Skip to content
This repository has been archived by the owner on Dec 9, 2022. It is now read-only.

Tests for TPCDS queries join ordering #21

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Move queries to TPCDS generator library.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We won't find this file. Better to just reg an issue somewhere.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Teradata/tpcds#28

Also I asked on presto slack to copy that project under prestodb community group, see https://prestodb.slack.com/archives/C07JH9WMQ/p1519824551000061

30 changes: 30 additions & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/q01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- database: presto_tpcds; groups: tpcds; requires: io.prestodb.tempto.fulfillment.table.hive.tpcds.ImmutableTpcdsTablesRequirements
WITH
customer_total_return AS (
SELECT
"sr_customer_sk" "ctr_customer_sk"
, "sr_store_sk" "ctr_store_sk"
, "sum"("sr_return_amt") "ctr_total_return"
FROM
store_returns
, date_dim
WHERE ("sr_returned_date_sk" = "d_date_sk")
AND ("d_year" = 2000)
GROUP BY "sr_customer_sk", "sr_store_sk"
)
SELECT "c_customer_id"
FROM
customer_total_return ctr1
, store
, customer
WHERE ("ctr1"."ctr_total_return" > (
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced we need yet another copy of TPCDS queries.

$ find -regex '.*ds.*q01.sql' -exec sha1sum {} +
550d3e6497028853792bc46c8500776e92d9971b  ./presto-tpcds/src/main/resources/tpcds/queries/q01.sql
550d3e6497028853792bc46c8500776e92d9971b  ./presto-product-tests/src/main/resources/sql-tests/testcases/tpcds/q01.sql
0169942554f00ce16abf51e98b1730fa8be8e005  ./presto-benchto-benchmarks/src/main/resources/sql/presto/tpcds/q01.sql

I would rather see some reuse (even hacky) then a copy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

queries in benchmarks are modified (they have variables like ${catalog}.${schema}).
Product tests have their headers. Here we need vanilla queries.

I prefer to copy files which no-one ever should modify, instead of dragging and/or playing with all the product-tests deps (like cassandra or kafka).

Copy link

@findepi findepi Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

queries in benchmarks are modified (they have variables like ${catalog}.${schema}).

you could simply find-replace ${catalog} with tpcds ...

I prefer to copy files which no-one ever should modify,

This is wishful thinking. The queries are being modified from time to time. Casts, grouping, .. And they will be modified in the future (casts and chars).

SELECT ("avg"("ctr_total_return") * DECIMAL '1.2')
FROM
customer_total_return ctr2
WHERE ("ctr1"."ctr_store_sk" = "ctr2"."ctr_store_sk")
))
AND ("s_store_sk" = "ctr1"."ctr_store_sk")
AND ("s_state" = 'TN')
AND ("ctr1"."ctr_customer_sk" = "c_customer_sk")
ORDER BY "c_customer_id" ASC
LIMIT 100
81 changes: 81 additions & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/q02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
-- database: presto_tpcds; groups: tpcds; requires: io.prestodb.tempto.fulfillment.table.hive.tpcds.ImmutableTpcdsTablesRequirements
WITH
wscs AS (
SELECT
"sold_date_sk"
, "sales_price"
FROM
(
SELECT
"ws_sold_date_sk" "sold_date_sk"
, "ws_ext_sales_price" "sales_price"
FROM
web_sales
)
UNION ALL (
SELECT
"cs_sold_date_sk" "sold_date_sk"
, "cs_ext_sales_price" "sales_price"
FROM
catalog_sales
) )
, wswscs AS (
SELECT
"d_week_seq"
, "sum"((CASE WHEN ("d_day_name" = 'Sunday ') THEN "sales_price" ELSE null END)) "sun_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Monday ') THEN "sales_price" ELSE null END)) "mon_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Tuesday ') THEN "sales_price" ELSE null END)) "tue_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Wednesday') THEN "sales_price" ELSE null END)) "wed_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Thursday ') THEN "sales_price" ELSE null END)) "thu_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Friday ') THEN "sales_price" ELSE null END)) "fri_sales"
, "sum"((CASE WHEN ("d_day_name" = 'Saturday ') THEN "sales_price" ELSE null END)) "sat_sales"
FROM
wscs
, date_dim
WHERE ("d_date_sk" = "sold_date_sk")
GROUP BY "d_week_seq"
)
SELECT
"d_week_seq1"
, "round"(("sun_sales1" / "sun_sales2"), 2)
, "round"(("mon_sales1" / "mon_sales2"), 2)
, "round"(("tue_sales1" / "tue_sales2"), 2)
, "round"(("wed_sales1" / "wed_sales2"), 2)
, "round"(("thu_sales1" / "thu_sales2"), 2)
, "round"(("fri_sales1" / "fri_sales2"), 2)
, "round"(("sat_sales1" / "sat_sales2"), 2)
FROM
(
SELECT
"wswscs"."d_week_seq" "d_week_seq1"
, "sun_sales" "sun_sales1"
, "mon_sales" "mon_sales1"
, "tue_sales" "tue_sales1"
, "wed_sales" "wed_sales1"
, "thu_sales" "thu_sales1"
, "fri_sales" "fri_sales1"
, "sat_sales" "sat_sales1"
FROM
wswscs
, date_dim
WHERE ("date_dim"."d_week_seq" = "wswscs"."d_week_seq")
AND ("d_year" = 2001)
) y
, (
SELECT
"wswscs"."d_week_seq" "d_week_seq2"
, "sun_sales" "sun_sales2"
, "mon_sales" "mon_sales2"
, "tue_sales" "tue_sales2"
, "wed_sales" "wed_sales2"
, "thu_sales" "thu_sales2"
, "fri_sales" "fri_sales2"
, "sat_sales" "sat_sales2"
FROM
wswscs
, date_dim
WHERE ("date_dim"."d_week_seq" = "wswscs"."d_week_seq")
AND ("d_year" = (2001 + 1))
) z
WHERE ("d_week_seq1" = ("d_week_seq2" - 53))
ORDER BY "d_week_seq1" ASC
17 changes: 17 additions & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/q03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- database: presto_tpcds; groups: tpcds; requires: io.prestodb.tempto.fulfillment.table.hive.tpcds.ImmutableTpcdsTablesRequirements
SELECT
"dt"."d_year"
, "item"."i_brand_id" "brand_id"
, "item"."i_brand" "brand"
, "sum"("ss_ext_sales_price") "sum_agg"
FROM
date_dim dt
, store_sales
, item
WHERE ("dt"."d_date_sk" = "store_sales"."ss_sold_date_sk")
AND ("store_sales"."ss_item_sk" = "item"."i_item_sk")
AND ("item"."i_manufact_id" = 128)
AND ("dt"."d_moy" = 11)
GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id"
ORDER BY "dt"."d_year" ASC, "sum_agg" DESC, "brand_id" ASC
LIMIT 100
94 changes: 94 additions & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/q04.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
-- database: presto_tpcds; groups: tpcds; requires: io.prestodb.tempto.fulfillment.table.hive.tpcds.ImmutableTpcdsTablesRequirements
WITH
year_total AS (
SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("ss_ext_list_price" - "ss_ext_wholesale_cost") - "ss_ext_discount_amt") + "ss_ext_sales_price") / 2)) "year_total"
, 's' "sale_type"
FROM
customer
, store_sales
, date_dim
WHERE ("c_customer_sk" = "ss_customer_sk")
AND ("ss_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
UNION ALL SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("cs_ext_list_price" - "cs_ext_wholesale_cost") - "cs_ext_discount_amt") + "cs_ext_sales_price") / 2)) "year_total"
, 'c' "sale_type"
FROM
customer
, catalog_sales
, date_dim
WHERE ("c_customer_sk" = "cs_bill_customer_sk")
AND ("cs_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
UNION ALL SELECT
"c_customer_id" "customer_id"
, "c_first_name" "customer_first_name"
, "c_last_name" "customer_last_name"
, "c_preferred_cust_flag" "customer_preferred_cust_flag"
, "c_birth_country" "customer_birth_country"
, "c_login" "customer_login"
, "c_email_address" "customer_email_address"
, "d_year" "dyear"
, "sum"((((("ws_ext_list_price" - "ws_ext_wholesale_cost") - "ws_ext_discount_amt") + "ws_ext_sales_price") / 2)) "year_total"
, 'w' "sale_type"
FROM
customer
, web_sales
, date_dim
WHERE ("c_customer_sk" = "ws_bill_customer_sk")
AND ("ws_sold_date_sk" = "d_date_sk")
GROUP BY "c_customer_id", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_country", "c_login", "c_email_address", "d_year"
)
SELECT
"t_s_secyear"."customer_id"
, "t_s_secyear"."customer_first_name"
, "t_s_secyear"."customer_last_name"
, "t_s_secyear"."customer_preferred_cust_flag"
FROM
year_total t_s_firstyear
, year_total t_s_secyear
, year_total t_c_firstyear
, year_total t_c_secyear
, year_total t_w_firstyear
, year_total t_w_secyear
WHERE ("t_s_secyear"."customer_id" = "t_s_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_c_secyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_c_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id")
AND ("t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id")
AND ("t_s_firstyear"."sale_type" = 's')
AND ("t_c_firstyear"."sale_type" = 'c')
AND ("t_w_firstyear"."sale_type" = 'w')
AND ("t_s_secyear"."sale_type" = 's')
AND ("t_c_secyear"."sale_type" = 'c')
AND ("t_w_secyear"."sale_type" = 'w')
AND ("t_s_firstyear"."dyear" = 2001)
AND ("t_s_secyear"."dyear" = (2001 + 1))
AND ("t_c_firstyear"."dyear" = 2001)
AND ("t_c_secyear"."dyear" = (2001 + 1))
AND ("t_w_firstyear"."dyear" = 2001)
AND ("t_w_secyear"."dyear" = (2001 + 1))
AND ("t_s_firstyear"."year_total" > 0)
AND ("t_c_firstyear"."year_total" > 0)
AND ("t_w_firstyear"."year_total" > 0)
AND ((CASE WHEN ("t_c_firstyear"."year_total" > 0) THEN ("t_c_secyear"."year_total" / "t_c_firstyear"."year_total") ELSE null END) > (CASE WHEN ("t_s_firstyear"."year_total" > 0) THEN ("t_s_secyear"."year_total" / "t_s_firstyear"."year_total") ELSE null END))
AND ((CASE WHEN ("t_c_firstyear"."year_total" > 0) THEN ("t_c_secyear"."year_total" / "t_c_firstyear"."year_total") ELSE null END) > (CASE WHEN ("t_w_firstyear"."year_total" > 0) THEN ("t_w_secyear"."year_total" / "t_w_firstyear"."year_total") ELSE null END))
ORDER BY "t_s_secyear"."customer_id" ASC, "t_s_secyear"."customer_first_name" ASC, "t_s_secyear"."customer_last_name" ASC, "t_s_secyear"."customer_preferred_cust_flag" ASC
LIMIT 100
145 changes: 145 additions & 0 deletions presto-tpcds/src/main/resources/tpcds/queries/q05.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
-- database: presto_tpcds; groups: tpcds; requires: io.prestodb.tempto.fulfillment.table.hive.tpcds.ImmutableTpcdsTablesRequirements
WITH
ssr AS (
SELECT
"s_store_id"
, "sum"("sales_price") "sales"
, "sum"("profit") "profit"
, "sum"("return_amt") "returns"
, "sum"("net_loss") "profit_loss"
FROM
(
SELECT
"ss_store_sk" "store_sk"
, "ss_sold_date_sk" "date_sk"
, "ss_ext_sales_price" "sales_price"
, "ss_net_profit" "profit"
, CAST(0 AS DECIMAL(7,2)) "return_amt"
, CAST(0 AS DECIMAL(7,2)) "net_loss"
FROM
store_sales
UNION ALL SELECT
"sr_store_sk" "store_sk"
, "sr_returned_date_sk" "date_sk"
, CAST(0 AS DECIMAL(7,2)) "sales_price"
, CAST(0 AS DECIMAL(7,2)) "profit"
, "sr_return_amt" "return_amt"
, "sr_net_loss" "net_loss"
FROM
store_returns
) salesreturns
, date_dim
, store
WHERE ("date_sk" = "d_date_sk")
AND ("d_date" BETWEEN CAST('2000-08-23' AS DATE) AND (CAST('2000-08-23' AS DATE) + INTERVAL '14' DAY))
AND ("store_sk" = "s_store_sk")
GROUP BY "s_store_id"
)
, csr AS (
SELECT
"cp_catalog_page_id"
, "sum"("sales_price") "sales"
, "sum"("profit") "profit"
, "sum"("return_amt") "returns"
, "sum"("net_loss") "profit_loss"
FROM
(
SELECT
"cs_catalog_page_sk" "page_sk"
, "cs_sold_date_sk" "date_sk"
, "cs_ext_sales_price" "sales_price"
, "cs_net_profit" "profit"
, CAST(0 AS DECIMAL(7,2)) "return_amt"
, CAST(0 AS DECIMAL(7,2)) "net_loss"
FROM
catalog_sales
UNION ALL SELECT
"cr_catalog_page_sk" "page_sk"
, "cr_returned_date_sk" "date_sk"
, CAST(0 AS DECIMAL(7,2)) "sales_price"
, CAST(0 AS DECIMAL(7,2)) "profit"
, "cr_return_amount" "return_amt"
, "cr_net_loss" "net_loss"
FROM
catalog_returns
) salesreturns
, date_dim
, catalog_page
WHERE ("date_sk" = "d_date_sk")
AND ("d_date" BETWEEN CAST('2000-08-23' AS DATE) AND (CAST('2000-08-23' AS DATE) + INTERVAL '14' DAY))
AND ("page_sk" = "cp_catalog_page_sk")
GROUP BY "cp_catalog_page_id"
)
, wsr AS (
SELECT
"web_site_id"
, "sum"("sales_price") "sales"
, "sum"("profit") "profit"
, "sum"("return_amt") "returns"
, "sum"("net_loss") "profit_loss"
FROM
(
SELECT
"ws_web_site_sk" "wsr_web_site_sk"
, "ws_sold_date_sk" "date_sk"
, "ws_ext_sales_price" "sales_price"
, "ws_net_profit" "profit"
, CAST(0 AS DECIMAL(7,2)) "return_amt"
, CAST(0 AS DECIMAL(7,2)) "net_loss"
FROM
web_sales
UNION ALL SELECT
"ws_web_site_sk" "wsr_web_site_sk"
, "wr_returned_date_sk" "date_sk"
, CAST(0 AS DECIMAL(7,2)) "sales_price"
, CAST(0 AS DECIMAL(7,2)) "profit"
, "wr_return_amt" "return_amt"
, "wr_net_loss" "net_loss"
FROM
(web_returns
LEFT JOIN web_sales ON ("wr_item_sk" = "ws_item_sk")
AND ("wr_order_number" = "ws_order_number"))
) salesreturns
, date_dim
, web_site
WHERE ("date_sk" = "d_date_sk")
AND ("d_date" BETWEEN CAST('2000-08-23' AS DATE) AND (CAST('2000-08-23' AS DATE) + INTERVAL '14' DAY))
AND ("wsr_web_site_sk" = "web_site_sk")
GROUP BY "web_site_id"
)
SELECT
"channel"
, "id"
, "sum"("sales") "sales"
, "sum"("returns") "returns"
, "sum"("profit") "profit"
FROM
(
SELECT
'store channel' "channel"
, "concat"('store', "s_store_id") "id"
, "sales"
, "returns"
, ("profit" - "profit_loss") "profit"
FROM
ssr
UNION ALL SELECT
'catalog channel' "channel"
, "concat"('catalog_page', "cp_catalog_page_id") "id"
, "sales"
, "returns"
, ("profit" - "profit_loss") "profit"
FROM
csr
UNION ALL SELECT
'web channel' "channel"
, "concat"('web_site', "web_site_id") "id"
, "sales"
, "returns"
, ("profit" - "profit_loss") "profit"
FROM
wsr
) x
GROUP BY ROLLUP (channel, id)
ORDER BY "channel" ASC, "id" ASC
LIMIT 100
Loading