Skip to content

Commit

Permalink
[ch-developer] consolidate instructions
Browse files Browse the repository at this point in the history
This commit adds all the SQL commands that are given in the instructions
to the solution files. This consolidates all instructions, making it easier
for a student to follow and learn. It also simplifies testing the labs.
  • Loading branch information
pmusa committed Apr 23, 2024
1 parent 3d1f52a commit 9caec61
Show file tree
Hide file tree
Showing 17 changed files with 431 additions and 5 deletions.
48 changes: 48 additions & 0 deletions developer/02_clickhouse_architecture/lab_2.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
--Step 1:
DESCRIBE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/pypi/2023/pypi_0_7_34.snappy.parquet');

--Step 2:
SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/pypi/2023/pypi_0_7_34.snappy.parquet')
Expand Down Expand Up @@ -80,6 +83,28 @@ ORDER BY c DESC;
* help in skipping granules.
*/

--Step 12:
CREATE TABLE pypi2 (
TIMESTAMP DateTime,
COUNTRY_CODE String,
URL String,
PROJECT String
)
ENGINE = MergeTree
PRIMARY KEY (TIMESTAMP, PROJECT);

INSERT INTO pypi2
SELECT *
FROM pypi;

SELECT
PROJECT,
count() AS c
FROM pypi2
WHERE PROJECT LIKE 'boto%'
GROUP BY PROJECT
ORDER BY c DESC;

--Step 13:
/*
* None. Even though PROJECT was added to the primary key, it did not allow
Expand All @@ -88,6 +113,29 @@ ORDER BY c DESC;
* difficult to be useful.
*/


--Step 14:
CREATE OR REPLACE TABLE pypi2 (
TIMESTAMP DateTime,
COUNTRY_CODE String,
URL String,
PROJECT String
)
ENGINE = MergeTree
PRIMARY KEY (PROJECT, TIMESTAMP);

INSERT INTO pypi2
SELECT *
FROM pypi;

SELECT
PROJECT,
count() AS c
FROM pypi2
WHERE PROJECT LIKE 'boto%'
GROUP BY PROJECT
ORDER BY c DESC;

--Step 15:
/*
* The first column of the primary key is an important and powerful design
Expand Down
30 changes: 29 additions & 1 deletion developer/02_clickhouse_architecture/lab_2.2.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
--Step 1:
SELECT
formatReadableSize(sum(data_compressed_bytes)) AS compressed_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
count() AS num_of_active_parts
FROM system.parts
WHERE (active = 1) AND (table = 'pypi');

--Step 3:
SELECT
table,
formatReadableSize(sum(data_compressed_bytes)) AS compressed_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
count() AS num_of_active_parts
FROM system.parts
WHERE (active = 1) AND (table LIKE '%pypi%')
GROUP BY table;

--Step 4:
CREATE TABLE test_pypi (
TIMESTAMP DateTime,
Expand All @@ -9,4 +27,14 @@ ENGINE = MergeTree
PRIMARY KEY (PROJECT, COUNTRY_CODE, TIMESTAMP);

INSERT INTO test_pypi
SELECT * FROM pypi2;
SELECT * FROM pypi2;

--Step 5:
SELECT
table,
formatReadableSize(sum(data_compressed_bytes)) AS compressed_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
count() AS num_of_active_parts
FROM system.parts
WHERE (active = 1) AND (table LIKE '%pypi%')
GROUP BY table;
25 changes: 25 additions & 0 deletions developer/03_modeling_data/lab_3.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
--Step 1:
DESCRIBE pypi;

--Step 2:
SELECT uniqExact(COUNTRY_CODE)
FROM pypi;
Expand Down Expand Up @@ -33,3 +36,25 @@ PRIMARY KEY (PROJECT, TIMESTAMP);

INSERT INTO pypi3
SELECT * FROM pypi2;

--Step 5:
SELECT
table,
formatReadableSize(sum(data_compressed_bytes)) AS compressed_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size,
count() AS num_of_active_parts
FROM system.parts
WHERE (active = 1) AND (table LIKE 'pypi%')
GROUP BY table;

--Step 6:
SELECT
toStartOfMonth(TIMESTAMP) AS month,
count() AS count
FROM pypi2
WHERE COUNTRY_CODE = 'US'
GROUP BY
month
ORDER BY
month ASC,
count DESC;
3 changes: 3 additions & 0 deletions developer/03_modeling_data/lab_3.2.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
--Step 1:
DESCRIBE s3('https://learnclickhouse.s3.us-east-2.amazonaws.com/datasets/crypto_prices.parquet');

--Step 2:
CREATE TABLE crypto_prices (
trade_date Date,
Expand Down
9 changes: 9 additions & 0 deletions developer/04_inserting_data/lab_4.3.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ SELECT
FROM s3('https://learn-clickhouse.s3.us-east-2.amazonaws.com/operating_budget.csv')
SETTINGS format_csv_delimiter = '~';

--Step 6:
SELECT
formatReadableQuantity(sum(approved_amount)),
formatReadableQuantity(sum(recommended_amount))
FROM s3('https://learn-clickhouse.s3.us-east-2.amazonaws.com/operating_budget.csv')
SETTINGS
format_csv_delimiter='~',
schema_inference_hints='approved_amount UInt32, recommended_amount UInt32';

--Step 7:
CREATE TABLE operating_budget (
fiscal_year LowCardinality(String),
Expand Down
13 changes: 13 additions & 0 deletions developer/06_materialized_views/lab_6.1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ WHERE date >= toDate('2022-01-01') AND date <= toDate('2022-12-31');
SELECT count()
FROM london_properties_view;

--Step 4:
SELECT count()
FROM uk_price_paid
WHERE town = 'LONDON';

--Step 6:
EXPLAIN SELECT count()
FROM london_properties_view;

EXPLAIN SELECT count()
FROM uk_price_paid
WHERE town = 'LONDON';

--Step 7:
CREATE VIEW properties_by_town_view
AS
Expand Down
17 changes: 17 additions & 0 deletions developer/06_materialized_views/lab_6.2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ INSERT INTO prices_by_year_dest
SELECT count()
FROM prices_by_year_dest;

--Step 7:
SELECT * FROM system.parts
WHERE table='prices_by_year_dest';

--Step 8:
SELECT * FROM system.parts
WHERE table='uk_price_paid';

--Step 10:
SELECT
count(),
Expand All @@ -85,7 +93,16 @@ FROM prices_by_year_dest
WHERE county = 'STAFFORDSHIRE'
AND date >= toDate('2005-06-01') AND date <= toDate('2005-06-30');

--Step 12:
INSERT INTO uk_price_paid VALUES
(125000, '2024-03-07', 'B77', '4JT', 'semi-detached', 0, 'freehold', 10,'', 'CRIGDON','WILNECOTE','TAMWORTH','TAMWORTH','STAFFORDSHIRE'),
(440000000, '2024-07-29', 'WC1B', '4JB', 'other', 0, 'freehold', 'VICTORIA HOUSE', '', 'SOUTHAMPTON ROW', '','LONDON','CAMDEN', 'GREATER LONDON'),
(2000000, '2024-01-22','BS40', '5QL', 'detached', 0, 'freehold', 'WEBBSBROOK HOUSE','', 'SILVER STREET', 'WRINGTON', 'BRISTOL', 'NORTH SOMERSET', 'NORTH SOMERSET');

--Step 13:
SELECT * FROM prices_by_year_dest
WHERE toYear(date) = '2024';

--Step 14:
SELECT * FROM system.parts
WHERE table='prices_by_year_dest';
27 changes: 27 additions & 0 deletions developer/07_aggregations_in_mvs/lab_7.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
--Step 1:
SELECT
town,
sum(price) AS sum_price,
formatReadableQuantity(sum_price)
FROM uk_price_paid
GROUP BY town
ORDER BY sum_price DESC;

--Step 2:
CREATE TABLE prices_sum_dest
(
Expand Down Expand Up @@ -28,6 +37,24 @@ SELECT count()
FROM prices_sum_dest;

--Step 4:
SELECT
town,
sum(price) AS sum_price,
formatReadableQuantity(sum_price)
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY town;

SELECT
town,
sum_price AS sum,
formatReadableQuantity(sum)
FROM prices_sum_dest
WHERE town = 'LONDON';

INSERT INTO uk_price_paid (price, date, town, street)
VALUES
(4294967295, toDate('2024-01-01'), 'LONDON', 'My Street1');

/*
* The issue is that prices_sum_dest might have multiple rows with the same
Expand Down
49 changes: 48 additions & 1 deletion developer/07_aggregations_in_mvs/lab_7.2.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
--Step 1:
WITH
toStartOfMonth(date) AS month
SELECT
month,
min(price) AS min_price,
max(price) AS max_price
FROM uk_price_paid
GROUP BY month
ORDER BY month DESC;

WITH
toStartOfMonth(date) AS month
SELECT
month,
avg(price)
FROM uk_price_paid
GROUP BY month
ORDER BY month DESC;

WITH
toStartOfMonth(date) AS month
SELECT
month,
count()
FROM uk_price_paid
GROUP BY month
ORDER BY month DESC;

--Step 2:
CREATE TABLE uk_prices_aggs_dest (
month Date,
Expand Down Expand Up @@ -36,6 +65,9 @@ INSERT INTO uk_prices_aggs_dest
WHERE date < toDate('2024-01-01')
GROUP BY month;

--Step 3:
SELECT * FROM uk_prices_aggs_dest;

--Step 4:
SELECT
month,
Expand Down Expand Up @@ -63,4 +95,19 @@ ORDER BY month DESC;
SELECT
countMerge(volume)
FROM uk_prices_aggs_dest
WHERE toYear(month) = '2020';
WHERE toYear(month) = '2020';

--Step 7:
INSERT INTO uk_price_paid (date, price, town) VALUES
('2024-08-01', 10000, 'Little Whinging'),
('2024-08-01', 1, 'Little Whinging');

--Step 8:
SELECT
month,
countMerge(volume),
min(min_price),
max(max_price)
FROM uk_prices_aggs_dest
WHERE toYYYYMM(month) = '202408'
GROUP BY month;
31 changes: 31 additions & 0 deletions developer/08_sharding_and_replication/lab_8.1.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
--Step 1:
SELECT
cluster,
shard_num,
replica_num,
database_shard_name,
database_replica_name
FROM system.clusters;

--Step 2:
SELECT event_time, query
FROM system.query_log
ORDER BY event_time DESC
LIMIT 20;

/*
* The query returns the last 20 queries executed ordered by time descending.
Expand Down Expand Up @@ -31,6 +44,24 @@ SELECT count()
FROM clusterAllReplicas(default, system.query_log)
WHERE positionCaseInsensitive(query, 'insert') > 0;

--Step 7:
SELECT count()
FROM system.parts;

--Step 8:
SELECT count()
FROM clusterAllReplicas(default, system.parts);

--Step 9:
SELECT
instance,
* EXCEPT instance APPLY formatReadableSize
FROM (
SELECT
hostname() AS instance,
sum(primary_key_size),
sum(primary_key_bytes_in_memory),
sum(primary_key_bytes_in_memory_allocated)
FROM clusterAllReplicas(default, system.parts)
GROUP BY instance
);
9 changes: 9 additions & 0 deletions developer/08_sharding_and_replication/lab_8.2.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
--Step 1:
SELECT
PROJECT,
count()
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/pypi/2023/pypi_0_0_*.snappy.parquet')
GROUP BY PROJECT
ORDER BY 2 DESC
LIMIT 20;

--Step 2:
SELECT
PROJECT,
Expand Down
Loading

0 comments on commit 9caec61

Please sign in to comment.