Skip to content

Commit

Permalink
impl
Browse files Browse the repository at this point in the history
  • Loading branch information
nickitat committed Jan 3, 2025
1 parent 79fe6a4 commit 6abfa90
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 24 deletions.
2 changes: 1 addition & 1 deletion clickhouse/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
24.8.4.13
24.12.2.29
13 changes: 10 additions & 3 deletions clickhouse/ch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ ch_stop() {

ch_query() {
ENGINE=Memory
if [ $ON_DISK -eq 1 ]; then
ENGINE="MergeTree ORDER BY tuple()"
if [ $COMPRESS -eq 1 ]; then
ENGINE="Memory settings compress=1"
fi
sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table'
clickhouse-client --query "DROP TABLE IF EXISTS ans;"
Expand All @@ -52,8 +52,15 @@ ch_make_2_runs() {
ch_query
ch_logrun

if [ $COMPRESS -eq 1 ]; then
# It will take some time for memory freed by Memory engine to be returned back to the system.
# Without a sleep we might get a MEMORY_LIMIT exception during the second run of the query.
# It is done only when $COMPRESS=1 because this variable is set to true only for the largest dataset.
sleep 60
fi

RUN=2
RUNNAME="${TASK}_${SRC_DATANAME}_q${Q}_r${RUN}"
ch_query
ch_logrun
}
}
40 changes: 20 additions & 20 deletions clickhouse/exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ echo '# clickhouse/exec.sh: creating tables and loading data'
THREADS=$(($(nproc --all) /2))
HAS_NULL=$(clickhouse-client --query "SELECT splitByChar('_','$SRC_DATANAME')[4]>0 FORMAT TSV")
IS_SORTED=$(clickhouse-client --query "SELECT splitByChar('_','$SRC_DATANAME')[5]=1 FORMAT TSV")
ON_DISK=0
COMPRESS=0

if [ $1 == 'groupby' ]; then
clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME"
Expand All @@ -57,39 +57,39 @@ elif [ $1 == 'join' ]; then
RHS1=$(echo $RHS | cut -d' ' -f1)
RHS2=$(echo $RHS | cut -d' ' -f2)
RHS3=$(echo $RHS | cut -d' ' -f3)
ON_DISK=$(clickhouse-client --query "SELECT (splitByChar('_','$SRC_DATANAME')[2])::Float32 >= 1e9::Float32 FORMAT TSV")
COMPRESS=$(clickhouse-client --query "SELECT (splitByChar('_','$SRC_DATANAME')[2])::Float32 >= 1e9::Float32 FORMAT TSV")

# cleanup
clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME"
clickhouse-client --query "DROP TABLE IF EXISTS $RHS1"
clickhouse-client --query "DROP TABLE IF EXISTS $RHS2"
clickhouse-client --query "DROP TABLE IF EXISTS $RHS3"

echo IS_SORTED ${IS_SORTED} HAS_NULL ${HAS_NULL} ON_DISK ${ON_DISK}
echo IS_SORTED ${IS_SORTED} HAS_NULL ${HAS_NULL} COMPRESS ${COMPRESS}
# schemas
if [ $HAS_NULL -eq 1 ]; then
if [ $IS_SORTED -eq 1 ]; then
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id4);"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id4);"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
else
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();"
fi
else
if [ $IS_SORTED -eq 1 ]; then
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id4);"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 LowCardinality(String), id5 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 String, id5 String, id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 String, v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id4);"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 String, id5 String, v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 String, id5 String, id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);"
else
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 LowCardinality(String), id5 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 String, id5 String, id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 String, v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 String, id5 String, v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 String, id5 String, id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY tuple();"
fi
fi

Expand All @@ -111,7 +111,7 @@ elif [ $1 == 'join' ]; then
else
echo "clickhouse task $1 not implemented" >&2 && exit 1
fi
export ON_DISK
export COMPRESS
export THREADS

# cleanup timings from last run if they have not been cleaned up after parsing
Expand Down
14 changes: 14 additions & 0 deletions logs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2727,3 +2727,17 @@ ip-172-31-18-198,1727700986,polars,1.8.2,,join,J1_1e8_NA_0_1,1727708694.5644,sta
ip-172-31-18-198,1727700986,polars,1.8.2,,join,J1_1e8_NA_0_1,1727708890.377,finish,0,0
ip-172-31-18-198,1727700986,polars,1.8.2,,join,J1_1e9_NA_0_0,1727708905.39232,start,,
ip-172-31-18-198,1727700986,polars,1.8.2,,join,J1_1e9_NA_0_0,1727712075.09348,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_0_0,1735925716.02897,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_0_0,1735925866.26595,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_5_0,1735925881.28121,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_5_0,1735926031.27624,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_0_1,1735926046.29138,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e7_NA_0_1,1735926196.5162,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_0_0,1735926211.53132,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_0_0,1735926412.55834,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_5_0,1735926427.57361,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_5_0,1735926627.83365,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_0_1,1735926642.849,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e8_NA_0_1,1735926843.44296,finish,0,0
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e9_NA_0_0,1735926858.45824,start,,
ip-172-31-30-41,1735925715,clickhouse,24.12.2.29,,join,J1_1e9_NA_0_0,1735927939.70705,finish,0,0
Loading

0 comments on commit 6abfa90

Please sign in to comment.