Skip to content

Commit

Permalink
Oxla clean-up.
Browse files Browse the repository at this point in the history
  • Loading branch information
tinybit committed Feb 1, 2024
1 parent aa0e9ca commit c37a35b
Showing 1 changed file with 52 additions and 33 deletions.
85 changes: 52 additions & 33 deletions oxla/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/bin/bash
#!/bin/bash -e

# cleanup
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# docker
sudo rm /usr/share/keyrings/docker-archive-keyring.gpg
Expand All @@ -8,7 +12,7 @@ sudo apt update
sudo apt install -y docker-ce

# base
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential

# ruby and fake S3
Expand All @@ -33,54 +37,60 @@ echo -e "[default]\nregion = none" > ~/.aws/config
echo -e "[default]\naws_access_key_id = none\naws_secret_access_key = none" > ~/.aws/credentials

# run fake S3
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | xargs -r kill -9

sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
sudo mkdir -p /mnt/fakes3_root
sudo chmod a+rw /mnt/fakes3_root -R
fakes3 -r /mnt/fakes3_root -H 0.0.0.0 -p 4569 --license license.pdf > /dev/null 2>&1 &
sleep 10 # waiting for container start

# # download dataset
# wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
# gzip -d hits.tsv.gz
# chmod 777 ~ hits.tsv
# download dataset
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz
chmod 777 ~ hits.tsv

# # convert dataset to csv
# rm -f hits_part*.csv
# curl https://clickhouse.com/ | sh
# ./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
# rm hits.tsv
# convert dataset to csv
rm -f part_*.csv
curl https://clickhouse.com/ | sh
./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
rm hits.tsv

# split -l 5000000 hits.csv part_
# for file in part_*; do mv "$file" "${file}.csv"; done
# prepare digestable parts (5m rows each) of hits.csv
split -l 5000000 hits.csv part_
for file in part_*; do mv "$file" "${file}.csv"; done

# upload dataset to fake S3 bucket
# upload dataset (prepared parts) to fake S3 bucket
aws s3 mb s3://my-new-bucket --endpoint-url http://localhost:4569

for file in part_*.csv; do
echo "Processing file: $file"

# copy the file to the S3 bucket
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569 > /dev/null 2>&1

# clean-up tmp pars left after upload
for key in $(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569); do
aws s3api delete-object --bucket my-new-bucket --key "$key" --endpoint-url http://localhost:4569
done
# clean-up tmp parts left after upload
TMPPARTS=$(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569)
echo $TMPPARTS | tr ' ' '\n' | grep . | parallel -j16 aws s3api delete-object --bucket my-new-bucket --key {} --endpoint-url http://localhost:4569
done

# get and configure Oxla image
sudo docker run --rm -it -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest &
sudo docker exec -it oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
sudo docker exec -it oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
echo "Install and run Oxla."

sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

sudo docker run --rm -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)

sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
sudo docker rmi oxla-configured-image:latest > /dev/null 2>&1 || echo "" > /dev/null
sudo docker commit oxlacontainer oxla-configured-image
sudo docker stop oxlacontainer

# run oxla
sudo docker run --rm -it -p 5432:5432 --net=host --name oxlacontainer oxla-configured-image &
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# sleep, waiting for initialisation (leader election, etc.)
sleep(10)
# run oxla
sudo docker run --rm --net=host --name oxlacontainer oxla-configured-image > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)

# create table and ingest data
export PGCLIENTENCODING=UTF8
Expand All @@ -90,15 +100,24 @@ psql -h localhost -p 5432 -U postgres -d test -t < create.sql
for file in part_*.csv; do
echo "Processing file: $file"
psql -h localhost -p 5432 -U postgres -d test -t -c '\timing' -c "COPY hits FROM 's3://my-new-bucket/$file';"
aws s3api delete-object --bucket my-new-bucket --key "$file" --endpoint-url http://localhost:4569
done

# kill fake S3
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
# get ingested data size
echo "data size after ingest:"
sudo docker exec oxlacontainer /bin/bash -c "du -s oxla/data"

sudo docker exec -it oxlacontainer /bin/bash -c "du -sh oxla/data"
# wait for merges to finish
sleep 30

# kill fake S3 and remove its data
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root

# run benchmark
echo "running benchmark..."
./run.sh 2>&1 | tee log.txt

# format results
cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

0 comments on commit c37a35b

Please sign in to comment.