Skip to content

Commit

Permalink
Merge pull request #2075 from cBioPortal/fix_ci
Browse files Browse the repository at this point in the history
Fix validation tests in CI
  • Loading branch information
Rima-Waleed authored Nov 11, 2024
2 parents 975fd3b + d4af0e2 commit d0c3dc1
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ workflows:
workflow_weekly:
triggers:
- schedule:
cron: "30 14 * * 1"
cron: "0 16 * * 6"
filters:
branches:
only:
Expand Down
18 changes: 8 additions & 10 deletions .circleci/install_dependencies.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#!/usr/bin/env bash
# This script installs the dependencies to download and validate all studies

# Upgrade pip
echo "Upgrading pip..."
python -m pip install --upgrade pip

# Install python dependencies
cd ~/repo/.circleci
sudo pip install -r requirements.txt
pip install -r requirements.txt

# Install and configure Git LFS
echo "Installing Git LFS..."
cd ~/
wget https://github.com/git-lfs/git-lfs/releases/download/v2.3.4/git-lfs-linux-amd64-2.3.4.tar.gz
tar -xvf git-lfs-linux-amd64-2.3.4.tar.gz
Expand All @@ -14,17 +18,11 @@ sudo ./install.sh
cd ~/repo
sudo chown -R circleci .git
git lfs install --skip-smudge

# Clone datahub master branch
cd ~/
git clone --depth 1 -b master https://github.com/cbioportal/cbioportal.git
rm -rf git-lfs-linux-amd64-2.3.4.tar.gz git-lfs-2.3.4

# Clone cBioPortal core
cd ~/
git clone https://github.com/cBioPortal/cbioportal-core.git
# install validator dependencies
sudo pip install -r cbioportal-core/requirements.txt

git clone --depth 1 -b main https://github.com/cBioPortal/cbioportal-core.git

# Make test reports location
cd ~/
Expand Down
10 changes: 6 additions & 4 deletions .circleci/requirements.txt
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
requests==2.27.1
Jinja2==3.1.4
mysqlclient==1.3.13
Jinja2==3.0.3
mysqlclient==2.1.0
docker==3.5.0
urllib3==1.26.19
gitpython==3.1.41
PyYAML==5.4
gitpython==3.1.18
PyYAML==6.0.1
markupsafe==2.0.1
dsnparse==0.1.15
40 changes: 23 additions & 17 deletions .circleci/validate_all_studies.sh
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
#!/usr/bin/env bash
# This script runs validation on all public studies.

STUDIES_DIR="public/"
STUDIES_DIRS=("public/" "crdc/gdc/")
GIT_REMOTE_URL="[email protected]:cbioportal/datahub.git"
test_reports_location="$HOME/test-reports"

git remote add upstream [email protected]:cbioportal/datahub.git
git remote add upstream "$GIT_REMOTE_URL"
git fetch upstream master

git lfs pull -I "public"

num_studies=${#list_of_study_dirs[@]}

test_reports_location="$HOME/test-reports"
validation_command="$HOME/cbioportal-core/scripts/importer/./validateStudies.py -d $HOME/repo/public/ -p $HOME/repo/.circleci/portalinfo -html $test_reports_location"
echo $'\nExecuting: '; echo $validation_command
if sh -c "$validation_command" ; then
echo "Tests passed successfully"
exit 0
else
echo "Errors found"
# move errors to ERRORS/ folder:
erred_studies=`grep -rnlz $test_reports_location -e 'Validation status.*Failed' `
mv $erred_studies $test_reports_location/ERRORS
exit 1
fi
for STUDIES_DIR in "${STUDIES_DIRS[@]}"; do
git lfs pull -I "STUDIES_DIR"
validation_command="$HOME/cbioportal-core/scripts/importer/./validateStudies.py -d $HOME/repo/$STUDIES_DIR -p $HOME/repo/.circleci/portalinfo -html $test_reports_location"
echo $'\nExecuting: '; echo $validation_command
if sh -c "$validation_command" ; then
echo "Tests passed successfully for $STUDIES_DIR"
EXIT_STATUS=0
else
echo "Errors found"
# move errors to ERRORS/ folder:
erred_studies=$(grep -rnlz "$test_reports_location" -e 'Failed')
if [ -n "$erred_studies" ]; then
mv $erred_studies $test_reports_location/ERRORS
EXIT_STATUS=1
fi
fi
done

exit "$EXIT_STATUS"
111 changes: 65 additions & 46 deletions .circleci/validate_changed_studies.sh
Original file line number Diff line number Diff line change
@@ -1,68 +1,76 @@
#!/usr/bin/env bash
# This script detects the studies that were changed and triggers the validation accordingly

STUDIES_DIR="public/"
STUDIES_DIRS=("public/" "crdc/gdc/")
REPO_DIR="$HOME/repo/"
TEST_REPORTS_LOCATION="$HOME/test-reports"
ERRORS_DIR="$TEST_REPORTS_LOCATION/ERRORS"
LOG_DIR="$TEST_REPORTS_LOCATION/logs"
VALIDATION_SCRIPT="$HOME/cbioportal-core/scripts/importer/validateStudies.py"
GIT_REMOTE_URL="[email protected]:cbioportal/datahub.git"
MAX_THREADS=7

git remote add upstream [email protected]:cbioportal/datahub.git
git remote add upstream "$GIT_REMOTE_URL"
git fetch upstream master

files_changing=`git diff --name-only --diff-filter=ACMRU upstream/master`
mkdir -p "$LOG_DIR"

files_changing=$(git diff --name-only --diff-filter=ACMRU upstream/master)
list_of_study_dirs=()

for file_changing in $files_changing
do
#echo "file > [$file_changing]"
for file_changing in $files_changing; do
# if file is part of studies_dir, store its directory path (except case_lists)
if [[ $file_changing = *$STUDIES_DIR* ]] && [[ $file_changing != *".htm"* ]]; then
echo "study file changing > [$file_changing]"
dir_name=`dirname $file_changing`
# match case_list*, caselist* as a case list dir (actually only case_lists is valid,
# but this is up to validation script to flag):
if [[ $dir_name != *"/case_list"* ]] && [[ $dir_name != *"/caselist"* ]] && [[ $dir_name != *"/archived_files"* ]] && [[ $dir_name != *"/gene_sets"* ]] && [[ $dir_name != *"/normals"* ]]; then
echo "study dir > [$dir_name]"
else
# get parent dir:
dir_name=`dirname $dir_name`
echo "study dir > [$dir_name]"
fi
found_in_list=`echo ${list_of_study_dirs[@]} | grep $dir_name`
if [[ $found_in_list = "" ]]; then
echo "adding to list..."
list_of_study_dirs+=($dir_name)
echo "downloading files from git lfs..."
git lfs pull -I "$dir_name/*"
git lfs pull -I "$dir_name/case_lists/*"
for STUDIES_DIR in "${STUDIES_DIRS[@]}"; do
if [[ $file_changing = *$STUDIES_DIR* ]] && [[ $file_changing != *".htm"* ]]; then
echo "study file changing > [$file_changing]"
dir_name=$(dirname $file_changing)
# match case_list*, caselist* as a case list dir (actually only case_lists is valid,
# but this is up to validation script to flag):
if [[ $dir_name != *"/case_list"* ]] && [[ $dir_name != *"/caselist"* ]] && [[ $dir_name != *"/archived_files"* ]] && [[ $dir_name != *"/gene_sets"* ]] && [[ $dir_name != *"/normals"* ]] && [[ $dir_name != *"/validation_reports"* ]]; then
echo "study dir > [$dir_name]"
else
# get parent dir:
dir_name=`dirname $dir_name`
echo "study dir > [$dir_name]"
fi
if [[ ! " ${list_of_study_dirs[@]} " =~ " $dir_name " ]]; then
echo "adding to list..."
list_of_study_dirs+=("$dir_name")
echo "downloading files from git lfs..."
git lfs pull -I "$dir_name/*"
git lfs pull -I "$dir_name/case_lists/*"
fi
fi
fi
done
done
num_studies=${#list_of_study_dirs[@]}
if [[ $num_studies > 0 ]]; then
echo $'\n====List of studies:====\n'
list_csv=`echo ${list_of_study_dirs[@]} | tr ' ' ','`
echo $list_csv
list_csv=$(printf "%s," "${list_of_study_dirs[@]}" | sed 's/,$//')
echo "$list_csv"

test_reports_location="$HOME/test-reports"
mkdir -p "$ERRORS_DIR"
validation_command=""
num=0
max_threads=7
break_num=$(($num_studies / $max_threads + 1))
break_num=$((num_studies / MAX_THREADS + 1))
for study in ${list_csv//,/ }
do
# append sleep command between commands
((num=num+1))
mod=$(($num % $break_num))
log_file="$LOG_DIR/$(basename $study).log"
# if [ $mod = 0 ] ; then
# validation_command="${validation_command} && sleep $((num*2))"
# fi
# append the first study
if [ "$validation_command" = "" ] ; then
validation_command="($HOME/cbioportal-core/src/main/resources/scripts/importer/./validateStudies.py -d $HOME/repo/ -l $study -p $HOME/repo/.circleci/portalinfo -html $test_reports_location/$study"
if [[ -z "$validation_command" ]] ; then
validation_command="($VALIDATION_SCRIPT -d $REPO_DIR -l $study -p $REPO_DIR/.circleci/portalinfo -html $TEST_REPORTS_LOCATION/$study > $log_file 2>&1"
else
# run each validation individually in the background
if [ $mod = 0 ] ; then
validation_command="${validation_command}) & ($HOME/cbioportal-core/src/main/resources/scripts/importer/./validateStudies.py -d $HOME/repo/ -l $study -p $HOME/repo/.circleci/portalinfo -html $test_reports_location/$study"
validation_command="${validation_command}) & ($VALIDATION_SCRIPT -d $REPO_DIR -l $study -p $REPO_DIR/.circleci/portalinfo -html $TEST_REPORTS_LOCATION/$study > $log_file 2>&1"
else
validation_command="${validation_command} ; $HOME/cbioportal-core/src/main/resources/scripts/importer/./validateStudies.py -d $HOME/repo/ -l $study -p $HOME/repo/.circleci/portalinfo -html $test_reports_location/$study"
validation_command="${validation_command} ; $VALIDATION_SCRIPT -d $REPO_DIR -l $study -p $REPO_DIR/.circleci/portalinfo -html $TEST_REPORTS_LOCATION/$study > $log_file 2>&1"
fi
fi
done
Expand All @@ -73,25 +81,36 @@ if [[ $num_studies > 0 ]]; then
while true; do
wait -n || {
code="$?"
echo "waiting for all processes to finish ...................."
echo -e "waiting for all processes to finish...\n\n"
# exit only when all processes finished
if ([[ $code = "127" ]] && exit 0) ; then
if (( code = 127 )); then
break
fi
}
done;

done

for log in "$LOG_DIR"/*.log; do
if [[ -f "$log" ]]; then
cat "$log"
echo -e "\n----------------------------------------------------\n"
fi
done

# Remove the log directory
if [[ -d "$LOG_DIR" ]]; then
rm -rf "$LOG_DIR"
fi

# find all studies with error
erred_studies=`grep -rnlz $test_reports_location -e 'Validation status.*Failed' `
if [[ $? -eq 0 ]]; then
erred_studies=$(grep -rl "$TEST_REPORTS_LOCATION" -e 'Failed')
if [[ $? -eq 0 ]] && [[ -n "$erred_studies" ]]; then
echo $'\n====List of error studies:====\n'
echo $erred_studies
mv $erred_studies $test_reports_location/ERRORS
echo "$erred_studies"
echo "$erred_studies" | xargs -I {} mv {} "$ERRORS_DIR"
exit 1
else
echo "All tests passed successfully"
exit 0
echo "No error studies found."
fi
else
echo "No studies were changed"
fi
fi

0 comments on commit d0c3dc1

Please sign in to comment.