Skip to content

Commit

Permalink
make submissions folder deletion better (#445)
Browse files Browse the repository at this point in the history
* make submissions folder deletion better
  • Loading branch information
SHuang-Broad authored May 3, 2024
1 parent 3168f8d commit 65018a8
Showing 1 changed file with 36 additions and 7 deletions.
43 changes: 36 additions & 7 deletions wdl/pipelines/TechAgnostic/Utility/CleanupIntermediate.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,70 @@ workflow CleanupIntermediate {
# Ironicaly, this generates intermeidate files too, but they are tiny.
meta {
description: "A workflow to clean up intermediate files from running workflows on Terra. Use at your own risk."
warn: "This workflow will delete the whole 'folder's corresponding to each of the specified submissions. So make sure nothing in those are useful anymore."
}
parameter_meta {
bucket_name: "The workspace bucket name, without the 'gs://' prefix"
submissionIDs: "List of submissions whose intermediate files are to be deleted"
keep_logs: "Whether to keep cromwell log files or not; if true, the process is significantly slower. Default is false."
}

input {
String workspace_bucket
Array[String] submissionIDs
Boolean keep_logs = false
}

scatter (sid in submissionIDs) {
call CleanupAFolder {
input:
bucket_name = workspace_bucket,
submission_id = sid
call CleanupAFolder { input:
bucket_name = workspace_bucket,
submission_id = sid,
keep_logs = keep_logs
}
}
}

task CleanupAFolder {
meta {
description: "Clean up intermediate files from running workflows on Terra."
warn: "This workflow will delete the whole 'folder' corresponding to the specified submission. So make sure nothing in it is useful anymore."
}
parameter_meta {
bucket_name: "The workspace bucket name, without the 'gs://' prefix"
submission_id: "The submission ID whose intermediate files are to be deleted. Will error out if empty."
keep_logs: "Whether to keep cromwell log files or not; if true, the process is significantly slower. Default is false."
}
input {
String bucket_name
String submission_id
Boolean keep_logs = false
}

Boolean fail = submission_id == ""
command <<<
timeout 23h gsutil -q rm -rf gs://~{bucket_name}/submissions/~{submission_id} || echo "Timed out. Please try again."
if ~{fail}; then echo "Please provide a non-empty submission ID" && exit 1; fi
if ~{keep_logs}; then
# keep some (presumably) lightweight cromwell log files
timeout 23h \
gcloud -q storage ls --recursive \
gs://~{bucket_name}/submissions/~{submission_id} \
| grep -v "*log$" | grep -vF '/stderr' | grep -vF '/stdout' | grep -vF '/script' \
| gcloud -q storage rm -r -I \
|| echo "Timed out. Please try again."
else
timeout 23h \
gcloud -q storage rm \
-r gs://~{bucket_name}/submissions/~{submission_id} \
|| echo "Timed out. Please try again."
fi
>>>

runtime {
cpu: 1
memory: "4 GiB"
disks: "local-disk 10 HDD"
preemptible: 1
maxRetries: 1
preemptible: 5
maxRetries: 1
docker:"us.gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine"
}
}

0 comments on commit 65018a8

Please sign in to comment.