From 0bcd041d8e596652a718f332204f1948b8453a5d Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 14:20:03 +0100 Subject: [PATCH 01/12] fail when join goes bad --- subworkflows/local/ancestry/ancestry_project.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf index 8fcae1fc..4420ea2e 100644 --- a/subworkflows/local/ancestry/ancestry_project.nf +++ b/subworkflows/local/ancestry/ancestry_project.nf @@ -71,7 +71,7 @@ workflow ANCESTRY_PROJECT { // ch_genomes - .join(vmiss) + .join(vmiss, failOnMismatch: true) .combine( ch_db.map{ it.tail() } ) // (drop hashmap) .flatten() .buffer(size: 8) From 67291a6d5118548811c930f96f92d54b0615bf4a Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 14:47:33 +0100 Subject: [PATCH 02/12] make meta key for vmiss match converted vcf --- modules/local/plink2_vcf.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/plink2_vcf.nf b/modules/local/plink2_vcf.nf index bfb91868..93c44659 100644 --- a/modules/local/plink2_vcf.nf +++ b/modules/local/plink2_vcf.nf @@ -23,7 +23,7 @@ process PLINK2_VCF { tuple val(newmeta), path("*.pgen"), emit: pgen tuple val(newmeta), path("*.psam"), emit: psam tuple val(newmeta), path("*.zst") , emit: pvar - tuple val(meta), path("*.vmiss.gz"), emit: vmiss + tuple val(newmeta), path("*.vmiss.gz"), emit: vmiss path "versions.yml" , emit: versions script: From 65c1e7e20cb087538b6c469d636b7fdce2a3011d Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 14:48:13 +0100 Subject: [PATCH 03/12] fix --only_projection --- workflows/pgscalc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/pgscalc.nf b/workflows/pgscalc.nf index 60304a07..6c7242aa 100644 --- a/workflows/pgscalc.nf +++ b/workflows/pgscalc.nf @@ -128,7 +128,7 @@ if (params.only_projection) { run_ancestry_bootstrap = true run_input_check = true run_make_compatible = true - run_match = false + run_match = true run_ancestry_assign = true run_apply_score = false run_report = false From 682526938a99e3ba2b997cff9f38c6193be518c8 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:10:53 +0100 Subject: [PATCH 04/12] add vcf ancestry test --- .github/workflows/ancestry-vcf.yml | 157 ++++++++++++++++++ .github/workflows/ci.yml | 9 + tests/ancestry/samplesheet_vcf.csv | 2 + .../ancestry/{test.yml => test_ancestry.yml} | 0 tests/ancestry/test_ancestry_vcf.yml | 30 ++++ 5 files changed, 198 insertions(+) create mode 100644 .github/workflows/ancestry-vcf.yml create mode 100644 tests/ancestry/samplesheet_vcf.csv rename tests/ancestry/{test.yml => test_ancestry.yml} (100%) create mode 100644 tests/ancestry/test_ancestry_vcf.yml diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml new file mode 100644 index 00000000..6c017fd6 --- /dev/null +++ b/.github/workflows/ancestry-vcf.yml @@ -0,0 +1,157 @@ +name: Run ancestry test with singularity or docker profiles + +on: + workflow_call: + inputs: + container-cache-key: + type: string + required: true + ancestry-cache-key: + type: string + required: true + docker: + type: boolean + singularity: + type: boolean + +env: + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/singularity + SINGULARITY_VERSION: 3.8.3 + +jobs: + docker: + if: ${{ inputs.docker }} + runs-on: ubuntu-latest + + steps: + - name: Set environment variables + run: | + echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + + - name: Check out pipeline code + uses: actions/checkout@v3 + + - uses: nf-core/setup-nextflow@v1 + + - name: Restore docker images + id: restore-docker + uses: actions/cache/restore@v3 + with: + path: ${{ runner.temp }}/docker + key: ${{ inputs.container-cache-key }} + fail-on-cache-miss: true + + - name: Load docker images from cache + run: | + find $HOME -name '*.tar' + find ${{ runner.temp }}/docker/ -name '*.tar' -exec sh -c 'docker load < {}' \; + + - name: Restore reference data + uses: actions/cache/restore@v3 + with: + path: | + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst + ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst + key: ${{ inputs.ancestry-cache-key }} + fail-on-cache-miss: true + + - name: Install plink2 to recode + run: sudo apt-get install -y plink2 + + - name: Make VCF + run: | + plink2 --pfile ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL vzs \ + --export vcf bgz \ + --out ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL + + - name: Set up test requirements + uses: actions/setup-python@v3 + with: + python-version: '3.10' + cache: 'pip' + + - run: pip install -r ${{ github.workspace }}/tests/requirements.txt + + - name: Run ancestry test + run: TMPDIR=~ PROFILE=docker pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v3 + with: + name: logs-singularity-ancestry + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/output/* + + singularity: + if: ${{ inputs.singularity }} + runs-on: ubuntu-latest + + steps: + - name: Set environment variables + run: | + echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + + - name: Check out pipeline code + uses: actions/checkout@v3 + + - uses: nf-core/setup-nextflow@v1 + + - name: Restore singularity setup + id: restore-singularity-setup + uses: actions/cache@v3 + with: + path: /opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64 + key: ${{ runner.os }}-singularity-${{ env.SINGULARITY_VERSION }} + fail-on-cache-miss: true + + - name: Add singularity to path + run: | + echo "/opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64/bin" >> $GITHUB_PATH + + - name: Restore singularity container images + id: restore-singularity + uses: actions/cache@v3 + with: + path: ${{ env.NXF_SINGULARITY_CACHEDIR }} + key: ${{ inputs.container-cache-key }} + + - name: Restore reference data + uses: actions/cache/restore@v3 + with: + path: | + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst + ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst + key: ${{ inputs.ancestry-cache-key }} + fail-on-cache-miss: true + + - name: Set up test requirements + uses: actions/setup-python@v3 + with: + python-version: '3.10' + cache: 'pip' + + - run: pip install -r ${{ github.workspace }}/tests/requirements.txt + + - name: Run ancestry test + run: TMPDIR=~ PROFILE=singularity pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry" --ignore tests/bin + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v3 + with: + name: logs-singularity-ancestry + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/output/* diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee98360f..2f6ebc85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: branches: - dev - main + - fix_vcf release: types: [published] @@ -123,3 +124,11 @@ jobs: container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }} ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} singularity: true + + ancestry_vcf_docker: + needs: [preload_ancestry, preload_docker] + uses: ./.github/workflows/ancestry-vcf.yml + with: + container-cache-key: ${{ needs.preload_docker.outputs.cache-key }} + ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} + docker: true diff --git a/tests/ancestry/samplesheet_vcf.csv b/tests/ancestry/samplesheet_vcf.csv new file mode 100644 index 00000000..19f13eb0 --- /dev/null +++ b/tests/ancestry/samplesheet_vcf.csv @@ -0,0 +1,2 @@ +sampleset,path_prefix,chrom,format +test,ANCESTRY_TARGET_DIR/GRCh38_HAPNEST_TARGET_ALL,,vcf \ No newline at end of file diff --git a/tests/ancestry/test.yml b/tests/ancestry/test_ancestry.yml similarity index 100% rename from tests/ancestry/test.yml rename to tests/ancestry/test_ancestry.yml diff --git a/tests/ancestry/test_ancestry_vcf.yml b/tests/ancestry/test_ancestry_vcf.yml new file mode 100644 index 00000000..8baf5f83 --- /dev/null +++ b/tests/ancestry/test_ancestry_vcf.yml @@ -0,0 +1,30 @@ +# ancestry test notes: +# need to stage reference in $ANCESTRY_REF_DIR +# extract target in $ANCESTRY_TARGET_DIR +# need to convert target to VCF + +- name: test ancestry projection and scoring + command: > + bash -c " + sed \"s|ANCESTRY_TARGET_DIR|$ANCESTRY_TARGET_DIR|\" tests/ancestry/samplesheet.csv > samplesheet.csv; + nextflow run main.nf -c ./tests/config/nextflow.config \ + --input samplesheet.csv \ + --run_ancestry $ANCESTRY_REF_DIR/GRCh38_HAPNEST_reference.tar.zst \ + --target_build GRCh38 \ + --pgs_id PGS001229 \ + --min_overlap 0.50 \ + --target_build GRCh38 \ + --scorefile false + " + tags: + - ancestry vcf + - slow + stdout: + contains: + - "Pipeline completed successfully" + files: + - path: "output/test/score/pop_summary.csv" + contains: + - "AFR,100 (33.33%)" + - "EAS,100 (33.33%)" + - "EUR,100 (33.33%)" From f27529e9b423200a42bbf9b8a6c824a41471cb5d Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:16:31 +0100 Subject: [PATCH 05/12] fix plink call --- .github/workflows/ancestry-vcf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 6c017fd6..f828f064 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -63,7 +63,7 @@ jobs: - name: Make VCF run: | - plink2 --pfile ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL vzs \ + plink2 --pfile ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL vzs \ --export vcf bgz \ --out ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL From 3ad7dfa68934eadd9a917121e3cc2482331704bb Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:22:11 +0100 Subject: [PATCH 06/12] what's wrong with my yaml :( --- .github/workflows/ancestry-vcf.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index f828f064..7a5907c6 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -61,11 +61,10 @@ jobs: - name: Install plink2 to recode run: sudo apt-get install -y plink2 - - name: Make VCF run: | - plink2 --pfile ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL vzs \ + plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs \ --export vcf bgz \ - --out ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL + --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL - name: Set up test requirements uses: actions/setup-python@v3 From 5c6656700e7d7714c414a4a9e446c5e24d1eb03d Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:23:17 +0100 Subject: [PATCH 07/12] arf --- .github/workflows/ancestry-vcf.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 7a5907c6..787ecc02 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -61,6 +61,7 @@ jobs: - name: Install plink2 to recode run: sudo apt-get install -y plink2 + - name: run plink2 run: | plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs \ --export vcf bgz \ From 6733f3dc07ea9df1d9cbe680e1dd7ae57f9f0fb2 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:25:06 +0100 Subject: [PATCH 08/12] double arf --- .github/workflows/ancestry-vcf.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 787ecc02..093b903e 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -62,9 +62,9 @@ jobs: run: sudo apt-get install -y plink2 - name: run plink2 - run: | - plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs \ - --export vcf bgz \ + run: > + plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs + --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL - name: Set up test requirements From 080f751b44d6bdc73b5c6e14bd0ad4744ab68bde Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:26:54 +0100 Subject: [PATCH 09/12] I hate yaml --- .github/workflows/ancestry-vcf.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 093b903e..6d1a2df6 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -61,11 +61,8 @@ jobs: - name: Install plink2 to recode run: sudo apt-get install -y plink2 - - name: run plink2 - run: > - plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs - --export vcf bgz - --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL + - name: Recode VCF + run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL - name: Set up test requirements uses: actions/setup-python@v3 From f4065dd9e77cf56e80de1dc6b5aaef83fd22009d Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:42:29 +0100 Subject: [PATCH 10/12] fix test name --- .github/workflows/ancestry-vcf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 6d1a2df6..4961ba5f 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -1,4 +1,4 @@ -name: Run ancestry test with singularity or docker profiles +name: Run ancestry test with singularity or docker profiles with VCF input on: workflow_call: From fc9d48485c6b71a9cc7c2f8a50a37545222fbe57 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:42:42 +0100 Subject: [PATCH 11/12] activate ancestry vcf singularity test --- .github/workflows/ancestry-vcf.yml | 10 ++++++++-- .github/workflows/ci.yml | 8 ++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml index 4961ba5f..5454433d 100644 --- a/.github/workflows/ancestry-vcf.yml +++ b/.github/workflows/ancestry-vcf.yml @@ -130,7 +130,13 @@ jobs: ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst key: ${{ inputs.ancestry-cache-key }} fail-on-cache-miss: true - + + - name: Install plink2 to recode + run: sudo apt-get install -y plink2 + + - name: Recode VCF + run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL + - name: Set up test requirements uses: actions/setup-python@v3 with: @@ -140,7 +146,7 @@ jobs: - run: pip install -r ${{ github.workspace }}/tests/requirements.txt - name: Run ancestry test - run: TMPDIR=~ PROFILE=singularity pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry" --ignore tests/bin + run: TMPDIR=~ PROFILE=singularity pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin - name: Upload logs on failure if: failure() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f6ebc85..b5acc420 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -132,3 +132,11 @@ jobs: container-cache-key: ${{ needs.preload_docker.outputs.cache-key }} ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} docker: true + + ancestry_vcf_singularity: + needs: [preload_ancestry, preload_singularity] + uses: ./.github/workflows/ancestry.yml + with: + container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }} + ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} + singularity: true From 47c3c99ab6684ce44fe64df097609f73d60ba5a6 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Wed, 27 Sep 2023 16:49:41 +0100 Subject: [PATCH 12/12] fix test name --- tests/ancestry/test_ancestry_vcf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ancestry/test_ancestry_vcf.yml b/tests/ancestry/test_ancestry_vcf.yml index 8baf5f83..9b0fe90d 100644 --- a/tests/ancestry/test_ancestry_vcf.yml +++ b/tests/ancestry/test_ancestry_vcf.yml @@ -3,7 +3,7 @@ # extract target in $ANCESTRY_TARGET_DIR # need to convert target to VCF -- name: test ancestry projection and scoring +- name: test ancestry projection and scoring with VCF input command: > bash -c " sed \"s|ANCESTRY_TARGET_DIR|$ANCESTRY_TARGET_DIR|\" tests/ancestry/samplesheet.csv > samplesheet.csv;