diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml new file mode 100644 index 00000000..5454433d --- /dev/null +++ b/.github/workflows/ancestry-vcf.yml @@ -0,0 +1,160 @@ +name: Run ancestry test with singularity or docker profiles with VCF input + +on: + workflow_call: + inputs: + container-cache-key: + type: string + required: true + ancestry-cache-key: + type: string + required: true + docker: + type: boolean + singularity: + type: boolean + +env: + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/singularity + SINGULARITY_VERSION: 3.8.3 + +jobs: + docker: + if: ${{ inputs.docker }} + runs-on: ubuntu-latest + + steps: + - name: Set environment variables + run: | + echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + + - name: Check out pipeline code + uses: actions/checkout@v3 + + - uses: nf-core/setup-nextflow@v1 + + - name: Restore docker images + id: restore-docker + uses: actions/cache/restore@v3 + with: + path: ${{ runner.temp }}/docker + key: ${{ inputs.container-cache-key }} + fail-on-cache-miss: true + + - name: Load docker images from cache + run: | + find $HOME -name '*.tar' + find ${{ runner.temp }}/docker/ -name '*.tar' -exec sh -c 'docker load < {}' \; + + - name: Restore reference data + uses: actions/cache/restore@v3 + with: + path: | + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst + ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst + key: ${{ inputs.ancestry-cache-key }} + fail-on-cache-miss: true + + - name: Install plink2 to recode + run: sudo apt-get install -y plink2 + + - name: Recode VCF + run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL + + - name: Set up test requirements + uses: actions/setup-python@v3 + with: + python-version: '3.10' + cache: 'pip' + + - run: pip install -r ${{ github.workspace }}/tests/requirements.txt + + - name: Run ancestry test + run: TMPDIR=~ PROFILE=docker pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v3 + with: + name: logs-singularity-ancestry + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/output/* + + singularity: + if: ${{ inputs.singularity }} + runs-on: ubuntu-latest + + steps: + - name: Set environment variables + run: | + echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV + + - name: Check out pipeline code + uses: actions/checkout@v3 + + - uses: nf-core/setup-nextflow@v1 + + - name: Restore singularity setup + id: restore-singularity-setup + uses: actions/cache@v3 + with: + path: /opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64 + key: ${{ runner.os }}-singularity-${{ env.SINGULARITY_VERSION }} + fail-on-cache-miss: true + + - name: Add singularity to path + run: | + echo "/opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64/bin" >> $GITHUB_PATH + + - name: Restore singularity container images + id: restore-singularity + uses: actions/cache@v3 + with: + path: ${{ env.NXF_SINGULARITY_CACHEDIR }} + key: ${{ inputs.container-cache-key }} + + - name: Restore reference data + uses: actions/cache/restore@v3 + with: + path: | + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam + ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst + ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst + key: ${{ inputs.ancestry-cache-key }} + fail-on-cache-miss: true + + - name: Install plink2 to recode + run: sudo apt-get install -y plink2 + + - name: Recode VCF + run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL + + - name: Set up test requirements + uses: actions/setup-python@v3 + with: + python-version: '3.10' + cache: 'pip' + + - run: pip install -r ${{ github.workspace }}/tests/requirements.txt + + - name: Run ancestry test + run: TMPDIR=~ PROFILE=singularity pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v3 + with: + name: logs-singularity-ancestry + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/output/* diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee98360f..b5acc420 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: branches: - dev - main + - fix_vcf release: types: [published] @@ -123,3 +124,19 @@ jobs: container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }} ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} singularity: true + + ancestry_vcf_docker: + needs: [preload_ancestry, preload_docker] + uses: ./.github/workflows/ancestry-vcf.yml + with: + container-cache-key: ${{ needs.preload_docker.outputs.cache-key }} + ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} + docker: true + + ancestry_vcf_singularity: + needs: [preload_ancestry, preload_singularity] + uses: ./.github/workflows/ancestry.yml + with: + container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }} + ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }} + singularity: true diff --git a/modules/local/plink2_vcf.nf b/modules/local/plink2_vcf.nf index bfb91868..93c44659 100644 --- a/modules/local/plink2_vcf.nf +++ b/modules/local/plink2_vcf.nf @@ -23,7 +23,7 @@ process PLINK2_VCF { tuple val(newmeta), path("*.pgen"), emit: pgen tuple val(newmeta), path("*.psam"), emit: psam tuple val(newmeta), path("*.zst") , emit: pvar - tuple val(meta), path("*.vmiss.gz"), emit: vmiss + tuple val(newmeta), path("*.vmiss.gz"), emit: vmiss path "versions.yml" , emit: versions script: diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf index 8fcae1fc..4420ea2e 100644 --- a/subworkflows/local/ancestry/ancestry_project.nf +++ b/subworkflows/local/ancestry/ancestry_project.nf @@ -71,7 +71,7 @@ workflow ANCESTRY_PROJECT { // ch_genomes - .join(vmiss) + .join(vmiss, failOnMismatch: true) .combine( ch_db.map{ it.tail() } ) // (drop hashmap) .flatten() .buffer(size: 8) diff --git a/tests/ancestry/samplesheet_vcf.csv b/tests/ancestry/samplesheet_vcf.csv new file mode 100644 index 00000000..19f13eb0 --- /dev/null +++ b/tests/ancestry/samplesheet_vcf.csv @@ -0,0 +1,2 @@ +sampleset,path_prefix,chrom,format +test,ANCESTRY_TARGET_DIR/GRCh38_HAPNEST_TARGET_ALL,,vcf \ No newline at end of file diff --git a/tests/ancestry/test.yml b/tests/ancestry/test_ancestry.yml similarity index 100% rename from tests/ancestry/test.yml rename to tests/ancestry/test_ancestry.yml diff --git a/tests/ancestry/test_ancestry_vcf.yml b/tests/ancestry/test_ancestry_vcf.yml new file mode 100644 index 00000000..9b0fe90d --- /dev/null +++ b/tests/ancestry/test_ancestry_vcf.yml @@ -0,0 +1,30 @@ +# ancestry test notes: +# need to stage reference in $ANCESTRY_REF_DIR +# extract target in $ANCESTRY_TARGET_DIR +# need to convert target to VCF + +- name: test ancestry projection and scoring with VCF input + command: > + bash -c " + sed \"s|ANCESTRY_TARGET_DIR|$ANCESTRY_TARGET_DIR|\" tests/ancestry/samplesheet.csv > samplesheet.csv; + nextflow run main.nf -c ./tests/config/nextflow.config \ + --input samplesheet.csv \ + --run_ancestry $ANCESTRY_REF_DIR/GRCh38_HAPNEST_reference.tar.zst \ + --target_build GRCh38 \ + --pgs_id PGS001229 \ + --min_overlap 0.50 \ + --target_build GRCh38 \ + --scorefile false + " + tags: + - ancestry vcf + - slow + stdout: + contains: + - "Pipeline completed successfully" + files: + - path: "output/test/score/pop_summary.csv" + contains: + - "AFR,100 (33.33%)" + - "EAS,100 (33.33%)" + - "EUR,100 (33.33%)" diff --git a/workflows/pgscalc.nf b/workflows/pgscalc.nf index 60304a07..6c7242aa 100644 --- a/workflows/pgscalc.nf +++ b/workflows/pgscalc.nf @@ -128,7 +128,7 @@ if (params.only_projection) { run_ancestry_bootstrap = true run_input_check = true run_make_compatible = true - run_match = false + run_match = true run_ancestry_assign = true run_apply_score = false run_report = false