Skip to content

Commit

Permalink
add --verify_variants support
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Oct 9, 2024
1 parent d89eb7d commit d4fd211
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 23 deletions.
20 changes: 0 additions & 20 deletions modules/local/plink2_score.nf
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,6 @@ process PLINK2_SCORE {
$input ${geno.baseName} \
--out ${output}
n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
if [ \$n_missing -gt 0 ]
then
echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
exit 1
else
echo "INFO: Scoring file variants match listed variants in sscore.vars"
fi
cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
Expand All @@ -93,16 +83,6 @@ process PLINK2_SCORE {
$input ${geno.baseName} \
--out ${output}
n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
if [ \$n_missing -gt 0 ]
then
echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
exit 1
else
echo "INFO: Scoring file variants match listed variants in sscore.vars"
fi
cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
Expand Down
8 changes: 6 additions & 2 deletions modules/local/score_aggregate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ process SCORE_AGGREGATE {
"${task.ext.docker}${task.ext.docker_version}" }"

input:
tuple val(meta), path(scorefiles)
tuple val(meta), path(scorefiles) // calculated polygenic scores
path(scorefile_vars) // PGS scoring file
path(scored_vars) // variants _actually used_ to calculate scores

output:
tuple val(scoremeta), path("aggregated_scores.txt.gz"), emit: scores
Expand All @@ -21,7 +23,9 @@ process SCORE_AGGREGATE {
script:
scoremeta = meta.subMap('id')
"""
pgscatalog-aggregate -s $scorefiles -o . -v --no-split
# variants are always verified, so that variants in the scoring files
# overlap perfectly with the scored variants
pgscatalog-aggregate -s $scorefiles -o . -v --no-split --verify_variants
cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
Expand Down
15 changes: 14 additions & 1 deletion subworkflows/local/apply_score.nf
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,20 @@ workflow APPLY_SCORE {
.map { [ it.first().subMap("id"), it.tail().findAll { !(it instanceof LinkedHashMap) }]}
.set { ch_scores }

SCORE_AGGREGATE ( ch_scores )
// pgscatalog-aggregate --verify_variants notes:
// Checks that variant IDs in the scorefiles match the IDs of scored variants perfectly
// Just dump all of the supporting files into the same directory: don't do any fancy joins
PLINK2_SCORE.out.vars_scored
.collect()
.set { ch_vars_scored }

ch_target_scorefile.flatMap { it.last() }
.filter(Path)
.collect()
.set{ ch_scorefile_verify }

SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_scorefile_verify )

ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions)

emit:
Expand Down

0 comments on commit d4fd211

Please sign in to comment.