From 85defc591e0034e481ed6c6a91e17bdc273269cf Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Mon, 16 Sep 2024 14:25:03 +0100
Subject: [PATCH 01/22] warn about --parallel

---
 workflows/pgsc_calc.nf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/workflows/pgsc_calc.nf b/workflows/pgsc_calc.nf
index 361c9736..e4ad9f95 100644
--- a/workflows/pgsc_calc.nf
+++ b/workflows/pgsc_calc.nf
@@ -117,6 +117,10 @@ if (workflow.profile.contains("test")) {
     }
 }
 
+if (params.parallel) {
+  log.info "INFO: --parallel parameter is deprecated: jobs are automatically parallelised by default"
+}
+
 /*
 ========================================================================================
     IMPORT LOCAL MODULES/SUBWORKFLOWS

From 3e7928097c45680a997e763eae098acfdc2baf0e Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 17 Sep 2024 12:10:44 +0100
Subject: [PATCH 02/22] fix parallel warning

---
 nextflow.config | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index dc054608..855b5f1f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -18,7 +18,6 @@ params {
     pgp_id                     = null
     efo_direct                 = false
 
-
     // reference params
     run_ancestry = null
     ancestry_checksums = "$projectDir/assets/ancestry/checksums.txt"
@@ -93,6 +92,9 @@ params {
     max_memory                 = '128.GB'
     max_cpus                   = 16
     max_time                   = '240.h'
+
+    // deprecated (doesn't do anything)
+    parallel = null
 }
 
 // Load base.config by default for all pipelines
@@ -317,7 +319,7 @@ validation {
     monochromeLogs         = params.monochrome_logs 
     failUnrecognisedParams = false
     lenientMode            = false
-    defaultIgnoreParams    = ['platform']
+    defaultIgnoreParams    = ['platform', 'parallel']
     ignoreParams           = ['genomes','igenomes_base',',only_bootstrap','only_input','only_compatible','only_match','only_score']
     showHiddenParams       = false
 }

From f17b9479cfa62827c4b54cdd0fda7e75ed34f741 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 17 Sep 2024 12:11:29 +0100
Subject: [PATCH 03/22] fix parsing VCFs in JSON

---
 lib/SamplesheetParser.groovy | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/SamplesheetParser.groovy b/lib/SamplesheetParser.groovy
index e3e41299..42bb516a 100644
--- a/lib/SamplesheetParser.groovy
+++ b/lib/SamplesheetParser.groovy
@@ -27,15 +27,19 @@ class SamplesheetParser {
     }
 
     def parseJSON(json) {
-        // note: we don't check for file existence here 
+        // note: we don't check for file existence here
         // relative paths won't work, because the JSON object doesn't use path_prefix
-        def parsed = json.subMap("chrom", "vcf_import_dosage", "n_chrom", "format") 
+        def parsed = json.subMap("chrom", "vcf_import_dosage", "n_chrom", "format")
         parsed.id = json.sampleset
         parsed = parsed + getFlagMap(json)
         parsed.build = this.target_build
         parsed.chrom = truncateChrom(json)
 
-        return [parsed, [json.geno, json.variants, json.pheno]]
+        if (parsed.is_vcf) {
+          return [parsed, [json.geno]]
+        } else {
+          return [parsed, [json.geno, json.variants, json.pheno]]
+        }
     }
 
     def verifySamplesheet(rows) {
@@ -192,4 +196,3 @@ class SamplesheetParser {
     }
 
 }
-

From 4d31352f3a0a494042171abbd6b481c83062bf7c Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 3 Oct 2024 12:45:20 +0100
Subject: [PATCH 04/22] Make report shareable (#379)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update README.md

be extra clear about WGS 👀

* remove individual level information from report
---
 README.md                |  4 ++++
 assets/report/report.qmd | 48 ++++++++++++++++++++++++++--------------
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index f1490fcd..4617e90c 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,10 @@ and/or user-defined PGS/PRS.
 
 ## Pipeline summary
 
+> [!IMPORTANT]  
+> * Whole genome sequencing (WGS) data [are not currently supported by the calculator](https://pgsc-calc.readthedocs.io/en/latest/explanation/match.html#are-your-target-genomes-imputed-are-they-wgs)
+> * It’s possible to [create compatible gVCFs from WGS data](https://github.com/PGScatalog/pgsc_calc/discussions/123#discussioncomment-6469422). We plan to improve support for WGS data in the near future.
+
 <p align="center">
   <img width="80%" src="https://github.com/PGScatalog/pgsc_calc/assets/11425618/f766b28c-0f75-4344-abf3-3463946e36cc">
 </p>
diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index 5c7a69ac..7a89a128 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -36,6 +36,10 @@ library(DT)
 library(tibble)
 library(forcats)
 library(readr)
+
+# prevent plots with small sample sets
+MINIMUM_N_SAMPLES <- 50
+LOW_SAMPLE_SIZE <- TRUE
 ```
 
 ```{r setup_logs, echo=FALSE}
@@ -64,6 +68,14 @@ log_df$sampleset <- gsub("_", " ", log_df$sampleset)  # page breaking issues
 cat command.txt | fold -w 80 -s | awk -F ' ' 'NR==1 { print "$", $0} NR>1 { print "    " $0}' | sed 's/$/\\/' | sed '$ s/.$//' 
 ```
 
+```{asis, echo = grepl("-profile test", readLines("command.txt"))}
+:::{.callout-tip}
+* If you're using the test profile, this report and these results are not biologically meaningful 
+* The test profile is only used to check that all software is installed and working correctly 
+* If you're reading this message, then that means everything is OK and you're ready to use your own data!
+:::
+```
+
 ## Version
 
 ```{r, echo=FALSE}
@@ -386,10 +398,15 @@ pop_summary %>%
 scores <- readr::read_tsv(params$score_path) 
 n_scores <- length(unique(scores$PGS))
 n_samples <- length(unique(scores$IID))
-print(n_samples)
+if (n_samples < MINIMUM_N_SAMPLES) {
+  LOW_SAMPLE_SIZE <- TRUE
+} else {
+  LOW_SAMPLE_SIZE <- FALSE
+}
 ```
 
-```{asis, echo = any(table(scores$sampleset) < 50) && !params$run_ancestry}
+
+```{asis, echo = (LOW_SAMPLE_SIZE && !params$run_ancestry)}
 
 ::: {.callout-important title="Warning: small sampleset size (n < 50) detected"}
 * plink2 uses allele frequency data to [mean impute](https://www.cog-genomics.org/plink/2.0/score) the dosages of missing genotypes
@@ -397,7 +414,7 @@ print(n_samples)
 * With a small sample size, the resulting score sums may be inconsistent between samples
 * The average `([scorename]_AVG)` may be more applicable as it calculates an average weighting over all genotypes present
 
-In the future mean-imputation will be supported in small samplesets using ancestry-matched reference samplesets to ensure consistent calculation of score sums (e.g. 1000G Genomes).
+It's recommended to use `--run_ancestry` with small samplesets to ensure consistent calculation of score sums (e.g. 1000G Genomes).
 :::
 
 ```
@@ -419,24 +436,21 @@ In the future mean-imputation will be supported in small samplesets using ancest
 
 ### Score data 
 
-#### Score extract
+#### Density plot(s)
 
+```{asis, echo = !LOW_SAMPLE_SIZE}
 ::: {.callout-note}
-Below is a summary of the aggregated scores, which might be useful for debugging. See here for an explanation of [plink2](https://www.cog-genomics.org/plink/2.0/formats#sscore) column names
+The summary density plots show up to six scoring files
 :::
-
-```{r, echo = FALSE}
-scores %>%
-  tibble::as_tibble(.)
 ```
 
-#### Density plot(s)
-
-::: {.callout-note}
-The summary density plots show up to six scoring files
+```{asis, echo = LOW_SAMPLE_SIZE}
+::: {.callout-warning}
+Density plots are disabled for low sample sizes
 :::
+```
 
-```{r density_ancestry, echo=FALSE, message=FALSE, warning=FALSE, eval=params$run_ancestry}
+```{r density_ancestry, echo=FALSE, message=FALSE, warning=FALSE, eval=(!LOW_SAMPLE_SIZE & params$run_ancestry)}
 # Select which PGS to plot
 uscores <- unique(scores$PGS)
 uscores_plot <- uscores[1:min(length(uscores), 6)] # plot max 6 PGS
@@ -454,7 +468,7 @@ for(current_pgs in uscores_plot){
 }
 ```
 
-```{r, echo = FALSE, message=FALSE, warning=FALSE, eval=!params$run_ancestry}
+```{r, echo = FALSE, message=FALSE, warning=FALSE, eval=(!LOW_SAMPLE_SIZE & !params$run_ancestry)}
 scores %>%
   ungroup() %>%
   select(IID, sampleset, PGS, SUM) %>%
@@ -488,7 +502,9 @@ stringr::str_glue("{params$sampleset}/score/aggregated_scores.txt.gz")
 
 # Citation
 
-> Lambert, Wingfield, et al. (2024) The Polygenic Score Catalog: new functionality and tools to enable FAIR research. medRxiv. doi:[10.1101/2024.05.29.24307783](https://doi.org/10.1101/2024.05.29.24307783).
+> Samuel A. Lambert, Benjamin Wingfield, Joel T. Gibson, Laurent Gil, Santhi Ramachandran, Florent Yvon, Shirin Saverimuttu, Emily Tinsley, Elizabeth Lewis, Scott C. Ritchie, Jingqin Wu, Rodrigo Canovas, Aoife McMahon, Laura W. Harris, Helen Parkinson, Michael Inouye.
+Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization.
+Nature Genetics | doi: [10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x)
 
 ::: {.callout-important}
 For scores from the PGS Catalog, please remember to cite the original publications from which they came (these are listed in the metadata table).

From 0d1597b493717975fca97c75034179bd49941adc Mon Sep 17 00:00:00 2001
From: smlmbrt <sam.a.lambert@gmail.com>
Date: Fri, 4 Oct 2024 11:51:59 +0100
Subject: [PATCH 05/22] Update citation to published article

---
 CITATIONS.md               | 6 +++---
 README.md                  | 6 +++---
 assets/report/report.qmd   | 2 +-
 docs/explanation/match.rst | 2 +-
 docs/index.rst             | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 7cba0759..b79a4f98 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -1,6 +1,6 @@
 # PGScatalog/pgsc_calc: Citations
 
-> Lambert, Wingfield _et al._ (2024) The Polygenic Score Catalog: new functionality and tools to enable FAIR research. medRxiv. doi:[10.1101/2024.05.29.24307783](https://doi.org/10.1101/2024.05.29.24307783).
+> Lambert, Wingfield _et al._ (2024) Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics. doi:[10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x).
   
 ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/)
 
@@ -13,11 +13,11 @@
 ## Pipeline tools
 
 * [PGS Catalog API](https://pubmed.ncbi.nlm.nih.gov/33692568/)
-    > Lambert SA, Gil L, Jupp S, Ritchie SC, Xu Y, Buniello A, McMahon A, Abraham G, Chapman M, Parkinson H, Danesh J. The Polygenic Score Catalog as an open database for reproducibility and systematic evaluation. Nature Genetics. 2021 Apr;53(4):420-5. doi: 10.1038/s41588-021-00783-5. PubMed PMID: 33692568.
+    > Lambert, Wingfield _et al._ (2024) Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics. doi:[10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x).
 
 * [pygscatalog](https://github.com/PGScatalog/pygscatalog)
 
-    > Lambert, Wingfield _et al._ (2024) The Polygenic Score Catalog: new functionality and tools to enable FAIR research. medRxiv. doi:[10.1101/2024.05.29.24307783](https://doi.org/10.1101/2024.05.29.24307783).
+    > Lambert, Wingfield _et al._ (2024) Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics. doi:[10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x).
     
 * [PLINK 2](https://pubmed.ncbi.nlm.nih.gov/25722852/)
     > Chang CC, Chow CC, Tellier LC, Vattikuti S, Purcell SM, Lee JJ. Second-generation PLINK: rising to the challenge of larger and richer datasets. Gigascience. 2015 Dec 1;4(1):s13742-015. doi: 10.1186/s13742-015-0047-8. PubMed PMID: 25722852. PubMed Central PMCID: PMC4342193.
diff --git a/README.md b/README.md
index 4617e90c..e94455df 100644
--- a/README.md
+++ b/README.md
@@ -104,9 +104,9 @@ from Aoife McMahon (EBI). Development of new features, testing, and code review
 is ongoing including Inouye lab members (Rodrigo Canovas, Scott Ritchie) and others. If 
 you use the tool we ask you to cite our paper describing software and updated PGS Catalog resource:
 
-- >Lambert, Wingfield _et al._ (2024) The Polygenic Score Catalog: new functionality
-  and tools to enable FAIR research.  medRxiv.
-  doi:[10.1101/2024.05.29.24307783](https://doi.org/10.1101/2024.05.29.24307783).
+- >Lambert, Wingfield _et al._ (2024) Enhancing the Polygenic Score Catalog with tools for score 
+  calculation and ancestry normalization. Nature Genetics.
+  doi:[10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x).
 
 This pipeline is distrubuted under an [Apache License](LICENSE) amd uses code and 
 infrastructure developed and maintained by the [nf-core](https://nf-co.re) community 
diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index 7a89a128..12b7e171 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -504,7 +504,7 @@ stringr::str_glue("{params$sampleset}/score/aggregated_scores.txt.gz")
 
 > Samuel A. Lambert, Benjamin Wingfield, Joel T. Gibson, Laurent Gil, Santhi Ramachandran, Florent Yvon, Shirin Saverimuttu, Emily Tinsley, Elizabeth Lewis, Scott C. Ritchie, Jingqin Wu, Rodrigo Canovas, Aoife McMahon, Laura W. Harris, Helen Parkinson, Michael Inouye.
 Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization.
-Nature Genetics | doi: [10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x)
+Nature Genetics (2024) | doi: [10.1038/s41588-024-01937-x](https://doi.org/10.1038/s41588-024-01937-x)
 
 ::: {.callout-important}
 For scores from the PGS Catalog, please remember to cite the original publications from which they came (these are listed in the metadata table).
diff --git a/docs/explanation/match.rst b/docs/explanation/match.rst
index 85449b4c..3bcdb131 100644
--- a/docs/explanation/match.rst
+++ b/docs/explanation/match.rst
@@ -22,7 +22,7 @@ What is matching?
 
 The calculator carefully checks that variants (rows) in a scoring file are present in your target genomes.
 
-The matching procedure `is described in the preprint supplement <https://www.medrxiv.org/content/10.1101/2024.05.29.24307783v1.supplementary-material>`_. 
+The matching procedure `is described in supplement of our recent publication <https://www.nature.com/articles/s41588-024-01937-x#Sec6>`_.
 
 The matching procedure never makes any changes to target genome data and only seeks to match variants in the scoring file to the genome.  
 
diff --git a/docs/index.rst b/docs/index.rst
index 96863956..1d9932ea 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -164,13 +164,13 @@ Citations
 
 If you use ``pgscatalog/pgsc_calc`` in your analysis, please cite:
 
-    Lambert, Wingfield, `et al.` (2024) The Polygenic Score Catalog: new functionality and tools to enable FAIR research. medRxiv. doi:`10.1101/2024.05.29.24307783`_.
+    Lambert, Wingfield, `et al.` (2024) Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics. doi:`10.1038/s41588-024-01937-x`_.
 
 In addition, please remember to cite the primary publications for any PGS Catalog scores
 you use in your analyses, and the underlying data/software tools described in the `citations file`_.
 
 .. _citations file: https://github.com/PGScatalog/pgsc_calc/blob/master/CITATIONS.md
-.. _10.1101/2024.05.29.24307783: https://doi.org/10.1101/2024.05.29.24307783
+.. _10.1038/s41588-024-01937-x: https://doi.org/10.1038/s41588-024-01937-x
 
 
 License Information

From c459592fe6dcb69838dd5e627d458ad73bd0c335 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Mon, 7 Oct 2024 11:37:58 +0100
Subject: [PATCH 06/22] add modern tar to zstd

---
 environments/zstd/environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environments/zstd/environment.yml b/environments/zstd/environment.yml
index 856cc981..4b892f91 100644
--- a/environments/zstd/environment.yml
+++ b/environments/zstd/environment.yml
@@ -5,3 +5,4 @@ channels:
   - nodefaults  
 dependencies:
   - zstd=1.4.8
+  - tar=1.34

From d89eb7d1013c23403383720025337e53c24db9f9 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Wed, 9 Oct 2024 14:15:26 +0100
Subject: [PATCH 07/22] update report with support for new log format

---
 assets/report/{img => }/PGS_Logo.png          | Bin
 assets/report/logo.css                        |   2 +-
 .../{img => }/pgs_header_background.png       | Bin
 assets/report/report.qmd                      |  69 +++++++++++-------
 conf/modules.config                           |   4 +-
 environments/pgscatalog_utils/environment.yml |   2 +-
 modules/local/score_report.nf                 |   4 +-
 subworkflows/local/report.nf                  |   8 +-
 8 files changed, 54 insertions(+), 35 deletions(-)
 rename assets/report/{img => }/PGS_Logo.png (100%)
 rename assets/report/{img => }/pgs_header_background.png (100%)

diff --git a/assets/report/img/PGS_Logo.png b/assets/report/PGS_Logo.png
similarity index 100%
rename from assets/report/img/PGS_Logo.png
rename to assets/report/PGS_Logo.png
diff --git a/assets/report/logo.css b/assets/report/logo.css
index 396a5358..bc1499bb 100644
--- a/assets/report/logo.css
+++ b/assets/report/logo.css
@@ -1,5 +1,5 @@
 .quarto-title-block .quarto-title-banner {
-  background-image: url(img/PGS_Logo.png), url(img/pgs_header_background.png);
+  background-image: url(PGS_Logo.png), url(pgs_header_background.png);
   background-size: 90px, 170px;
   background-position: left, right;
   background-repeat: no-repeat;
diff --git a/assets/report/img/pgs_header_background.png b/assets/report/pgs_header_background.png
similarity index 100%
rename from assets/report/img/pgs_header_background.png
rename to assets/report/pgs_header_background.png
diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index 12b7e171..a8058f18 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -88,7 +88,6 @@ message(params$version)
 
 ```{r load_scorefiles}
 json_list <- jsonlite::fromJSON(params$log_scorefiles, simplifyVector = FALSE)
-json_scorefiles <- unlist(json_list, recursive=FALSE)
 
 link_traits <- function(trait_efo, mapped) {
   if (length(trait_efo) == 0) {
@@ -99,12 +98,12 @@ link_traits <- function(trait_efo, mapped) {
 }
 
 extract_traits <- function(x) {
-  trait_efo <- purrr::map(json_scorefiles, ~ extract_chr_handle_null(.x, "trait_efo"))
-  mapped <- purrr::map(json_scorefiles, ~ extract_chr_handle_null(.x, "trait_mapped"))
+  trait_efo <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_efo"))
+  mapped <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_mapped"))
   trait_display <- purrr::map2(trait_efo, mapped, link_traits)
   mapped_trait_links <- purrr::map_chr(trait_display, ~ paste(.x, collapse = "<br />"))
-  reported_traits <- purrr::map(json_scorefiles, ~ extract_chr_handle_null(.x, "trait_reported"))
-  purrr::map2(reported_traits, mapped_trait_links, ~ {
+  reported_traits <- purrr::map(x, ~ extract_chr_handle_null(.x, "trait_reported"))
+  purrr::map2_chr(reported_traits, mapped_trait_links, ~ {
     stringr::str_glue("<u>Reported trait:</u> {.x} <br /> <u>Mapped trait(s):</u> {.y}")
   })
 }
@@ -133,21 +132,18 @@ annotate_genome_build <- function(original_build, harmonised_build) {
   return(stringr::str_glue("<u>Original build:</u> {original_build} <br /> <u>Harmonised build:</u> {harmonised_build}"))
 }
 
-tibble::tibble(json = json_scorefiles) %>%
-  # extract fields from json list
-  mutate(pgs_id = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "pgs_id")),
-         pgs_name = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "pgs_name")),
-         pgp_id = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "pgp_id")),
-         citation = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "citation")),
-         # trait_efo = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "trait_efo")),
-         # trait_reported = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "trait_reported")),
-         # trait_mapped = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "trait_mapped")),
-         trait_display = extract_traits(.),
-         genome_build = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "genome_build")),
-         harmonised_build = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "HmPOS_build")),
-         n_variants = purrr::map_chr(json, ~ .x$variants_number),
-         accession = stringr::str_replace_all(names(json), "_", " ")
-         ) %>%
+# extract fields from json list
+tibble(
+  pgs_id = map_chr(json_list, "pgs_id"),
+  pgs_name = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgs_name")),
+  pgp_id = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgp_id")),
+  citation = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "citation")),
+  trait_display = extract_traits(json_list),
+  genome_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "genome_build")),
+  harmonised_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "HmPOS_build")),
+  n_variants = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "variants_number")),
+  compatible_effect_type = map_lgl(json_list, "compatible_effect_type"),
+  has_complex_alleles = map_lgl(json_list, "has_complex_alleles")) %>%
   # add links to pgs catalog identifiers
   mutate(pgs_id = purrr::map_chr(pgs_id, ~ link_pgscatalog(.x, "score")),
          pgp_id = purrr::map_chr(pgp_id, ~ link_pgscatalog(.x, "publication"))) %>%
@@ -156,7 +152,7 @@ tibble::tibble(json = json_scorefiles) %>%
          pgs_id = purrr::map2_chr(pgs_id, pgs_name, ~ add_note(.x, .y)),
          genome_build = purrr::map2_chr(genome_build, harmonised_build, ~ annotate_genome_build(.x, .y))) %>% 
   # pick columns
-  select(accession, pgs_id, pgp_id, trait_display, n_variants, genome_build) -> scorefile_metadata
+  select(pgs_id, pgp_id, trait_display, n_variants, genome_build, has_complex_alleles, compatible_effect_type) -> scorefile_metadata
 ```
 
 :::{.column-body-outset}
@@ -167,12 +163,13 @@ DT::datatable(
   rownames = FALSE,
   escape = FALSE,
   colnames = c(
-    "Scoring file" = "accession",
     "Polygenic Score ID" = "pgs_id",
     "Publication" = "pgp_id",
     "Traits" = "trait_display",
     "Number of variants" = "n_variants",
-    "Genome build" = "genome_build"
+    "Genome build" = "genome_build",
+    "Complex alleles present?" = "has_complex_alleles",
+    "Effect types compatible?" = "compatible_effect_type"
   ),
   extensions = 'Buttons',
   options = list(dom = 'Bfrtip',
@@ -182,6 +179,25 @@ DT::datatable(
 
 :::
 
+```{asis, echo = any(!scorefile_metadata$compatible_effect_type)}
+::: {.callout-warning title="Incompatible effect types detected"}
+* Some scoring files contain variants with dosage dependent effect weights (for example, [PGS002253](https://www.pgscatalog.org/score/PGS002253/))
+* Scoring files like this are not supported by the PGS Catalog Calculator
+  * Scoring files with non-additive effect types are skipped and not processed
+  * Variants with recessive or dominant effect types are supported
+:::
+```
+
+```{asis, echo = any(scorefile_metadata$has_complex_alleles)}
+::: {.callout-warning title="Complex alleles detected"}
+* Some scoring files contain complex alleles (e.g. APOE / HLA / CYP)
+* It's important to preprocess your genomes correctly to include complex alleles
+    * e.g. using SNP2HLA to include HLA alleles
+* If you don't do this then it's unlikely that complex alleles will be matched. This means that these variants won't contribute to the final calculated PGS.
+* Please check [PGS Catalog Curation Guidelines - Appendix A – Special Cases](https://www.pgscatalog.org/docs/curation) for more detailed information
+:::
+```
+
 # Variant matching 
 
 ## Parameters
@@ -523,13 +539,12 @@ For scores from the PGS Catalog, please remember to cite the original publicatio
 # as of 2023-12-12 only non-default licenses are recorded in the scoring file header
 default_ebi_terms <- "PGS obtained from the Catalog should be cited appropriately, and used in accordance with any licensing restrictions set by the authors. See EBI Terms of Use (https://www.ebi.ac.uk/about/terms-of-use/) for additional details."
 
-tibble::tibble(json = json_scorefiles) %>%
-  mutate(pgs_id = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "pgs_id")),
-         license_text = purrr::map_chr(json, ~ extract_chr_handle_null(.x, "license"))) %>%
+tibble(
+    pgs_id = map_chr(json_list, "pgs_id"),
+    license_text = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "license"))) %>%
   mutate(license_text = ifelse(license_text == "", default_ebi_terms, license_text)) %>%
   # display license terms for files in the PGS Catalog only (with a PGS ID)
   filter(startsWith(pgs_id, "PGS")) %>%
-  select(-json) %>%
   DT::datatable(., colnames = c(
       "PGS ID" = "pgs_id",
       "License text" = "license_text"
diff --git a/conf/modules.config b/conf/modules.config
index 102776a8..2538e91c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pygscatalog'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog'
-        ext.docker_version = ':pgscatalog-utils-1.3.1'
-        ext.singularity_version = ':pgscatalog-utils-1.3.1-singularity'
+        ext.docker_version = ':pgscatalog-utils-1.4.1'
+        ext.singularity_version = ':pgscatalog-utils-1.4.1-singularity'
     }
 
     withLabel: plink2 {
diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
index 550613a1..a0c5ecd1 100644
--- a/environments/pgscatalog_utils/environment.yml
+++ b/environments/pgscatalog_utils/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - pgscatalog-utils=1.3.1
+  - pgscatalog-utils=1.4.1
diff --git a/modules/local/score_report.nf b/modules/local/score_report.nf
index 911b34d6..23814fb8 100644
--- a/modules/local/score_report.nf
+++ b/modules/local/score_report.nf
@@ -16,7 +16,7 @@ process SCORE_REPORT {
     tuple val(meta), path(scorefile), path(score_log), path(match_summary), path(ancestry)
     path intersect_count
     val reference_panel_name
-    path report_path
+    path(report_path, arity: '4') // 4 files expected: report, css, background image x2
 
     output:
     // includeInputs to correctly use $meta.id in publishDir path
@@ -40,7 +40,7 @@ process SCORE_REPORT {
     echo "keep_ambiguous   : $params.keep_ambiguous"    >> params.txt
     echo "min_overlap      : $params.min_overlap"       >> params.txt
     
-    quarto render $report_path -M "self-contained:true" \
+    quarto render report.qmd -M "self-contained:true" \
         -P score_path:$scorefile \
         -P sampleset:$meta.id \
         -P run_ancestry:$run_ancestry \
diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
index 5fa7ca38..c86ac61a 100644
--- a/subworkflows/local/report.nf
+++ b/subworkflows/local/report.nf
@@ -82,8 +82,12 @@ workflow REPORT {
         .combine(log_scorefiles) // all samplesets have the same scorefile metadata
         .set { ch_report_input }
 
-    Channel.fromPath(file(projectDir / "assets" /"report" / "report.qmd", checkIfExists: true))
-        .set{report_path}
+    Channel.fromPath([file(projectDir / "assets" /"report" / "report.qmd", checkIfExists: true),
+        file(projectDir / "assets" /"report" / "logo.css", checkIfExists: true),
+        file(projectDir / "assets" /"report" / "PGS_Logo.png", checkIfExists: true),
+        file(projectDir / "assets" /"report" / "pgs_header_background.png", checkIfExists: true)])
+      .collect()
+      .set{ report_path }
 
     SCORE_REPORT( ch_report_input, intersect_count, reference_panel_name, report_path )
     ch_versions = ch_versions.mix(SCORE_REPORT.out.versions)

From d4fd211e50bb02f2466f99f5ec6481965bb5cec7 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Wed, 9 Oct 2024 14:30:54 +0100
Subject: [PATCH 08/22] add --verify_variants support

---
 modules/local/plink2_score.nf     | 20 --------------------
 modules/local/score_aggregate.nf  |  8 ++++++--
 subworkflows/local/apply_score.nf | 15 ++++++++++++++-
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/modules/local/plink2_score.nf b/modules/local/plink2_score.nf
index 71bde2d1..5facf37f 100644
--- a/modules/local/plink2_score.nf
+++ b/modules/local/plink2_score.nf
@@ -64,16 +64,6 @@ process PLINK2_SCORE {
             $input ${geno.baseName} \
             --out ${output}
 
-        n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
-
-        if [ \$n_missing -gt 0 ]
-        then
-          echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
-          exit 1
-        else
-          echo "INFO: Scoring file variants match listed variants in sscore.vars"
-        fi
-
         cat <<-END_VERSIONS > versions.yml
         ${task.process.tokenize(':').last()}:
             plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
@@ -93,16 +83,6 @@ process PLINK2_SCORE {
             $input ${geno.baseName} \
             --out ${output}
 
-        n_missing=\$(comm -3 <(zcat --force $scorefile | tail -n +2 | cut -f 1 | sort) <(sort ${output}.sscore.vars) | wc -l | tr -d ' ')
-
-        if [ \$n_missing -gt 0 ]
-        then
-          echo "ERROR: \$n_missing variant(s) missing from final calculated score!"
-          exit 1
-        else
-          echo "INFO: Scoring file variants match listed variants in sscore.vars"
-        fi
-
         cat <<-END_VERSIONS > versions.yml
         ${task.process.tokenize(':').last()}:
             plink2: \$(plink2 --version 2>&1 | sed 's/^PLINK v//; s/ 64.*\$//' )
diff --git a/modules/local/score_aggregate.nf b/modules/local/score_aggregate.nf
index 66e544cf..1e3c4b3e 100644
--- a/modules/local/score_aggregate.nf
+++ b/modules/local/score_aggregate.nf
@@ -12,7 +12,9 @@ process SCORE_AGGREGATE {
         "${task.ext.docker}${task.ext.docker_version}" }"
 
     input:
-    tuple val(meta), path(scorefiles)
+    tuple val(meta), path(scorefiles) // calculated polygenic scores
+    path(scorefile_vars) // PGS scoring file
+    path(scored_vars) // variants _actually used_ to calculate scores
 
     output:
     tuple val(scoremeta), path("aggregated_scores.txt.gz"), emit: scores
@@ -21,7 +23,9 @@ process SCORE_AGGREGATE {
     script:
     scoremeta = meta.subMap('id')
     """
-    pgscatalog-aggregate -s $scorefiles -o . -v --no-split
+    # variants are always verified, so that variants in the scoring files
+    # overlap perfectly with the scored variants
+    pgscatalog-aggregate -s $scorefiles -o . -v --no-split --verify_variants
 
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}:
diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf
index 6b81b70b..defb20dd 100644
--- a/subworkflows/local/apply_score.nf
+++ b/subworkflows/local/apply_score.nf
@@ -123,7 +123,20 @@ workflow APPLY_SCORE {
         .map { [ it.first().subMap("id"), it.tail().findAll { !(it instanceof LinkedHashMap) }]}
         .set { ch_scores }
 
-    SCORE_AGGREGATE ( ch_scores )
+    // pgscatalog-aggregate --verify_variants notes:
+    // Checks that variant IDs in the scorefiles match the IDs of scored variants perfectly
+    // Just dump all of the supporting files into the same directory: don't do any fancy joins
+    PLINK2_SCORE.out.vars_scored
+        .collect()
+        .set { ch_vars_scored }
+
+    ch_target_scorefile.flatMap { it.last() }
+        .filter(Path)
+        .collect()
+        .set{ ch_scorefile_verify }
+
+    SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_scorefile_verify )
+    
     ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions)
 
     emit:

From c9ae1cf364657bf1eab72a3cfe628a1a4031fbeb Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Wed, 9 Oct 2024 14:31:05 +0100
Subject: [PATCH 09/22] update section name

---
 assets/report/report.qmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index a8058f18..36b1b08d 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -194,7 +194,7 @@ DT::datatable(
 * It's important to preprocess your genomes correctly to include complex alleles
     * e.g. using SNP2HLA to include HLA alleles
 * If you don't do this then it's unlikely that complex alleles will be matched. This means that these variants won't contribute to the final calculated PGS.
-* Please check [PGS Catalog Curation Guidelines - Appendix A – Special Cases](https://www.pgscatalog.org/docs/curation) for more detailed information
+* Please check [Appendix A - Curation of PGS including complex alleles](https://www.pgscatalog.org/docs/curation) for more detailed information
 :::
 ```
 

From 831775db5dbf550414259bc72154b7f25c77f0b8 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Wed, 9 Oct 2024 14:54:40 +0100
Subject: [PATCH 10/22] add tooltip text to score summary table

---
 assets/report/report.qmd | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index 36b1b08d..3fcf5cac 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -158,23 +158,33 @@ tibble(
 :::{.column-body-outset}
 
 ```{r, echo=FALSE}
+tooltip_text <- c(
+  "Polygenic Score ID" = "Unique identifier for the polygenic score.",
+  "Publication" = "Reference publication for the score.",
+  "Traits" = "Traits associated with the score.",
+  "Number of variants" = "Total number of genetic variants (defined in the header)",
+  "Genome build" = "The genome assembly version used.",
+  "Complex alleles present?" = "Describes if complex non-SNP alleles included in the scoring file, e.g. APOE/HLA. These variants are excluded from the PGS calculation in the current version",
+  "Effect types compatible?" = "Describes if the scoring file is compatible with the Calculator. Scores with dosage-specific weights are removed."
+)
+
 DT::datatable(
   scorefile_metadata,
   rownames = FALSE,
   escape = FALSE,
-  colnames = c(
-    "Polygenic Score ID" = "pgs_id",
-    "Publication" = "pgp_id",
-    "Traits" = "trait_display",
-    "Number of variants" = "n_variants",
-    "Genome build" = "genome_build",
-    "Complex alleles present?" = "has_complex_alleles",
-    "Effect types compatible?" = "compatible_effect_type"
+  colnames = setNames(
+    paste0('<span title="', tooltip_text, '">', names(tooltip_text), '</span>'), 
+    NULL
   ),
   extensions = 'Buttons',
   options = list(dom = 'Bfrtip',
                  buttons = c('csv'))
-)
+) %>%
+    DT::formatStyle('has_complex_alleles', 
+                  backgroundColor = DT::styleEqual(c(FALSE, TRUE), c('#a6dba0', '#c2a5cf'))) %>% 
+    DT::formatStyle('compatible_effect_type', 
+                  backgroundColor = DT::styleEqual(c(FALSE, TRUE), c('#c2a5cf', '#a6dba0')))
+
 ```
 
 :::
@@ -182,18 +192,15 @@ DT::datatable(
 ```{asis, echo = any(!scorefile_metadata$compatible_effect_type)}
 ::: {.callout-warning title="Incompatible effect types detected"}
 * Some scoring files contain variants with dosage dependent effect weights (for example, [PGS002253](https://www.pgscatalog.org/score/PGS002253/))
-* Scoring files like this are not supported by the PGS Catalog Calculator
-  * Scoring files with non-additive effect types are skipped and not processed
-  * Variants with recessive or dominant effect types are supported
+* Scores with dosage-specific weights are removed from calculation 
+* Scores that contain variants with recessive, dominant, or additive effect types are supported
 :::
 ```
 
 ```{asis, echo = any(scorefile_metadata$has_complex_alleles)}
 ::: {.callout-warning title="Complex alleles detected"}
 * Some scoring files contain complex alleles (e.g. APOE / HLA / CYP)
-* It's important to preprocess your genomes correctly to include complex alleles
-    * e.g. using SNP2HLA to include HLA alleles
-* If you don't do this then it's unlikely that complex alleles will be matched. This means that these variants won't contribute to the final calculated PGS.
+* These variants are excluded from the PGS calculation in the current version
 * Please check [Appendix A - Curation of PGS including complex alleles](https://www.pgscatalog.org/docs/curation) for more detailed information
 :::
 ```

From 676030c823a4b091b8a44cb4d79b96306cf948a1 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Wed, 9 Oct 2024 15:08:08 +0100
Subject: [PATCH 11/22] bump version number to 2.0.0

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 855b5f1f..6b52bd11 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -261,7 +261,7 @@ manifest {
     description     = 'The Polygenic Score Catalog Calculator is a nextflow pipeline for polygenic score calculation'
     mainScript      = 'main.nf'
     nextflowVersion = '>=23.10.0'
-    version         = '2.0.0-beta.3'
+    version         = '2.0.0'
 }
 
 // Load modules.config for DSL2 module specific options

From 66373b1495581935a1e79bb2c5f5460d38609535 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 10 Oct 2024 10:32:50 +0100
Subject: [PATCH 12/22] fix --run_ancestry variant verify

---
 modules/local/ancestry/relabel_scorefiles.nf | 16 +++++++++++-----
 subworkflows/local/apply_score.nf            | 18 +++++++++++++++---
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/modules/local/ancestry/relabel_scorefiles.nf b/modules/local/ancestry/relabel_scorefiles.nf
index dccbb632..e186c4bb 100644
--- a/modules/local/ancestry/relabel_scorefiles.nf
+++ b/modules/local/ancestry/relabel_scorefiles.nf
@@ -3,7 +3,7 @@ process RELABEL_SCOREFILES {
     label 'process_medium'
     label 'pgscatalog_utils' // controls conda, docker, + singularity options
 
-    tag "$meta.id $meta.effect_type $target_format"
+    tag "reference $meta.effect_type $target_format"
 
     conda "${task.ext.conda}"
 
@@ -16,13 +16,12 @@ process RELABEL_SCOREFILES {
     tuple val(meta), path(target), path(matched)
 
     output:
-    tuple val(relabel_meta), path("${output}"), emit: relabelled
+    tuple val(relabel_meta), path("reference*"), emit: relabelled
     path "versions.yml", emit: versions
 
     script:
     target_format = target.getName().tokenize('.')[1] // test.tar.gz -> tar, test.var -> var
     relabel_meta = meta.plus(['target_format': target_format]) // .plus() returns a new map
-    output_mode = "--split --combined" // always output split and combined data to make life easier
     col_from = "ID_TARGET"
     col_to = "ID_REF"
     output = "${meta.id}.${target_format}*"
@@ -32,11 +31,18 @@ process RELABEL_SCOREFILES {
         --col_to $col_to \
         --target_file $target \
         --target_col ID \
-        --dataset ${meta.id}.${target_format} \
+        --dataset reference \
         --verbose \
-        $output_mode \
+        --combined \
         --outdir \$PWD
 
+    # TODO: improve pgscatalog-relabel so you can set output names precisely
+    # use some unpleasant sed to keep a consistent naming scheme
+    # hgdp_ALL_additive_0.scorefile.gz -> reference_ALL_additive_0.scorefile.gz 
+    output=\$(echo $target | sed 's/^[^_]*_/reference_/')
+
+    mv reference_ALL_relabelled.gz \$output
+
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}:
         pgscatalog.core: \$(echo \$(python -c 'import pgscatalog.core; print(pgscatalog.core.__version__)'))
diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf
index defb20dd..f91a6c58 100644
--- a/subworkflows/local/apply_score.nf
+++ b/subworkflows/local/apply_score.nf
@@ -125,7 +125,7 @@ workflow APPLY_SCORE {
 
     // pgscatalog-aggregate --verify_variants notes:
     // Checks that variant IDs in the scorefiles match the IDs of scored variants perfectly
-    // Just dump all of the supporting files into the same directory: don't do any fancy joins
+    // Just dump all of the supporting files into the same directory: don't do any fancy channel manipulation
     PLINK2_SCORE.out.vars_scored
         .collect()
         .set { ch_vars_scored }
@@ -133,10 +133,22 @@ workflow APPLY_SCORE {
     ch_target_scorefile.flatMap { it.last() }
         .filter(Path)
         .collect()
-        .set{ ch_scorefile_verify }
+        .set{ ch_target_scorefile_flat }
 
-    SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_scorefile_verify )
+    // note, for the calculated score:
+    // reference_ALL_additive_0.sscore.zst (ch_scores)
+    // --verify_variants expects the following files in the same directory
+    // reference_ALL_additive_0.sscore.vars (ch_vars_scored)
+    // reference_ALL_additive_0.scorefile.gz (ch_verify_scorefiles)
+
+    ch_apply_ref.flatMap { it.last() }
+        .filter(Path)
+        .mix( ch_target_scorefile_flat )
+        .collect()
+        .set{ ch_verify_scorefiles }
     
+    SCORE_AGGREGATE ( ch_scores, ch_vars_scored, ch_verify_scorefiles )
+
     ch_versions = ch_versions.mix(SCORE_AGGREGATE.out.versions)
 
     emit:

From bd72486b196b00c8e2774d9fe03a120ff9bc2561 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 10 Oct 2024 10:49:15 +0100
Subject: [PATCH 13/22] fix pgscatalog package version checks

---
 tests/modules/combine/test.yml           | 2 +-
 tests/modules/download/test.yml          | 4 ++--
 tests/modules/match/test.yml             | 4 ++--
 tests/subworkflows/test_apply_score.yml  | 2 +-
 tests/subworkflows/test_liftover_run.yml | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/modules/combine/test.yml b/tests/modules/combine/test.yml
index ba6c16dc..3ddbcdbf 100644
--- a/tests/modules/combine/test.yml
+++ b/tests/modules/combine/test.yml
@@ -14,4 +14,4 @@
         - "effect_weight"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.2.2"
\ No newline at end of file
+        - "pgscatalog.core: 0.3.1"
\ No newline at end of file
diff --git a/tests/modules/download/test.yml b/tests/modules/download/test.yml
index 54323166..c1c093a8 100644
--- a/tests/modules/download/test.yml
+++ b/tests/modules/download/test.yml
@@ -8,7 +8,7 @@
     - path: output/download/PGS000001_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.2.2"
+        - "pgscatalog.core: 0.3.1"
       
 - name: pgscatalog test --efo_trait --pgp_id and --pgs_id
   command: nextflow run ./tests/modules/download -entry testmultipleaccessions -c ./tests/config/nextflow.config
@@ -24,7 +24,7 @@
     - path: output/download/PGS002054_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.2.2"
+        - "pgscatalog.core: 0.3.1"
 
 - name: pgscatalog test bad accession
   command: nextflow run ./tests/modules/download -entry testbadaccession -c ./tests/config/nextflow.config
diff --git a/tests/modules/match/test.yml b/tests/modules/match/test.yml
index e39e2b94..d7c477cf 100644
--- a/tests/modules/match/test.yml
+++ b/tests/modules/match/test.yml
@@ -7,7 +7,7 @@
   files:
     - path: output/test/match/versions.yml
       contains:
-        - "pgscatalog.match: 0.3.1"
+        - "pgscatalog.match: 0.3.3"
 # can't test IPC output (not published)        
 
 - name: test match combine module
@@ -20,7 +20,7 @@
   files:
     - path: output/test/match/versions.yml
       contains:
-        - "pgscatalog.match: 0.3.1"
+        - "pgscatalog.match: 0.3.3"
     - path: output/test/match/test_ALL_additive_0.scorefile.gz           
       contains:
         - "PGS001229"     
diff --git a/tests/subworkflows/test_apply_score.yml b/tests/subworkflows/test_apply_score.yml
index 9c322606..43fd3bc5 100644
--- a/tests/subworkflows/test_apply_score.yml
+++ b/tests/subworkflows/test_apply_score.yml
@@ -18,7 +18,7 @@
         - "SUM"
     - path: output/score/versions.yml
       contains:
-        - "pgscatalog.calc: 0.2.2"
+        - "pgscatalog.calc: 0.3.0"
       must_not_contain:
         - "percentile_MostSimilarPop"
         - "Z_MostSimilarPop"
diff --git a/tests/subworkflows/test_liftover_run.yml b/tests/subworkflows/test_liftover_run.yml
index e02c2010..053f6192 100644
--- a/tests/subworkflows/test_liftover_run.yml
+++ b/tests/subworkflows/test_liftover_run.yml
@@ -11,7 +11,7 @@
       - "5297845"
     - path: output/combine/versions.yml      
       contains:
-        - "pgscatalog.core: 0.2.2"
+        - "pgscatalog.core: 0.3.1"
 
 - name: test input check subworkflow with liftover 37to38
   command: nextflow run main.nf --only_input --scorefile ./assets/examples/scorefiles/customgrch37.txt --liftover --target_build GRCh38 -c ./tests/config/nextflow.config --hg19_chain https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz --hg38_chain https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
@@ -26,4 +26,4 @@
       - "5237785"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.2.2"
+        - "pgscatalog.core: 0.3.1"

From 396351fe19f18d8f999eba2cb7fa9bb403e41ad5 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 10 Oct 2024 11:23:52 +0100
Subject: [PATCH 14/22] fix test_processed_variants

---
 tests/subworkflows/test_apply_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/subworkflows/test_apply_score.py b/tests/subworkflows/test_apply_score.py
index 6453caff..f6c4a0d2 100644
--- a/tests/subworkflows/test_apply_score.py
+++ b/tests/subworkflows/test_apply_score.py
@@ -42,7 +42,7 @@ def test_processed_variants(workflow_dir):
     """Make sure n_lines in scorefile == --score XXX variants processed in log"""
     # find directories with scoring file variants in them
     scoring_variants = [
-        pathlib.Path(x)
+        pathlib.Path(workflow_dir / x)
         for x in glob.glob("work/**/**/*.sscore.vars", root_dir=workflow_dir)
     ]
     not_symlinks = [not x.is_symlink() for x in scoring_variants]

From 41d67f27e38e251cab94700532dd9b98ef418931 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 10 Oct 2024 13:34:32 +0100
Subject: [PATCH 15/22] Remove conda defaults channel from nextflow.config

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 6b52bd11..f1910875 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -128,7 +128,7 @@ profiles {
         podman.enabled         = false
         shifter.enabled        = false
         charliecloud.enabled   = false
-        channels               = ['conda-forge', 'bioconda', 'defaults']
+        channels               = ['conda-forge', 'bioconda', 'nodefaults']
         apptainer.enabled      = false
     }
     mamba {

From 38cad8b5a3d842cfc17f8206d6eddc52aef38814 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Mon, 14 Oct 2024 11:21:17 +0100
Subject: [PATCH 16/22] update process resource definition

---
 docs/how-to/bigjob.rst | 223 +++++++++++++++++++++--------------------
 1 file changed, 116 insertions(+), 107 deletions(-)

diff --git a/docs/how-to/bigjob.rst b/docs/how-to/bigjob.rst
index 8940b616..0a9c1c52 100644
--- a/docs/how-to/bigjob.rst
+++ b/docs/how-to/bigjob.rst
@@ -80,113 +80,122 @@ on UK Biobank with a SLURM cluster:
 .. code-block:: text
 
     process {
-        errorStrategy = 'retry'
-        maxRetries = 3
-        maxErrors = '-1'
-        executor = 'slurm'
-
-        withName: 'DOWNLOAD_SCOREFILES' {
-          cpus = 1
-          memory = { 1.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'COMBINE_SCOREFILES' {
-          cpus = 1
-          memory = { 8.GB * task.attempt }
-          time = { 2.hour * task.attempt }
-        }
-
-        withName: 'PLINK2_MAKEBED' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'RELABEL_IDS' {
-          cpus = 1
-          memory = { 16.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'PLINK2_ORIENT' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'DUMPSOFTWAREVERSIONS' {
-          cpus = 1
-          memory = { 1.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'ANCESTRY_ANALYSIS' {
-          cpus = { 1 * task.attempt }
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'SCORE_REPORT' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'EXTRACT_DATABASE' {
-          cpus = 1
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'PLINK2_RELABELPVAR' {
-          cpus = 2
-          memory = { 16.GB * task.attempt }
-          time = { 2.hour * task.attempt }
-        }
-
-        withName: 'INTERSECT_VARIANTS' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'MATCH_VARIANTS' {
-          cpus = 2
-          memory = { 32.GB * task.attempt }
-          time = { 6.hour * task.attempt }
-        }
-
-        withName: 'FILTER_VARIANTS' {
-          cpus = 2
-          memory = { 16.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'MATCH_COMBINE' {
-          cpus = 4
-          memory = { 64.GB * task.attempt }
-          time = { 6.hour * task.attempt }
-        }
-
-        withName: 'FRAPOSA_PCA' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 1.hour * task.attempt }
-        }
-
-        withName: 'PLINK2_SCORE' {
-          cpus = 2
-          memory = { 8.GB * task.attempt }
-          time = { 12.hour * task.attempt }
-        }
-
-        withName: 'SCORE_AGGREGATE' {
-          cpus = 2
-          memory = { 16.GB * task.attempt }
-          time = { 4.hour * task.attempt }
-        }
-    }
+      errorStrategy = 'retry'
+      maxRetries = 3
+      maxErrors = '-1'
+      executor = 'slurm'
+
+      withName: 'SAMPLESHEET_JSON' {
+        cpus = 1
+        memory = { 1.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'DOWNLOAD_SCOREFILES' {
+        cpus = 1
+        memory = { 1.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'COMBINE_SCOREFILES' {
+        cpus = 1
+        memory = { 16.GB * task.attempt }
+        time = { 2.hour * task.attempt }
+      }
+
+      withName: 'PLINK2_MAKEBED' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'RELABEL_IDS' {
+        cpus = 1
+        memory = { 16.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'PLINK2_ORIENT' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'DUMPSOFTWAREVERSIONS' {
+        cpus = 1
+        memory = { 1.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'ANCESTRY_ANALYSIS' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'SCORE_REPORT' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'EXTRACT_DATABASE' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'PLINK2_RELABELPVAR' {
+        cpus = 1
+        memory = { 16.GB * task.attempt }
+        time = { 2.hour * task.attempt }
+      }
+
+      withName: 'INTERSECT_VARIANTS' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'INTERSECT_THINNED' {
+        cpus = 1
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'MATCH_VARIANTS' {
+        cpus = 2
+        memory = { 32.GB * task.attempt }
+        time = { 6.hour * task.attempt }
+      }
+
+      withName: 'FILTER_VARIANTS' {
+        cpus = 1
+        memory = { 16.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'MATCH_COMBINE' {
+        cpus = 2
+        memory = { 64.GB * task.attempt }
+        time = { 6.hour * task.attempt }
+      }
+
+      withName: 'FRAPOSA_PCA' {
+        cpus = 2
+        memory = { 8.GB * task.attempt }
+        time = { 1.hour * task.attempt }
+      }
+
+      withName: 'PLINK2_SCORE' {
+        cpus = 2
+        memory = { 8.GB * task.attempt }
+        time = { 16.hour * task.attempt }
+      }
+  }
+
+
+.. note:: You'll want to adjust memory usage depending on the complexity of your input scoring files.  Allocating more CPUs probably won't make the workflow complete faster. 
 
 Assuming the configuration file you set up is saved as
 ``my_custom.config`` in your current working directory, you're ready

From 8b20b61f060dda702cf0faab95c538d260090295 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Mon, 14 Oct 2024 12:30:19 +0100
Subject: [PATCH 17/22] add missing processes

---
 docs/how-to/bigjob.rst | 238 ++++++++++++++++++++++-------------------
 1 file changed, 125 insertions(+), 113 deletions(-)

diff --git a/docs/how-to/bigjob.rst b/docs/how-to/bigjob.rst
index 0a9c1c52..22a22655 100644
--- a/docs/how-to/bigjob.rst
+++ b/docs/how-to/bigjob.rst
@@ -79,119 +79,131 @@ on UK Biobank with a SLURM cluster:
 
 .. code-block:: text
 
-    process {
-      errorStrategy = 'retry'
-      maxRetries = 3
-      maxErrors = '-1'
-      executor = 'slurm'
-
-      withName: 'SAMPLESHEET_JSON' {
-        cpus = 1
-        memory = { 1.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'DOWNLOAD_SCOREFILES' {
-        cpus = 1
-        memory = { 1.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'COMBINE_SCOREFILES' {
-        cpus = 1
-        memory = { 16.GB * task.attempt }
-        time = { 2.hour * task.attempt }
-      }
-
-      withName: 'PLINK2_MAKEBED' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'RELABEL_IDS' {
-        cpus = 1
-        memory = { 16.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'PLINK2_ORIENT' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'DUMPSOFTWAREVERSIONS' {
-        cpus = 1
-        memory = { 1.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'ANCESTRY_ANALYSIS' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'SCORE_REPORT' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'EXTRACT_DATABASE' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'PLINK2_RELABELPVAR' {
-        cpus = 1
-        memory = { 16.GB * task.attempt }
-        time = { 2.hour * task.attempt }
-      }
-
-      withName: 'INTERSECT_VARIANTS' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'INTERSECT_THINNED' {
-        cpus = 1
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'MATCH_VARIANTS' {
-        cpus = 2
-        memory = { 32.GB * task.attempt }
-        time = { 6.hour * task.attempt }
-      }
-
-      withName: 'FILTER_VARIANTS' {
-        cpus = 1
-        memory = { 16.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'MATCH_COMBINE' {
-        cpus = 2
-        memory = { 64.GB * task.attempt }
-        time = { 6.hour * task.attempt }
-      }
-
-      withName: 'FRAPOSA_PCA' {
-        cpus = 2
-        memory = { 8.GB * task.attempt }
-        time = { 1.hour * task.attempt }
-      }
-
-      withName: 'PLINK2_SCORE' {
-        cpus = 2
-        memory = { 8.GB * task.attempt }
-        time = { 16.hour * task.attempt }
-      }
+  process {
+    errorStrategy = 'retry'
+    maxRetries = 3
+    maxErrors = '-1'
+    executor = 'slurm'
+
+    withName: 'SAMPLESHEET_JSON' {
+      cpus = 1
+      memory = { 1.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'DOWNLOAD_SCOREFILES' {
+      cpus = 1
+      memory = { 1.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'COMBINE_SCOREFILES' {
+      cpus = 1
+      memory = { 16.GB * task.attempt }
+      time = { 2.hour * task.attempt }
+    }
+
+    withName: 'PLINK2_MAKEBED' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'RELABEL_IDS' {
+      cpus = 1
+      memory = { 16.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'PLINK2_ORIENT' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'DUMPSOFTWAREVERSIONS' {
+      cpus = 1
+      memory = { 1.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'ANCESTRY_ANALYSIS' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'SCORE_REPORT' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'EXTRACT_DATABASE' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'PLINK2_RELABELPVAR' {
+      cpus = 1
+      memory = { 16.GB * task.attempt }
+      time = { 2.hour * task.attempt }
+    }
+
+    withName: 'INTERSECT_VARIANTS' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'INTERSECT_THINNED' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'MATCH_VARIANTS' {
+      cpus = 2
+      memory = { 32.GB * task.attempt }
+      time = { 6.hour * task.attempt }
+    }
+
+    withName: 'FILTER_VARIANTS' {
+      cpus = 1
+      memory = { 16.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'MATCH_COMBINE' {
+      cpus = 2
+      memory = { 64.GB * task.attempt }
+      time = { 6.hour * task.attempt }
+    }
+
+    withName: 'FRAPOSA_PCA' {
+      cpus = 2
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'PLINK2_SCORE' {
+      cpus = 2
+      memory = { 8.GB * task.attempt }
+      time = { 16.hour * task.attempt }
+    }
+
+    withName: 'FRAPOSA_PROJECT' {
+      cpus = 1
+      memory = { 8.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }
+
+    withName: 'SCORE_AGGREGATE' {
+      cpus = 1
+      memory = { 16.GB * task.attempt }
+      time = { 1.hour * task.attempt }
+    }    
   }
 
 

From c1f8c25eece3e2afecf41eef9e75ba494c477a32 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Thu, 24 Oct 2024 15:15:06 +0100
Subject: [PATCH 18/22] bump pgscatalog-utils version

---
 conf/modules.config                           | 4 ++--
 environments/pgscatalog_utils/environment.yml | 2 +-
 tests/modules/combine/test.yml                | 2 +-
 tests/modules/download/test.yml               | 6 +++---
 tests/subworkflows/test_liftover_run.yml      | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2538e91c..ee84d87d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pygscatalog'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog'
-        ext.docker_version = ':pgscatalog-utils-1.4.1'
-        ext.singularity_version = ':pgscatalog-utils-1.4.1-singularity'
+        ext.docker_version = ':pgscatalog-utils-1.4.2'
+        ext.singularity_version = ':pgscatalog-utils-1.4.2-singularity'
     }
 
     withLabel: plink2 {
diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
index a0c5ecd1..cc9cb9ad 100644
--- a/environments/pgscatalog_utils/environment.yml
+++ b/environments/pgscatalog_utils/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - pgscatalog-utils=1.4.1
+  - pgscatalog-utils=1.4.2
diff --git a/tests/modules/combine/test.yml b/tests/modules/combine/test.yml
index 3ddbcdbf..b4434c4d 100644
--- a/tests/modules/combine/test.yml
+++ b/tests/modules/combine/test.yml
@@ -14,4 +14,4 @@
         - "effect_weight"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.1"
\ No newline at end of file
+        - "pgscatalog.core: 0.3.2"
diff --git a/tests/modules/download/test.yml b/tests/modules/download/test.yml
index c1c093a8..706319da 100644
--- a/tests/modules/download/test.yml
+++ b/tests/modules/download/test.yml
@@ -8,7 +8,7 @@
     - path: output/download/PGS000001_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.1"
+        - "pgscatalog.core: 0.3.2"
       
 - name: pgscatalog test --efo_trait --pgp_id and --pgs_id
   command: nextflow run ./tests/modules/download -entry testmultipleaccessions -c ./tests/config/nextflow.config
@@ -24,7 +24,7 @@
     - path: output/download/PGS002054_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.1"
+        - "pgscatalog.core: 0.3.2"
 
 - name: pgscatalog test bad accession
   command: nextflow run ./tests/modules/download -entry testbadaccession -c ./tests/config/nextflow.config
@@ -40,4 +40,4 @@
     - module
     - download
     - fast
-  exit_code: 1
\ No newline at end of file
+  exit_code: 1
diff --git a/tests/subworkflows/test_liftover_run.yml b/tests/subworkflows/test_liftover_run.yml
index 053f6192..eee0d599 100644
--- a/tests/subworkflows/test_liftover_run.yml
+++ b/tests/subworkflows/test_liftover_run.yml
@@ -11,7 +11,7 @@
       - "5297845"
     - path: output/combine/versions.yml      
       contains:
-        - "pgscatalog.core: 0.3.1"
+        - "pgscatalog.core: 0.3.2"
 
 - name: test input check subworkflow with liftover 37to38
   command: nextflow run main.nf --only_input --scorefile ./assets/examples/scorefiles/customgrch37.txt --liftover --target_build GRCh38 -c ./tests/config/nextflow.config --hg19_chain https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz --hg38_chain https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
@@ -26,4 +26,4 @@
       - "5237785"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.1"
+        - "pgscatalog.core: 0.3.2"

From 73a5d3d3bb01a1aa41d5022c615910b6dbb24fb3 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 29 Oct 2024 10:29:09 +0000
Subject: [PATCH 19/22] bump utils to 1.4.3

---
 conf/modules.config                           | 4 ++--
 environments/pgscatalog_utils/environment.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ee84d87d..994fc586 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pygscatalog'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog'
-        ext.docker_version = ':pgscatalog-utils-1.4.2'
-        ext.singularity_version = ':pgscatalog-utils-1.4.2-singularity'
+        ext.docker_version = ':pgscatalog-utils-1.4.3'
+        ext.singularity_version = ':pgscatalog-utils-1.4.3-singularity'
     }
 
     withLabel: plink2 {
diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
index cc9cb9ad..5d45db69 100644
--- a/environments/pgscatalog_utils/environment.yml
+++ b/environments/pgscatalog_utils/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - pgscatalog-utils=1.4.2
+  - pgscatalog-utils=1.4.3

From d5f27d834915eddb03c0496be2b58d8b018f2f1b Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 29 Oct 2024 10:42:47 +0000
Subject: [PATCH 20/22] tests: bump core patch version

---
 tests/modules/combine/test.yml           | 2 +-
 tests/modules/download/test.yml          | 4 ++--
 tests/subworkflows/test_liftover_run.yml | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/modules/combine/test.yml b/tests/modules/combine/test.yml
index b4434c4d..4f629ca2 100644
--- a/tests/modules/combine/test.yml
+++ b/tests/modules/combine/test.yml
@@ -14,4 +14,4 @@
         - "effect_weight"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.2"
+        - "pgscatalog.core: 0.3.3"
diff --git a/tests/modules/download/test.yml b/tests/modules/download/test.yml
index 706319da..18d9d2ef 100644
--- a/tests/modules/download/test.yml
+++ b/tests/modules/download/test.yml
@@ -8,7 +8,7 @@
     - path: output/download/PGS000001_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.2"
+        - "pgscatalog.core: 0.3.3"
       
 - name: pgscatalog test --efo_trait --pgp_id and --pgs_id
   command: nextflow run ./tests/modules/download -entry testmultipleaccessions -c ./tests/config/nextflow.config
@@ -24,7 +24,7 @@
     - path: output/download/PGS002054_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.2"
+        - "pgscatalog.core: 0.3.3"
 
 - name: pgscatalog test bad accession
   command: nextflow run ./tests/modules/download -entry testbadaccession -c ./tests/config/nextflow.config
diff --git a/tests/subworkflows/test_liftover_run.yml b/tests/subworkflows/test_liftover_run.yml
index eee0d599..8991cb16 100644
--- a/tests/subworkflows/test_liftover_run.yml
+++ b/tests/subworkflows/test_liftover_run.yml
@@ -11,7 +11,7 @@
       - "5297845"
     - path: output/combine/versions.yml      
       contains:
-        - "pgscatalog.core: 0.3.2"
+        - "pgscatalog.core: 0.3.3"
 
 - name: test input check subworkflow with liftover 37to38
   command: nextflow run main.nf --only_input --scorefile ./assets/examples/scorefiles/customgrch37.txt --liftover --target_build GRCh38 -c ./tests/config/nextflow.config --hg19_chain https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz --hg38_chain https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
@@ -26,4 +26,4 @@
       - "5237785"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog.core: 0.3.2"
+        - "pgscatalog.core: 0.3.3"

From 2faa714e1fcb7afa0d908e66999687e7c2d26991 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 29 Oct 2024 11:10:41 +0000
Subject: [PATCH 21/22] install packages individually

---
 environments/pgscatalog_utils/environment.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
index 5d45db69..5aca3dd4 100644
--- a/environments/pgscatalog_utils/environment.yml
+++ b/environments/pgscatalog_utils/environment.yml
@@ -4,4 +4,6 @@ channels:
   - bioconda
   - nodefaults  
 dependencies:
-  - pgscatalog-utils=1.4.3
+  - pgscatalog.core=0.3.3
+  - pgscatalog.match=0.3.3
+  - pgscatalog.calc=0.3.0

From b02f00b9a055a3d7a1212a8847febe89c6dec554 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Tue, 29 Oct 2024 11:42:29 +0000
Subject: [PATCH 22/22] bump pgscatalog-utils

---
 conf/modules.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 994fc586..7febd013 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pygscatalog'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pygscatalog'
-        ext.docker_version = ':pgscatalog-utils-1.4.3'
-        ext.singularity_version = ':pgscatalog-utils-1.4.3-singularity'
+        ext.docker_version = ':pgscatalog-utils-1.4.4'
+        ext.singularity_version = ':pgscatalog-utils-1.4.4-singularity'
     }
 
     withLabel: plink2 {