diff --git a/.github/workflows/ancestry-conda.yml b/.github/workflows/ancestry-conda.yml
new file mode 100644
index 00000000..532f6ae1
--- /dev/null
+++ b/.github/workflows/ancestry-conda.yml
@@ -0,0 +1,74 @@
+name: Run ancestry test with mamba profile
+
+on:
+  workflow_call:
+    inputs:
+      ancestry-cache-key:
+        type: string
+        required: true
+  
+jobs:
+  test_mamba_ancestry:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v3
+
+      - name: Set environment variables
+        run: |
+          echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+          echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+
+      - name: Restore reference data
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst
+            ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst
+          key: ${{ inputs.ancestry-cache-key }}
+          fail-on-cache-miss: true
+  
+      - uses: conda-incubator/setup-miniconda@v2
+        with:  
+          channels: conda-forge,bioconda,defaults
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
+          python-version: "3.10"
+          
+      - uses: actions/setup-java@v3
+        with:
+          distribution: 'corretto'
+          java-version: '17'
+
+      - name: install nxf
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+
+      - name: Set up test requirements
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          
+      - run: pip install -r ${{ github.workspace }}/tests/requirements.txt
+
+      - name: Run ancestry test
+        run: TMPDIR=~ PROFILE=mamba pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry" --ignore tests/bin
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: logs-conda-ancestry
+          path: |
+            /home/runner/pytest_workflow_*/*/.nextflow.log
+            /home/runner/pytest_workflow_*/*/log.out
+            /home/runner/pytest_workflow_*/*/log.err
+            /home/runner/pytest_workflow_*/*/output/*
diff --git a/.github/workflows/ancestry-vcf.yml b/.github/workflows/ancestry-vcf.yml
new file mode 100644
index 00000000..5454433d
--- /dev/null
+++ b/.github/workflows/ancestry-vcf.yml
@@ -0,0 +1,160 @@
+name: Run ancestry test with singularity or docker profiles with VCF input
+
+on:
+  workflow_call:
+    inputs:
+      container-cache-key:
+        type: string
+        required: true
+      ancestry-cache-key:
+        type: string
+        required: true
+      docker:
+        type: boolean
+      singularity:
+        type: boolean
+
+env:
+  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/singularity
+  SINGULARITY_VERSION: 3.8.3
+  
+jobs:
+  docker:
+    if: ${{ inputs.docker }}
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Set environment variables
+        run: |
+          echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+          echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+      
+      - name: Check out pipeline code
+        uses: actions/checkout@v3
+
+      - uses: nf-core/setup-nextflow@v1
+
+      - name: Restore docker images
+        id: restore-docker
+        uses: actions/cache/restore@v3
+        with:
+          path: ${{ runner.temp }}/docker
+          key: ${{ inputs.container-cache-key }}
+          fail-on-cache-miss: true
+
+      - name: Load docker images from cache
+        run: |
+          find $HOME -name '*.tar'
+          find ${{ runner.temp }}/docker/ -name '*.tar' -exec sh -c 'docker load < {}' \;
+
+      - name: Restore reference data
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst
+            ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst
+          key: ${{ inputs.ancestry-cache-key }}
+          fail-on-cache-miss: true
+
+      - name: Install plink2 to recode
+        run: sudo apt-get install -y plink2
+
+      - name: Recode VCF
+        run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL
+
+      - name: Set up test requirements
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          
+      - run: pip install -r ${{ github.workspace }}/tests/requirements.txt
+
+      - name: Run ancestry test
+        run: TMPDIR=~ PROFILE=docker pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: logs-singularity-ancestry
+          path: |
+            /home/runner/pytest_workflow_*/*/.nextflow.log
+            /home/runner/pytest_workflow_*/*/log.out
+            /home/runner/pytest_workflow_*/*/log.err
+            /home/runner/pytest_workflow_*/*/output/*
+            
+  singularity:
+    if: ${{ inputs.singularity }}
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Set environment variables
+        run: |
+          echo "ANCESTRY_REF_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+          echo "ANCESTRY_TARGET_DIR=$RUNNER_TEMP" >> $GITHUB_ENV
+      
+      - name: Check out pipeline code
+        uses: actions/checkout@v3
+
+      - uses: nf-core/setup-nextflow@v1
+
+      - name: Restore singularity setup
+        id: restore-singularity-setup
+        uses: actions/cache@v3
+        with:
+          path: /opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64
+          key: ${{ runner.os }}-singularity-${{ env.SINGULARITY_VERSION }}
+          fail-on-cache-miss: true
+
+      - name: Add singularity to path
+        run: |
+          echo "/opt/hostedtoolcache/singularity/${{ env.SINGULARITY_VERSION }}/x64/bin" >> $GITHUB_PATH
+
+      - name: Restore singularity container images
+        id: restore-singularity
+        uses: actions/cache@v3
+        with:
+          path: ${{ env.NXF_SINGULARITY_CACHEDIR }}
+          key: ${{ inputs.container-cache-key }}
+
+      - name: Restore reference data
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pgen
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.psam
+            ${{ env.ANCESTRY_TARGET_DIR }}/GRCh38_HAPNEST_TARGET_ALL.pvar.zst
+            ${{ env.ANCESTRY_REF_DIR }}/GRCh38_HAPNEST_reference.tar.zst
+          key: ${{ inputs.ancestry-cache-key }}
+          fail-on-cache-miss: true
+
+      - name: Install plink2 to recode
+        run: sudo apt-get install -y plink2
+
+      - name: Recode VCF
+        run: plink2 --pfile ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL vzs --export vcf bgz --out ${ANCESTRY_TARGET_DIR}/GRCh38_HAPNEST_TARGET_ALL
+
+      - name: Set up test requirements
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          
+      - run: pip install -r ${{ github.workspace }}/tests/requirements.txt
+
+      - name: Run ancestry test
+        run: TMPDIR=~ PROFILE=singularity pytest --kwdof --symlink --git-aware --wt 2 --tag "ancestry vcf" --ignore tests/bin
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: logs-singularity-ancestry
+          path: |
+            /home/runner/pytest_workflow_*/*/.nextflow.log
+            /home/runner/pytest_workflow_*/*/log.out
+            /home/runner/pytest_workflow_*/*/log.err
+            /home/runner/pytest_workflow_*/*/output/*
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ee98360f..b5acc420 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ on:
     branches:
       - dev
       - main
+      - fix_vcf
   release:
     types: [published]
 
@@ -123,3 +124,19 @@ jobs:
       container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }}
       ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }}
       singularity: true
+
+  ancestry_vcf_docker:
+    needs: [preload_ancestry, preload_docker]
+    uses: ./.github/workflows/ancestry-vcf.yml
+    with:
+      container-cache-key: ${{ needs.preload_docker.outputs.cache-key }}
+      ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }}
+      docker: true
+
+  ancestry_vcf_singularity:
+    needs: [preload_ancestry, preload_singularity]
+    uses: ./.github/workflows/ancestry.yml
+    with:
+      container-cache-key: ${{ needs.preload_singularity.outputs.cache-key }}
+      ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }}
+      singularity: true
diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 87e77667..8b627f5a 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -1,4 +1,4 @@
-name: test conda on publish
+name: test conda profiles on demand and on publish
 
 on:
   release:
@@ -6,33 +6,50 @@ on:
   workflow_dispatch:
 
 jobs:
+  preload_ancestry:
+    uses: ./.github/workflows/preload-reference.yml
+
+  test_mamba_ancestry:
+    uses: ./.github/workflows/ancestry-conda.yml
+    needs: [preload_ancestry]
+    with:
+      ancestry-cache-key: ${{ needs.preload_ancestry.outputs.cache-key }}
+
   test_mamba_standard:
     runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
     strategy:
       fail-fast: false
       matrix:
         test_profile: ["test"]
         profile: ["mamba"]
-        nxf_ver: ["22.10.0", "latest"]
+        nxf_ver: ["22.10.0", ""]
+
+    env:
+      NXF_VER: ${{ matrix.nxf_ver }}
 
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v3
 
+      - uses: conda-incubator/setup-miniconda@v2
+        with:  
+          channels: conda-forge,bioconda,defaults
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
+          python-version: "3.10"
+          
       - uses: actions/setup-java@v3
         with:
           distribution: 'corretto'
           java-version: '17'
 
-      - uses: nf-core/setup-nextflow@v1
-        with:
-          version: ${{ matrix.nxf_ver }}
-          
-      - uses: conda-incubator/setup-miniconda@v2
-        with:
-          miniforge-variant: Mambaforge
-          miniforge-version: latest          
-          channels: conda-forge,bioconda,defaults
+      - name: install nxf
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
 
       - name: Run pipeline with test data
         run: |
diff --git a/.github/workflows/standard-test.yml b/.github/workflows/standard-test.yml
index f8f61872..881f68ec 100644
--- a/.github/workflows/standard-test.yml
+++ b/.github/workflows/standard-test.yml
@@ -14,6 +14,7 @@ on:
 env:
   NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/singularity
   SINGULARITY_VERSION: 3.8.3
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
   
 jobs:
   docker:
diff --git a/RELEASE-CHECKLIST.md b/RELEASE-CHECKLIST.md
index 2abc6f5b..b12a8d27 100644
--- a/RELEASE-CHECKLIST.md
+++ b/RELEASE-CHECKLIST.md
@@ -31,6 +31,10 @@
 - [ ] Has the changelog been updated?
 - [ ] Update the nextflow schema
 
+# Reference panels
+- [ ] Did anything change to the modules for creating the reference panel? Bump ref_format_version in nextflow.config
+   - [ ] Publish new reference panels to FTP, update any documentation.
+
 # Tests
 
 - [ ] Make sure unit tests pass on singularity, docker, and conda (CI)
diff --git a/assets/examples/samplesheet.csv b/assets/examples/samplesheet.csv
index e4221e60..65fd9dc1 100644
--- a/assets/examples/samplesheet.csv
+++ b/assets/examples/samplesheet.csv
@@ -1,2 +1,2 @@
 sampleset,path_prefix,chrom,format
-cineca,assets/examples/target_genomes/cineca_synthetic_subset,22,pfile
\ No newline at end of file
+cineca,target_genomes/cineca_synthetic_subset,22,pfile
\ No newline at end of file
diff --git a/assets/examples/samplesheet_bfile.csv b/assets/examples/samplesheet_bfile.csv
index 0ac2241d..19f83aba 100644
--- a/assets/examples/samplesheet_bfile.csv
+++ b/assets/examples/samplesheet_bfile.csv
@@ -1,2 +1,2 @@
 sampleset,path_prefix,chrom,format
-cineca,assets/examples/target_genomes/cineca_synthetic_subset,22,bfile
\ No newline at end of file
+cineca,target_genomes/cineca_synthetic_subset,22,bfile
\ No newline at end of file
diff --git a/assets/examples/samplesheet_vcf.csv b/assets/examples/samplesheet_vcf.csv
index 553e907a..80642835 100644
--- a/assets/examples/samplesheet_vcf.csv
+++ b/assets/examples/samplesheet_vcf.csv
@@ -1,2 +1,2 @@
 sampleset,path_prefix,chrom,format
-cineca,assets/examples/target_genomes/cineca_synthetic_subset,22,vcf
\ No newline at end of file
+cineca,target_genomes/cineca_synthetic_subset,22,vcf
\ No newline at end of file
diff --git a/assets/report/report.qmd b/assets/report/report.qmd
index 3001f6a0..dda84749 100644
--- a/assets/report/report.qmd
+++ b/assets/report/report.qmd
@@ -293,12 +293,23 @@ if(params$run_ancestry == TRUE){
 ```{r colour_palette, echo = FALSE, eval=params$run_ancestry}
 # source: https://github.com/PGScatalog/PGS_Catalog/blob/master/catalog/static/catalog/pgs.scss#L2493-L2520
 # $ancestry_colours
-thousand_genomes_colours <- c("#FFD900", "#E41A1C", "#B15928", "#4DAF4A", 
-                              "#377EB8", "#00CED1", "#984EA3", "#A6CEE3", 
-                              "#FF7F00", "#BBB", "#999")
-names(thousand_genomes_colours) <- c("AFR", "AMR", "ASN", "EAS", "EUR", "GME",
-                                     "SAS", "MAE", "MAO", "NR", "OTH")
-thousand_genomes_palette <- scale_colour_manual(name = "Populations", values = thousand_genomes_colours)
+if({params$reference_panel_name} == '1000G'){
+  thousand_genomes_colours <- c("#FFD900", "#E41A1C", "#B15928", "#4DAF4A",
+                                "#377EB8", "#00CED1", "#984EA3", "#A6CEE3",
+                                "#FF7F00", "#BBB", "#999")
+  names(thousand_genomes_colours) <- c("AFR", "AMR", "ASN", "EAS",
+                                       "EUR", "GME", "SAS", "MAE",
+                                       "MAO", "NR", "OTH")
+  current_population_palette <- scale_colour_manual(name = "Populations", values = thousand_genomes_colours)
+} else if({params$reference_panel_name} == 'HGDP+1kGP'){
+  gnomAD_pop_colours <- c("#97519d", "#e42523", "#f67e1e", "#48b24b",
+                          "#3280bb", "#a65528", "#9a9c9b")
+  names(gnomAD_pop_colours) <- c("AFR", "AMR", "CSA", "EAS",
+                                 "EUR", "MID", "OCE")
+  current_population_palette <- scale_colour_manual(name = "Populations", values = gnomAD_pop_colours)
+} else{
+  current_population_palette <- scale_colour_brewer(palette="Set3")
+}
 ```
 
 ```{r, echo = FALSE, message = FALSE, eval=params$run_ancestry}
@@ -321,7 +332,7 @@ for(pc in seq.int(1,5,2)){
   if (pcX %in% colnames(popsim)){
     p_pca <- ggplot(popsim[popsim$REFERENCE == TRUE,], aes(x=!!sym(pcX), y=!!sym(pcY))) + geom_point(aes(colour=SuperPop, shape=slabel), alpha=0.25)
     p_pca <- p_pca + geom_point(data=popsim[popsim$REFERENCE != TRUE,], aes(color=MostSimilarPop, shape=slabel))
-    p_pca <- p_pca + theme_bw() + thousand_genomes_palette + scale_shape_manual(values=map_shapes, name='sampleset')
+    p_pca <- p_pca + theme_bw() + current_population_palette + scale_shape_manual(values=map_shapes, name='sampleset')
     print(p_pca)
   }
 }
@@ -492,4 +503,4 @@ For scores from the PGS Catalog, please remember to cite the original publicatio
 
 > PGS Catalog Calculator (in development). PGS Catalog Team. `https://github.com/PGScatalog/pgsc_calc`
 
-> Lambert et al. (2021) The Polygenic Score Catalog as an open database for reproducibility and systematic evaluation. Nature Genetics. 53:420–425 doi:10.1038/s41588-021-00783-5.
\ No newline at end of file
+> Lambert et al. (2021) The Polygenic Score Catalog as an open database for reproducibility and systematic evaluation. Nature Genetics. 53:420–425 doi:10.1038/s41588-021-00783-5.
diff --git a/assets/schemas/samplesheet.json b/assets/schemas/samplesheet.json
index 87afd724..d1fe85f1 100644
--- a/assets/schemas/samplesheet.json
+++ b/assets/schemas/samplesheet.json
@@ -11,8 +11,8 @@
     "properties": {
       "sampleset": {
         "type": "string",
-        "pattern": "^\\S+$",
-        "description": "Sampleset name must be provided and cannot contain spaces"
+        "pattern": "^[a-zA-Z0-9]*$",
+        "description": "Sampleset name must be provided and cannot contain spaces or reserved characters ('_' or '.')"
       },
       "path": {
         "description": "A list of resolved target genome file paths",
diff --git a/conf/base.config b/conf/base.config
index d2a485ce..c4a972a2 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -51,4 +51,7 @@ process {
     withName:DUMPSOFTWAREVERSIONS {
         cache = false
     }
+    withLabel:plink2{
+        memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+    }
 }
diff --git a/conf/modules.config b/conf/modules.config
index 2d0e267e..eef253af 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,8 +38,8 @@ process {
         ext.conda = "$projectDir/environments/pgscatalog_utils/environment.yml"
         ext.docker = 'ghcr.io/pgscatalog/pgscatalog_utils'
         ext.singularity = 'oras://ghcr.io/pgscatalog/pgscatalog_utils'
-        ext.docker_version = ':v0.4.1'
-        ext.singularity_version = ':v0.4.1-singularity'
+        ext.docker_version = ':v0.4.2'
+        ext.singularity_version = ':v0.4.2-singularity'
     }
 
     withLabel: plink2 {
diff --git a/conf/test.config b/conf/test.config
index 4ebee4d6..e64e0d95 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -19,13 +19,10 @@ params {
     max_memory = '6.GB'
     max_time   = '6.h'
 
-    input  = "https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/input_v2.json"
-    format = "json"
-    scorefile = "https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/PGS001229_22.txt" 
+    input  = "$projectDir/assets/examples/samplesheet.csv"
+    format = "csv"
+    scorefile = "$projectDir/assets/examples/scorefiles/PGS001229_22.txt" 
 
-    // TODO: fix local tests with CSV
-    // input  = "$projectDir/assets/examples/samplesheet.csv"
-    // scorefile = "$projectDir/assets/examples/scorefiles/PGS001229_22.txt" 
     outdir = "$projectDir/results"
     target_build = "GRCh37"
 }
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 29ba8192..4b24045a 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,6 +8,26 @@ will only occur in major versions with changes noted in this changelog.
 
 .. _`semantic versioning`: https://semver.org/
 
+pgsc_calc v2.0.0-alpha.3 (2023-10-02)
+-------------------------------------
+
+Improvements:
+
+* Automatically retry scoring with more RAM on larger datasets
+* Describe scoring precision in docs 
+* Change handling of VCFs to reduce errors when recoding 
+* Internal changes to improve support for custom reference panels
+
+Bug fixes:
+
+* Fix VCF input to ancestry projection subworkflow (thanks `@frahimov`_ and `@AWS-crafter`_ for patiently debugging)
+* Fix scoring options when reading allelic frequencies from a reference panel (thanks `@raimondsre`_ for reporting the changes from v1.3.2 -> 2.0.0-alpha)
+* Fix conda profile action
+
+.. _`@frahimov`: https://github.com/PGScatalog/pgsc_calc/issues/172
+.. _`@AWS-crafter`: https://github.com/PGScatalog/pgsc_calc/issues/155
+.. _`@raimondsre`: https://github.com/PGScatalog/pgsc_calc/pull/139#issuecomment-1736313211
+
 pgsc_calc v2.0.0-alpha.1 (2023-08-11)
 -------------------------------------
 
diff --git a/docs/explanation/output.rst b/docs/explanation/output.rst
index 7d8c26eb..c9216b47 100644
--- a/docs/explanation/output.rst
+++ b/docs/explanation/output.rst
@@ -34,6 +34,10 @@ If you have run the pipeline **without** using ancestry information the followin
 - ``AVG``: normalizes ``SUM`` by the ``DENOM`` field (displayed when you calculate the PGS on a small sample size n<50
   to avoid using unreliable allele frequency estimates for missing genotypes in the target sample.
 
+.. note:: The PGS ``SUM`` & ``AVG`` are rounded to a precision of 6 decimal places in the ouput of the PLINK2_SCORE
+    commands; however, the calculation of the PGS is based on the full precision of the effect_weight value in the
+    scoring file.
+
 If you have run the pipeline **using ancestry information** (``--run_ancesty``) the following columns may be present
 depending on the ancestry adjustments that were run (see :ref:`norm` for more details):
 
diff --git a/docs/how-to/prepare.rst b/docs/how-to/prepare.rst
index eb2fe4eb..d74427bc 100644
--- a/docs/how-to/prepare.rst
+++ b/docs/how-to/prepare.rst
@@ -16,8 +16,8 @@ Target genome data requirements
 
 - Only human chromosomes 1 -- 22, X, Y, and XY are supported by the pipeline,
   although sex chromosomes are rarely used in scoring files.
-- If input data contain other chromosomes (e.g. pseudoautosomal regions) then
-  the pipeline will probably complain loudly and stop calculating.
+- If input data contain other chromosomes (e.g. patch regions) then
+  the pipeline may complain loudly and stop calculating.
 
 
 Supported file formats
@@ -41,11 +41,17 @@ VCF from an imputation server
     plink2 --vcf <full_path_to_vcf.vcf.gz> \
         --allow-extra-chr \
         --chr 1-22, X, Y, XY \
-        -make-pgen --out <1000G>_axy
+        -make-pgen --out <short name>_axy
+
+.. note:: Non-standard chromosomes/patches should not cause errors in versions >v2.0.0-alpha.3;
+    however, they will be be filtered out from the list of variants available for PGS scoring.
 
 VCF from WGS
 ------------
 
+See https://github.com/PGScatalog/pgsc_calc/discussions/123 for discussion about tools
+to convert the VCF files into ones suitable for calculating PGS.
+
 
 ``plink`` binary fileset (bfile)
 --------------------------------
diff --git a/docs/how-to/samplesheet.rst b/docs/how-to/samplesheet.rst
index 05b9a77f..b5f74063 100644
--- a/docs/how-to/samplesheet.rst
+++ b/docs/how-to/samplesheet.rst
@@ -25,11 +25,11 @@ download here <../../assets/examples/samplesheet.csv>`.
 
 There are four mandatory columns:
 
-- **sampleset**: A text string referring to the name of a :term:`target dataset`
-  of genotyping data containing at least one sample/individual (however cohort
-  datasets will often contain many individuals with combined genotyped/imputed
-  data). Data from a sampleset may be input as a single file, or split across
-  chromosomes into multiple files.  Scores generated from files with the same
+- **sampleset**: A text string (no spaces, or reserved characters [ '.' or '_' ]) referring
+  to the name of a :term:`target dataset` of genotyping data containing at least one
+  sample/individual (however cohort datasets will often contain many individuals with
+  combined genotyped/imputed data). Data from a sampleset may be input as a single file,
+  or split across chromosomes into multiple files.  Scores generated from files with the same
   sampleset name are combined in later stages of the analysis.
 
   .. danger::
diff --git a/environments/pgscatalog_utils/environment.yml b/environments/pgscatalog_utils/environment.yml
index a6827249..b2be6204 100644
--- a/environments/pgscatalog_utils/environment.yml
+++ b/environments/pgscatalog_utils/environment.yml
@@ -3,4 +3,4 @@ dependencies:
 - python=3.10
 - pip
 - pip:
-    - pgscatalog_utils==0.4.1
+    - pgscatalog_utils==0.4.2
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
index 28567bd7..8d030f4e 100755
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -21,19 +21,26 @@ class Utils {
         }
 
         // Check that all channels are present
-        def required_channels = ['conda-forge', 'bioconda', 'defaults']
-        def conda_check_failed = !required_channels.every { ch -> ch in channels }
+        // This channel list is ordered by required channel priority.
+        def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+        def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
 
         // Check that they are in the right order
-        conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
-        conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+        def channel_priority_violation = false
+        def n = required_channels_in_order.size()
+        for (int i = 0; i < n - 1; i++) {
+            channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+        }
 
-        if (conda_check_failed) {
+        if (channels_missing | channel_priority_violation) {
             log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
                 "  There is a problem with your Conda configuration!\n\n" +
                 "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
-                "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
-                "  NB: The order of the channels matters!\n" +
+                "  Please refer to https://bioconda.github.io/\n" +
+                "  The observed channel order is \n" +
+                "  ${channels}\n" +
+                "  but the following channel order is required:\n" +
+                "  ${required_channels_in_order}\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
         }
     }
diff --git a/modules/local/ancestry/bootstrap/make_database.nf b/modules/local/ancestry/bootstrap/make_database.nf
index dca93503..e6d2aa16 100644
--- a/modules/local/ancestry/bootstrap/make_database.nf
+++ b/modules/local/ancestry/bootstrap/make_database.nf
@@ -14,6 +14,8 @@ process MAKE_DATABASE {
 
     input:
     path '*'
+    tuple val(grch37_king_meta), path(grch37_king)
+    tuple val(grch38_king_meta), path(grch38_king)
     path checksums
 
     output:
@@ -24,7 +26,13 @@ process MAKE_DATABASE {
     """
     md5sum -c $checksums
 
-    echo $workflow.manifest.version > meta.txt
+    echo ${params.ref_format_version} > meta.txt
+
+    # can't use meta variables in stageAs
+    # don't want to use renameTo because it's destructive for the input
+    cp -L $grch37_king ${grch37_king_meta.build}_${grch37_king_meta.id}.king.cutoff.out.id
+    cp -L $grch38_king ${grch38_king_meta.build}_${grch38_king_meta.id}.king.cutoff.out.id
+    rm $grch37_king $grch38_king
 
     tar --dereference -acf pgsc_calc.tar.zst *
 
diff --git a/modules/local/ancestry/extract_database.nf b/modules/local/ancestry/extract_database.nf
index 6778b28a..b7c2fc1b 100644
--- a/modules/local/ancestry/extract_database.nf
+++ b/modules/local/ancestry/extract_database.nf
@@ -17,18 +17,27 @@ process EXTRACT_DATABASE {
 
     output:
     tuple val(meta38), path("GRCh38_*_ALL.pgen"), path("GRCh38_*_ALL.psam"), path("GRCh38_*_ALL.pvar.zst"), emit: grch38, optional: true
-    tuple val(meta38), path("deg2_hg38.king.cutoff.out.id"), emit: grch38_king, optional: true
+    tuple val(meta38), path("GRCh38_*.king.cutoff.out.id"), emit: grch38_king, optional: true
     tuple val(meta37), path("GRCh37_*_ALL.pgen"), path("GRCh37_*_ALL.psam"), path("GRCh37_*_ALL.pvar.zst"), emit: grch37, optional: true
-    tuple val(meta37), path("deg2_phase3.king.cutoff.out.id"), emit: grch37_king, optional: true
+    tuple val(meta37), path("GRCh37_*.king.cutoff.out.id"), emit: grch37_king, optional: true
     path "versions.yml", emit: versions
 
     script:
     meta38 = ['build': 'GRCh38']
     meta37 = ['build': 'GRCh37']
-    def king = params.target_build == "GRCh37" ? "deg2_phase3.king.cutoff.out.id" : "deg2_hg38.king.cutoff.out.id"
 
     """
-    tar -xvf $reference --wildcards "${params.target_build}*" $king
+    tar -xf $reference --wildcards "${params.target_build}*" meta.txt 2> /dev/null
+
+    DB_VERSION=\$(cat meta.txt)
+
+    if [ "\$DB_VERSION" != "${params.ref_format_version}" ]; then
+      echo "Old reference database version detected, please redownload the latest version and try again"
+      echo "See https://pgsc-calc.readthedocs.io/en/latest/how-to/ancestry.html"
+      exit 1
+    else
+      echo "Database version good"
+    fi
 
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}:
diff --git a/modules/local/match_combine.nf b/modules/local/match_combine.nf
index 7484001b..1b84709f 100644
--- a/modules/local/match_combine.nf
+++ b/modules/local/match_combine.nf
@@ -1,6 +1,7 @@
 process MATCH_COMBINE {
     // labels are defined in conf/modules.config
     label 'process_medium'
+    label 'error_retry'
     label 'pgscatalog_utils' // controls conda, docker, + singularity options
 
     // first element of tag must be sampleset
diff --git a/modules/local/plink2_relabelbim.nf b/modules/local/plink2_relabelbim.nf
index d495960f..beb487da 100644
--- a/modules/local/plink2_relabelbim.nf
+++ b/modules/local/plink2_relabelbim.nf
@@ -5,8 +5,8 @@ process PLINK2_RELABELBIM {
     label "plink2" // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome $meta.chrom"
-    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${params.target_build}/${meta.chrom}" :
-              "$workDir/genomes/${meta.id}/${params.target_build}/${meta.chrom}/")
+    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
+              "$workDir/genomes/${meta.id}/${meta.build}/${meta.chrom}/")
 
     conda "${task.ext.conda}"
 
@@ -20,9 +20,9 @@ process PLINK2_RELABELBIM {
     tuple val(meta), path(geno), path(variants), path(pheno)
 
     output:
-    tuple val(meta), path("*.bed"), emit: geno
-    tuple val(meta), path("*.zst"), emit: variants
-    tuple val(meta), path("*.fam"), emit: pheno
+    tuple val(meta), path("${meta.build}_*.bed"), emit: geno
+    tuple val(meta), path("${meta.build}_*.zst"), emit: variants
+    tuple val(meta), path("${meta.build}_*.fam"), emit: pheno
     tuple val(meta), path("*.vmiss.gz"), emit: vmiss
     path "versions.yml"           , emit: versions
 
@@ -33,7 +33,7 @@ process PLINK2_RELABELBIM {
     script:
     def args = task.ext.args ?: ''
     def compressed = variants.getName().endsWith("zst") ? 'vzs' : ''
-    def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_"
+    def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}"
     def mem_mb = task.memory.toMega() // plink is greedy
     // if dropping multiallelic variants, set a generic ID that won't match
     def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'
@@ -48,11 +48,11 @@ process PLINK2_RELABELBIM {
         $set_ma_missing \\
         --bfile ${geno.baseName} $compressed \\
         --make-just-bim zs \\
-        --out ${params.target_build}_${prefix}${meta.chrom}
+        --out ${meta.build}_${prefix}_${meta.chrom}
 
     # cross platform (mac, linux) method of preserving symlinks
-    cp -a $geno ${params.target_build}_${prefix}${meta.chrom}.bed
-    cp -a $pheno ${params.target_build}_${prefix}${meta.chrom}.fam
+    cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.bed
+    cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.fam
     gzip *.vmiss
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/plink2_relabelpvar.nf b/modules/local/plink2_relabelpvar.nf
index 67671808..6de23ec3 100644
--- a/modules/local/plink2_relabelpvar.nf
+++ b/modules/local/plink2_relabelpvar.nf
@@ -6,8 +6,8 @@ process PLINK2_RELABELPVAR {
 
     tag "$meta.id chromosome $meta.chrom"
 
-    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${params.target_build}/${meta.chrom}" :
-              "$workDir/genomes/${meta.id}/${params.target_build}/${meta.chrom}/")
+    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
+              "$workDir/genomes/${meta.id}/${meta.build}/${meta.chrom}/")
 
     conda "${task.ext.conda}"
 
@@ -21,9 +21,9 @@ process PLINK2_RELABELPVAR {
     tuple val(meta), path(geno), path(pheno), path(variants)
 
     output:
-    tuple val(meta), path("*.pgen"), emit: geno
-    tuple val(meta), path("*.zst") , emit: variants
-    tuple val(meta), path("*.psam"), emit: pheno
+    tuple val(meta), path("${meta.build}_*.pgen"), emit: geno
+    tuple val(meta), path("${meta.build}_*.pvar.zst") , emit: variants
+    tuple val(meta), path("${meta.build}_*.psam"), emit: pheno
     tuple val(meta), path("*.vmiss.gz"), emit: vmiss
     path "versions.yml"            , emit: versions
 
@@ -34,7 +34,7 @@ process PLINK2_RELABELPVAR {
     script:
     def args = task.ext.args ?: ''
     def compressed = variants.getName().endsWith("zst") ? 'vzs' : ''
-    def prefix = task.ext.suffix ? "${meta.id}_${task.ext.suffix}_" : "${meta.id}_"
+    def prefix = task.ext.suffix ? "${meta.id}_${task.ext.suffix}" : "${meta.id}"
     def mem_mb = task.memory.toMega() // plink is greedy
     // if dropping multiallelic variants, set a generic ID that won't match
     def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'
@@ -49,11 +49,12 @@ process PLINK2_RELABELPVAR {
         $set_ma_missing \\
         --pfile ${geno.baseName} $compressed \\
         --make-just-pvar zs \\
-        --out ${params.target_build}_${prefix}${meta.chrom}
+        --out ${meta.build}_${prefix}_${meta.chrom}
 
     # cross platform (mac, linux) method of preserving symlinks
-    cp -a $geno ${params.target_build}_${prefix}${meta.chrom}.pgen
-    cp -a $pheno ${params.target_build}_${prefix}${meta.chrom}.psam
+    cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.pgen
+    cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.psam
+   
     gzip *.vmiss
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/plink2_score.nf b/modules/local/plink2_score.nf
index f8b21fc3..2da01f11 100644
--- a/modules/local/plink2_score.nf
+++ b/modules/local/plink2_score.nf
@@ -3,6 +3,7 @@ process PLINK2_SCORE {
     // labels are defined in conf/modules.config
     label 'process_low'
     label 'process_long'
+    label 'error_retry'
     label 'plink2' // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome $meta.chrom effect type $scoremeta.effect_type $scoremeta.n"
@@ -40,22 +41,23 @@ process PLINK2_SCORE {
     // custom args2
     def maxcol = (scoremeta.n_scores.toInteger() + 2) // id + effect allele = 2 cols
 
-    // if we load allelic frequencies, don't do mean imputation
-    def no_imputation = (ref_afreq.name == 'NO_FILE') ? "no-mean-imputation" : ""
-    // if no-mean-imputation, be more efficient
+    // if we have allelic frequencies or enough samples don't do mean imputation and skip freq-calc
+    def no_imputation = ((ref_afreq.name == 'NO_FILE') && (meta.n_samples.toInteger() < 50)) ? "no-mean-imputation" : ""
     def error_on_freq_calc = (no_imputation == "no-mean-imputation") ? "--error-on-freq-calc" : ""
 
-    def cols = (meta.n_samples.toInteger() < 50) ? 'header-read cols=+scoresums,+denom,-fid' : 'header-read cols=+scoresums,+denom,-fid'
+    def cols = 'header-read cols=+scoresums,+denom,-fid'
     def recessive = (scoremeta.effect_type == 'recessive') ? ' recessive ' : ''
     def dominant = (scoremeta.effect_type == 'dominant') ? ' dominant ' : ''
     args2 = [args2, cols, 'list-variants', no_imputation, recessive, dominant, error_on_freq_calc].join(' ')
 
+    // speed up the calculation by only considering scoring-file variants for allele frequency calculation (--extract)
     if (scoremeta.n_scores.toInteger() == 1)
         """
         plink2 \
             --threads $task.cpus \
             --memory $mem_mb \
             --seed 31 \
+            --extract $scorefile \
             $load_afreq \
             $args \
             --score $scorefile $args2 \
@@ -73,6 +75,7 @@ process PLINK2_SCORE {
             --threads $task.cpus \
             --memory $mem_mb \
             --seed 31 \
+            --extract $scorefile \
             $load_afreq \
             $args \
             --score $scorefile $args2 \
diff --git a/modules/local/plink2_vcf.nf b/modules/local/plink2_vcf.nf
index 10ceb515..d13201d4 100644
--- a/modules/local/plink2_vcf.nf
+++ b/modules/local/plink2_vcf.nf
@@ -6,8 +6,8 @@ process PLINK2_VCF {
 
     tag "$meta.id chromosome $meta.chrom"
 
-    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${params.target_build}/${meta.chrom}" :
-              "$workDir/genomes/${meta.id}/${params.target_build}/${meta.chrom}/")
+    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
+              "$workDir/genomes/${meta.id}/${meta.build}/${meta.chrom}/")
 
     conda "${task.ext.conda}"
 
@@ -20,19 +20,20 @@ process PLINK2_VCF {
     tuple val(meta), path(vcf)
 
     output:
-    tuple val(newmeta), path("*.pgen"), emit: pgen
-    tuple val(newmeta), path("*.psam"), emit: psam
-    tuple val(newmeta), path("*.zst") , emit: pvar
-    tuple val(meta), path("*.vmiss.gz"), emit: vmiss
+    tuple val(newmeta), path("${meta.build}_*.pgen"), emit: pgen
+    tuple val(newmeta), path("${meta.build}_*.psam"), emit: psam
+    tuple val(newmeta), path("${meta.build}_*.zst") , emit: pvar
+    tuple val(newmeta), path("${meta.build}_*.vmiss.gz"), emit: vmiss
     path "versions.yml"            , emit: versions
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}_"
+    def prefix = task.ext.suffix ? "${meta.id}_${task.ext.suffix}" : "${meta.id}"
     def mem_mb = task.memory.toMega()
     def dosage_options = meta.vcf_import_dosage ? 'dosage=DS' : ''
     // rewriting genotypes, so use --max-alleles instead of using generic ID
     def set_ma_missing = params.keep_multiallelic ? '' : '--max-alleles 2'
+    def chrom_filter = meta.chrom == "ALL" ? "--chr 1-22, X, Y, XY" : "--chr ${meta.chrom}" // filter to canonical/stated chromosome
     newmeta = meta.clone() // copy hashmap for updating...
     newmeta.is_pfile = true // now it's converted to a pfile :)
 
@@ -45,8 +46,9 @@ process PLINK2_VCF {
         --missing vcols=fmissdosage,fmiss \\
         $args \\
         --vcf $vcf $dosage_options \\
+        --allow-extra-chr $chrom_filter \\
         --make-pgen vzs \\
-        --out ${params.target_build}_${prefix}${meta.chrom}
+        --out ${meta.build}_${prefix}_${meta.chrom}_vcf
 
     gzip *.vmiss
 
diff --git a/nextflow.config b/nextflow.config
index 279c6779..5892f786 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,7 +22,7 @@ params {
     efo_direct                 = false
 
     // reference params
-    run_ancestry = null // path to reference database
+    run_ancestry = null // path to reference database TODO: replace with NO_FILE
     ancestry_checksums = "$projectDir/assets/ancestry/checksums.txt"
     // if you want to liftover --scorefiles, set the chain files
     hg19_chain = null // "https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz"
@@ -37,7 +37,7 @@ params {
     ld_grch38 = "$projectDir/assets/ancestry/high-LD-regions-hg38-GRCh38.txt"
 
     // ancestry params
-    ancestry_params_file = null
+    ref_format_version = "v0.1"
     ref_samplesheet = "$projectDir/assets/ancestry/reference.csv"
     projection_method = "oadp"
     ancestry_method = "RandomForest"
@@ -47,7 +47,6 @@ params {
     n_normalization = 4
 
     // compatibility params
-    compat_params_file = null
     liftover = false
     target_build = null
     min_lift = 0.95
@@ -80,7 +79,7 @@ params {
     help                       = false
     validate_params            = true
     show_hidden_params         = false
-    schema_ignore_params       = 'only_bootstrap,only_input,only_compatible,only_match,only_projection,only_score,skip_ancestry,igenomes_ignore'
+    schema_ignore_params       = 'only_bootstrap,only_input,only_compatible,only_match,only_projection,only_score,skip_ancestry'
 
     // Max resource options
     // Defaults only, expecting to be overwritten
diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf
index 8fcae1fc..4420ea2e 100644
--- a/subworkflows/local/ancestry/ancestry_project.nf
+++ b/subworkflows/local/ancestry/ancestry_project.nf
@@ -71,7 +71,7 @@ workflow ANCESTRY_PROJECT {
     //
 
     ch_genomes
-        .join(vmiss)
+        .join(vmiss, failOnMismatch: true)
         .combine( ch_db.map{ it.tail() } ) // (drop hashmap)
         .flatten()
         .buffer(size: 8)
diff --git a/subworkflows/local/ancestry/bootstrap_ancestry.nf b/subworkflows/local/ancestry/bootstrap_ancestry.nf
index 49fd9a07..2dfdbc4a 100644
--- a/subworkflows/local/ancestry/bootstrap_ancestry.nf
+++ b/subworkflows/local/ancestry/bootstrap_ancestry.nf
@@ -2,7 +2,7 @@
 // Create a database containing reference data required for ancestry inference
 //
 include { SETUP_RESOURCE } from '../../../modules/local/ancestry/bootstrap/setup_resource'
-include { PLINK2_RELABELPVAR } from '../../../modules/local/plink2_relabelpvar'
+include { PLINK2_RELABELPVAR as BOOTSTRAP_RELABEL } from '../../../modules/local/plink2_relabelpvar'
 include { MAKE_DATABASE } from '../../../modules/local/ancestry/bootstrap/make_database'
 
 workflow BOOTSTRAP_ANCESTRY {
@@ -33,11 +33,11 @@ workflow BOOTSTRAP_ANCESTRY {
 
     SETUP_RESOURCE.out.plink.dump( tag: 'ref_setup' )
 
-    PLINK2_RELABELPVAR( SETUP_RESOURCE.out.plink )
-    ch_versions = ch_versions.mix(PLINK2_RELABELPVAR.out.versions.first())
+    BOOTSTRAP_RELABEL( SETUP_RESOURCE.out.plink )
+    ch_versions = ch_versions.mix(BOOTSTRAP_RELABEL.out.versions.first())
 
-    PLINK2_RELABELPVAR.out.geno
-        .concat(PLINK2_RELABELPVAR.out.pheno, PLINK2_RELABELPVAR.out.variants)
+    BOOTSTRAP_RELABEL.out.geno
+        .concat(BOOTSTRAP_RELABEL.out.pheno, BOOTSTRAP_RELABEL.out.variants)
         .dump(tag: 'ancestry_relabelled')
         .set { relabelled }
 
@@ -47,12 +47,14 @@ workflow BOOTSTRAP_ANCESTRY {
         .groupTuple(size: 3)
         .dump(tag: 'ancestry_relabelled_grouped')
         .map { drop_meta_keys(it).flatten() }
-        .set{ relabelled_flat }
+        .set{ relabelled_flat }     
 
-    ref.king
-        .map { drop_meta_keys(it) }
-    // dropping meta keys simplifies the join
-        .join( relabelled_flat )
+    ref.king.branch {
+        GRCh37: it[0].build == "GRCh37"
+        GRCh38: it[0].build == "GRCh38"
+    }.set { ch_king }
+
+    relabelled_flat
         .flatten()
         .filter(Path)
         .collect()
@@ -62,7 +64,7 @@ workflow BOOTSTRAP_ANCESTRY {
     Channel.fromPath(params.ancestry_checksums, checkIfExists: true)
         .set { ch_checksums }
 
-    MAKE_DATABASE( ch_raw_ref, ch_checksums )
+    MAKE_DATABASE( ch_raw_ref, ch_king.GRCh37, ch_king.GRCh38, ch_checksums )
     ch_versions = ch_versions.mix(MAKE_DATABASE.out.versions)
 
     emit:
diff --git a/tests/ancestry/samplesheet_vcf.csv b/tests/ancestry/samplesheet_vcf.csv
new file mode 100644
index 00000000..19f13eb0
--- /dev/null
+++ b/tests/ancestry/samplesheet_vcf.csv
@@ -0,0 +1,2 @@
+sampleset,path_prefix,chrom,format
+test,ANCESTRY_TARGET_DIR/GRCh38_HAPNEST_TARGET_ALL,,vcf
\ No newline at end of file
diff --git a/tests/ancestry/test.yml b/tests/ancestry/test_ancestry.yml
similarity index 81%
rename from tests/ancestry/test.yml
rename to tests/ancestry/test_ancestry.yml
index b31c2e81..5cf5370e 100644
--- a/tests/ancestry/test.yml
+++ b/tests/ancestry/test_ancestry.yml
@@ -27,4 +27,11 @@
         - "AFR,100 (33.33%)"
         - "EAS,100 (33.33%)"
         - "EUR,100 (33.33%)"
+    - path: output/plink2/test_ALL_additive_0.log
+      contains:
+        - "--read-freq"
+        - "--extract"
+      must_not_contain:
+        - "no-mean-imputation"
+        - "error-on-freq-calc"
       
diff --git a/tests/ancestry/test_ancestry_vcf.yml b/tests/ancestry/test_ancestry_vcf.yml
new file mode 100644
index 00000000..991fcbfb
--- /dev/null
+++ b/tests/ancestry/test_ancestry_vcf.yml
@@ -0,0 +1,38 @@
+# ancestry test notes:
+# need to stage reference in $ANCESTRY_REF_DIR
+# extract target in $ANCESTRY_TARGET_DIR
+# need to convert target to VCF
+
+- name: test ancestry projection and scoring with VCF input
+  command: >
+    bash -c "
+    sed \"s|ANCESTRY_TARGET_DIR|$ANCESTRY_TARGET_DIR|\" tests/ancestry/samplesheet.csv > samplesheet.csv;
+    nextflow run main.nf -c ./tests/config/nextflow.config \
+      --input samplesheet.csv \
+      --run_ancestry $ANCESTRY_REF_DIR/GRCh38_HAPNEST_reference.tar.zst \
+      --target_build GRCh38 \
+      --pgs_id PGS001229 \
+      --min_overlap 0.50 \
+      --target_build GRCh38 \
+      --scorefile false
+    "
+  tags:
+    - ancestry vcf
+    - slow
+  stdout:
+    contains:
+      - "Pipeline completed successfully"
+  files:
+    - path: "output/test/score/pop_summary.csv"
+      contains:
+        - "AFR,100 (33.33%)"
+        - "EAS,100 (33.33%)"
+        - "EUR,100 (33.33%)"
+    - path: output/plink2/test_ALL_additive_0.log
+      contains:
+        - "--read-freq"
+        - "--extract"
+      must_not_contain:
+        - "no-mean-imputation"
+        - "error-on-freq-calc"
+
diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config
index 3c530d3c..bfb8aeb7 100644
--- a/tests/config/nextflow.config
+++ b/tests/config/nextflow.config
@@ -25,13 +25,16 @@ process {
 }
 
 def platform = "$PROFILE" == 'arm' ? '--platform linux/arm64' : '--platform linux/amd64'
-def mount_home = "-v $HOME:$HOME"
+def mount_home = "-v $HOME:$HOME -v /private/var/folders/"
 
 if ("$PROFILE" == "singularity") {
     singularity.enabled = true
     singularity.autoMounts = true
 } else if ("$PROFILE" == "conda") {
     conda.enabled = true
+} else if ("$PROFILE" == "mamba") {
+    conda.enabled          = true
+    conda.useMamba         = true
 } else if ("$PROFILE" == "arm") {
     docker.enabled         = true
     docker.userEmulation   = false
diff --git a/tests/modules/combine/test.yml b/tests/modules/combine/test.yml
index abd151cf..9f5d708d 100644
--- a/tests/modules/combine/test.yml
+++ b/tests/modules/combine/test.yml
@@ -15,6 +15,6 @@
         - "effect_type"
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
     
 
diff --git a/tests/modules/download/test.yml b/tests/modules/download/test.yml
index 79ced86c..e6a341fe 100644
--- a/tests/modules/download/test.yml
+++ b/tests/modules/download/test.yml
@@ -8,7 +8,7 @@
     - path: output/download/PGS000001_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
       
 - name: pgscatalog test --efo_trait --pgp_id and --pgs_id
   command: nextflow run ./tests/modules/download -entry testmultipleaccessions -c ./tests/config/nextflow.config
@@ -24,7 +24,7 @@
     - path: output/download/PGS002054_hmPOS_GRCh37.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
 
 - name: pgscatalog test bad accession
   command: nextflow run ./tests/modules/download -entry testbadaccession -c ./tests/config/nextflow.config
@@ -44,4 +44,4 @@
     - path: output/download/PGS000001_hmPOS_GRCh38.txt.gz
     - path: output/download/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
diff --git a/tests/modules/match/test.yml b/tests/modules/match/test.yml
index 689353ff..191d190e 100644
--- a/tests/modules/match/test.yml
+++ b/tests/modules/match/test.yml
@@ -8,7 +8,7 @@
   files:
     - path: output/test/match/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
 
 - name: test match combine module
   command: nextflow run ./tests/modules/match -entry testmatchcombine -c ./tests/config/nextflow.config
@@ -20,7 +20,7 @@
   files:
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
     - path: output/combine/scorefiles.txt.gz
       contains:
         - "effect_allele"
diff --git a/tests/modules/plink2/relabelbim/main.nf b/tests/modules/plink2/relabelbim/main.nf
index 4ae36f09..9d530119 100644
--- a/tests/modules/plink2/relabelbim/main.nf
+++ b/tests/modules/plink2/relabelbim/main.nf
@@ -8,7 +8,7 @@ workflow testrelabelbim {
     bim = file('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.bim')
     bed = file('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.bed')
     fam = file('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.fam')
-    def meta = [id: 'test', is_bfile: true]
+    def meta = [id: 'test', build: 'GRCh37', is_bfile: true, chrom: 22]
 
     PLINK2_RELABELBIM( Channel.of([meta, bed, bim, fam]) )
 }
diff --git a/tests/modules/plink2/relabelbim/test.yml b/tests/modules/plink2/relabelbim/test.yml
index cd6bbab4..6338f261 100644
--- a/tests/modules/plink2/relabelbim/test.yml
+++ b/tests/modules/plink2/relabelbim/test.yml
@@ -5,10 +5,10 @@
     - fast
     - module
   files:
-    - path: output/plink2/GRCh37_test_null.bed
+    - path: output/plink2/GRCh37_test_22.bed
       md5sum: a8be76ae3301d395563784fcbd571ae2
-    - path: output/plink2/GRCh37_test_null.bim.zst
-    - path: output/plink2/GRCh37_test_null.fam
+    - path: output/plink2/GRCh37_test_22.bim.zst
+    - path: output/plink2/GRCh37_test_22.fam
       md5sum: 8915d48959a21e827d1db1b192422ba1    
     - path: output/plink2/versions.yml
       contains:
diff --git a/tests/modules/plink2/relabelpvar/main.nf b/tests/modules/plink2/relabelpvar/main.nf
index e460bafd..8cfa13b8 100644
--- a/tests/modules/plink2/relabelpvar/main.nf
+++ b/tests/modules/plink2/relabelpvar/main.nf
@@ -7,7 +7,7 @@ include { PLINK2_RELABELPVAR } from '../../../../modules/local/plink2_relabelpva
 
 workflow testrelabelpvar {
     vcf = file('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.vcf.gz')
-    def meta = [id: 'test', chrom: 22]
+    def meta = [id: 'test', 'build': 'GRCh37', chrom: 22]
 
     PLINK2_VCF(Channel.of([meta, vcf]))
 
diff --git a/tests/modules/plink2/relabelpvar/test.yml b/tests/modules/plink2/relabelpvar/test.yml
index a13895cc..0b8341b0 100644
--- a/tests/modules/plink2/relabelpvar/test.yml
+++ b/tests/modules/plink2/relabelpvar/test.yml
@@ -5,11 +5,11 @@
     - fast
     - module
   files:
-    - path: output/plink2/GRCh37_test_22.psam
+    - path: output/plink2/GRCh37_test_22_vcf.psam
       md5sum: 90f1430b71153d59bc14e9499b0366f4
-    - path: output/plink2/GRCh37_test_22.pgen
+    - path: output/plink2/GRCh37_test_22_vcf.pgen
       md5sum: be32a51a5509111327a5deb6a3610b2d
-    - path: output/plink2/GRCh37_test_22.pvar.zst
+    - path: output/plink2/GRCh37_test_22_vcf.pvar.zst
     - path: output/plink2/versions.yml
       contains:
         - "plink2: 2.00a3.3"    
diff --git a/tests/modules/plink2/score/test.yml b/tests/modules/plink2/score/test.yml
index 4dddd189..b7098e10 100644
--- a/tests/modules/plink2/score/test.yml
+++ b/tests/modules/plink2/score/test.yml
@@ -11,6 +11,8 @@
         - "PGS001229_22_SUM"
     - path: output/plink2/test_null_null_null.log
       contains:
+        - "--extract"
+      must_not_contain:
         - "no-mean-imputation"
         - "error-on-freq-calc"
     - path: output/plink2/versions.yml
@@ -32,6 +34,7 @@
       contains:
         - "no-mean-imputation"
         - "error-on-freq-calc"
+        - "--extract"
     - path: output/plink2/versions.yml
       contains:
         - "plink2: 2.00a3.3"
@@ -50,9 +53,11 @@
         - "second_score_SUM"
     - path: output/plink2/test_null_null_null.log
       contains:
+        - "--score-col-nums"
+        - "--extract"
+      must_not_contain:
         - "error-on-freq-calc"
         - "no-mean-imputation"
-        - "--score-col-nums"
     - path: output/plink2/versions.yml
       contains:
         - "plink2: 2.00a3.3"
@@ -74,6 +79,7 @@
         - "no-mean-imputation"
         - "error-on-freq-calc"
         - "--score-col-nums"
+        - "--extract" 
     - path: output/plink2/versions.yml
       contains:
         - "plink2: 2.00a3.3"
diff --git a/tests/modules/plink2/vcf/main.nf b/tests/modules/plink2/vcf/main.nf
index 612715a6..b078074f 100644
--- a/tests/modules/plink2/vcf/main.nf
+++ b/tests/modules/plink2/vcf/main.nf
@@ -6,7 +6,7 @@ include { PLINK2_VCF } from '../../../../modules/local/plink2_vcf'
 
 workflow testvcf {
     vcf = file('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.vcf.gz')
-    def meta = [id: 'test', is_vcf: true]
+    def meta = [id: 'test', is_vcf: true, build: 'GRCh37', chrom: '22']
 
     PLINK2_VCF(Channel.of([meta, vcf]))
 
diff --git a/tests/modules/plink2/vcf/test.yml b/tests/modules/plink2/vcf/test.yml
index 7174a174..f9440e26 100644
--- a/tests/modules/plink2/vcf/test.yml
+++ b/tests/modules/plink2/vcf/test.yml
@@ -5,9 +5,9 @@
     - plink2
     - fast
   files:
-    - path: output/plink2/GRCh37_vcf_null.pgen
-    - path: output/plink2/GRCh37_vcf_null.psam
-    - path: output/plink2/GRCh37_vcf_null.pvar.zst
+    - path: output/plink2/GRCh37_test_22_vcf.pgen
+    - path: output/plink2/GRCh37_test_22_vcf.psam
+    - path: output/plink2/GRCh37_test_22_vcf.pvar.zst
     - path: output/plink2/versions.yml
       contains:
         - "plink2: 2.00a3.3"
diff --git a/tests/subworkflows/test_liftover_run.yml b/tests/subworkflows/test_liftover_run.yml
index ddf0d2ef..9c1a8f3c 100644
--- a/tests/subworkflows/test_liftover_run.yml
+++ b/tests/subworkflows/test_liftover_run.yml
@@ -9,7 +9,7 @@
     - path: output/combine/scorefiles.txt.gz
     - path: output/combine/versions.yml      
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
 
 - name: test input check subworkflow with liftover 37to38
   command: nextflow run main.nf --only_input --pgs_id PGS001229 --liftover --target_build GRCh38 -c ./tests/config/nextflow.config --hg19_chain https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz --hg38_chain https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz
@@ -22,4 +22,4 @@
     - path: output/combine/scorefiles.txt.gz
     - path: output/combine/versions.yml
       contains:
-        - "pgscatalog_utils: 0.4.1"
+        - "pgscatalog_utils: 0.4.2"
diff --git a/tests/subworkflows/test_make_compatible.yml b/tests/subworkflows/test_make_compatible.yml
index caa68411..11499f92 100644
--- a/tests/subworkflows/test_make_compatible.yml
+++ b/tests/subworkflows/test_make_compatible.yml
@@ -21,10 +21,10 @@
   files:
     - path: output/samplesheet/out.json
     - path: output/combine/scorefiles.txt.gz
-    - path: output/plink2/GRCh37_vcf_22.pgen
-    - path: output/plink2/GRCh37_vcf_22.pvar.zst
-    - path: output/plink2/GRCh37_vcf_22.psam
-    - path: output/plink2/GRCh37_vcf_22.vmiss.gz      
+    - path: output/plink2/GRCh37_cineca_22_vcf.pgen
+    - path: output/plink2/GRCh37_cineca_22_vcf.pvar.zst
+    - path: output/plink2/GRCh37_cineca_22_vcf.psam
+    - path: output/plink2/GRCh37_cineca_22_vcf.vmiss.gz      
 
 - name: test make compatible subworkflow with pfile
   command: nextflow run main.nf --only_compatible -c ./tests/config/nextflow.config
diff --git a/workflows/pgscalc.nf b/workflows/pgscalc.nf
index 60304a07..6c7242aa 100644
--- a/workflows/pgscalc.nf
+++ b/workflows/pgscalc.nf
@@ -128,7 +128,7 @@ if (params.only_projection) {
     run_ancestry_bootstrap = true
     run_input_check = true
     run_make_compatible = true
-    run_match = false
+    run_match = true
     run_ancestry_assign = true
     run_apply_score = false
     run_report = false