From ef191eb2368dfd79a50fe43aea90c86000bfb00b Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:37:09 +0100
Subject: [PATCH 1/4] fix values docs

---
 kubernetes/loculus/values.schema.json | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index 93802fe2a..4b3687403 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
                 "description": "Fields that should be added to the preprocessing pipeline config file.",
                 "properties": {
                     "alignment_requirement": {
-                      "groups": ["ingest"],
+                      "groups": ["preprocessing.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["ALL", "ANY"],
                       "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["ingest"],
+                      "groups": ["preprocessing.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["ingest"],
+                      "groups": ["preprocessing.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if sequences should be aligned"
@@ -379,43 +379,43 @@
               "description": "Fields that should be added to the ingest pipeline config file",
               "properties": {
                 "taxon_id": {
-                  "groups": ["ingest"],
+                  "groups": ["ingest.configFile"],
                   "docsIncludePrefix": false,
                   "type": "integer",
                   "description": "NCBI taxon ID for the organism"
                 },
                 "segment_identification": {
-                  "groups": ["ingest"],
+                  "groups": ["ingest.configFile"],
                   "docsIncludePrefix": false,
                   "type": "object",
                   "description": "If multi-segmented organism, how to identify segments",
                   "properties": {
                     "method": {
-                      "groups": ["ingest"],
+                      "groups": ["ingest.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["align", "minimizer"],
                       "description": "Method to identify segments, uses either nextclade align or nextclade sort"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["ingest"],
+                      "groups": ["ingest.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["ingest"],
+                      "groups": ["ingest.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is align"
                     },
                     "minimizer_parser": {
-                      "groups": ["ingest"],
+                      "groups": ["ingest.configFile"],
                       "docsIncludePrefix": false,
                       "type": "array",
                       "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
                     },
                     "minimizer_index": {
-                      "groups": ["ingest"],
+                      "groups": ["ingest.configFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is minimizer"
@@ -424,13 +424,13 @@
                   "required": ["method"]
                 },
                 "grouping_override": {
-                  "groups": ["ingest"],
+                  "groups": ["ingest.configFile"],
                   "docsIncludePrefix": false,
                   "type": "string",
                   "description": "If multi-segmented organism, segment grouping overrides"
                 },
                 "metadata_filter": {
-                  "groups": ["ingest"],
+                  "groups": ["ingest.configFile"],
                   "docsIncludePrefix": false,
                   "type": "object",
                   "description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs."

From 45f0d9572952135c5b6fc1aeda55c6e1e1040db3 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:46:30 +0100
Subject: [PATCH 2/4] fix bug

---
 kubernetes/loculus/values.schema.json | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index 4b3687403..a6a43d5de 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
                 "description": "Fields that should be added to the preprocessing pipeline config file.",
                 "properties": {
                     "alignment_requirement": {
-                      "groups": ["preprocessing.configFile"],
+                      "groups": ["preprocessing"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["ALL", "ANY"],
                       "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["preprocessing.configFile"],
+                      "groups": ["preprocessing"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["preprocessing.configFile"],
+                      "groups": ["preprocessing"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if sequences should be aligned"
@@ -379,43 +379,43 @@
               "description": "Fields that should be added to the ingest pipeline config file",
               "properties": {
                 "taxon_id": {
-                  "groups": ["ingest.configFile"],
+                  "groups": ["ingest"],
                   "docsIncludePrefix": false,
                   "type": "integer",
                   "description": "NCBI taxon ID for the organism"
                 },
                 "segment_identification": {
-                  "groups": ["ingest.configFile"],
+                  "groups": ["ingest"],
                   "docsIncludePrefix": false,
                   "type": "object",
                   "description": "If multi-segmented organism, how to identify segments",
                   "properties": {
                     "method": {
-                      "groups": ["ingest.configFile"],
+                      "groups": ["ingest"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["align", "minimizer"],
                       "description": "Method to identify segments, uses either nextclade align or nextclade sort"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["ingest.configFile"],
+                      "groups": ["ingest"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["ingest.configFile"],
+                      "groups": ["ingest"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is align"
                     },
                     "minimizer_parser": {
-                      "groups": ["ingest.configFile"],
+                      "groups": ["ingest"],
                       "docsIncludePrefix": false,
                       "type": "array",
                       "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
                     },
                     "minimizer_index": {
-                      "groups": ["ingest.configFile"],
+                      "groups": ["ingest"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is minimizer"
@@ -424,13 +424,13 @@
                   "required": ["method"]
                 },
                 "grouping_override": {
-                  "groups": ["ingest.configFile"],
+                  "groups": ["ingest"],
                   "docsIncludePrefix": false,
                   "type": "string",
                   "description": "If multi-segmented organism, segment grouping overrides"
                 },
                 "metadata_filter": {
-                  "groups": ["ingest.configFile"],
+                  "groups": ["ingest"],
                   "docsIncludePrefix": false,
                   "type": "object",
                   "description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs."

From 2d32f539aee525d7c4eb92cfde7c80a9bed6cd35 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:53:35 +0100
Subject: [PATCH 3/4] second try

---
 .../content/docs/reference/helm-chart-config.mdx | 10 +++++-----
 kubernetes/loculus/values.schema.json            | 16 ++++++++--------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx
index 22bb1c09e..bf1969abb 100644
--- a/docs/src/content/docs/reference/helm-chart-config.mdx
+++ b/docs/src/content/docs/reference/helm-chart-config.mdx
@@ -79,16 +79,16 @@ The values for `args` and `configFile` depend on the used preprocessing pipeline
 For the Nextclade preprocessing pipeline, please see
 [here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
 
+<SchemaDocs group='nextcladePipelineConfigFile' fieldColumnClass='w-28' />
+
 ### Ingest (type)
 
 <SchemaDocs group='ingest' fieldColumnClass='w-28' />
 
 The values for `configFile` depend on the used preprocessing pipeline.
-For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest)
-which downloads data from NCBI GenBank using NCBI Datasets, the config file needs to
-contain the taxon_id of the organism and additionally, if the organism is multi-segmented,
-it requires a list of segment names (nucleotide_sequences) and the nextclade_dataset that
-can be used for segment identification and alignment.
+For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields:
+
+<SchemaDocs group='ingestPipelineConfigFile' fieldColumnClass='w-28' />
 
 ### NucleotideSequence (type)
 
diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index a6a43d5de..242433743 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
                 "description": "Fields that should be added to the preprocessing pipeline config file.",
                 "properties": {
                     "alignment_requirement": {
-                      "groups": ["preprocessing"],
+                      "groups": ["nextcladePipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["ALL", "ANY"],
                       "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["preprocessing"],
+                      "groups": ["nextcladePipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["preprocessing"],
+                      "groups": ["nextcladePipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if sequences should be aligned"
@@ -391,31 +391,31 @@
                   "description": "If multi-segmented organism, how to identify segments",
                   "properties": {
                     "method": {
-                      "groups": ["ingest"],
+                      "groups": ["ingestPipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "enum": ["align", "minimizer"],
                       "description": "Method to identify segments, uses either nextclade align or nextclade sort"
                     },
                     "nextclade_dataset_server": {
-                      "groups": ["ingest"],
+                      "groups": ["ingestPipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string"
                     },
                     "nextclade_dataset_name": {
-                      "groups": ["ingest"],
+                      "groups": ["ingestPipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is align"
                     },
                     "minimizer_parser": {
-                      "groups": ["ingest"],
+                      "groups": ["ingestPipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "array",
                       "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
                     },
                     "minimizer_index": {
-                      "groups": ["ingest"],
+                      "groups": ["ingestPipelineConfigFile"],
                       "docsIncludePrefix": false,
                       "type": "string",
                       "description": "Required if method is minimizer"

From e21bf91f666b25c637df58718668372d7dba1af0 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 10:02:28 +0100
Subject: [PATCH 4/4] clean up

---
 docs/src/content/docs/reference/helm-chart-config.mdx | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx
index bf1969abb..84938e0d6 100644
--- a/docs/src/content/docs/reference/helm-chart-config.mdx
+++ b/docs/src/content/docs/reference/helm-chart-config.mdx
@@ -76,16 +76,22 @@ Definition of metadata fields for sequence entries of an organism, for example t
 <SchemaDocs group='preprocessing' fieldColumnClass='w-28' />
 
 The values for `args` and `configFile` depend on the used preprocessing pipeline.
-For the Nextclade preprocessing pipeline, please see
-[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
+
+#### Nextclade Preprocessing Pipeline ConfigFile (type)
 
 <SchemaDocs group='nextcladePipelineConfigFile' fieldColumnClass='w-28' />
 
+For more details on the Nextclade preprocessing pipeline, please see
+[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
+
 ### Ingest (type)
 
 <SchemaDocs group='ingest' fieldColumnClass='w-28' />
 
 The values for `configFile` depend on the used preprocessing pipeline.
+
+#### Ingest ConfigFile (type)
+
 For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields:
 
 <SchemaDocs group='ingestPipelineConfigFile' fieldColumnClass='w-28' />