From ef191eb2368dfd79a50fe43aea90c86000bfb00b Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:37:09 +0100
Subject: [PATCH 1/4] fix values docs
---
kubernetes/loculus/values.schema.json | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index 93802fe2a..4b3687403 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
"description": "Fields that should be added to the preprocessing pipeline config file.",
"properties": {
"alignment_requirement": {
- "groups": ["ingest"],
+ "groups": ["preprocessing.configFile"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["ALL", "ANY"],
"description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
},
"nextclade_dataset_server": {
- "groups": ["ingest"],
+ "groups": ["preprocessing.configFile"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["ingest"],
+ "groups": ["preprocessing.configFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if sequences should be aligned"
@@ -379,43 +379,43 @@
"description": "Fields that should be added to the ingest pipeline config file",
"properties": {
"taxon_id": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "integer",
"description": "NCBI taxon ID for the organism"
},
"segment_identification": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "object",
"description": "If multi-segmented organism, how to identify segments",
"properties": {
"method": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["align", "minimizer"],
"description": "Method to identify segments, uses either nextclade align or nextclade sort"
},
"nextclade_dataset_server": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is align"
},
"minimizer_parser": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "array",
"description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
},
"minimizer_index": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is minimizer"
@@ -424,13 +424,13 @@
"required": ["method"]
},
"grouping_override": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "If multi-segmented organism, segment grouping overrides"
},
"metadata_filter": {
- "groups": ["ingest"],
+ "groups": ["ingest.configFile"],
"docsIncludePrefix": false,
"type": "object",
"description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs."
From 45f0d9572952135c5b6fc1aeda55c6e1e1040db3 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:46:30 +0100
Subject: [PATCH 2/4] fix bug
---
kubernetes/loculus/values.schema.json | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index 4b3687403..a6a43d5de 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
"description": "Fields that should be added to the preprocessing pipeline config file.",
"properties": {
"alignment_requirement": {
- "groups": ["preprocessing.configFile"],
+ "groups": ["preprocessing"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["ALL", "ANY"],
"description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
},
"nextclade_dataset_server": {
- "groups": ["preprocessing.configFile"],
+ "groups": ["preprocessing"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["preprocessing.configFile"],
+ "groups": ["preprocessing"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if sequences should be aligned"
@@ -379,43 +379,43 @@
"description": "Fields that should be added to the ingest pipeline config file",
"properties": {
"taxon_id": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "integer",
"description": "NCBI taxon ID for the organism"
},
"segment_identification": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "object",
"description": "If multi-segmented organism, how to identify segments",
"properties": {
"method": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["align", "minimizer"],
"description": "Method to identify segments, uses either nextclade align or nextclade sort"
},
"nextclade_dataset_server": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is align"
},
"minimizer_parser": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "array",
"description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
},
"minimizer_index": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is minimizer"
@@ -424,13 +424,13 @@
"required": ["method"]
},
"grouping_override": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "string",
"description": "If multi-segmented organism, segment grouping overrides"
},
"metadata_filter": {
- "groups": ["ingest.configFile"],
+ "groups": ["ingest"],
"docsIncludePrefix": false,
"type": "object",
"description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs."
From 2d32f539aee525d7c4eb92cfde7c80a9bed6cd35 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 09:53:35 +0100
Subject: [PATCH 3/4] second try
---
.../content/docs/reference/helm-chart-config.mdx | 10 +++++-----
kubernetes/loculus/values.schema.json | 16 ++++++++--------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx
index 22bb1c09e..bf1969abb 100644
--- a/docs/src/content/docs/reference/helm-chart-config.mdx
+++ b/docs/src/content/docs/reference/helm-chart-config.mdx
@@ -79,16 +79,16 @@ The values for `args` and `configFile` depend on the used preprocessing pipeline
For the Nextclade preprocessing pipeline, please see
[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
+
+
### Ingest (type)
The values for `configFile` depend on the used preprocessing pipeline.
-For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest)
-which downloads data from NCBI GenBank using NCBI Datasets, the config file needs to
-contain the taxon_id of the organism and additionally, if the organism is multi-segmented,
-it requires a list of segment names (nucleotide_sequences) and the nextclade_dataset that
-can be used for segment identification and alignment.
+For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields:
+
+
### NucleotideSequence (type)
diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
index a6a43d5de..242433743 100644
--- a/kubernetes/loculus/values.schema.json
+++ b/kubernetes/loculus/values.schema.json
@@ -335,19 +335,19 @@
"description": "Fields that should be added to the preprocessing pipeline config file.",
"properties": {
"alignment_requirement": {
- "groups": ["preprocessing"],
+ "groups": ["nextcladePipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["ALL", "ANY"],
"description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns"
},
"nextclade_dataset_server": {
- "groups": ["preprocessing"],
+ "groups": ["nextcladePipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["preprocessing"],
+ "groups": ["nextcladePipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if sequences should be aligned"
@@ -391,31 +391,31 @@
"description": "If multi-segmented organism, how to identify segments",
"properties": {
"method": {
- "groups": ["ingest"],
+ "groups": ["ingestPipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string",
"enum": ["align", "minimizer"],
"description": "Method to identify segments, uses either nextclade align or nextclade sort"
},
"nextclade_dataset_server": {
- "groups": ["ingest"],
+ "groups": ["ingestPipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string"
},
"nextclade_dataset_name": {
- "groups": ["ingest"],
+ "groups": ["ingestPipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is align"
},
"minimizer_parser": {
- "groups": ["ingest"],
+ "groups": ["ingestPipelineConfigFile"],
"docsIncludePrefix": false,
"type": "array",
"description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index"
},
"minimizer_index": {
- "groups": ["ingest"],
+ "groups": ["ingestPipelineConfigFile"],
"docsIncludePrefix": false,
"type": "string",
"description": "Required if method is minimizer"
From e21bf91f666b25c637df58718668372d7dba1af0 Mon Sep 17 00:00:00 2001
From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com>
Date: Mon, 3 Mar 2025 10:02:28 +0100
Subject: [PATCH 4/4] clean up
---
docs/src/content/docs/reference/helm-chart-config.mdx | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx
index bf1969abb..84938e0d6 100644
--- a/docs/src/content/docs/reference/helm-chart-config.mdx
+++ b/docs/src/content/docs/reference/helm-chart-config.mdx
@@ -76,16 +76,22 @@ Definition of metadata fields for sequence entries of an organism, for example t
The values for `args` and `configFile` depend on the used preprocessing pipeline.
-For the Nextclade preprocessing pipeline, please see
-[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
+
+#### Nextclade Preprocessing Pipeline ConfigFile (type)
+For more details on the Nextclade preprocessing pipeline, please see
+[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline).
+
### Ingest (type)
The values for `configFile` depend on the used preprocessing pipeline.
+
+#### Ingest ConfigFile (type)
+
For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields: