From ef191eb2368dfd79a50fe43aea90c86000bfb00b Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Mon, 3 Mar 2025 09:37:09 +0100 Subject: [PATCH 1/4] fix values docs --- kubernetes/loculus/values.schema.json | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json index 93802fe2a..4b3687403 100644 --- a/kubernetes/loculus/values.schema.json +++ b/kubernetes/loculus/values.schema.json @@ -335,19 +335,19 @@ "description": "Fields that should be added to the preprocessing pipeline config file.", "properties": { "alignment_requirement": { - "groups": ["ingest"], + "groups": ["preprocessing.configFile"], "docsIncludePrefix": false, "type": "string", "enum": ["ALL", "ANY"], "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns" }, "nextclade_dataset_server": { - "groups": ["ingest"], + "groups": ["preprocessing.configFile"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["ingest"], + "groups": ["preprocessing.configFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if sequences should be aligned" @@ -379,43 +379,43 @@ "description": "Fields that should be added to the ingest pipeline config file", "properties": { "taxon_id": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "integer", "description": "NCBI taxon ID for the organism" }, "segment_identification": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "object", "description": "If multi-segmented organism, how to identify segments", "properties": { "method": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "string", "enum": ["align", "minimizer"], "description": "Method to identify segments, uses either nextclade align or nextclade sort" }, "nextclade_dataset_server": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is align" }, "minimizer_parser": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "array", "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index" }, "minimizer_index": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is minimizer" @@ -424,13 +424,13 @@ "required": ["method"] }, "grouping_override": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "string", "description": "If multi-segmented organism, segment grouping overrides" }, "metadata_filter": { - "groups": ["ingest"], + "groups": ["ingest.configFile"], "docsIncludePrefix": false, "type": "object", "description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs." From 45f0d9572952135c5b6fc1aeda55c6e1e1040db3 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Mon, 3 Mar 2025 09:46:30 +0100 Subject: [PATCH 2/4] fix bug --- kubernetes/loculus/values.schema.json | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json index 4b3687403..a6a43d5de 100644 --- a/kubernetes/loculus/values.schema.json +++ b/kubernetes/loculus/values.schema.json @@ -335,19 +335,19 @@ "description": "Fields that should be added to the preprocessing pipeline config file.", "properties": { "alignment_requirement": { - "groups": ["preprocessing.configFile"], + "groups": ["preprocessing"], "docsIncludePrefix": false, "type": "string", "enum": ["ALL", "ANY"], "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns" }, "nextclade_dataset_server": { - "groups": ["preprocessing.configFile"], + "groups": ["preprocessing"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["preprocessing.configFile"], + "groups": ["preprocessing"], "docsIncludePrefix": false, "type": "string", "description": "Required if sequences should be aligned" @@ -379,43 +379,43 @@ "description": "Fields that should be added to the ingest pipeline config file", "properties": { "taxon_id": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "integer", "description": "NCBI taxon ID for the organism" }, "segment_identification": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "object", "description": "If multi-segmented organism, how to identify segments", "properties": { "method": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "string", "enum": ["align", "minimizer"], "description": "Method to identify segments, uses either nextclade align or nextclade sort" }, "nextclade_dataset_server": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is align" }, "minimizer_parser": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "array", "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index" }, "minimizer_index": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is minimizer" @@ -424,13 +424,13 @@ "required": ["method"] }, "grouping_override": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "string", "description": "If multi-segmented organism, segment grouping overrides" }, "metadata_filter": { - "groups": ["ingest.configFile"], + "groups": ["ingest"], "docsIncludePrefix": false, "type": "object", "description": "Filter ingested sequences based on value in metadata. Filter should be a list of metadata field and value pairs." From 2d32f539aee525d7c4eb92cfde7c80a9bed6cd35 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Mon, 3 Mar 2025 09:53:35 +0100 Subject: [PATCH 3/4] second try --- .../content/docs/reference/helm-chart-config.mdx | 10 +++++----- kubernetes/loculus/values.schema.json | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx index 22bb1c09e..bf1969abb 100644 --- a/docs/src/content/docs/reference/helm-chart-config.mdx +++ b/docs/src/content/docs/reference/helm-chart-config.mdx @@ -79,16 +79,16 @@ The values for `args` and `configFile` depend on the used preprocessing pipeline For the Nextclade preprocessing pipeline, please see [here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline). + + ### Ingest (type) The values for `configFile` depend on the used preprocessing pipeline. -For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) -which downloads data from NCBI GenBank using NCBI Datasets, the config file needs to -contain the taxon_id of the organism and additionally, if the organism is multi-segmented, -it requires a list of segment names (nucleotide_sequences) and the nextclade_dataset that -can be used for segment identification and alignment. +For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields: + + ### NucleotideSequence (type) diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json index a6a43d5de..242433743 100644 --- a/kubernetes/loculus/values.schema.json +++ b/kubernetes/loculus/values.schema.json @@ -335,19 +335,19 @@ "description": "Fields that should be added to the preprocessing pipeline config file.", "properties": { "alignment_requirement": { - "groups": ["preprocessing"], + "groups": ["nextcladePipelineConfigFile"], "docsIncludePrefix": false, "type": "string", "enum": ["ALL", "ANY"], "description": "If multi-segmented viruses should require ALL segments align or ANY segment aligns" }, "nextclade_dataset_server": { - "groups": ["preprocessing"], + "groups": ["nextcladePipelineConfigFile"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["preprocessing"], + "groups": ["nextcladePipelineConfigFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if sequences should be aligned" @@ -391,31 +391,31 @@ "description": "If multi-segmented organism, how to identify segments", "properties": { "method": { - "groups": ["ingest"], + "groups": ["ingestPipelineConfigFile"], "docsIncludePrefix": false, "type": "string", "enum": ["align", "minimizer"], "description": "Method to identify segments, uses either nextclade align or nextclade sort" }, "nextclade_dataset_server": { - "groups": ["ingest"], + "groups": ["ingestPipelineConfigFile"], "docsIncludePrefix": false, "type": "string" }, "nextclade_dataset_name": { - "groups": ["ingest"], + "groups": ["ingestPipelineConfigFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is align" }, "minimizer_parser": { - "groups": ["ingest"], + "groups": ["ingestPipelineConfigFile"], "docsIncludePrefix": false, "type": "array", "description": "Required if method is minimizer, list of the name of each '_' - separated metadata field in the minimizer index" }, "minimizer_index": { - "groups": ["ingest"], + "groups": ["ingestPipelineConfigFile"], "docsIncludePrefix": false, "type": "string", "description": "Required if method is minimizer" From e21bf91f666b25c637df58718668372d7dba1af0 Mon Sep 17 00:00:00 2001 From: "Anna (Anya) Parker" <50943381+anna-parker@users.noreply.github.com> Date: Mon, 3 Mar 2025 10:02:28 +0100 Subject: [PATCH 4/4] clean up --- docs/src/content/docs/reference/helm-chart-config.mdx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/src/content/docs/reference/helm-chart-config.mdx b/docs/src/content/docs/reference/helm-chart-config.mdx index bf1969abb..84938e0d6 100644 --- a/docs/src/content/docs/reference/helm-chart-config.mdx +++ b/docs/src/content/docs/reference/helm-chart-config.mdx @@ -76,16 +76,22 @@ Definition of metadata fields for sequence entries of an organism, for example t The values for `args` and `configFile` depend on the used preprocessing pipeline. -For the Nextclade preprocessing pipeline, please see -[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline). + +#### Nextclade Preprocessing Pipeline ConfigFile (type) +For more details on the Nextclade preprocessing pipeline, please see +[here](../../for-administrators/existing-preprocessing-pipelines/#nextclade-based-pipeline). + ### Ingest (type) The values for `configFile` depend on the used preprocessing pipeline. + +#### Ingest ConfigFile (type) + For [our ingest pipeline](https://github.com/loculus-project/loculus/tree/main/ingest) we require the following fields: