Skip to content

Commit

Permalink
Updated config.yaml & .rst for cmpb #5 #8 #11
Browse files Browse the repository at this point in the history
  • Loading branch information
GwennyGit committed Jul 10, 2024
1 parent 798f40a commit 6a18bb7
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 104 deletions.
135 changes: 68 additions & 67 deletions docs/source/cmpb/cmpb-config.rst
Original file line number Diff line number Diff line change
@@ -1,100 +1,101 @@
CMPB Configuration File
=======================
``CMPB`` Configuration File
===========================

Below, the configuration file with the underlying defaults, is listed.
Below, the configuration file with the underlying defaults, is displayed.

.. code-block:: yaml
# Configuration file for the SPECIMEN CMPB pipeline
# Explaination for default parameters:
# with the value __USER__ are required to be specified by the user
# with the value USER are required only under specific cases
# Meaning of the default parameters:
# The value __USER__ indicates parameters required to be specified by the user
# The value USER indicates parameters required only in specific cases
# meta info:
# Meta info:
# model: USER
# organism: USER
# date: USER
# author: USER
# input for the pipeline
# Input for the pipeline
# ----------------------
input:
modelpath: NULL # optional, path to a model.
# If not given, runs CarveMe
annotated_genome: __USER__ # required, path to the annotated genome file
mediapath: __USER__ # path to a media config to tests growth with
modelpath: NULL # Optional, path to a model.
# If not given, runs CarveMe -> Future update!
annotated_genome: __USER__ # Required, path to the annotated genome file
mediapath: __USER__ # Path to a media config to test growth with
# general options
# General options
# ---------------
general:
dir: './' # Path/Name of a directory to save output to
colours: 'YlGn' # set the colour scheme for the plots
# should be a valid matplotlib continuous color palette
namespace: BiGG # Namespace to use for the model
# Possible identifiers, currently: BiGG
save_all_models: True # save all models (models for each step)
memote_always_on: False # run memote after every step
stats_always_on: False # calculate the model statistics after every step
# below are options used by multiple steps
refseq_gff: USER # Path to RefSeq GFF file: Required for gap analysis with 'KEGG'.
# Can be optionally provided for cm-polish.
kegg_organism_id: USER # KEGG ID of the organism: Required for gap analysis with KEGG.
# Can be optionally provided for cm-polish.
# part-specific options
dir: './' # Path/Name of a directory to save output to
colours: 'YlGn' # Set the colour scheme for the plots
# should be a valid matplotlib continuous color palette
namespace: BiGG # Namespace to use for the model
# Possible identifiers, currently: BiGG
save_all_models: True # Save a model per step
memote_always_on: False # Run MEMOTE after every step
stats_always_on: False # Calculate the model statistics after every step
# Below are options used by multiple steps
refseq_gff: USER # Path to RefSeq GFF file: Required for gap analysis with 'KEGG'.
# Can be optionally provided for cm-polish.
kegg_organism_id: USER # KEGG ID of the organism: Required for gap analysis with 'KEGG'.
# Can be optionally provided for cm-polish.
# Part-specific options
# ---------------------
# polish a CarveMe model
# only neccessary, if the mode will or has been build with CarveMe
# will only be used, if model is indeed a CarveMe model
# Polish a CarveMe model
# Only neccessary, if the model will or has been build with CarveMe
# Will only be used, if model is indeed a CarveMe model
cm-polish:
email: USER # User Mail to use for Entrez
protein_fasta: USER # optional, except for is_lab_strain: True.
# The path to the protein FASTA used to create the CarveMe model.
is_lab_strain: False # whether the users strain originates from a lab
# Needs to be set to ensure that protein IDs get the 'bqbiol:isHomologTo' qualifier
# & to set the locus_tag to the ones obtained by the annotation
email: USER # User Mail to use for Entrez
protein_fasta: USER # Optional, except for 'is_lab_strain: True'.
# The path to the protein FASTA used to create the CarveMe model.
is_lab_strain: False # Whether the users strain originates from a lab
# Needs to be set to ensure that protein IDs get the 'bqbiol:isHomologTo' qualifier
# & to set the locus_tag to the ones obtained by the annotation
# (Warning: Might cause issues if annotatione was not performed with NCBI PGAP!)
# gapfilling, optional
# Filling gaps, optional
gapfilling:
### Automatic gap filling ###
# All parameters are required for all db_to_compare choices except biocyc_files which is not required for 'KEGG'
gap_fill_params:
### Automatic gap filling ###
# All parameters are required for all 'db_to_compare' choices except 'biocyc_files' which is not required for 'KEGG'
gap_fill_params:
db_to_compare: USER # One of the choices KEGG|BioCyc|KEGG+BioCyc
biocyc_files:
- USER # Path to TXT file containing a SmartTable from BioCyc with the columns 'Accession-2' 'Reaction of gene'
- USER # Path to TXT file containing a SmartTable with all reaction relevant information (*)
- USER # Path to TXT file containing a SmartTable with all metabolite relevant information (+)
- USER # Path to protein FASTA file used as input for CarveMe (Required to get the protein IDs from the locus tags)
# (*) 'Reaction' 'Reactants of reaction' 'Products of reaction' 'EC-Number' 'KEGG Reaction' 'MetaNetX' 'Reaction-Direction' 'Spontaneous?'
# (+) 'Compound' 'Object ID' 'Chemical Formula' 'InChI-Key' 'ChEBI'
gap_fill_file: NULL # Path to Excel file with which gaps in model should be filled
# Either obtained by running gapfilling/Created by hand with the same structure as the result file from gapfilling
# Example Excel file to fill in by hand: data/modelName_gapfill_analysis_date_example.xlsx
- USER # Path to TXT file containing a SmartTable from BioCyc with the columns 'Accession-2' 'Reaction of gene'
- USER # Path to TXT file containing a SmartTable with all reaction relevant information (*)
- USER # Path to TXT file containing a SmartTable with all metabolite relevant information (+)
- USER # Path to protein FASTA file used as input for CarveMe (Required to get the protein IDs from the locus tags)
# (*) 'Reaction' 'Reactants of reaction' 'Products of reaction' 'EC-Number' 'KEGG Reaction' 'MetaNetX' 'Reaction-Direction' 'Spontaneous?'
# (+) 'Compound' 'Object ID' 'Chemical Formula' 'InChI-Key' 'ChEBI'
gap_fill_file: NULL # Path to Excel file with which gaps in model should be filled
# Either obtained by running gapfilling/Created by hand with the same structure as the result file from gapfilling
# Example Excel file to fill in by hand: refinegems/src/refinegems/example/example_inputs/modelName_gapfill_analysis_date_example.xlsx
# add KEGG pathways as groups, optional
kegg_pathway_groups: True # decide, whether to run this or not
# Add KEGG pathways as groups, optional
kegg_pathway_groups: True
# resolve duplicates
# Resolve duplicates
duplicates:
# three possible option for the resolvement of duplicates for the following model entities:
# - check: check for duplicates and simply report them
# - remove: check for and remove duplicates from the model (if possible)
# - skip: skip the resolvement
reactions: remove
metabolites: remove
# additional remove unused metabolites (reduces possible knowledge base)
remove_unused_metabs: False
# Three possible options for the resolvement of duplicates for the following model entities:
# - check: Check for duplicates and simply report them
# - remove: Check for and remove duplicates from the model (if possible)
# - skip: Skip the resolvement
reactions: remove
metabolites: remove
# Additionally, remove unused metabolites (possibly reduces knowledge-base)
remove_unused_metabs: False
# BOFdat / Biomass objective function
BOF:
run_bofdat: False
# if BOFdat should be run,
# fill out the params below
bofdat_params:
full_genome_sequence: USER # whole genome sequence
run_bofdat: False
# if BOFdat should be run,
# fill out the params below
bofdat_params:
full_genome_sequence: USER # Whole genome sequence
dna_weight_fraction: USER # DNA weight fraction for the organism
weight_fraction: USER # Ezyme/ion weight fractions for the organism
74 changes: 37 additions & 37 deletions src/specimen/data/config/cmpb_config.yaml
Original file line number Diff line number Diff line change
@@ -1,58 +1,60 @@
# Configuration file for the SPECIMEN CMPB pipeline

# Explaination for default parameters:
# with the value __USER__ are required to be specified by the user
# with the value USER are required only under specific cases
# Meaning of the default parameters:
# The value __USER__ indicates parameters required to be specified by the user
# The value USER indicates parameters required only in specific cases

# meta info:
# Meta info:
# model: USER
# organism: USER
# date: USER
# author: USER

# input for the pipeline
# Input for the pipeline
# ----------------------
input:
modelpath: NULL # optional, path to a model.
# If not given, runs CarveMe
annotated_genome: __USER__ # required, path to the annotated genome file
mediapath: __USER__ # path to a media config to tests growth with
modelpath: NULL # Optional, path to a model.
# If not given, runs CarveMe -> Future update!
annotated_genome: __USER__ # Required, path to the annotated genome file
mediapath: __USER__ # Path to a media config to test growth with

# general options
# General options
# ---------------
general:
dir: './' # Path/Name of a directory to save output to
colours: 'YlGn' # set the colour scheme for the plots
colours: 'YlGn' # Set the colour scheme for the plots
# should be a valid matplotlib continuous color palette
namespace: BiGG # Namespace to use for the model
# Possible identifiers, currently: BiGG
save_all_models: True # save all models (models for each step)
memote_always_on: False # run memote after every step
stats_always_on: False # calculate the model statistics after every step
# below are options used by multiple steps
save_all_models: True # Save a model per step
memote_always_on: False # Run MEMOTE after every step
stats_always_on: False # Calculate the model statistics after every step

# Below are options used by multiple steps
refseq_gff: USER # Path to RefSeq GFF file: Required for gap analysis with 'KEGG'.
# Can be optionally provided for cm-polish.
kegg_organism_id: USER # KEGG ID of the organism: Required for gap analysis with KEGG.
kegg_organism_id: USER # KEGG ID of the organism: Required for gap analysis with 'KEGG'.
# Can be optionally provided for cm-polish.
# part-specific options

# Part-specific options
# ---------------------

# polish a CarveMe model
# only neccessary, if the mode will or has been build with CarveMe
# will only be used, if model is indeed a CarveMe model
# Polish a CarveMe model
# Only neccessary, if the model will or has been build with CarveMe
# Will only be used, if model is indeed a CarveMe model
cm-polish:
email: USER # User Mail to use for Entrez
protein_fasta: USER # optional, except for is_lab_strain: True.
protein_fasta: USER # Optional, except for 'is_lab_strain: True'.
# The path to the protein FASTA used to create the CarveMe model.
is_lab_strain: False # whether the users strain originates from a lab
is_lab_strain: False # Whether the users strain originates from a lab
# Needs to be set to ensure that protein IDs get the 'bqbiol:isHomologTo' qualifier
# & to set the locus_tag to the ones obtained by the annotation
# (Warning: Might cause issues if annotatione was not performed with NCBI PGAP!)

# gapfilling, optional
# Filling gaps, optional
gapfilling:
### Automatic gap filling ###
# All parameters are required for all db_to_compare choices except biocyc_files which is not required for 'KEGG'
# All parameters are required for all 'db_to_compare' choices except 'biocyc_files' which is not required for 'KEGG'
gap_fill_params:
db_to_compare: USER # One of the choices KEGG|BioCyc|KEGG+BioCyc
biocyc_files:
Expand All @@ -64,20 +66,20 @@ gapfilling:
# (+) 'Compound' 'Object ID' 'Chemical Formula' 'InChI-Key' 'ChEBI'
gap_fill_file: NULL # Path to Excel file with which gaps in model should be filled
# Either obtained by running gapfilling/Created by hand with the same structure as the result file from gapfilling
# Example Excel file to fill in by hand: data/modelName_gapfill_analysis_date_example.xlsx
# Example Excel file to fill in by hand: refinegems/src/refinegems/example/example_inputs/modelName_gapfill_analysis_date_example.xlsx

# add KEGG pathways as groups, optional
kegg_pathway_groups: True # decide, whether to run this or not
# Add KEGG pathways as groups, optional
kegg_pathway_groups: True

# resolve duplicates
# Resolve duplicates
duplicates:
# three possible option for the resolvement of duplicates for the following model entities:
# - check: check for duplicates and simply report them
# - remove: check for and remove duplicates from the model (if possible)
# - skip: skip the resolvement
# Three possible options for the resolvement of duplicates for the following model entities:
# - check: Check for duplicates and simply report them
# - remove: Check for and remove duplicates from the model (if possible)
# - skip: Skip the resolvement
reactions: remove
metabolites: remove
# additional remove unused metabolites (reduces possible knowledge base)
# Additionally, remove unused metabolites (possibly reduces knowledge-base)
remove_unused_metabs: False

# BOFdat / Biomass objective function
Expand All @@ -86,8 +88,6 @@ BOF:
# if BOFdat should be run,
# fill out the params below
bofdat_params:
full_genome_sequence: USER # whole genome sequence
full_genome_sequence: USER # Whole genome sequence
dna_weight_fraction: USER # DNA weight fraction for the organism
weight_fraction: USER # Ezyme/ion weight fractions for the organism


0 comments on commit 6a18bb7

Please sign in to comment.