Skip to content

Commit

Permalink
Update workflow cmpb #8
Browse files Browse the repository at this point in the history
  • Loading branch information
cb-Hades committed May 23, 2024
1 parent 339ab0b commit 50c6d0d
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 264 deletions.
228 changes: 228 additions & 0 deletions dev/old_workflow_cmpb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
###########
# old stuff
###########
# @TODO:
# despite some changes, this is still mainly the old version,
# which WILL NOT work properly with the new refinegems update - yet
# address TODOs before release!!!!!

def run_old(configpath=None):
"""main function to run the program"""
print("RefineGEMs provides functions to curate and investigate genome-scale metabolic models!")
print("Author:", __author__)

config = save_cmpb_user_input(configpath)
today = date.today().strftime("%Y%m%d")
log_file = Path(config["out_path"],f'rg_{str(today)}.log')

# check if the output directory is already present, if not create it
if not os.path.isdir(config['out_path']):
logging.info('Given out_path is not yet a directory, creating ' + config['out_path'])
os.makedirs(config['out_path'])
if config['visualize']:
dir = Path(config['out_path'],'visualization')
if not os.path.isdir(dir):
os.makedirs(dir)

print(f'The following logs are saved to: {log_file}')

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s:%(name)s] %(message)s",
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
logging.getLogger('cobra').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.info('----------- New run of refineGEMs -----------')

logging.info(f'Your output will be saved to: {config["out_path"]}')

if config['multiple']:
logging.info('Growth simulation for multiple models: ')
models_cobra = rg.utility.io.load_model(config['multiple_paths'], 'cobra')
growth_all = rg.analysis.growth.growth_analysis(model_cobra,
config['mediapath'],
namespace = config['namespace'],
retrieve='report')
growth_all.save(Path(config['out_path']))
logging.info('Multiple model growth simulation results are saved to ' + str(Path(config['out_path'], 'GrowthSimReport')))

###########

# visualizations
# .....................................
# @TODO: more than half of the functions work differently now
if config['visualize']:
models_libsbml = rg.io.load_multiple_models(config['multiple_paths'], 'libsbml')
ini_plot = rg.comparison.plot_initial_analysis(models_libsbml).get_figure()
sbo_fig_all = rg.comparison.plot_rea_sbo_multiple(models_libsbml).get_figure()
venn_reac = rg.comparison.plot_venn(models_cobra, 'reaction', True).get_figure()
venn_metab = rg.comparison.plot_venn(models_cobra, 'metabolite', True).get_figure()
heatmap = rg.comparison.plot_heatmap_dt(growth_all[['model', 'medium', 'doubling_time [min]']])
native_heatmap = rg.comparison.plot_heatmap_native(growth_all)
# saving them
sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight')
venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight')
heatmap_dt_prefix = 'heatmap_dt_additives_anaerobic_' if config['anaerobic_growth'] else 'heatmap_dt_additives_'
heatmap.savefig(config['out_path'] + 'visualization/' + heatmap_dt_prefix + str(today) + '.png')
native_heatmap_prefix = 'heatmap_native_anaerobic_' if config['anaerobic_growth'] else 'heatmap_native_'
native_heatmap.savefig(config['out_path'] + 'visualization/' + native_heatmap_prefix + str(today) + '.png', bbox_inches='tight')
ini_plot.savefig(config['out_path'] + 'visualization/' + 'model_status_' + str(today) + '.png', bbox_inches='tight')
# .....................................

if config['single']:
try:
model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model'])
#logging.info(errors)
except (OSError):
model_cobra = None
logging.info('No or no valid model given, please enter a valid path in the model field in the config file.')

if config['keggpathways']:
model_libsbml, non_kegg = rg.curation.pathways.kegg_pathways(config['model'])
file = open(config['out_path'] + model_libsbml.getId() + '_reac_wo_kegg_' + str(today) + '.txt','w')
for reaction in non_kegg:
file.write(reaction+"\n")
file.close()
logging.info('Kegg Pathways were added to the model as groups. Reactions that have no KEGG annotation are denoted in ' + model_libsbml.getId() + '_reac_wo_kegg.txt')

else:
model_libsbml = rg.utility.io.load_model(config['model'],'libsbml')

if config['sboterms']:
if config['visualize']:
# .....................................
# @TODO: where us the functions now?
sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
# .....................................
# saving the created visualizations
sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_beforeUpdate_' + str(today) + '.png', bbox_inches='tight')
model_libsbml = rg.sboann.sbo_annotation(model_libsbml)
logging.info('SBO Terms updated for ' + model_libsbml.getId())

if config['charge_corr']:
model_libsbml, multiple_charges = rg.curation.charges.correct_charges_modelseed(model_libsbml)
pd.DataFrame.from_dict(multiple_charges, orient='index').to_csv(config['out_path'] + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv', sep=',', header=False)
logging.info('Charges were corrected for ' + model_libsbml.getId() + '. A table with metabolites with multiple charges can be found under ' + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv')

if config['man_cur']:
if config['man_cur_type'] == 'gapfill':
gapfill = rg.utility.io.load_manual_gapfill(config['man_cur_table'])
model_libsbml = rg.curation.curate.add_reactions_from_table(model_libsbml, gapfill, config['entrez_email'])
logging.info('Manual gap filling was done for ' + model_libsbml.getId())
elif config['man_cur_type'] == 'metabs':
man_ann = rg.utility.io.load_manual_annotations(config['man_cur_table'])
model_libsbml = rg.curation.curate.update_annotations_from_table(model_libsbml, man_ann)
model_libsbml = rg.curation.curate.update_annotations_from_others(model_libsbml)
logging.info('Manual update of annotations was done for ' + model_libsbml.getId())

if config['gap_analysis'] and config['gapfill_model']:
filename = f'{config["out_path"]}{model_libsbml.getId()}_gap_analysis_{str(today)}'
if config['gap_analysis_params'].get('db_to_compare') not in ('BioCyc', 'KEGG+BioCyc'):
logging.warning('Currently, only the result from the \'BioCyc\' or \'KEGG+BioCyc\' runs can be directly added to a model.')
gap_analysis = rg.curation.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename)
logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.')
logging.info(f'Complete Excel table is in file: {filename}.')
else:
gapfill = rg.curation.gapfill.gapfill(model_libsbml, config['gap_analysis_params'], filename)
gap_analysis_stats = gapfill[0][0]
logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
logging.info(gap_analysis_stats)
logging.info(f'Complete Excel table is in file: {filename}.')
model_libsbml = gapfill[-1]
logging.info(f'Gaps were filled in {model_libsbml.getId()}.')
elif config['gap_analysis']:
filename = f'{config["out_path"]}{model_libsbml.getId()}_gap_analysis_{str(today)}'
gap_analysis = rg.curation.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename)
logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.')
if config["gap_analysis_params"].get("db_to_compare") != 'KEGG':
logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:')
logging.info(gap_analysis[0])
logging.info(f'Complete Excel table is in file: {filename}.')
elif config['gapfill_model']:
model_libsbml = rg.curation.gapfill.gapfill_model(model_libsbml, config['gap_analysis_file'])
logging.info(f'Gaps were filled in {model_libsbml.getId()}.')

if config['polish']:
model_libsbml = rg.curation.polish.polish(model_libsbml, config['entrez_email'], config['id_db'], config['gff_file'],
config['protein_fasta'], config['lab_strain'], config['organismid'], config['out_path'])
logging.info(model_libsbml.getId() + ' has been polished')

if config['biomass']:
result = rg.curation.biomass.check_normalise_biomass(model_cobra)
if result:
model_libsbml = result
logging.info(model_libsbml.getId() + '\'s biomass function has been checked.')

mods = [config['keggpathways'], config['sboterms'], config['charge_corr'], config['man_cur'], config['gapfill_model'], config['polish'], config['biomass']]

if any(mods):
if config['model_out'] == 'stdout':
config['model_out'] = config['out_path'] + model_libsbml.getId() + '_modified_' + str(today) + '.xml'

try:
rg.utility.io.write_model_to_file(model_libsbml, config['model_out'])
except (OSError) as e:
logging.info(e)
logging.info("Model could not be saved...")

if model_cobra:
try:
model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model_out'])
logging.info(errors)
except (OSError) as e:
logging.info(e)
model_cobra = None
logging.info('Model was invalidated during curation steps.')

if model_cobra:
logging.info(model_cobra.id + ' will be investigated.')
# .....................................................
# @TODO: different !!!!!
name, reac, metab, genes = rg.analysis.investigate.initial_analysis(model_libsbml)
orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra)
mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra)
egc = rg.investigate.get_egc(model_cobra)
if config['visualize']:
logging.info('All visualizations can be found in the subfolder "visualization".')
sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()

# saving the created visualizations
sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
# .....................................................
# .....................................................
# @TODO different function
if config['memote']:
score = rg.investigate.get_memote_score(rg.investigate.run_memote(model_cobra))
# .....................................................
if config['modelseed']:
charge_mismatch, formula_mismatch = rg.curation.db_access.modelseed.compare_to_modelseed(model_cobra)
# .....................................................
# @TODO function changed
if config['media']:
growth_sim = rg.growth.get_growth_selected_media(model_cobra, config['media'], config['growth_basis'], config['anaerobic_growth'])
# .....................................................
if config['memote']:
information = [[name], [reac], [metab], [genes], [score], orphans, deadends, disconnected, mass_unbal, charge_unbal]
model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'memote score', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
else:
information = [[name], [reac], [metab], [genes], orphans, deadends, disconnected, mass_unbal, charge_unbal]
model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T
with pd.ExcelWriter(config['out_path'] + name + '_' + str(today) + '.xlsx') as writer:
model_params.to_excel(writer, sheet_name='model params', index=False)
growth_sim_name = 'anaerobic growth simulation' if config['anaerobic_growth'] else 'growth simulation'
if config['media']: #@TODO: needed???
growth_sim.to_excel(writer, sheet_name=growth_sim_name, index=False)
egc.to_excel(writer, sheet_name='EGC test', index=False)
if config['modelseed']:
charge_mismatch.to_excel(writer, sheet_name='charge mismatches', index=False)
formula_mismatch.to_excel(writer, sheet_name='formula mismatches', index=False)
logging.info('Single model analyses results are saved to ' + name + '_analyses_' + str(today) + '.xlsx')
else:
logging.info('No valid model, investigation aborted!')

Loading

0 comments on commit 50c6d0d

Please sign in to comment.