-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
257 additions
and
264 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
########### | ||
# old stuff | ||
########### | ||
# @TODO: | ||
# despite some changes, this is still mainly the old version, | ||
# which WILL NOT work properly with the new refinegems update - yet | ||
# address TODOs before release!!!!! | ||
|
||
def run_old(configpath=None): | ||
"""main function to run the program""" | ||
print("RefineGEMs provides functions to curate and investigate genome-scale metabolic models!") | ||
print("Author:", __author__) | ||
|
||
config = save_cmpb_user_input(configpath) | ||
today = date.today().strftime("%Y%m%d") | ||
log_file = Path(config["out_path"],f'rg_{str(today)}.log') | ||
|
||
# check if the output directory is already present, if not create it | ||
if not os.path.isdir(config['out_path']): | ||
logging.info('Given out_path is not yet a directory, creating ' + config['out_path']) | ||
os.makedirs(config['out_path']) | ||
if config['visualize']: | ||
dir = Path(config['out_path'],'visualization') | ||
if not os.path.isdir(dir): | ||
os.makedirs(dir) | ||
|
||
print(f'The following logs are saved to: {log_file}') | ||
|
||
logging.basicConfig( | ||
level=logging.INFO, | ||
format="%(asctime)s [%(levelname)s:%(name)s] %(message)s", | ||
handlers=[ | ||
logging.FileHandler(log_file), | ||
logging.StreamHandler() | ||
] | ||
) | ||
logging.getLogger('cobra').setLevel(logging.WARNING) | ||
logging.getLogger('requests').setLevel(logging.WARNING) | ||
logging.getLogger('urllib3').setLevel(logging.WARNING) | ||
logging.info('----------- New run of refineGEMs -----------') | ||
|
||
logging.info(f'Your output will be saved to: {config["out_path"]}') | ||
|
||
if config['multiple']: | ||
logging.info('Growth simulation for multiple models: ') | ||
models_cobra = rg.utility.io.load_model(config['multiple_paths'], 'cobra') | ||
growth_all = rg.analysis.growth.growth_analysis(model_cobra, | ||
config['mediapath'], | ||
namespace = config['namespace'], | ||
retrieve='report') | ||
growth_all.save(Path(config['out_path'])) | ||
logging.info('Multiple model growth simulation results are saved to ' + str(Path(config['out_path'], 'GrowthSimReport'))) | ||
|
||
########### | ||
|
||
# visualizations | ||
# ..................................... | ||
# @TODO: more than half of the functions work differently now | ||
if config['visualize']: | ||
models_libsbml = rg.io.load_multiple_models(config['multiple_paths'], 'libsbml') | ||
ini_plot = rg.comparison.plot_initial_analysis(models_libsbml).get_figure() | ||
sbo_fig_all = rg.comparison.plot_rea_sbo_multiple(models_libsbml).get_figure() | ||
venn_reac = rg.comparison.plot_venn(models_cobra, 'reaction', True).get_figure() | ||
venn_metab = rg.comparison.plot_venn(models_cobra, 'metabolite', True).get_figure() | ||
heatmap = rg.comparison.plot_heatmap_dt(growth_all[['model', 'medium', 'doubling_time [min]']]) | ||
native_heatmap = rg.comparison.plot_heatmap_native(growth_all) | ||
# saving them | ||
sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight') | ||
venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight') | ||
venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight') | ||
heatmap_dt_prefix = 'heatmap_dt_additives_anaerobic_' if config['anaerobic_growth'] else 'heatmap_dt_additives_' | ||
heatmap.savefig(config['out_path'] + 'visualization/' + heatmap_dt_prefix + str(today) + '.png') | ||
native_heatmap_prefix = 'heatmap_native_anaerobic_' if config['anaerobic_growth'] else 'heatmap_native_' | ||
native_heatmap.savefig(config['out_path'] + 'visualization/' + native_heatmap_prefix + str(today) + '.png', bbox_inches='tight') | ||
ini_plot.savefig(config['out_path'] + 'visualization/' + 'model_status_' + str(today) + '.png', bbox_inches='tight') | ||
# ..................................... | ||
|
||
if config['single']: | ||
try: | ||
model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model']) | ||
#logging.info(errors) | ||
except (OSError): | ||
model_cobra = None | ||
logging.info('No or no valid model given, please enter a valid path in the model field in the config file.') | ||
|
||
if config['keggpathways']: | ||
model_libsbml, non_kegg = rg.curation.pathways.kegg_pathways(config['model']) | ||
file = open(config['out_path'] + model_libsbml.getId() + '_reac_wo_kegg_' + str(today) + '.txt','w') | ||
for reaction in non_kegg: | ||
file.write(reaction+"\n") | ||
file.close() | ||
logging.info('Kegg Pathways were added to the model as groups. Reactions that have no KEGG annotation are denoted in ' + model_libsbml.getId() + '_reac_wo_kegg.txt') | ||
|
||
else: | ||
model_libsbml = rg.utility.io.load_model(config['model'],'libsbml') | ||
|
||
if config['sboterms']: | ||
if config['visualize']: | ||
# ..................................... | ||
# @TODO: where us the functions now? | ||
sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure() | ||
# ..................................... | ||
# saving the created visualizations | ||
sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_beforeUpdate_' + str(today) + '.png', bbox_inches='tight') | ||
model_libsbml = rg.sboann.sbo_annotation(model_libsbml) | ||
logging.info('SBO Terms updated for ' + model_libsbml.getId()) | ||
|
||
if config['charge_corr']: | ||
model_libsbml, multiple_charges = rg.curation.charges.correct_charges_modelseed(model_libsbml) | ||
pd.DataFrame.from_dict(multiple_charges, orient='index').to_csv(config['out_path'] + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv', sep=',', header=False) | ||
logging.info('Charges were corrected for ' + model_libsbml.getId() + '. A table with metabolites with multiple charges can be found under ' + model_libsbml.getId() + '_mulchar_' + str(today) + '.csv') | ||
|
||
if config['man_cur']: | ||
if config['man_cur_type'] == 'gapfill': | ||
gapfill = rg.utility.io.load_manual_gapfill(config['man_cur_table']) | ||
model_libsbml = rg.curation.curate.add_reactions_from_table(model_libsbml, gapfill, config['entrez_email']) | ||
logging.info('Manual gap filling was done for ' + model_libsbml.getId()) | ||
elif config['man_cur_type'] == 'metabs': | ||
man_ann = rg.utility.io.load_manual_annotations(config['man_cur_table']) | ||
model_libsbml = rg.curation.curate.update_annotations_from_table(model_libsbml, man_ann) | ||
model_libsbml = rg.curation.curate.update_annotations_from_others(model_libsbml) | ||
logging.info('Manual update of annotations was done for ' + model_libsbml.getId()) | ||
|
||
if config['gap_analysis'] and config['gapfill_model']: | ||
filename = f'{config["out_path"]}{model_libsbml.getId()}_gap_analysis_{str(today)}' | ||
if config['gap_analysis_params'].get('db_to_compare') not in ('BioCyc', 'KEGG+BioCyc'): | ||
logging.warning('Currently, only the result from the \'BioCyc\' or \'KEGG+BioCyc\' runs can be directly added to a model.') | ||
gap_analysis = rg.curation.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename) | ||
logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.') | ||
logging.info(f'Complete Excel table is in file: {filename}.') | ||
else: | ||
gapfill = rg.curation.gapfill.gapfill(model_libsbml, config['gap_analysis_params'], filename) | ||
gap_analysis_stats = gapfill[0][0] | ||
logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:') | ||
logging.info(gap_analysis_stats) | ||
logging.info(f'Complete Excel table is in file: {filename}.') | ||
model_libsbml = gapfill[-1] | ||
logging.info(f'Gaps were filled in {model_libsbml.getId()}.') | ||
elif config['gap_analysis']: | ||
filename = f'{config["out_path"]}{model_libsbml.getId()}_gap_analysis_{str(today)}' | ||
gap_analysis = rg.curation.gapfill.gap_analysis(model_libsbml, config['gap_analysis_params'], filename) | ||
logging.info(f'Gap analysis for {model_libsbml.getId()} with {config["gap_analysis_params"].get("db_to_compare")} was performed.') | ||
if config["gap_analysis_params"].get("db_to_compare") != 'KEGG': | ||
logging.info(f'Statistics on missing entites for {model_libsbml.getId()}:') | ||
logging.info(gap_analysis[0]) | ||
logging.info(f'Complete Excel table is in file: {filename}.') | ||
elif config['gapfill_model']: | ||
model_libsbml = rg.curation.gapfill.gapfill_model(model_libsbml, config['gap_analysis_file']) | ||
logging.info(f'Gaps were filled in {model_libsbml.getId()}.') | ||
|
||
if config['polish']: | ||
model_libsbml = rg.curation.polish.polish(model_libsbml, config['entrez_email'], config['id_db'], config['gff_file'], | ||
config['protein_fasta'], config['lab_strain'], config['organismid'], config['out_path']) | ||
logging.info(model_libsbml.getId() + ' has been polished') | ||
|
||
if config['biomass']: | ||
result = rg.curation.biomass.check_normalise_biomass(model_cobra) | ||
if result: | ||
model_libsbml = result | ||
logging.info(model_libsbml.getId() + '\'s biomass function has been checked.') | ||
|
||
mods = [config['keggpathways'], config['sboterms'], config['charge_corr'], config['man_cur'], config['gapfill_model'], config['polish'], config['biomass']] | ||
|
||
if any(mods): | ||
if config['model_out'] == 'stdout': | ||
config['model_out'] = config['out_path'] + model_libsbml.getId() + '_modified_' + str(today) + '.xml' | ||
|
||
try: | ||
rg.utility.io.write_model_to_file(model_libsbml, config['model_out']) | ||
except (OSError) as e: | ||
logging.info(e) | ||
logging.info("Model could not be saved...") | ||
|
||
if model_cobra: | ||
try: | ||
model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model_out']) | ||
logging.info(errors) | ||
except (OSError) as e: | ||
logging.info(e) | ||
model_cobra = None | ||
logging.info('Model was invalidated during curation steps.') | ||
|
||
if model_cobra: | ||
logging.info(model_cobra.id + ' will be investigated.') | ||
# ..................................................... | ||
# @TODO: different !!!!! | ||
name, reac, metab, genes = rg.analysis.investigate.initial_analysis(model_libsbml) | ||
orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra) | ||
mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra) | ||
egc = rg.investigate.get_egc(model_cobra) | ||
if config['visualize']: | ||
logging.info('All visualizations can be found in the subfolder "visualization".') | ||
sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure() | ||
|
||
# saving the created visualizations | ||
sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight') | ||
# ..................................................... | ||
# ..................................................... | ||
# @TODO different function | ||
if config['memote']: | ||
score = rg.investigate.get_memote_score(rg.investigate.run_memote(model_cobra)) | ||
# ..................................................... | ||
if config['modelseed']: | ||
charge_mismatch, formula_mismatch = rg.curation.db_access.modelseed.compare_to_modelseed(model_cobra) | ||
# ..................................................... | ||
# @TODO function changed | ||
if config['media']: | ||
growth_sim = rg.growth.get_growth_selected_media(model_cobra, config['media'], config['growth_basis'], config['anaerobic_growth']) | ||
# ..................................................... | ||
if config['memote']: | ||
information = [[name], [reac], [metab], [genes], [score], orphans, deadends, disconnected, mass_unbal, charge_unbal] | ||
model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'memote score', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T | ||
else: | ||
information = [[name], [reac], [metab], [genes], orphans, deadends, disconnected, mass_unbal, charge_unbal] | ||
model_params = pd.DataFrame(information, ['model name', '#reactions', '#metabolites', '#genes', 'orphans', 'deadends', 'disconnected', 'mass unbalanced', 'charge unbalanced']).T | ||
with pd.ExcelWriter(config['out_path'] + name + '_' + str(today) + '.xlsx') as writer: | ||
model_params.to_excel(writer, sheet_name='model params', index=False) | ||
growth_sim_name = 'anaerobic growth simulation' if config['anaerobic_growth'] else 'growth simulation' | ||
if config['media']: #@TODO: needed??? | ||
growth_sim.to_excel(writer, sheet_name=growth_sim_name, index=False) | ||
egc.to_excel(writer, sheet_name='EGC test', index=False) | ||
if config['modelseed']: | ||
charge_mismatch.to_excel(writer, sheet_name='charge mismatches', index=False) | ||
formula_mismatch.to_excel(writer, sheet_name='formula mismatches', index=False) | ||
logging.info('Single model analyses results are saved to ' + name + '_analyses_' + str(today) + '.xlsx') | ||
else: | ||
logging.info('No valid model, investigation aborted!') | ||
|
Oops, something went wrong.