From 0c012301345948915202c3e9f135e620d922cba1 Mon Sep 17 00:00:00 2001 From: famosab Date: Tue, 28 Feb 2023 16:50:53 +0100 Subject: [PATCH] commented vis functions #56 --- main.py | 31 +++++++----- refinegems/comparison.py | 101 +++++++++++++++++++++++++++++++------- refinegems/investigate.py | 44 ++++++++++++----- refinegems/io.py | 10 +++- 4 files changed, 142 insertions(+), 44 deletions(-) diff --git a/main.py b/main.py index e678df0c..3839034d 100644 --- a/main.py +++ b/main.py @@ -70,19 +70,26 @@ def main(): else: if (config['multiple']): - growth_all = rg.comparison.simulate_all(config['multiple_paths'], config['media'], config['growth_basis']) + models_cobra = rg.io.load_multiple_models(config['multiple_paths'], 'cobra') + growth_all = rg.comparison.simulate_all(models_cobra, config['media'], config['growth_basis']) growth_all.to_csv(config['out_path'] + 'growth_' + str(today) + '_' + config['growth_basis'] + '.csv', index=False) # visualizations - sbo_fig_all = rg.comparison.get_sbo_plot_multiple(config['multiple_paths']).get_figure() - venn_reac = rg.comparison.create_venn(config['multiple_paths'], 'reaction', True).get_figure() - venn_metab = rg.comparison.create_venn(config['multiple_paths'], 'metabolite', True).get_figure() - heatmap = rg.comparison.create_heatmap(growth_all[['model', 'medium', 'doubling_time [min]']]) - # saving them - sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight') - venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight') - venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight') - heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_dt_additives_' + str(today) + '.png') - + if (config['visualize']): + models_libsbml = rg.io.load_multiple_models(config['multiple_paths'], 'libsbml') + ini_plot = rg.comparison.plot_initial_analysis(models_libsbml).get_figure() + sbo_fig_all = rg.comparison.plot_rea_sbo_multiple(models_libsbml).get_figure() + venn_reac = rg.comparison.plot_venn(models_cobra, 'reaction', True).get_figure() + venn_metab = rg.comparison.plot_venn(models_cobra, 'metabolite', True).get_figure() + heatmap = rg.comparison.plot_heatmap_dt(growth_all[['model', 'medium', 'doubling_time [min]']]) + binary_heatmap = rg.comparison.plot_heatmap_binary(growth_all) + # saving them + sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight') + venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight') + venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight') + heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_dt_additives_' + str(today) + '.png') + binary_heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_native_' + str(today) + '.png', bbox_inches='tight') + ini_plot.savefig(config['out_path'] + 'visualization/' + 'model_status_' + str(today) + '.png', bbox_inches='tight') + try: model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model']) print(errors) @@ -96,7 +103,7 @@ def main(): orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra) mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra) egc = rg.investigate.get_egc(model_cobra) - sbo_fig = rg.investigate.get_sbo_plot_single(model_libsbml).get_figure() + sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure() # saving the created visualizations sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight') diff --git a/refinegems/comparison.py b/refinegems/comparison.py index 5e897c89..7d500cd1 100644 --- a/refinegems/comparison.py +++ b/refinegems/comparison.py @@ -10,13 +10,46 @@ import numpy as np from tqdm import tqdm from venn import venn -from refinegems.io import load_multiple_models, load_medium_from_db, search_sbo_label +from libsbml import Model as libModel +from cobra import Model as cobraModel +from refinegems.io import load_medium_from_db, search_sbo_label from refinegems.growth import growth_one_medium_from_default, growth_one_medium_from_minimal from refinegems.investigate import initial_analysis, get_reactions_per_sbo __author__ = "Famke Baeuerle" -def get_sbo_mapping_multiple(models): +def plot_initial_analysis(models: list[libModel]): + """Creates bar plot of number of entities per Model + + Args: + models (list[libModel]): Models loaded with libSBML + + Returns: + plot: pandas plot object + """ + numbers = pd.DataFrame([initial_analysis(model) for model in models], columns=['model', 'metabolites', 'reactions', 'genes']) + ax = numbers.set_index('model').plot.bar(y=['metabolites', 'reactions', 'genes'], figsize=(8, 5), cmap='Paired', rot=0) + # commented is possibility to integrate memote scores + #numbers.set_index('model').plot(y='Memote score', ax=ax, use_index=False, linestyle=':', secondary_y='Memote score', color='k', marker='D', legend=True) + #ax.right_ax.set_ylabel('Memote score [%]') + #ax.right_ax.legend(loc='upper right', bbox_to_anchor=[0.98, 0.9]) + #ax.right_ax.set_ylim([75, 95]) + ax.legend(title=False, loc='upper left', ncol=3, frameon=False) + ylim = numbers.drop('model', axis=1).max().max() + 200 + ax.set_ylim([0,ylim]) + ax.set_xlabel('') + ax.tick_params(axis='x',which='both', bottom=False,top=False) + return ax + +def get_sbo_mapping_multiple(models: list[libModel]) -> pd.DataFrame: + """Determines number of reactions per SBO Term and adss label of SBO Terms + + Args: + models (list[libModel]): Models loaded with libSBML + + Returns: + pd.DataFrame: SBO Terms, no of reactions per model and SBO Label + """ mappings = {} for model in models: mappings[model.id] = get_reactions_per_sbo(model) @@ -25,8 +58,16 @@ def get_sbo_mapping_multiple(models): df['SBO-Name'] = df['SBO-Term'].apply(search_sbo_label) return df -def get_sbo_plot_multiple(model_list: list[str], rename=None): - models = load_multiple_models(model_list, package='libsbml') +def plot_rea_sbo_multiple(models: list[libModel], rename=None): + """Plots reactions per SBO Term in horizontal bar chart with stacked bars for the models + + Args: + models (list[libModel]): Models loaded with libSBML + rename (dict, optional): Rename model ids to custom names. Defaults to None. + + Returns: + plot: pandas plot object + """ map = get_sbo_mapping_multiple(models) id_list = [mod.id for mod in models] map = map[(map[id_list]>3).all(axis=1)] @@ -39,10 +80,19 @@ def get_sbo_plot_multiple(model_list: list[str], rename=None): fig.legend(loc='lower right') return fig -def create_venn(model_list: list[str], entity: str, perc: bool=False): - all_models = load_multiple_models(model_list, package='cobra') +def plot_venn(models: list[cobraModel], entity: str, perc: bool=False): + """Creates venn diagram to show the overlap of model entities + + Args: + models (list[cobraModel]): Models loaded with cobrapy + entity (str): Compare on metabolite|reaction + perc (bool, optional): True if percentages should be used. Defaults to False. + + Returns: + plot: venn diagram + """ intersec = {} - for model in all_models: + for model in models: reas = [] if entity == 'metabolite': for rea in model.metabolites: @@ -57,7 +107,15 @@ def create_venn(model_list: list[str], entity: str, perc: bool=False): fig = venn(intersec) return fig -def create_heatmap(growth: pd.DataFrame): +def plot_heatmap_dt(growth: pd.DataFrame): + """Creates heatmap of simulated doubling times with additives + + Args: + growth (pd.DataFrame): Containing growth data from simulate_all + + Returns: + plot: sns heatmap plot + """ growth=growth.set_index(['medium', 'model']).sort_index().T.stack() growth.columns.name=None growth.index.names = (None,None) @@ -95,7 +153,15 @@ def create_heatmap(growth: pd.DataFrame): ) return fig -def create_binary_heatmap(growth: pd.DataFrame): +def plot_heatmap_binary(growth: pd.DataFrame): + """Creates a plot were if growth without additives is possible is marked blue otherwise red + + Args: + growth (pd.DataFrame): Containing growth data from simulate_all + + Returns: + plot: sns heatmap plot + """ def get_native_growth(row): if row == True: return 1 @@ -131,23 +197,22 @@ def get_native_growth(row): ) return fig -def simulate_all(model_list: list[str], media: list[str], basis: str) -> pd.DataFrame: - """does a run of growth simulation for multiple models on different media +def simulate_all(models: list[cobraModel], media: list[str], basis: str) -> pd.DataFrame: + """Does a run of growth simulation for multiple models on different media Args: - model_list (list): paths to the models of interest (xml files) - mediumpath (string): path to csv containing medium definitions - media (list): media of interest (f.ex. LB, M9, ...) - basis (string): either default_uptake (adding metabs from default) or minimal_uptake (adding metabs from minimal medium) + models (list[cobraModel]): Models loaded with cobrapy + mediumpath (string): Path to csv containing medium definitions + media (list): Media of interest (f.ex. LB, M9, ...) + basis (string): Either default_uptake (adding metabs from default) or minimal_uptake (adding metabs from minimal medium) Returns: - df: table containing the results of the growth simulation + pd.DataFrame: table containing the results of the growth simulation """ growth = pd.DataFrame() - all_models = load_multiple_models(model_list, package='cobra') for medium_id in tqdm(media): medium = load_medium_from_db(medium_id) - for model in all_models: + for model in models: essentials_given = False if (basis=='default_uptake'): growth_one = growth_one_medium_from_default(model, medium).drop('missing exchanges', axis=1) diff --git a/refinegems/investigate.py b/refinegems/investigate.py index 4bbf877e..30b3f162 100644 --- a/refinegems/investigate.py +++ b/refinegems/investigate.py @@ -11,6 +11,8 @@ import pandas as pd import numpy as np from cobra import Reaction +from libsbml import Model as libModel +from cobra import Model as cobraModel from memote.support import consistency # needed by memote.support.consitency from memote.support import consistency_helpers as con_helpers @@ -37,17 +39,17 @@ } -def run_memote_sys(modelfile): +def run_memote_sys(model: cobraModel): """run memote on linux machine Args: - modelfile (cobra-model): model loaded with cobrapy + model (cobra-model): model loaded with cobrapy """ - cmd = 'memote report snapshot ' + str(modelfile) + cmd = 'memote report snapshot ' + str(model) os.system(cmd) -def run_memote(model): +def run_memote(model: cobraModel) -> float: """runs memote to obtain total score Args: @@ -65,7 +67,7 @@ def run_memote(model): return totalScore -def initial_analysis(model): +def initial_analysis(model: libModel): """extracts most important numbers of GEM Args: @@ -81,7 +83,7 @@ def initial_analysis(model): return name, reactions, metabolites, genes -def get_orphans_deadends_disconnected(model): +def get_orphans_deadends_disconnected(model: cobraModel): """Uses memote functions to extract orphans, deadends and disconnected metabolites Args: @@ -112,7 +114,7 @@ def get_orphans_deadends_disconnected(model): return orphan_list, deadend_list, disconnected_list -def get_mass_charge_unbalanced(model): +def get_mass_charge_unbalanced(model: cobraModel): """creates lists of mass and charge unbalanced reactions, without exchange reactions since they are unbalanced per definition @@ -143,7 +145,7 @@ def get_mass_charge_unbalanced(model): return mass_list, charge_list -def get_model_info(modelpath): +def get_model_info(modelpath: str): """Reports core information of given model Args: @@ -179,7 +181,7 @@ def get_model_info(modelpath): return model_info -def parse_reaction(eq, model): # from alina +def parse_reaction(eq: str, model: cobraModel) -> dict: # from alina """Parses a reaction equation string to dictionary Args: @@ -210,7 +212,7 @@ def parse_reaction(eq, model): # from alina coeff = 1 return eq_matrix -def get_egc(model): +def get_egc(model: cobraModel) -> pd.DataFrame: """Energy-generating cycles represent thermodynamically infeasible states. Charging of energy metabolites without any energy source causes such cycles. Detection method is based on (Fritzemeier et al., 2017) Args: @@ -264,7 +266,7 @@ def get_egc(model): df_fluxes = pd.concat([df_fluxes,pd.DataFrame.from_dict([objval])]) return df_fluxes.T.reset_index().rename({'index':'BOF', 0:'objective value'}, axis=1).fillna('') -def get_metabs_with_one_cvterm(model): +def get_metabs_with_one_cvterm(model: libModel) -> list: """reports metabolites which have only one annotation, can be used as basis for further annotation research @@ -285,7 +287,15 @@ def get_metabs_with_one_cvterm(model): return only_one -def get_reactions_per_sbo(model): +def get_reactions_per_sbo(model: libModel) -> dict: + """Counts number of reactions of all SBO Terms present + + Args: + model (libModel): model loaded with libsbml + + Returns: + dict: SBO Term as keys and number of reactions as values + """ sbos_dict = {} for react in model.getListOfReactions(): sbo = react.getSBOTerm() @@ -295,7 +305,15 @@ def get_reactions_per_sbo(model): sbos_dict[sbo] = 1 return sbos_dict -def get_sbo_plot_single(model): +def plot_rea_sbo_single(model: libModel): + """Plots reactions per SBO Term in horizontal bar chart + + Args: + model (libModel): model loaded with libsbml + + Returns: + plot: pandas plot object + """ df = pd.DataFrame(get_reactions_per_sbo(model), index=[0]).T.reset_index().rename({0:model.id, 'index': 'SBO-Term'}, axis=1) df = df[df[model.id]>3] df['SBO-Name'] = df['SBO-Term'].apply(search_sbo_label) diff --git a/refinegems/io.py b/refinegems/io.py index 20967aeb..3782b1d5 100644 --- a/refinegems/io.py +++ b/refinegems/io.py @@ -331,7 +331,15 @@ def extract_locus(feature): lambda row: extract_locus(row['Parent']), axis=1) return mapping_df.drop('Parent', axis=1) -def search_sbo_label(sbo_number: str): +def search_sbo_label(sbo_number: str) -> str: + """looks up the SBO label corresponding to a given SBO Term number + + Args: + sbo_number (str): Last three digits of SBO-Term as str + + Returns: + str: denoted label for given SBO Term + """ sbo_number = str(sbo_number) client = EBIClient() sbo = client.get_term('sbo', 'http://biomodels.net/SBO/SBO_0000' + sbo_number)