From 0c012301345948915202c3e9f135e620d922cba1 Mon Sep 17 00:00:00 2001
From: famosab <famke.baeuerle@gmail.com>
Date: Tue, 28 Feb 2023 16:50:53 +0100
Subject: [PATCH] commented vis functions #56

---
 main.py                   |  31 +++++++-----
 refinegems/comparison.py  | 101 +++++++++++++++++++++++++++++++-------
 refinegems/investigate.py |  44 ++++++++++++-----
 refinegems/io.py          |  10 +++-
 4 files changed, 142 insertions(+), 44 deletions(-)

diff --git a/main.py b/main.py
index e678df0c..3839034d 100644
--- a/main.py
+++ b/main.py
@@ -70,19 +70,26 @@ def main():
     
     else:
         if (config['multiple']):
-            growth_all = rg.comparison.simulate_all(config['multiple_paths'], config['media'], config['growth_basis'])
+            models_cobra = rg.io.load_multiple_models(config['multiple_paths'], 'cobra')
+            growth_all = rg.comparison.simulate_all(models_cobra, config['media'], config['growth_basis'])
             growth_all.to_csv(config['out_path'] + 'growth_' + str(today) + '_' + config['growth_basis'] + '.csv', index=False)
             # visualizations
-            sbo_fig_all = rg.comparison.get_sbo_plot_multiple(config['multiple_paths']).get_figure()
-            venn_reac = rg.comparison.create_venn(config['multiple_paths'], 'reaction', True).get_figure()
-            venn_metab = rg.comparison.create_venn(config['multiple_paths'], 'metabolite', True).get_figure()
-            heatmap = rg.comparison.create_heatmap(growth_all[['model', 'medium', 'doubling_time [min]']])
-            # saving them
-            sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
-            venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight')
-            venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight')
-            heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_dt_additives_' + str(today) + '.png')
-        
+            if (config['visualize']):
+                models_libsbml = rg.io.load_multiple_models(config['multiple_paths'], 'libsbml')
+                ini_plot = rg.comparison.plot_initial_analysis(models_libsbml).get_figure()
+                sbo_fig_all = rg.comparison.plot_rea_sbo_multiple(models_libsbml).get_figure()
+                venn_reac = rg.comparison.plot_venn(models_cobra, 'reaction', True).get_figure()
+                venn_metab = rg.comparison.plot_venn(models_cobra, 'metabolite', True).get_figure()
+                heatmap = rg.comparison.plot_heatmap_dt(growth_all[['model', 'medium', 'doubling_time [min]']])
+                binary_heatmap = rg.comparison.plot_heatmap_binary(growth_all)
+                # saving them
+                sbo_fig_all.savefig(config['out_path'] + 'visualization/' + 'all_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
+                venn_reac.savefig(config['out_path'] + 'visualization/' + 'all_ReacOverlap_' + str(today) + '.png', bbox_inches='tight')
+                venn_metab.savefig(config['out_path'] + 'visualization/' + 'all_MetabOverlap_' + str(today) + '.png', bbox_inches='tight')
+                heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_dt_additives_' + str(today) + '.png')
+                binary_heatmap.savefig(config['out_path'] + 'visualization/' + 'heatmap_native_' + str(today) + '.png', bbox_inches='tight')
+                ini_plot.savefig(config['out_path'] + 'visualization/' + 'model_status_' + str(today) + '.png', bbox_inches='tight')
+                                                    
         try:    
             model_cobra, errors = cobra.io.sbml.validate_sbml_model(config['model'])
             print(errors)
@@ -96,7 +103,7 @@ def main():
             orphans, deadends, disconnected = rg.investigate.get_orphans_deadends_disconnected(model_cobra)
             mass_unbal, charge_unbal = rg.investigate.get_mass_charge_unbalanced(model_cobra)
             egc = rg.investigate.get_egc(model_cobra)
-            sbo_fig = rg.investigate.get_sbo_plot_single(model_libsbml).get_figure()
+            sbo_fig = rg.investigate.plot_rea_sbo_single(model_libsbml).get_figure()
             
             # saving the created visualizations
             sbo_fig.savefig(config['out_path'] + 'visualization/' + str(model_cobra.id) + '_ReacPerSBO_' + str(today) + '.png', bbox_inches='tight')
diff --git a/refinegems/comparison.py b/refinegems/comparison.py
index 5e897c89..7d500cd1 100644
--- a/refinegems/comparison.py
+++ b/refinegems/comparison.py
@@ -10,13 +10,46 @@
 import numpy as np
 from tqdm import tqdm
 from venn import venn
-from refinegems.io import load_multiple_models, load_medium_from_db, search_sbo_label
+from libsbml import Model as libModel
+from cobra import Model as cobraModel
+from refinegems.io import load_medium_from_db, search_sbo_label
 from refinegems.growth import growth_one_medium_from_default, growth_one_medium_from_minimal
 from refinegems.investigate import initial_analysis, get_reactions_per_sbo
 
 __author__ = "Famke Baeuerle"
 
-def get_sbo_mapping_multiple(models):
+def plot_initial_analysis(models: list[libModel]):
+    """Creates bar plot of number of entities per Model
+
+    Args:
+        models (list[libModel]): Models loaded with libSBML
+
+    Returns:
+        plot: pandas plot object
+    """
+    numbers = pd.DataFrame([initial_analysis(model) for model in models], columns=['model', 'metabolites', 'reactions', 'genes'])
+    ax = numbers.set_index('model').plot.bar(y=['metabolites', 'reactions', 'genes'], figsize=(8, 5), cmap='Paired', rot=0)
+    # commented is possibility to integrate memote scores
+    #numbers.set_index('model').plot(y='Memote score', ax=ax, use_index=False, linestyle=':', secondary_y='Memote score', color='k', marker='D', legend=True)
+    #ax.right_ax.set_ylabel('Memote score [%]')
+    #ax.right_ax.legend(loc='upper right', bbox_to_anchor=[0.98, 0.9])
+    #ax.right_ax.set_ylim([75, 95])
+    ax.legend(title=False, loc='upper left', ncol=3, frameon=False)
+    ylim = numbers.drop('model', axis=1).max().max() + 200
+    ax.set_ylim([0,ylim])
+    ax.set_xlabel('')
+    ax.tick_params(axis='x',which='both', bottom=False,top=False)
+    return ax
+
+def get_sbo_mapping_multiple(models: list[libModel]) -> pd.DataFrame:
+    """Determines number of reactions per SBO Term and adss label of SBO Terms
+
+    Args:
+        models (list[libModel]): Models loaded with libSBML
+
+    Returns:
+        pd.DataFrame: SBO Terms, no of reactions per model and SBO Label
+    """
     mappings = {}
     for model in models:
         mappings[model.id] = get_reactions_per_sbo(model)
@@ -25,8 +58,16 @@ def get_sbo_mapping_multiple(models):
     df['SBO-Name'] = df['SBO-Term'].apply(search_sbo_label)
     return df
 
-def get_sbo_plot_multiple(model_list: list[str], rename=None):
-    models = load_multiple_models(model_list, package='libsbml')
+def plot_rea_sbo_multiple(models: list[libModel], rename=None):
+    """Plots reactions per SBO Term in horizontal bar chart with stacked bars for the models
+
+    Args:
+        models (list[libModel]): Models loaded with libSBML
+        rename (dict, optional): Rename model ids to custom names. Defaults to None.
+
+    Returns:
+        plot: pandas plot object
+    """
     map = get_sbo_mapping_multiple(models)
     id_list = [mod.id for mod in models]
     map = map[(map[id_list]>3).all(axis=1)]
@@ -39,10 +80,19 @@ def get_sbo_plot_multiple(model_list: list[str], rename=None):
     fig.legend(loc='lower right')
     return fig
 
-def create_venn(model_list: list[str], entity: str, perc: bool=False): 
-    all_models = load_multiple_models(model_list, package='cobra')
+def plot_venn(models: list[cobraModel], entity: str, perc: bool=False):
+    """Creates venn diagram to show the overlap of model entities
+
+    Args:
+        models (list[cobraModel]): Models loaded with cobrapy
+        entity (str): Compare on metabolite|reaction
+        perc (bool, optional): True if percentages should be used. Defaults to False.
+
+    Returns:
+        plot: venn diagram
+    """
     intersec = {}
-    for model in all_models:
+    for model in models:
         reas = []
         if entity == 'metabolite':
             for rea in model.metabolites:
@@ -57,7 +107,15 @@ def create_venn(model_list: list[str], entity: str, perc: bool=False):
         fig = venn(intersec)
     return fig
 
-def create_heatmap(growth: pd.DataFrame):
+def plot_heatmap_dt(growth: pd.DataFrame):
+    """Creates heatmap of simulated doubling times with additives
+    
+    Args:
+        growth (pd.DataFrame): Containing growth data from simulate_all
+        
+    Returns:
+        plot: sns heatmap plot
+    """
     growth=growth.set_index(['medium', 'model']).sort_index().T.stack()
     growth.columns.name=None
     growth.index.names = (None,None)
@@ -95,7 +153,15 @@ def create_heatmap(growth: pd.DataFrame):
         )
     return fig
 
-def create_binary_heatmap(growth: pd.DataFrame):
+def plot_heatmap_binary(growth: pd.DataFrame):
+    """Creates a plot were if growth without additives is possible is marked blue otherwise red
+
+    Args:
+        growth (pd.DataFrame): Containing growth data from simulate_all
+        
+    Returns:
+        plot: sns heatmap plot
+    """
     def get_native_growth(row):
         if row == True:
             return 1
@@ -131,23 +197,22 @@ def get_native_growth(row):
         )
     return fig
 
-def simulate_all(model_list: list[str], media: list[str], basis: str) -> pd.DataFrame:
-    """does a run of growth simulation for multiple models on different media
+def simulate_all(models: list[cobraModel], media: list[str], basis: str) -> pd.DataFrame:
+    """Does a run of growth simulation for multiple models on different media
 
     Args:
-        model_list (list): paths to the models of interest (xml files)
-        mediumpath (string): path to csv containing medium definitions
-        media (list): media of interest (f.ex. LB, M9, ...)
-        basis (string): either default_uptake (adding metabs from default) or minimal_uptake (adding metabs from minimal medium)
+        models (list[cobraModel]): Models loaded with cobrapy
+        mediumpath (string): Path to csv containing medium definitions
+        media (list): Media of interest (f.ex. LB, M9, ...)
+        basis (string): Either default_uptake (adding metabs from default) or minimal_uptake (adding metabs from minimal medium)
 
     Returns:
-        df: table containing the results of the growth simulation
+        pd.DataFrame: table containing the results of the growth simulation
     """
     growth = pd.DataFrame()
-    all_models = load_multiple_models(model_list, package='cobra')
     for medium_id in tqdm(media):
         medium = load_medium_from_db(medium_id)
-        for model in all_models:
+        for model in models:
             essentials_given = False
             if (basis=='default_uptake'):
                 growth_one = growth_one_medium_from_default(model, medium).drop('missing exchanges', axis=1)
diff --git a/refinegems/investigate.py b/refinegems/investigate.py
index 4bbf877e..30b3f162 100644
--- a/refinegems/investigate.py
+++ b/refinegems/investigate.py
@@ -11,6 +11,8 @@
 import pandas as pd
 import numpy as np
 from cobra import Reaction
+from libsbml import Model as libModel
+from cobra import Model as cobraModel
 from memote.support import consistency
 # needed by memote.support.consitency
 from memote.support import consistency_helpers as con_helpers
@@ -37,17 +39,17 @@
     }
 
 
-def run_memote_sys(modelfile):
+def run_memote_sys(model: cobraModel):
     """run memote on linux machine
 
     Args:
-        modelfile (cobra-model): model loaded with cobrapy
+        model (cobra-model): model loaded with cobrapy
     """
-    cmd = 'memote report snapshot ' + str(modelfile)
+    cmd = 'memote report snapshot ' + str(model)
     os.system(cmd)
 
 
-def run_memote(model):
+def run_memote(model: cobraModel) -> float:
     """runs memote to obtain total score
 
     Args:
@@ -65,7 +67,7 @@ def run_memote(model):
     return totalScore
 
 
-def initial_analysis(model):
+def initial_analysis(model: libModel):
     """extracts most important numbers of GEM
 
     Args:
@@ -81,7 +83,7 @@ def initial_analysis(model):
     return name, reactions, metabolites, genes
 
 
-def get_orphans_deadends_disconnected(model):
+def get_orphans_deadends_disconnected(model: cobraModel):
     """Uses memote functions to extract orphans, deadends and disconnected metabolites
 
     Args:
@@ -112,7 +114,7 @@ def get_orphans_deadends_disconnected(model):
     return orphan_list, deadend_list, disconnected_list
 
 
-def get_mass_charge_unbalanced(model):
+def get_mass_charge_unbalanced(model: cobraModel):
     """creates lists of mass and charge unbalanced reactions,
        without exchange reactions since they are unbalanced per definition
 
@@ -143,7 +145,7 @@ def get_mass_charge_unbalanced(model):
     return mass_list, charge_list
 
 
-def get_model_info(modelpath):
+def get_model_info(modelpath: str):
     """Reports core information of given model
 
     Args:
@@ -179,7 +181,7 @@ def get_model_info(modelpath):
 
     return model_info
 
-def parse_reaction(eq, model): # from alina
+def parse_reaction(eq: str, model: cobraModel) -> dict: # from alina
     """Parses a reaction equation string to dictionary
 
     Args:
@@ -210,7 +212,7 @@ def parse_reaction(eq, model): # from alina
             coeff = 1
     return eq_matrix
 
-def get_egc(model):
+def get_egc(model: cobraModel) -> pd.DataFrame:
     """Energy-generating cycles represent thermodynamically infeasible states. Charging of energy metabolites without any energy source causes such cycles. Detection method is based on (Fritzemeier et al., 2017)
 
     Args:
@@ -264,7 +266,7 @@ def get_egc(model):
         df_fluxes = pd.concat([df_fluxes,pd.DataFrame.from_dict([objval])])
     return df_fluxes.T.reset_index().rename({'index':'BOF', 0:'objective value'}, axis=1).fillna('')
 
-def get_metabs_with_one_cvterm(model):
+def get_metabs_with_one_cvterm(model: libModel) -> list:
     """reports metabolites which have only one annotation, 
     can be used as basis for further annotation research
 
@@ -285,7 +287,15 @@ def get_metabs_with_one_cvterm(model):
                 
     return only_one
 
-def get_reactions_per_sbo(model):
+def get_reactions_per_sbo(model: libModel) -> dict:
+    """Counts number of reactions of all SBO Terms present
+
+    Args:
+        model (libModel): model loaded with libsbml
+
+    Returns:
+        dict: SBO Term as keys and number of reactions as values
+    """
     sbos_dict = {}
     for react in model.getListOfReactions():
         sbo = react.getSBOTerm()
@@ -295,7 +305,15 @@ def get_reactions_per_sbo(model):
             sbos_dict[sbo] = 1
     return sbos_dict
 
-def get_sbo_plot_single(model):
+def plot_rea_sbo_single(model: libModel):
+    """Plots reactions per SBO Term in horizontal bar chart
+
+    Args:
+        model (libModel): model loaded with libsbml
+
+    Returns:
+        plot: pandas plot object
+    """
     df = pd.DataFrame(get_reactions_per_sbo(model), index=[0]).T.reset_index().rename({0:model.id, 'index': 'SBO-Term'}, axis=1)
     df = df[df[model.id]>3]
     df['SBO-Name'] = df['SBO-Term'].apply(search_sbo_label)
diff --git a/refinegems/io.py b/refinegems/io.py
index 20967aeb..3782b1d5 100644
--- a/refinegems/io.py
+++ b/refinegems/io.py
@@ -331,7 +331,15 @@ def extract_locus(feature):
         lambda row: extract_locus(row['Parent']), axis=1)
     return mapping_df.drop('Parent', axis=1)
 
-def search_sbo_label(sbo_number: str):
+def search_sbo_label(sbo_number: str) -> str:
+    """looks up the SBO label corresponding to a given SBO Term number
+
+    Args:
+        sbo_number (str): Last three digits of SBO-Term as str
+
+    Returns:
+        str: denoted label for given SBO Term
+    """
     sbo_number = str(sbo_number)
     client = EBIClient()
     sbo = client.get_term('sbo', 'http://biomodels.net/SBO/SBO_0000' + sbo_number)