return samples and fba, fva solutions as df with reactions as indices…

…; also fixes GeomScale#96
hariszaf · Apr 25, 2024 · 6526db7 · 6526db7
1 parent 727c0c2
commit 6526db7
Show file tree

Hide file tree

Showing 6 changed files with 176 additions and 117 deletions.
diff --git a/dingo/MetabolicNetwork.py b/dingo/MetabolicNetwork.py
@@ -7,12 +7,15 @@
 # Licensed under GNU LGPL.3, see LICENCE file
 
 import numpy as np
+import pandas as pd
 import sys
 from typing import Dict
 import cobra
 from dingo.loading_models import read_json_file, read_mat_file, read_sbml_file, parse_cobra_model
 from dingo.fva import slow_fva
 from dingo.fba import slow_fba
+import logging
+logger = logging.getLogger(__name__)
 
 try:
     import gurobipy
@@ -33,10 +36,12 @@ def __init__(self, tuple_args):
             import gurobipy
 
             self._parameters["fast_computations"] = True
+            self._parameters["tol"] = 1e-06
         except ImportError as e:
             self._parameters["fast_computations"] = False
+            self._parameters["tol"] = 1e-03
 
-        if len(tuple_args) != 10:
+        if len(tuple_args) != 12:
             raise Exception(
                 "An unknown input format given to initialize a metabolic network object."
             )
@@ -51,6 +56,8 @@ def __init__(self, tuple_args):
         self._medium = tuple_args[7]
         self._medium_indices = tuple_args[8]
         self._exchanges = tuple_args[9]
+        self._reactions_map = tuple_args[10]
+        self._metabolites_map = tuple_args[11]
 
         try:
             if self._biomass_index is not None and (
@@ -108,30 +115,51 @@ def fva(self):
         """A member function to apply the FVA method on the metabolic network."""
 
         if self._parameters["fast_computations"]:
-            return fast_fva(
+            min_fluxes, max_fluxes, max_biomass_flux_vector, max_biomass_objective = fast_fva(
                 self._lb,
                 self._ub,
                 self._S,
                 self._biomass_function,
                 self._parameters["opt_percentage"],
             )
         else:
-            return slow_fva(
+            min_fluxes, max_fluxes, max_biomass_flux_vector, max_biomass_objective = slow_fva(
                 self._lb,
                 self._ub,
                 self._S,
                 self._biomass_function,
                 self._parameters["opt_percentage"],
             )
+        self._min_fluxes = min_fluxes
+        self._max_fluxes = max_fluxes
+        self._opt_vector = max_biomass_flux_vector
+        self._opt_value = max_biomass_objective
+        return min_fluxes, max_fluxes, max_biomass_flux_vector, max_biomass_objective
 
     def fba(self):
         """A member function to apply the FBA method on the metabolic network."""
 
         if self._parameters["fast_computations"]:
-            return fast_fba(self._lb, self._ub, self._S, self._biomass_function)
+            opt_vector, opt_value = fast_fba(self._lb, self._ub, self._S, self._biomass_function)
         else:
-            return slow_fba(self._lb, self._ub, self._S, self._biomass_function)
-
+            opt_vector, opt_value = slow_fba(self._lb, self._ub, self._S, self._biomass_function)
+        self._opt_vector = opt_vector
+        self._opt_value = opt_value
+        return opt_vector, opt_value
+
+    def fba_to_df(self):
+        if not hasattr(self, '_opt_vector'):
+            self.fba()
+        fba_df = pd.DataFrame({'fluxes': self._opt_vector}, index=self._reactions)
+        return fba_df
+
+    def fva_to_df(self):
+        if not hasattr(self, '_min_fluxes'):
+            self.fva()
+        fva_df = pd.DataFrame({'minimum': self._min_fluxes, 'maximum': self._max_fluxes}, index=self._reactions)
+        return fva_df
+
+    # Descriptors
     @property
     def lb(self):
         return self._lb
@@ -172,6 +200,30 @@ def exchanges(self):
     def parameters(self):
         return self._parameters
 
+    @property
+    def reactions_map(self):
+        return self._reactions_map
+
+    @property
+    def metabolites_map(self):
+        return self._metabolites_map
+
+    @property
+    def opt_value(self, value):
+        self._opt_value = value
+
+    @property
+    def opt_vector(self, value):
+        self._opt_vector = value
+
+    @property
+    def min_fluxes(self, value):
+        self._min_fluxes = value
+
+    @property
+    def max_fluxes(self, value):
+        self._max_fluxes = value
+
     @property
     def get_as_tuple(self):
         return (
@@ -183,16 +235,11 @@ def get_as_tuple(self):
             self._biomass_index,
             self._biomass_function,
             self._medium,
-            self._inter_medium,
-            self._exchanges
+            self._exchanges,
+            self._reactions_map,
+            self._metabolites_map
         )
 
-    def num_of_reactions(self):
-        return len(self._reactions)
-
-    def num_of_metabolites(self):
-        return len(self._metabolites)
-
     @lb.setter
     def lb(self, value):
         self._lb = value
@@ -221,7 +268,6 @@ def biomass_index(self, value):
     def biomass_function(self, value):
         self._biomass_function = value
 
-
     @medium.setter
     def medium(self, medium: Dict[str, float]) -> None:
         """Set the constraints on the model exchanges.
@@ -275,18 +321,50 @@ def set_active_bound(reaction: str, reac_index: int, bound: float) -> None:
         # Turn off reactions not present in media
         for rxn_id in exchange_rxns - frozen_media_rxns:
             """
-            is_export for us, needs to check on the S 
-            order reactions to their lb and ub 
+            is_export for us, needs to check on the S
+            order reactions to their lb and ub
             """
             # is_export = rxn.reactants and not rxn.products
             reac_index = self._reactions.index(rxn_id)
-            products = np.any(self._S[:,reac_index] > 0) 
+            products = np.any(self._S[:,reac_index] > 0)
             reactants_exist = np.any(self._S[:,reac_index] < 0)
             is_export = True if not products and reactants_exist else False
             set_active_bound(
                 rxn_id, reac_index, min(0.0, -self._lb[reac_index] if is_export else self._ub[reac_index])
             )
 
+    @reactions_map.setter
+    def reactions_map(self, value):
+        self._reactions_map = value
+
+    @metabolites_map.setter
+    def metabolites_map(self, value):
+        self._metabolites_map = value
+
+    @min_fluxes.setter
+    def min_fluxes(self, value):
+        self._min_fluxes = value
+
+    @max_fluxes.setter
+    def max_fluxes(self, value):
+        self._max_fluxes = value
+
+    @opt_value.setter
+    def opt_value(self, value):
+        self._opt_value = value
+
+    @opt_vector.setter
+    def opt_vector(self, value):
+        self._opt_vector = value
+
+
+    # Attributes
+    def num_of_reactions(self):
+        return len(self._reactions)
+
+    def num_of_metabolites(self):
+        return len(self._metabolites)
+
     def set_fast_mode(self):
 
         try:

diff --git a/dingo/PolytopeSampler.py b/dingo/PolytopeSampler.py
@@ -17,6 +17,7 @@
     get_matrices_of_low_dim_polytope,
     get_matrices_of_full_dim_polytope,
 )
+import pandas as pd
 
 try:
     import gurobipy
@@ -39,12 +40,12 @@ def __init__(self, metabol_net):
             raise Exception("An unknown input object given for initialization.")
 
         self._metabolic_network = copy.deepcopy(metabol_net)
-        self._A = []
-        self._b = []
-        self._N = []
-        self._N_shift = []
-        self._T = []
-        self._T_shift = []
+        self._A = np.empty( shape=(0, 0) )
+        self._b = np.empty( shape=(0, 0) )
+        self._N = np.empty( shape=(0, 0) )
+        self._N_shift = np.empty( shape=(0, 0) )
+        self._T = np.empty( shape=(0, 0) )
+        self._T_shift = np.empty( shape=(0, 0) )
         self._parameters = {}
         self._parameters["nullspace_method"] = "sparseQR"
         self._parameters["opt_percentage"] = self.metabolic_network.parameters[
@@ -69,12 +70,12 @@ def get_polytope(self):
         """
 
         if (
-            self._A == []
-            or self._b == []
-            or self._N == []
-            or self._N_shift == []
-            or self._T == []
-            or self._T_shift == []
+            self._A.size == 0
+            or self._b.size == 0
+            or self._N.size == 0
+            or self._N_shift.size == 0
+            or self._T.size == 0
+            or self._T_shift.size == 0
         ):
 
             (
@@ -186,8 +187,10 @@ def generate_steady_states(
         self._T = np.dot(self._T, Tr)
         self._T_shift = np.add(self._T_shift, Tr_shift)
 
-        return steady_states
-
+        steady_states_df = pd.DataFrame(steady_states, index = self._metabolic_network.reactions)
+
+        return steady_states_df
+
     def generate_steady_states_no_multiphase(
         self, method = 'billiard_walk', n=1000, burn_in=0, thinning=1, variance=1.0, bias_vector=None
     ):
@@ -199,11 +202,11 @@ def generate_steady_states_no_multiphase(
         burn_in -- the number of points to burn before sampling
         thinning -- the walk length of the chain
         """
-        	
+
         self.get_polytope()
 
         P = HPolytope(self._A, self._b)
-        
+
         if bias_vector is None:
             bias_vector = np.ones(self._A.shape[1], dtype=np.float64)
         else:
@@ -215,8 +218,9 @@ def generate_steady_states_no_multiphase(
         steady_states = map_samples_to_steady_states(
                 samples_T, self._N, self._N_shift
             )
+        steady_states_df = pd.DataFrame(steady_states, index = self._metabolic_network.reactions)
 
-        return steady_states
+        return steady_states_df
 
     @staticmethod
     def sample_from_polytope(
@@ -247,7 +251,7 @@ def sample_from_polytope(
             )
 
         return samples
-    
+
     @staticmethod
     def sample_from_polytope_no_multiphase(
         A, b, method = 'billiard_walk', n=1000, burn_in=0, thinning=1, variance=1.0, bias_vector=None
@@ -266,7 +270,7 @@ def sample_from_polytope_no_multiphase(
             bias_vector = np.ones(A.shape[1], dtype=np.float64)
         else:
             bias_vector = bias_vector.astype('float64')
-            
+
         P = HPolytope(A, b)
 
         try:
@@ -288,16 +292,13 @@ def round_polytope(
             A, b, Tr, Tr_shift, round_value = P.rounding(method, True)
         except ImportError as e:
             A, b, Tr, Tr_shift, round_value = P.rounding(method, False)
-        
+
         return A, b, Tr, Tr_shift
 
+
     @staticmethod
     def sample_from_fva_output(
-        min_fluxes,
-        max_fluxes,
-        biomass_function,
-        max_biomass_objective,
-        S,
+        model,
         opt_percentage=100,
         ess=1000,
         psrf=False,
@@ -307,11 +308,7 @@ def sample_from_fva_output(
         """A static function to sample steady states when the output of FVA is given.
 
         Keyword arguments:
-        min_fluxes -- minimum values of the fluxes, i.e., a n-dimensional vector
-        max_fluxes -- maximum values for the fluxes, i.e., a n-dimensional vector
-        biomass_function -- the biomass objective function
-        max_biomass_objective -- the maximum value of the biomass objective function
-        S -- stoichiometric matrix
+        model -- a dingo.MetabolicNetwork() object
         opt_percentage -- consider solutions that give you at least a certain
                       percentage of the optimal solution (default is to consider
                       optimal solutions only)
@@ -321,16 +318,18 @@ def sample_from_fva_output(
         num_threads -- the number of threads to use for parallel mmcs
         """
 
+        min_fluxes, max_fluxes, opt_vector, opt_value = model.fva()
+
         A, b, Aeq, beq = get_matrices_of_low_dim_polytope(
-            S, min_fluxes, max_fluxes, opt_percentage, tol
+            model.S, min_fluxes, max_fluxes, opt_percentage, model._parameters["tol"]
         )
 
-        A = np.vstack((A, -biomass_function))
+        A = np.vstack((A, -model.biomass_function))
         b = np.append(
             b,
             -(opt_percentage / 100)
-            * self._parameters["tol"]
-            * math.floor(max_biomass_objective / self._parameters["tol"]),
+            * model._parameters["tol"]
+            * math.floor(opt_value / model._parameters["tol"]),
         )
 
         A, b, N, N_shift = get_matrices_of_full_dim_polytope(A, b, Aeq, beq)
@@ -352,6 +351,17 @@ def sample_from_fva_output(
 
         return steady_states
 
+    @staticmethod
+    def samples_as_df(model, samples):
+        """A static function to convert the samples numpy ndarray to a pandas DataFrame with model's reactions as indices
+
+        Keyword arguments:
+        model --
+        samples --
+        """
+        samples_df = pd.DataFrame(samples, index = model.reactions)
+        return samples_df
+
     @property
     def A(self):
         return self._A
@@ -413,3 +423,4 @@ def set_tol(self, value):
     def set_opt_percentage(self, value):
 
         self._parameters["opt_percentage"] = value
+