added docstrings

Becksteinlab · Nov 23, 2024 · 90c63d8 · 90c63d8
1 parent 9099fcf
commit 90c63d8
Show file tree

Hide file tree

Showing 2 changed files with 89 additions and 5 deletions.
diff --git a/basicrta/cluster.py b/basicrta/cluster.py
@@ -8,16 +8,24 @@
 from basicrta.gibbs import Gibbs
 gc.enable()
 
-""" This module provides the ProcessProtein class, which collects and processes
+"""This module provides the ProcessProtein class, which collects and processes
 Gibbs sampler data.
 """
 
 class ProcessProtein(object):
-    r""" ProcessProtein is the class that collects and processes Gibbs sampler
+    r"""ProcessProtein is the class that collects and processes Gibbs sampler
     data. This class collects results for all residues in the 
     `basicrta-{cutoff}` directory and can write out a :math:`\tau` vs resid
     numpy array or plot :math:`\tau` vs resid. If a structure is provided,
     :math:`\tau` will be written as b-factors for visualization.
+
+    :param niter: Number of iterations used in the Gibbs samplers
+    :type niter: int
+    :param prot: Name of protein in `tm_dict.txt`, used to draw TM bars in
+                 :math:`tau` vs resid plot.
+    :type prot: str, optional
+    :param cutoff: Cutoff used in contact analysis.
+    :type cutoff: float
     """
 
     def __init__(self, niter, prot, cutoff):
@@ -44,6 +52,12 @@ def _single_residue(self, adir, process=False):
         return result
 
     def reprocess(self, nproc=1):
+        """Rerun processing and clustering on :class:`Gibbs` data.
+
+        :param nproc: Number of processes to use in clustering results for all
+                      residues.
+        :type nproc: int
+        """
         from glob import glob
 
         dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
@@ -62,6 +76,9 @@ def reprocess(self, nproc=1):
                 pass
 
     def collect_results(self):
+        """Collect names of results for each residue in the `basicrta-{cutoff}`
+        directory in a dictionary stored in :attr:`ProcessProtein.results`.
+        """
         from glob import glob
 
         dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
@@ -77,6 +94,14 @@ def collect_results(self):
             pass
 
     def get_taus(self):
+        r"""Get :math:`\tau` and 95\% confidence interval bounds for the slowest
+        process for each residue. 
+        
+        :returns: Returns a tuple of the form (:math:`\tau`, [CI lower bound, 
+                  CI upper bound])
+        :rtype: tuple
+        
+        """
         from basicrta.util import get_bars
 
         taus = []
@@ -95,13 +120,25 @@ def get_taus(self):
         return taus[:, 1], bars
 
     def write_data(self, fname='tausout'):
+        r"""Write :math:`\tau` values with 95\% confidence interval to a numpy
+        file with the format [`sel1` resid, :math:`\tau`, CI lower bound, CI
+        upper bound].
+
+        :param fname: Filename to save data to.
+        :type fname: str, optional
+        """
         taus, bars = self.get_taus()
         keys = self.residues.keys()
         residues = np.array([int(res[1:]) for res in keys])
         data = np.stack((residues, taus, bars[0], bars[1]))
         np.save(fname, data.T)
 
     def plot_protein(self, **kwargs):
+        r"""Plot :math:`\tau` vs resid. kwargs are passed to the 
+        :meth:`plot_protein` method of `util.py`. These can be used to change
+        the labeling cutoff, y-limit of the plot, scale the figure, and set
+        major and minor ticks.
+        """
         from basicrta.util import plot_protein
         if len(self.residues) == 0:
             print('run `collect_residues` then rerun')
@@ -118,6 +155,9 @@ def plot_protein(self, **kwargs):
         plot_protein(residues, taus, bars, self.prot, **kwargs)
 
     def b_color_structure(self, structure):
+        r"""Add :math:`\tau` to b-factors in the specified structure. Saves
+        structure with b-factors to `tau_bcolored.pdb`. 
+        """
         taus, bars = self.get_taus()
         cis = bars[1]+bars[0]
         errs = taus/cis

diff --git a/basicrta/contacts.py b/basicrta/contacts.py
@@ -15,11 +15,37 @@
 
 
 class MapContacts(object):
-    """
-    This class is used to create the map of contacts between two groups of
+    """This class is used to create the map of contacts between two groups of
     atoms. A single cutoff is used to define a contact between the two groups,
     where if any atomic distance between the two groups is less than the cutoff,
     a contact is considered formed.
+    
+    :param u: Universe containing the topology and trajectory for which the
+              contacts will be computed.
+    :type u: `MDAnalysis Universe`
+    :param ag1: Primary AtomGroup for which contacts will be computed, typically a
+                protein.
+    :type ag1: MDAnalysis AtomGroup
+    :param ag2: Secondary AtomGroup which forms contacts with `ag1`, typically
+                lipids, ions, or other small molecules. Each residue of `ag2` 
+                must have the same number of atoms.
+    :type ag2: MDAnalysis AtomGroup
+    :param nproc: Number of processes to use in computing contacts (default is
+                  1).
+    :type nproc: int, optional
+    :param frames: List of frames to use in computing contacts (default is
+                   None, meaning all frames are used).
+    :type frames: list or np.array, optional
+    :param cutoff: Maximum cutoff to use in computing contacts. A primary 
+                   contact map is created upon which multiple cutoffs can be
+                   imposed, i.e. in the case where a proper cutoff is being
+                   determined. This can typically be left at its default value,
+                   unless a greater value is needed (default is 10.0).
+    :type cutoff: float, optional
+    :param nslices: Number of slices to break the trajectory into for
+                    processing. If device memory is limited, try increasing
+                    `nslices` (default is 100).
+    :type nslices: int, optional
     """
 
     def __init__(self, u, ag1, ag2, nproc=1, frames=None, cutoff=10.0,
@@ -29,6 +55,8 @@ def __init__(self, u, ag1, ag2, nproc=1, frames=None, cutoff=10.0,
         self.cutoff, self.frames, self.nslices = cutoff, frames, nslices
 
     def run(self):
+        """Run contact analysis and save to `contacts.pkl`
+        """
         if self.frames is not None:
             sliced_frames = np.array_split(self.frames, self.nslices)
         else:
@@ -105,12 +133,28 @@ def _run_contacts(self, i, sliced_traj):
 
 
 class ProcessContacts(object):
-    def __init__(self, cutoff, nproc, map_name='contacts.pkl'):
+    """The :class:`ProcessProtein` class takes the primary contact map
+    (default is `contacts.pkl`) and collects contacts based on a prescribed 
+    cutoff. 
+
+    :param cutoff: Collect all contacts between `ag1` and `ag2` within this
+                   value.
+    :type cutoff: float
+    :param nproc: Number of processes to use in collecting contacts (default is
+                  1). 
+    :type nproc: int, optional
+    :param map_name: Name of primary contact map (default is `contacts.pkl`)
+    :type map_name: str, optional
+    """
+    def __init__(self, cutoff, nproc=1, map_name='contacts.pkl'):
         self.nproc = nproc
         self.map_name = map_name
         self.cutoff = cutoff
 
     def run(self):
+        """Process contacts using the prescribed cutoff and write to
+           contacts-{cutoff}.pkl
+        """
         if os.path.exists(self.map_name):
             with open(self.map_name, 'r+b') as f:
                 memmap = pickle.load(f)