Skip to content

Commit

Permalink
added docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
rsexton2 committed Nov 23, 2024
1 parent 9099fcf commit 90c63d8
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 5 deletions.
44 changes: 42 additions & 2 deletions basicrta/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,24 @@
from basicrta.gibbs import Gibbs
gc.enable()

""" This module provides the ProcessProtein class, which collects and processes
"""This module provides the ProcessProtein class, which collects and processes
Gibbs sampler data.
"""

class ProcessProtein(object):
r""" ProcessProtein is the class that collects and processes Gibbs sampler
r"""ProcessProtein is the class that collects and processes Gibbs sampler
data. This class collects results for all residues in the
`basicrta-{cutoff}` directory and can write out a :math:`\tau` vs resid
numpy array or plot :math:`\tau` vs resid. If a structure is provided,
:math:`\tau` will be written as b-factors for visualization.
:param niter: Number of iterations used in the Gibbs samplers
:type niter: int
:param prot: Name of protein in `tm_dict.txt`, used to draw TM bars in
:math:`tau` vs resid plot.
:type prot: str, optional
:param cutoff: Cutoff used in contact analysis.
:type cutoff: float
"""

def __init__(self, niter, prot, cutoff):
Expand All @@ -44,6 +52,12 @@ def _single_residue(self, adir, process=False):
return result

def reprocess(self, nproc=1):
"""Rerun processing and clustering on :class:`Gibbs` data.
:param nproc: Number of processes to use in clustering results for all
residues.
:type nproc: int
"""
from glob import glob

dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
Expand All @@ -62,6 +76,9 @@ def reprocess(self, nproc=1):
pass

def collect_results(self):
"""Collect names of results for each residue in the `basicrta-{cutoff}`
directory in a dictionary stored in :attr:`ProcessProtein.results`.
"""
from glob import glob

dirs = np.array(glob(f'basicrta-{self.cutoff}/?[0-9]*'))
Expand All @@ -77,6 +94,14 @@ def collect_results(self):
pass

def get_taus(self):
r"""Get :math:`\tau` and 95\% confidence interval bounds for the slowest
process for each residue.
:returns: Returns a tuple of the form (:math:`\tau`, [CI lower bound,
CI upper bound])
:rtype: tuple
"""
from basicrta.util import get_bars

taus = []
Expand All @@ -95,13 +120,25 @@ def get_taus(self):
return taus[:, 1], bars

def write_data(self, fname='tausout'):
r"""Write :math:`\tau` values with 95\% confidence interval to a numpy
file with the format [`sel1` resid, :math:`\tau`, CI lower bound, CI
upper bound].
:param fname: Filename to save data to.
:type fname: str, optional
"""
taus, bars = self.get_taus()
keys = self.residues.keys()
residues = np.array([int(res[1:]) for res in keys])
data = np.stack((residues, taus, bars[0], bars[1]))
np.save(fname, data.T)

def plot_protein(self, **kwargs):
r"""Plot :math:`\tau` vs resid. kwargs are passed to the
:meth:`plot_protein` method of `util.py`. These can be used to change
the labeling cutoff, y-limit of the plot, scale the figure, and set
major and minor ticks.
"""
from basicrta.util import plot_protein
if len(self.residues) == 0:
print('run `collect_residues` then rerun')
Expand All @@ -118,6 +155,9 @@ def plot_protein(self, **kwargs):
plot_protein(residues, taus, bars, self.prot, **kwargs)

def b_color_structure(self, structure):
r"""Add :math:`\tau` to b-factors in the specified structure. Saves
structure with b-factors to `tau_bcolored.pdb`.
"""
taus, bars = self.get_taus()
cis = bars[1]+bars[0]
errs = taus/cis
Expand Down
50 changes: 47 additions & 3 deletions basicrta/contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,37 @@


class MapContacts(object):
"""
This class is used to create the map of contacts between two groups of
"""This class is used to create the map of contacts between two groups of
atoms. A single cutoff is used to define a contact between the two groups,
where if any atomic distance between the two groups is less than the cutoff,
a contact is considered formed.
:param u: Universe containing the topology and trajectory for which the
contacts will be computed.
:type u: `MDAnalysis Universe`
:param ag1: Primary AtomGroup for which contacts will be computed, typically a
protein.
:type ag1: MDAnalysis AtomGroup
:param ag2: Secondary AtomGroup which forms contacts with `ag1`, typically
lipids, ions, or other small molecules. Each residue of `ag2`
must have the same number of atoms.
:type ag2: MDAnalysis AtomGroup
:param nproc: Number of processes to use in computing contacts (default is
1).
:type nproc: int, optional
:param frames: List of frames to use in computing contacts (default is
None, meaning all frames are used).
:type frames: list or np.array, optional
:param cutoff: Maximum cutoff to use in computing contacts. A primary
contact map is created upon which multiple cutoffs can be
imposed, i.e. in the case where a proper cutoff is being
determined. This can typically be left at its default value,
unless a greater value is needed (default is 10.0).
:type cutoff: float, optional
:param nslices: Number of slices to break the trajectory into for
processing. If device memory is limited, try increasing
`nslices` (default is 100).
:type nslices: int, optional
"""

def __init__(self, u, ag1, ag2, nproc=1, frames=None, cutoff=10.0,
Expand All @@ -29,6 +55,8 @@ def __init__(self, u, ag1, ag2, nproc=1, frames=None, cutoff=10.0,
self.cutoff, self.frames, self.nslices = cutoff, frames, nslices

def run(self):
"""Run contact analysis and save to `contacts.pkl`
"""
if self.frames is not None:
sliced_frames = np.array_split(self.frames, self.nslices)
else:
Expand Down Expand Up @@ -105,12 +133,28 @@ def _run_contacts(self, i, sliced_traj):


class ProcessContacts(object):
def __init__(self, cutoff, nproc, map_name='contacts.pkl'):
"""The :class:`ProcessProtein` class takes the primary contact map
(default is `contacts.pkl`) and collects contacts based on a prescribed
cutoff.
:param cutoff: Collect all contacts between `ag1` and `ag2` within this
value.
:type cutoff: float
:param nproc: Number of processes to use in collecting contacts (default is
1).
:type nproc: int, optional
:param map_name: Name of primary contact map (default is `contacts.pkl`)
:type map_name: str, optional
"""
def __init__(self, cutoff, nproc=1, map_name='contacts.pkl'):
self.nproc = nproc
self.map_name = map_name
self.cutoff = cutoff

def run(self):
"""Process contacts using the prescribed cutoff and write to
contacts-{cutoff}.pkl
"""
if os.path.exists(self.map_name):
with open(self.map_name, 'r+b') as f:
memmap = pickle.load(f)
Expand Down

0 comments on commit 90c63d8

Please sign in to comment.