Skip to content

Commit

Permalink
Revert "Merge pull request #3 from BioKT/develop"
Browse files Browse the repository at this point in the history
This reverts commit 87d88e8, reversing
changes made to 95c83b8.
  • Loading branch information
daviddesancho committed Nov 18, 2020
1 parent 87d88e8 commit df64f92
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 392 deletions.
307 changes: 0 additions & 307 deletions examples/alanine_dipeptide/ala_dipeptide_multi.ipynb

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
55 changes: 2 additions & 53 deletions mastermsm/trajectory/traj.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
import os
import numpy as np
import mdtraj as md
from ..trajectory import traj_lib

Expand All @@ -25,58 +24,6 @@ def _load_mdtraj(top=None, traj=None):
"""
return md.load(traj, top=top)

class MultiTimeSeries(object):
""" A class for generating multiple TimeSeries objects in
a consistent way. In principle this is only needed when
the clustering is not established a priori.
"""
def __init__(self, top=None, trajs=None, dt=None):
"""
Parameters
----------
dt : float
The time step.
top : string
The topology file, may be a PDB or GRO file.
trajs : list
A list of trajectory filenames to be read.
"""
self.file_list = trajs
self.traj_list = []
for traj in self.file_list:
tr = TimeSeries(top=top, traj=traj)
self.traj_list.append(tr)

def joint_discretize(self, mcs=None, ms=None):
"""
Analyze jointly torsion angles from all trajectories.
Produces a fake trajectory comprising a concatenated set
to recover the labels from HDBSCAN.
"""
phi_cum = []
psi_cum = []
for tr in self.traj_list:
phi = md.compute_phi(tr.mdt)
psi = md.compute_psi(tr.mdt)
phi_cum.append(phi[1])
psi_cum.append(psi[1])
phi_cum = np.vstack(phi_cum)
psi_cum = np.vstack(psi_cum)
phi_fake = [phi[0], phi_cum]
psi_fake = [psi[0], psi_cum]

labels = traj_lib.discrete_hdbscan(phi_fake, psi_fake, mcs=mcs, ms=ms)

i = 0
for tr in self.traj_list:
ltraj = tr.mdt.n_frames
tr.distraj = list(labels[i:i+ltraj]) #labels[i:i+ltraj]
i +=ltraj

class TimeSeries(object):
""" A class to read and discretize simulation trajectories.
When simulation trajectories are provided, frames are read
Expand Down Expand Up @@ -162,6 +109,8 @@ def _read_distraj(self, distraj=None, dt=None):
cstates = [x.split()[0] for x in raw]
return cstates, 1.



def discretize(self, method="rama", states=None, nbins=20, mcs=185, ms=185):
""" Discretize the simulation data.
Expand Down
51 changes: 19 additions & 32 deletions mastermsm/trajectory/traj_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _stategrid(phi, psi, nbins):
ibin = int(0.5*nbins*(phi/math.pi + 1.)) + int(0.5*nbins*(psi/math.pi + 1))*nbins
return ibin

def discrete_hdbscan(phi, psi, mcs=None, ms=None):
def discrete_hdbscan(phi, psi, mcs, ms):
""" Assign a set of phi, psi angles to coarse states by
using the HDBSCAN algorithm
Expand All @@ -228,49 +228,36 @@ def discrete_hdbscan(phi, psi, mcs=None, ms=None):
min_samples for HDBSCAN
"""
if len(phi[0]) != len(psi[0]):
sys.exit()

if len(phi[0]) != len(psi[0]): sys.exit()
ndih = len(phi[0])
phis, psis = [], []
for f, y in zip(phi[1],psi[1]):
for f,y in zip(phi[1],psi[1]):
for n in range(ndih):
phis.append(f[n])
psis.append(y[n])
phis.append(f[n]*180/math.pi)
psis.append(y[n]*180/math.pi)

data = np.column_stack((range(len(phis)),phis,psis))
X = StandardScaler().fit_transform(data[:,[1,2]])
if not mcs:
mcs = 10
ms = mcs

while True:
hb = hdbscan.HDBSCAN(min_cluster_size = mcs, min_samples = ms).fit(X)#fit_predict(X)
labels = hb.labels_
n_micro_clusters = len(set(labels)) - (1 if -1 in labels else 0)
if n_micro_clusters > 0:
print("HDBSCAN mcs value set to %g"%mcs)
break
elif mcs < 200:
mcs += 10
else:
sys.exit("Cannot found any valid HDBSCAN mcs value")
#n_noise = list(labels).count(-1)
hb = hdbscan.HDBSCAN(min_cluster_size = mcs, min_samples = ms).fit(X)#fit_predict(X)

labels = hb.labels_
#n_micro_clusters = len(set(labels)) - (1 if -1 in labels else 0)
#n_noise = list(labels).count(-1)

# remove from clusters points with small (<0.1) probability
for i, x_i in enumerate(labels):
if hb.probabilities_[i] < 0.1:
labels[i] = -1

## plot clusters and corresponding tree
#import matplotlib.pyplot as plt
#colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', \
#'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'lightgray']
#vectorizer = np.vectorize(lambda x: colors[x % len(colors)])
#plt.scatter(X[:,0],X[:,1], c=vectorizer(labels))
#plt.savefig('alaTB_hdbscan.png')
#hb.condensed_tree_.plot()
#plt.savefig('tree.png')
# plot clusters and corresponding tree
import matplotlib.pyplot as plt
colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', \
'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'lightgray']
vectorizer = np.vectorize(lambda x: colors[x % len(colors)])
plt.scatter(X[:,0],X[:,1], c=vectorizer(labels))
plt.savefig('alaTB_hdbscan.png')
hb.condensed_tree_.plot()
plt.savefig('tree.png')

# remove noise from microstate trajectory and apply TBA (Buchete JPCB 2008)
i = 0
Expand Down

0 comments on commit df64f92

Please sign in to comment.