Skip to content

Commit

Permalink
Merge pull request #87 from orionarcher/reviewer_response
Browse files Browse the repository at this point in the history
Response to JOSS feedback.
  • Loading branch information
orionarcher authored Apr 2, 2023
2 parents 9461f84 + 9b96772 commit de4afd4
Show file tree
Hide file tree
Showing 18 changed files with 321 additions and 223 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@ in understanding the solvation structure of a liquid, this package is for you!

Find the documentation on [readthedocs].

### Installing SolvationAnalysis

SolvationAnalysis is available on PyPI and can be installed with pip:

```bash
pip install solvation-analysis
```

### Contributing

Contributions, both issues and PRs, are welcome. If you'd like to contribute, we ask that you
follow the community guidelines outlined in the [MDAnalysis Code of Conduct](https://www.mdanalysis.org/pages/conduct/).

---

#### Acknowledgements
Expand Down
112 changes: 48 additions & 64 deletions docs/tutorials/basics_tutorial.ipynb

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions docs/tutorials/clustering_and_residence_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@
"import MDAnalysis as mda\n",
"from solvation_analysis.solute import Solute\n",
"\n",
"# define paths to data\n",
"data = \"../../solvation_analysis/tests/data/bn_fec_data/bn_fec.data\"\n",
"traj = \"../../solvation_analysis/tests/data/bn_fec_data/bn_fec_short_unwrap.dcd\"\n",
"# we will use a trajectory supplied by the package\n",
"from solvation_analysis.tests import datafiles\n",
"\n",
"# instantiate Universe\n",
"u = mda.Universe(data, traj)\n",
"u = mda.Universe(datafiles.bn_fec_data, datafiles.bn_fec_dcd_unwrap)\n",
"\n",
"# define solute AtomGroup\n",
"li_atoms = u.atoms.select_atoms(\"type 22\")\n",
Expand Down
56 changes: 26 additions & 30 deletions docs/tutorials/multi_atom_solutes.ipynb

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions joss_paper/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,23 @@ @software{pandas:2020
url = {https://doi.org/10.5281/zenodo.3509134},
}

@article{nglview:2018,
author = {Nguyen, Hai and Case, David A and Rose, Alexander S},
title = "{NGLview–interactive molecular graphics for Jupyter notebooks}",
journal = {Bioinformatics},
volume = {34},
number = {7},
pages = {1241-1242},
year = {2017},
month = {12},
issn = {1367-4803},
doi = {10.1093/bioinformatics/btx789},
url = {https://doi.org/10.1093/bioinformatics/btx789},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/34/7/1241/48914829/bioinformatics\_34\_7\_1241.pdf},
}



@article{Hou:2019,
title = {The influence of FEC on the solvation structure and reduction reaction of LiPF6/EC electrolytes and its implication for solid electrolyte interphase formation},
author = {Hou, Tingzheng and Yang, Guang and Rajput, Nav Nidhi and Self, Julian and Park, Sang-Won and Nanda, Jagjit and Persson, Kristin A.},
Expand Down
5 changes: 3 additions & 2 deletions joss_paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ coordination numbers, solute-solvent pairing, and solute speciation,
SolvationAnalysis uses tools from the SciPy ecosystem [@numpy:2020] [@scipy:2020]
to implement analyses of network formation [@Xie:2023] and residence
times [@Self:2019], summarized in \autoref{fig:summary}. To make visualization fast,
the package includes a robust set of plotting tools built
on top of `Matplotlib` and `Plotly` [@matplotlib:2007] [@plotly:2015].
the package includes a robust set of plotting tools built on top of `Matplotlib` and
`Plotly` [@matplotlib:2007] [@plotly:2015]. Paired with nglview [@nglview:2018], both
exploration and 3d visualization can be done in a Jupyter notebook.
A full set of tutorials based on state-of-the-art battery electrolytes
[@Hou:2019] [@Dong-Joo:2022] are also included to familiarize new researchers
with solvation structure analysis. Together, these features allow for
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@

install_requires=[
'numpy>=1.20.0',
'mdanalysis>=2.0.0b0',
'mdanalysis>=2.0.0',
'pandas',
'matplotlib',
'scipy',
'statsmodels',
'statsmodels',
'plotly',
'rdkit'
],
Expand Down
39 changes: 26 additions & 13 deletions solvation_analysis/coordination.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,6 @@ class Coordination:
n_solutes : int
The number of solutes in solvation_data.
Attributes
----------
cn_dict : dict of {str: float}
a dictionary where keys are residue names (str) and values are the
mean coordination number of that residue (float).
cn_by_frame : pd.DataFrame
a DataFrame of the mean coordination number of in each frame of the trajectory.
coordinating_atoms : pd.DataFrame
fraction of each atom_type participating in solvation, calculated for each solvent.
Examples
--------
Expand All @@ -63,7 +53,7 @@ class Coordination:
# first define Li, BN, and FEC AtomGroups
>>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6})
>>> solute.run()
>>> solute.coordination.cn_dict
>>> solute.coordination.coordination_numbers
{'BN': 4.328, 'FEC': 0.253, 'PF6': 0.128}
"""
Expand All @@ -72,9 +62,9 @@ def __init__(self, solvation_data, n_frames, n_solutes, atom_group):
self.solvation_data = solvation_data
self.n_frames = n_frames
self.n_solutes = n_solutes
self.cn_dict, self.cn_by_frame = self._mean_cn()
self._cn_dict, self._cn_dict_by_frame = self._mean_cn()
self.atom_group = atom_group
self.coordinating_atoms = self._calculate_coordinating_atoms()
self._coordinating_atoms = self._calculate_coordinating_atoms()

@staticmethod
def from_solute(solute):
Expand Down Expand Up @@ -132,3 +122,26 @@ def _calculate_coordinating_atoms(self, tol=0.005):
.set_index(ATOM_TYPE, append=True)
)
return type_fractions[type_fractions[FRACTION] > tol]

@property
def coordination_numbers(self):
"""
A dictionary where keys are residue names (str) and values are the
mean coordination number of that residue (float).
"""
return self._cn_dict

@property
def coordination_numbers_by_frame(self):
"""
A DataFrame of the mean coordination number of in each frame of the trajectory.
"""
return self._cn_dict_by_frame

@property
def coordinating_atoms(self):
"""
Fraction of each atom_type participating in solvation, calculated for each solvent.
"""
return self._coordinating_atoms

73 changes: 45 additions & 28 deletions solvation_analysis/networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,30 +52,6 @@ class Networking:
res_name_map : pd.Series
a mapping between residue indices and the solute & solvent names in a Solute.
Attributes
----------
network_df : pd.DataFrame
the dataframe containing all networking data. the indices are the frame and
network index, respectively. the columns are the solvent_name and res_ix.
network_sizes : pd.DataFrame
a dataframe of network sizes. the index is the frame. the column headers
are network sizes, or the number of solutes + solvents in the network, so
the columns might be [2, 3, 4, ...]. the values in each column are the
number of networks with that size in each frame.
solute_status : dict of {str: float}
a dictionary where the keys are the "status" of the solute and the values
are the fraction of solute with that status, averaged over all frames.
"isolated" means that the solute not coordinated with any of the networking
solvents, network size is 1.
"paired" means the solute and is coordinated with a single networking
solvent and that solvent is not coordinated to any other solutes, network
size is 2.
"networked" means that the solute is coordinated to more than one solvent
or its solvent is coordinated to more than one solute, network size >= 3.
solute_status_by_frame : pd.DataFrame
as described above, except organized into a dataframe where each
row is a unique frame and the columns are "isolated", "paired", and "networked".
Examples
--------
.. code-block:: python
Expand All @@ -95,10 +71,10 @@ def __init__(self, solvents, solvation_data, solute_res_ix, res_name_map):
self.solute_res_ix = solute_res_ix
self.res_name_map = res_name_map
self.n_solute = len(solute_res_ix)
self.network_df = self._generate_networks()
self.network_sizes = self._calculate_network_sizes()
self.solute_status, self.solute_status_by_frame = self._calculate_solute_status()
self.solute_status = self.solute_status.to_dict()
self._network_df = self._generate_networks()
self._network_sizes = self._calculate_network_sizes()
self._solute_status, self._solute_status_by_frame = self._calculate_solute_status()
self._solute_status = self._solute_status.to_dict()

@staticmethod
def from_solute(solute, solvents):
Expand Down Expand Up @@ -239,3 +215,44 @@ def get_network_res_ix(self, network_index, frame):
"""
res_ix = self.network_df.loc[pd.IndexSlice[frame, network_index], SOLVENT_IX].values
return res_ix.astype(int)

@property
def network_df(self):
"""
The dataframe containing all networking data. the indices are the frame and
network index, respectively. the columns are the solvent_name and res_ix.
"""
return self._network_df

@property
def network_sizes(self):
"""
A dataframe of network sizes. the index is the frame. the column headers
are network sizes, or the number of solutes + solvents in the network, so
the columns might be [2, 3, 4, ...]. the values in each column are the
number of networks with that size in each frame.
"""
return self._network_sizes

@property
def solute_status(self):
"""
A dictionary where the keys are the "status" of the solute and the values
are the fraction of solute with that status, averaged over all frames.
"isolated" means that the solute not coordinated with any of the networking
solvents, network size is 1.
"paired" means the solute and is coordinated with a single networking
solvent and that solvent is not coordinated to any other solutes, network
size is 2.
"networked" means that the solute is coordinated to more than one solvent
or its solvent is coordinated to more than one solute, network size >= 3.
"""
return self._solute_status

@property
def solute_status_by_frame(self):
"""
As described above, except organized into a dataframe where each
row is a unique frame and the columns are "isolated", "paired", and "networked".
"""
return self._solute_status_by_frame
71 changes: 48 additions & 23 deletions solvation_analysis/pairing.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,6 @@ class Pairing:
n_solvents : dict of {str: int}
The number of each kind of solvent.
Attributes
----------
pairing_dict : dict of {str: float}
a dictionary where keys are residue names (str) and values are the
fraction of solutes that contain that residue (float).
pairing_by_frame : pd.DataFrame
a dictionary tracking the mean fraction of each residue across frames.
fraction_free_solvents : dict of {str: float}
a dictionary containing the fraction of each solvent that is free. e.g.
not coordinated to a solute.
diluent_dict : dict of {str: float}
the fraction of the diluent constituted by each solvent. The diluent is
defined as everything that is not coordinated with the solute.
diluent_by_frame : pd.DataFrame
a DataFrame of the diluent composition in each frame of the trajectory.
diluent_counts : pd.DataFrame
a DataFrame of the raw solvent counts in the diluent in each frame of the trajectory.
Examples
--------
Expand All @@ -71,7 +52,7 @@ class Pairing:
# first define Li, BN, and FEC AtomGroups
>>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6})
>>> solute.run()
>>> solute.pairing.pairing_dict
>>> solute.pairing.solvent_pairing
{'BN': 1.0, 'FEC': 0.210, 'PF6': 0.120}
"""

Expand All @@ -80,9 +61,9 @@ def __init__(self, solvation_data, n_frames, n_solutes, n_solvents):
self.n_frames = n_frames
self.n_solutes = n_solutes
self.solvent_counts = n_solvents
self.pairing_dict, self.pairing_by_frame = self._fraction_coordinated()
self.fraction_free_solvents = self._fraction_free_solvent()
self.diluent_dict, self.diluent_by_frame, self.diluent_counts = self._diluent_composition()
self._solvent_pairing, self._pairing_by_frame = self._fraction_coordinated()
self._fraction_free_solvents = self._fraction_free_solvent()
self._diluent_composition, self._diluent_composition_by_frame, self._diluent_counts = self._diluent_composition()

@staticmethod
def from_solute(solute):
Expand Down Expand Up @@ -135,3 +116,47 @@ def _diluent_composition(self):
diluent_dict = diluent_by_frame.mean(axis=1).to_dict()
return diluent_dict, diluent_by_frame, diluent_counts

@property
def solvent_pairing(self):
"""
A dictionary where keys are residue names (str) and values are the
fraction of solutes that contain that residue (float).
"""
return self._solvent_pairing

@property
def pairing_by_frame(self):
"""
A pd.Dataframe tracking the mean fraction of each residue across frames.
"""
return self._pairing_by_frame

@property
def fraction_free_solvents(self):
"""
A dictionary containing the fraction of each solvent that is free. e.g.
not coordinated to a solute.
"""
return self._fraction_free_solvents

@property
def diluent_composition(self):
"""
The fraction of the diluent constituted by each solvent. The diluent is
defined as everything that is not coordinated with the solute.
"""
return self._diluent_composition

@property
def diluent_composition_by_frame(self):
"""
A DataFrame of the diluent composition in each frame of the trajectory.
"""
return self._diluent_composition_by_frame

@property
def diluent_counts(self):
"""
A DataFrame of the raw solvent counts in the diluent in each frame of the trajectory.
"""
return self._diluent_counts
8 changes: 4 additions & 4 deletions solvation_analysis/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def compare_func(
)
return fig

arguments_docstring = """
arguments_docstring = """
property_dict : dict of {str: dict}
a dictionary with the solution name as keys and a dict of {str: float} as values, where each key
Expand Down Expand Up @@ -279,7 +279,7 @@ def compare_func(

compare_pairing = _compare_function_generator(
"pairing",
"pairing_dict",
"solvent_pairing",
"Fractional Pairing of Solvents",
"Compare the solute-solvent pairing.",
)
Expand All @@ -295,15 +295,15 @@ def compare_func(

compare_diluent = _compare_function_generator(
"pairing",
"diluent_dict",
"diluent_composition",
"Diluent Composition of Solutes",
"Compare the diluent composition.",
)


compare_coordination_numbers = _compare_function_generator(
"coordination",
"cn_dict",
"coordination_numbers",
"Coordination Numbers of Solvents",
"Compare the coordination numbers.",
)
Expand Down
Loading

0 comments on commit de4afd4

Please sign in to comment.