Merge pull request #87 from orionarcher/reviewer_response

Response to JOSS feedback.
MDAnalysis · Apr 2, 2023 · de4afd4 · de4afd4
2 parents 9461f84 + 9b96772
commit de4afd4
Show file tree

Hide file tree

Showing 18 changed files with 321 additions and 223 deletions.
diff --git a/README.md b/README.md
@@ -21,6 +21,19 @@ in understanding the solvation structure of a liquid, this package is for you!
 
 Find the documentation on [readthedocs].
 
+### Installing SolvationAnalysis
+
+SolvationAnalysis is available on PyPI and can be installed with pip:
+
+```bash
+pip install solvation-analysis
+```
+
+### Contributing
+
+Contributions, both issues and PRs, are welcome. If you'd like to contribute, we ask that you 
+follow the community guidelines outlined in the [MDAnalysis Code of Conduct](https://www.mdanalysis.org/pages/conduct/).
+
 ---
 
 #### Acknowledgements

diff --git a/docs/tutorials/basics_tutorial.ipynb b/docs/tutorials/basics_tutorial.ipynb
diff --git a/docs/tutorials/clustering_and_residence_tutorial.ipynb b/docs/tutorials/clustering_and_residence_tutorial.ipynb
@@ -37,12 +37,11 @@
     "import MDAnalysis as mda\n",
     "from solvation_analysis.solute import Solute\n",
     "\n",
-    "# define paths to data\n",
-    "data = \"../../solvation_analysis/tests/data/bn_fec_data/bn_fec.data\"\n",
-    "traj = \"../../solvation_analysis/tests/data/bn_fec_data/bn_fec_short_unwrap.dcd\"\n",
+    "# we will use a trajectory supplied by the package\n",
+    "from solvation_analysis.tests import datafiles\n",
     "\n",
     "# instantiate Universe\n",
-    "u = mda.Universe(data, traj)\n",
+    "u = mda.Universe(datafiles.bn_fec_data, datafiles.bn_fec_dcd_unwrap)\n",
     "\n",
     "# define solute AtomGroup\n",
     "li_atoms = u.atoms.select_atoms(\"type 22\")\n",

diff --git a/docs/tutorials/multi_atom_solutes.ipynb b/docs/tutorials/multi_atom_solutes.ipynb
diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib
@@ -102,6 +102,23 @@ @software{pandas:2020
 url          = {https://doi.org/10.5281/zenodo.3509134},
 }
 
+@article{nglview:2018,
+    author = {Nguyen, Hai and Case, David A and Rose, Alexander S},
+    title = "{NGLview–interactive molecular graphics for Jupyter notebooks}",
+    journal = {Bioinformatics},
+    volume = {34},
+    number = {7},
+    pages = {1241-1242},
+    year = {2017},
+    month = {12},
+    issn = {1367-4803},
+    doi = {10.1093/bioinformatics/btx789},
+    url = {https://doi.org/10.1093/bioinformatics/btx789},
+    eprint = {https://academic.oup.com/bioinformatics/article-pdf/34/7/1241/48914829/bioinformatics\_34\_7\_1241.pdf},
+}
+
+
+
 @article{Hou:2019,
 title = {The influence of FEC on the solvation structure and reduction reaction of LiPF6/EC electrolytes and its implication for solid electrolyte interphase formation},
 author = {Hou, Tingzheng and Yang, Guang and Rajput, Nav Nidhi and Self, Julian and Park, Sang-Won and Nanda, Jagjit and Persson, Kristin A.},

diff --git a/joss_paper/paper.md b/joss_paper/paper.md
@@ -77,8 +77,9 @@ coordination numbers, solute-solvent pairing, and solute speciation,
 SolvationAnalysis uses tools from the SciPy ecosystem [@numpy:2020] [@scipy:2020]
 to implement analyses of network formation [@Xie:2023] and residence
 times [@Self:2019], summarized in \autoref{fig:summary}. To make visualization fast, 
-the package includes a robust set of plotting tools built
-on top of `Matplotlib` and `Plotly` [@matplotlib:2007] [@plotly:2015].
+the package includes a robust set of plotting tools built on top of `Matplotlib` and 
+`Plotly` [@matplotlib:2007] [@plotly:2015]. Paired with nglview [@nglview:2018], both 
+exploration and 3d visualization can be done in a Jupyter notebook.
 A full set of tutorials based on state-of-the-art battery electrolytes 
 [@Hou:2019] [@Dong-Joo:2022] are also included to familiarize new researchers
 with solvation structure analysis. Together, these features allow for

diff --git a/setup.py b/setup.py
@@ -46,11 +46,11 @@
 
     install_requires=[
         'numpy>=1.20.0',
-        'mdanalysis>=2.0.0b0',
+        'mdanalysis>=2.0.0',
         'pandas',
         'matplotlib',
         'scipy',
-        'statsmodels', 
+        'statsmodels',
         'plotly',
         'rdkit'
     ],

diff --git a/solvation_analysis/coordination.py b/solvation_analysis/coordination.py
@@ -45,16 +45,6 @@ class Coordination:
     n_solutes : int
         The number of solutes in solvation_data.
 
-    Attributes
-    ----------
-    cn_dict : dict of {str: float}
-        a dictionary where keys are residue names (str) and values are the
-        mean coordination number of that residue (float).
-    cn_by_frame : pd.DataFrame
-        a DataFrame of the mean coordination number of in each frame of the trajectory.
-    coordinating_atoms : pd.DataFrame
-        fraction of each atom_type participating in solvation, calculated for each solvent.
-
     Examples
     --------
 
@@ -63,7 +53,7 @@ class Coordination:
         # first define Li, BN, and FEC AtomGroups
         >>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6})
         >>> solute.run()
-        >>> solute.coordination.cn_dict
+        >>> solute.coordination.coordination_numbers
         {'BN': 4.328, 'FEC': 0.253, 'PF6': 0.128}
 
     """
@@ -72,9 +62,9 @@ def __init__(self, solvation_data, n_frames, n_solutes, atom_group):
         self.solvation_data = solvation_data
         self.n_frames = n_frames
         self.n_solutes = n_solutes
-        self.cn_dict, self.cn_by_frame = self._mean_cn()
+        self._cn_dict, self._cn_dict_by_frame = self._mean_cn()
         self.atom_group = atom_group
-        self.coordinating_atoms = self._calculate_coordinating_atoms()
+        self._coordinating_atoms = self._calculate_coordinating_atoms()
 
     @staticmethod
     def from_solute(solute):
@@ -132,3 +122,26 @@ def _calculate_coordinating_atoms(self, tol=0.005):
                          .set_index(ATOM_TYPE, append=True)
                          )
         return type_fractions[type_fractions[FRACTION] > tol]
+
+    @property
+    def coordination_numbers(self):
+        """
+        A dictionary where keys are residue names (str) and values are the
+        mean coordination number of that residue (float).
+        """
+        return self._cn_dict
+
+    @property
+    def coordination_numbers_by_frame(self):
+        """
+        A DataFrame of the mean coordination number of in each frame of the trajectory.
+        """
+        return self._cn_dict_by_frame
+
+    @property
+    def coordinating_atoms(self):
+        """
+        Fraction of each atom_type participating in solvation, calculated for each solvent.
+        """
+        return self._coordinating_atoms
+
diff --git a/solvation_analysis/networking.py b/solvation_analysis/networking.py
@@ -52,30 +52,6 @@ class Networking:
     res_name_map : pd.Series
         a mapping between residue indices and the solute & solvent names in a Solute.
 
-    Attributes
-    ----------
-    network_df : pd.DataFrame
-        the dataframe containing all networking data. the indices are the frame and
-        network index, respectively. the columns are the solvent_name and res_ix.
-    network_sizes : pd.DataFrame
-        a dataframe of network sizes. the index is the frame. the column headers
-        are network sizes, or the number of solutes + solvents in the network, so
-        the columns might be [2, 3, 4, ...]. the values in each column are the
-        number of networks with that size in each frame.
-    solute_status : dict of {str: float}
-        a dictionary where the keys are the "status" of the solute and the values
-        are the fraction of solute with that status, averaged over all frames.
-        "isolated" means that the solute not coordinated with any of the networking
-        solvents, network size is 1.
-        "paired" means the solute and is coordinated with a single networking
-        solvent and that solvent is not coordinated to any other solutes, network
-        size is 2.
-        "networked" means that the solute is coordinated to more than one solvent
-        or its solvent is coordinated to more than one solute, network size >= 3.
-    solute_status_by_frame : pd.DataFrame
-        as described above, except organized into a dataframe where each
-        row is a unique frame and the columns are "isolated", "paired", and "networked".
-
     Examples
     --------
      .. code-block:: python
@@ -95,10 +71,10 @@ def __init__(self, solvents, solvation_data, solute_res_ix, res_name_map):
         self.solute_res_ix = solute_res_ix
         self.res_name_map = res_name_map
         self.n_solute = len(solute_res_ix)
-        self.network_df = self._generate_networks()
-        self.network_sizes = self._calculate_network_sizes()
-        self.solute_status, self.solute_status_by_frame = self._calculate_solute_status()
-        self.solute_status = self.solute_status.to_dict()
+        self._network_df = self._generate_networks()
+        self._network_sizes = self._calculate_network_sizes()
+        self._solute_status, self._solute_status_by_frame = self._calculate_solute_status()
+        self._solute_status = self._solute_status.to_dict()
 
     @staticmethod
     def from_solute(solute, solvents):
@@ -239,3 +215,44 @@ def get_network_res_ix(self, network_index, frame):
         """
         res_ix = self.network_df.loc[pd.IndexSlice[frame, network_index], SOLVENT_IX].values
         return res_ix.astype(int)
+
+    @property
+    def network_df(self):
+        """
+        The dataframe containing all networking data. the indices are the frame and
+        network index, respectively. the columns are the solvent_name and res_ix.
+        """
+        return self._network_df
+
+    @property
+    def network_sizes(self):
+        """
+        A dataframe of network sizes. the index is the frame. the column headers
+        are network sizes, or the number of solutes + solvents in the network, so
+        the columns might be [2, 3, 4, ...]. the values in each column are the
+        number of networks with that size in each frame.
+        """
+        return self._network_sizes
+
+    @property
+    def solute_status(self):
+        """
+        A dictionary where the keys are the "status" of the solute and the values
+        are the fraction of solute with that status, averaged over all frames.
+        "isolated" means that the solute not coordinated with any of the networking
+        solvents, network size is 1.
+        "paired" means the solute and is coordinated with a single networking
+        solvent and that solvent is not coordinated to any other solutes, network
+        size is 2.
+        "networked" means that the solute is coordinated to more than one solvent
+        or its solvent is coordinated to more than one solute, network size >= 3.
+        """
+        return self._solute_status
+
+    @property
+    def solute_status_by_frame(self):
+        """
+        As described above, except organized into a dataframe where each
+        row is a unique frame and the columns are "isolated", "paired", and "networked".
+        """
+        return self._solute_status_by_frame
diff --git a/solvation_analysis/pairing.py b/solvation_analysis/pairing.py
@@ -44,25 +44,6 @@ class Pairing:
     n_solvents : dict of {str: int}
         The number of each kind of solvent.
 
-    Attributes
-    ----------
-    pairing_dict : dict of {str: float}
-        a dictionary where keys are residue names (str) and values are the
-        fraction of solutes that contain that residue (float).
-    pairing_by_frame : pd.DataFrame
-        a dictionary tracking the mean fraction of each residue across frames.
-    fraction_free_solvents : dict of {str: float}
-        a dictionary containing the fraction of each solvent that is free. e.g.
-        not coordinated to a solute.
-    diluent_dict : dict of {str: float}
-        the fraction of the diluent constituted by each solvent. The diluent is
-        defined as everything that is not coordinated with the solute.
-    diluent_by_frame : pd.DataFrame
-        a DataFrame of the diluent composition in each frame of the trajectory.
-    diluent_counts : pd.DataFrame
-        a DataFrame of the raw solvent counts in the diluent in each frame of the trajectory.
-
-
     Examples
     --------
 
@@ -71,7 +52,7 @@ class Pairing:
         # first define Li, BN, and FEC AtomGroups
         >>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6})
         >>> solute.run()
-        >>> solute.pairing.pairing_dict
+        >>> solute.pairing.solvent_pairing
         {'BN': 1.0, 'FEC': 0.210, 'PF6': 0.120}
     """
 
@@ -80,9 +61,9 @@ def __init__(self, solvation_data, n_frames, n_solutes, n_solvents):
         self.n_frames = n_frames
         self.n_solutes = n_solutes
         self.solvent_counts = n_solvents
-        self.pairing_dict, self.pairing_by_frame = self._fraction_coordinated()
-        self.fraction_free_solvents = self._fraction_free_solvent()
-        self.diluent_dict, self.diluent_by_frame, self.diluent_counts = self._diluent_composition()
+        self._solvent_pairing, self._pairing_by_frame = self._fraction_coordinated()
+        self._fraction_free_solvents = self._fraction_free_solvent()
+        self._diluent_composition, self._diluent_composition_by_frame, self._diluent_counts = self._diluent_composition()
 
     @staticmethod
     def from_solute(solute):
@@ -135,3 +116,47 @@ def _diluent_composition(self):
         diluent_dict = diluent_by_frame.mean(axis=1).to_dict()
         return diluent_dict, diluent_by_frame, diluent_counts
 
+    @property
+    def solvent_pairing(self):
+        """
+        A dictionary where keys are residue names (str) and values are the
+        fraction of solutes that contain that residue (float).
+        """
+        return self._solvent_pairing
+
+    @property
+    def pairing_by_frame(self):
+        """
+        A pd.Dataframe tracking the mean fraction of each residue across frames.
+        """
+        return self._pairing_by_frame
+
+    @property
+    def fraction_free_solvents(self):
+        """
+        A dictionary containing the fraction of each solvent that is free. e.g.
+        not coordinated to a solute.
+        """
+        return self._fraction_free_solvents
+
+    @property
+    def diluent_composition(self):
+        """
+        The fraction of the diluent constituted by each solvent. The diluent is
+        defined as everything that is not coordinated with the solute.
+        """
+        return self._diluent_composition
+
+    @property
+    def diluent_composition_by_frame(self):
+        """
+        A DataFrame of the diluent composition in each frame of the trajectory.
+        """
+        return self._diluent_composition_by_frame
+
+    @property
+    def diluent_counts(self):
+        """
+        A DataFrame of the raw solvent counts in the diluent in each frame of the trajectory.
+        """
+        return self._diluent_counts
diff --git a/solvation_analysis/plotting.py b/solvation_analysis/plotting.py
@@ -243,7 +243,7 @@ def compare_func(
         )
         return fig
 
-    arguments_docstring = """    
+    arguments_docstring = """
     
     property_dict : dict of {str: dict}
         a dictionary with the solution name as keys and a dict of {str: float} as values, where each key
@@ -279,7 +279,7 @@ def compare_func(
 
 compare_pairing = _compare_function_generator(
     "pairing",
-    "pairing_dict",
+    "solvent_pairing",
     "Fractional Pairing of Solvents",
     "Compare the solute-solvent pairing.",
 )
@@ -295,15 +295,15 @@ def compare_func(
 
 compare_diluent = _compare_function_generator(
     "pairing",
-    "diluent_dict",
+    "diluent_composition",
     "Diluent Composition of Solutes",
     "Compare the diluent composition.",
 )
 
 
 compare_coordination_numbers = _compare_function_generator(
     "coordination",
-    "cn_dict",
+    "coordination_numbers",
     "Coordination Numbers of Solvents",
     "Compare the coordination numbers.",
 )