Merge pull request #68 from mhangaard/ads_atom_types

bag of edges
SUNCAT-Center · Mar 21, 2019 · e584097 · e584097
2 parents 92739d1 + 1a530c8
commit e584097
Show file tree

Hide file tree

Showing 13 changed files with 271 additions and 66 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,7 @@
 # dev
 
+# Version 0.6.0 (January 2019)
+
 -   Added ML-MIN algorithm for energy minimization.
 -   Added ML-NEB algorithm for transition state search.
 -   Changed input format for kernels in the GP.

diff --git a/catlearn/__init__.py b/catlearn/__init__.py
@@ -1 +1 @@
-__version__ = "0.6.0.dev3"
+__version__ = "0.6.0"
diff --git a/catlearn/api/ase_atoms_api.py b/catlearn/api/ase_atoms_api.py
@@ -27,7 +27,7 @@ def database_to_list(fname, selection=None):
         atoms.info['id'] = dbid
         atoms.info['ctime'] = float(d.ctime)
         atoms.subsets = {}
-        if 'data' in d and 'connectivity' in d.data:
+        if hasattr(d, 'data') and 'connectivity' in dict(d.data):
             atoms.connectivity = np.array(d.data.connectivity)
         images.append(atoms)
 

diff --git a/catlearn/featurize/setup.py b/catlearn/featurize/setup.py
@@ -9,6 +9,7 @@
 from collections import defaultdict
 import multiprocessing
 from tqdm import tqdm
+from catlearn.fingerprint.molecule import AutoCorrelationFingerprintGenerator
 from catlearn.fingerprint.adsorbate import (AdsorbateFingerprintGenerator,
                                             default_adsorbate_fingerprinters)
 from catlearn.fingerprint.convoluted import (ConvolutedFingerprintGenerator,
@@ -59,7 +60,8 @@ class FeatureGenerator(
         AdsorbateFingerprintGenerator, ParticleFingerprintGenerator,
         StandardFingerprintGenerator, GraphFingerprintGenerator,
         BulkFingerprintGenerator, ConvolutedFingerprintGenerator,
-        ChalcogenideFingerprintGenerator, CatappFingerprintGenerator):
+        ChalcogenideFingerprintGenerator, CatappFingerprintGenerator,
+        AutoCorrelationFingerprintGenerator):
     """Feature generator class.
 
     It is sometimes necessary to normalize the length of feature vectors when
@@ -303,6 +305,32 @@ def _get_atom_types(self, train_candidates, test_candidates=None):
 
         self.atom_types = atom_types
 
+    def _get_ads_atom_types(self, train_candidates, test_candidates=None):
+        """Function to get all potential atomic types in data.
+
+        Parameters
+        ----------
+        train_candidates : list
+            List of atoms objects.
+        test_candidates : list
+            List of atoms objects.
+
+        Returns
+        -------
+        atom_types : list
+            Full list of atomic numbers in adsorbate atoms subsets.
+        """
+        train_candidates = list(train_candidates)
+        if test_candidates is not None:
+            train_candidates += list(test_candidates)
+        ads_atom_types = set()
+        for a in train_candidates:
+            ads_atom_types.update(
+                    set(a.get_atomic_numbers()[a.subsets['ads_atoms']]))
+        ads_atom_types = sorted(list(ads_atom_types))
+
+        self.ads_atom_types = ads_atom_types
+
     def _get_atom_length(self, train_candidates, test_candidates=None):
         """Function to get all potential system sizes in data.
 

diff --git a/catlearn/fingerprint/adsorbate.py b/catlearn/fingerprint/adsorbate.py
@@ -1,17 +1,14 @@
 """Slab adsorbate fingerprint functions for machine learning."""
 import numpy as np
-import collections
 
 from ase.symbols import string2symbols
 from ase.data import ground_state_magnetic_moments as gs_magmom
 from ase.data import atomic_numbers, chemical_symbols
 
-from catlearn.featurize.periodic_table_data import (get_mendeleev_params,
-                                                    n_outer,
-                                                    list_mendeleev_params,
+from catlearn.featurize.periodic_table_data import (list_mendeleev_params,
                                                     default_params, get_radius,
                                                     electronegativities,
-                                                    block2number, make_labels)
+                                                    make_labels)
 from catlearn.featurize.base import BaseGenerator, check_labels
 
 
@@ -32,8 +29,8 @@
                                     'generalized_cn',
                                     'bag_cn',
                                     'bag_atoms_ads',
-                                    'bag_connections_ads',
-                                    'bag_connections_chemi']
+                                    'bag_edges_ads',
+                                    'bag_edges_chemi']
 
 extra_slab_params = ['atomic_radius',
                      'heat_of_formation',
@@ -645,7 +642,7 @@ def strain(self, atoms=None):
             strain_term = (av_term - av_bulk) / av_bulk
             return [strain_site, strain_term]
 
-    def bag_connections_ads(self, atoms):
+    def bag_edges_ads(self, atoms):
         """Returns bag of connections, counting only the bonds within the
         adsorbate.
 
@@ -693,7 +690,7 @@ def bag_connections_ads(self, atoms):
                 boc[bond_type] += 1
             return list(boc[np.triu_indices_from(boc)])
 
-    def bag_connections_chemi(self, atoms):
+    def bag_edges_chemi(self, atoms):
         """Returns bag of connections, counting only the bonds within the
         adsorbate and the connections between adsorbate and surface.
 
@@ -742,6 +739,63 @@ def bag_connections_chemi(self, atoms):
 
         return list(boc[np.triu_indices_from(boc)])
 
+    def bag_edges_all(self, atoms):
+        """Returns bag of connections, counting all bonds within the
+        adsorbate and between adsorbate atoms and surface. If we assign an
+        energy to each type of bond, considering first neighbors only,
+        this fingerprint would work independently in a linear model. The length
+        of the vector is atom_types * ads_atom_types.
+
+        Parameters
+        ----------
+        atoms : object
+            ASE Atoms object.
+
+        Returns
+        ----------
+        features : list
+            If None was passed, the elements are strings, naming the feature.
+        """
+        # number of element types.
+        n_elements = len(self.atom_types)
+        n_elements_ads = len(self.ads_atom_types)
+
+        # range of element types.
+        symbols = np.array([chemical_symbols[z] for z in self.atom_types])
+        ads_symbols = np.array([chemical_symbols[z] for z
+                                in self.ads_atom_types])
+
+        # Array of pairs.
+        rows, cols = np.meshgrid(symbols, ads_symbols)
+
+        # Add pairs to make labels.
+        pairs = np.core.defchararray.add(rows, cols)
+        labels = ['bea_' + c + '_ads' for c in
+                  pairs[np.triu_indices_from(pairs)]]
+        if atoms is None:
+            return labels
+        else:
+            # empty bag of connection types.
+            boc = np.zeros([n_elements_ads, n_elements])
+
+            natoms = len(atoms)
+            ads_atoms = atoms.subsets['ads_atoms']
+            # n_ads_atoms = len(atoms.subsets['ads_atoms'])
+            cm = np.array(atoms.connectivity)[ads_atoms, :]
+            np.fill_diagonal(cm, 0)
+
+            bonds = np.where(np.ravel(np.triu(cm)) > 0)[0]
+            for b in bonds:
+                # Get bonded atomic numbers.
+                z_ads, z_all = np.unravel_index(b, [natoms, natoms])
+                bond_index = (atoms.numbers[ads_atoms][z_ads],
+                              atoms.numbers[z_all])
+                bond_type = tuple((self.ads_atom_types.index(bond_index[0]),
+                                   self.atom_types.index(bond_index[1])))
+                # Count bonds in upper triangle.
+                boc[bond_type] += 1
+            return list(boc[np.triu_indices_from(boc)])
+
     def en_difference_ads(self, atoms=None):
         """Returns a list of electronegativity metrics, squared and summed over
         bonds within the adsorbate atoms.

diff --git a/catlearn/fingerprint/molecule.py b/catlearn/fingerprint/molecule.py
@@ -1,21 +1,20 @@
 """Functions to build a gas phase molecule fingerprint."""
-from catlearn.utilities.neighborlist import catlearn_neighborlist
+from catlearn.featurize.base import BaseGenerator
 from catlearn.featurize.periodic_table_data import list_mendeleev_params
 import networkx as nx
 import numpy as np
-from ase import Atoms
+
 
 default_parameters = [
     'atomic_number',
     'covalent_radius_cordero',
-    'en_pauling',
-]
+    'en_pauling']
 
 
-class AutoCorrelationFingerprintGenerator():
+class AutoCorrelationFingerprintGenerator(BaseGenerator):
     """Class for constructing an autocorrelation fingerprint."""
 
-    def __init__(self, images, dstar=0, parameters=None):
+    def __init__(self, **kwargs):
         """Initialize.
 
         Parameters
@@ -27,28 +26,24 @@ def __init__(self, images, dstar=0, parameters=None):
         parameters : list
             Parameters to use for the autocorrelation
         """
-        if isinstance(images, Atoms):
-            images = [images]
-
-        self.images = images
-        self.dstar = dstar
+        # Slab periodic table parameters.
+        if not hasattr(self, 'dstar'):
+            self.dstar = kwargs.get('dstar')
 
-        if parameters is None:
-            self.parameters = default_parameters
+        if self.dstar is None:
+            self.dstar = 2
 
-    def generate(self):
-        """Return an (n, m) array of fingerprints."""
-        fp_length = len(self.parameters) * (self.dstar + 1)
-        fingerprints = np.zeros((len(self.images), fp_length))
+        if not hasattr(self, 'parameters'):
+            self.parameters = kwargs.get('parameters')
 
-        for i, atoms in enumerate(self.images):
-            fingerprints[i] = self.get_autocorrelation(atoms)
+        if self.parameters is None:
+            self.parameters = default_parameters
 
-        return fingerprints
+        super(AutoCorrelationFingerprintGenerator, self).__init__(**kwargs)
 
     def get_autocorrelation(self, atoms):
         """Return the autocorrelation fingerprint for a molecule."""
-        connectivity = catlearn_neighborlist(atoms)
+        connectivity = atoms.connectivity
 
         G = nx.Graph(connectivity)
         distance_matrix = nx.floyd_warshall_numpy(G)