Skip to content

Commit

Permalink
Merge pull request #68 from mhangaard/ads_atom_types
Browse files Browse the repository at this point in the history
bag of edges
  • Loading branch information
Martin Hangaard Hansen authored Mar 21, 2019
2 parents 92739d1 + 1a530c8 commit e584097
Show file tree
Hide file tree
Showing 13 changed files with 271 additions and 66 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dev

# Version 0.6.0 (January 2019)

- Added ML-MIN algorithm for energy minimization.
- Added ML-NEB algorithm for transition state search.
- Changed input format for kernels in the GP.
Expand Down
2 changes: 1 addition & 1 deletion catlearn/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0.dev3"
__version__ = "0.6.0"
2 changes: 1 addition & 1 deletion catlearn/api/ase_atoms_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def database_to_list(fname, selection=None):
atoms.info['id'] = dbid
atoms.info['ctime'] = float(d.ctime)
atoms.subsets = {}
if 'data' in d and 'connectivity' in d.data:
if hasattr(d, 'data') and 'connectivity' in dict(d.data):
atoms.connectivity = np.array(d.data.connectivity)
images.append(atoms)

Expand Down
30 changes: 29 additions & 1 deletion catlearn/featurize/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections import defaultdict
import multiprocessing
from tqdm import tqdm
from catlearn.fingerprint.molecule import AutoCorrelationFingerprintGenerator
from catlearn.fingerprint.adsorbate import (AdsorbateFingerprintGenerator,
default_adsorbate_fingerprinters)
from catlearn.fingerprint.convoluted import (ConvolutedFingerprintGenerator,
Expand Down Expand Up @@ -59,7 +60,8 @@ class FeatureGenerator(
AdsorbateFingerprintGenerator, ParticleFingerprintGenerator,
StandardFingerprintGenerator, GraphFingerprintGenerator,
BulkFingerprintGenerator, ConvolutedFingerprintGenerator,
ChalcogenideFingerprintGenerator, CatappFingerprintGenerator):
ChalcogenideFingerprintGenerator, CatappFingerprintGenerator,
AutoCorrelationFingerprintGenerator):
"""Feature generator class.
It is sometimes necessary to normalize the length of feature vectors when
Expand Down Expand Up @@ -303,6 +305,32 @@ def _get_atom_types(self, train_candidates, test_candidates=None):

self.atom_types = atom_types

def _get_ads_atom_types(self, train_candidates, test_candidates=None):
"""Function to get all potential atomic types in data.
Parameters
----------
train_candidates : list
List of atoms objects.
test_candidates : list
List of atoms objects.
Returns
-------
atom_types : list
Full list of atomic numbers in adsorbate atoms subsets.
"""
train_candidates = list(train_candidates)
if test_candidates is not None:
train_candidates += list(test_candidates)
ads_atom_types = set()
for a in train_candidates:
ads_atom_types.update(
set(a.get_atomic_numbers()[a.subsets['ads_atoms']]))
ads_atom_types = sorted(list(ads_atom_types))

self.ads_atom_types = ads_atom_types

def _get_atom_length(self, train_candidates, test_candidates=None):
"""Function to get all potential system sizes in data.
Expand Down
72 changes: 63 additions & 9 deletions catlearn/fingerprint/adsorbate.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
"""Slab adsorbate fingerprint functions for machine learning."""
import numpy as np
import collections

from ase.symbols import string2symbols
from ase.data import ground_state_magnetic_moments as gs_magmom
from ase.data import atomic_numbers, chemical_symbols

from catlearn.featurize.periodic_table_data import (get_mendeleev_params,
n_outer,
list_mendeleev_params,
from catlearn.featurize.periodic_table_data import (list_mendeleev_params,
default_params, get_radius,
electronegativities,
block2number, make_labels)
make_labels)
from catlearn.featurize.base import BaseGenerator, check_labels


Expand All @@ -32,8 +29,8 @@
'generalized_cn',
'bag_cn',
'bag_atoms_ads',
'bag_connections_ads',
'bag_connections_chemi']
'bag_edges_ads',
'bag_edges_chemi']

extra_slab_params = ['atomic_radius',
'heat_of_formation',
Expand Down Expand Up @@ -645,7 +642,7 @@ def strain(self, atoms=None):
strain_term = (av_term - av_bulk) / av_bulk
return [strain_site, strain_term]

def bag_connections_ads(self, atoms):
def bag_edges_ads(self, atoms):
"""Returns bag of connections, counting only the bonds within the
adsorbate.
Expand Down Expand Up @@ -693,7 +690,7 @@ def bag_connections_ads(self, atoms):
boc[bond_type] += 1
return list(boc[np.triu_indices_from(boc)])

def bag_connections_chemi(self, atoms):
def bag_edges_chemi(self, atoms):
"""Returns bag of connections, counting only the bonds within the
adsorbate and the connections between adsorbate and surface.
Expand Down Expand Up @@ -742,6 +739,63 @@ def bag_connections_chemi(self, atoms):

return list(boc[np.triu_indices_from(boc)])

def bag_edges_all(self, atoms):
"""Returns bag of connections, counting all bonds within the
adsorbate and between adsorbate atoms and surface. If we assign an
energy to each type of bond, considering first neighbors only,
this fingerprint would work independently in a linear model. The length
of the vector is atom_types * ads_atom_types.
Parameters
----------
atoms : object
ASE Atoms object.
Returns
----------
features : list
If None was passed, the elements are strings, naming the feature.
"""
# number of element types.
n_elements = len(self.atom_types)
n_elements_ads = len(self.ads_atom_types)

# range of element types.
symbols = np.array([chemical_symbols[z] for z in self.atom_types])
ads_symbols = np.array([chemical_symbols[z] for z
in self.ads_atom_types])

# Array of pairs.
rows, cols = np.meshgrid(symbols, ads_symbols)

# Add pairs to make labels.
pairs = np.core.defchararray.add(rows, cols)
labels = ['bea_' + c + '_ads' for c in
pairs[np.triu_indices_from(pairs)]]
if atoms is None:
return labels
else:
# empty bag of connection types.
boc = np.zeros([n_elements_ads, n_elements])

natoms = len(atoms)
ads_atoms = atoms.subsets['ads_atoms']
# n_ads_atoms = len(atoms.subsets['ads_atoms'])
cm = np.array(atoms.connectivity)[ads_atoms, :]
np.fill_diagonal(cm, 0)

bonds = np.where(np.ravel(np.triu(cm)) > 0)[0]
for b in bonds:
# Get bonded atomic numbers.
z_ads, z_all = np.unravel_index(b, [natoms, natoms])
bond_index = (atoms.numbers[ads_atoms][z_ads],
atoms.numbers[z_all])
bond_type = tuple((self.ads_atom_types.index(bond_index[0]),
self.atom_types.index(bond_index[1])))
# Count bonds in upper triangle.
boc[bond_type] += 1
return list(boc[np.triu_indices_from(boc)])

def en_difference_ads(self, atoms=None):
"""Returns a list of electronegativity metrics, squared and summed over
bonds within the adsorbate atoms.
Expand Down
37 changes: 16 additions & 21 deletions catlearn/fingerprint/molecule.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
"""Functions to build a gas phase molecule fingerprint."""
from catlearn.utilities.neighborlist import catlearn_neighborlist
from catlearn.featurize.base import BaseGenerator
from catlearn.featurize.periodic_table_data import list_mendeleev_params
import networkx as nx
import numpy as np
from ase import Atoms


default_parameters = [
'atomic_number',
'covalent_radius_cordero',
'en_pauling',
]
'en_pauling']


class AutoCorrelationFingerprintGenerator():
class AutoCorrelationFingerprintGenerator(BaseGenerator):
"""Class for constructing an autocorrelation fingerprint."""

def __init__(self, images, dstar=0, parameters=None):
def __init__(self, **kwargs):
"""Initialize.
Parameters
Expand All @@ -27,28 +26,24 @@ def __init__(self, images, dstar=0, parameters=None):
parameters : list
Parameters to use for the autocorrelation
"""
if isinstance(images, Atoms):
images = [images]

self.images = images
self.dstar = dstar
# Slab periodic table parameters.
if not hasattr(self, 'dstar'):
self.dstar = kwargs.get('dstar')

if parameters is None:
self.parameters = default_parameters
if self.dstar is None:
self.dstar = 2

def generate(self):
"""Return an (n, m) array of fingerprints."""
fp_length = len(self.parameters) * (self.dstar + 1)
fingerprints = np.zeros((len(self.images), fp_length))
if not hasattr(self, 'parameters'):
self.parameters = kwargs.get('parameters')

for i, atoms in enumerate(self.images):
fingerprints[i] = self.get_autocorrelation(atoms)
if self.parameters is None:
self.parameters = default_parameters

return fingerprints
super(AutoCorrelationFingerprintGenerator, self).__init__(**kwargs)

def get_autocorrelation(self, atoms):
"""Return the autocorrelation fingerprint for a molecule."""
connectivity = catlearn_neighborlist(atoms)
connectivity = atoms.connectivity

G = nx.Graph(connectivity)
distance_matrix = nx.floyd_warshall_numpy(G)
Expand Down
Loading

0 comments on commit e584097

Please sign in to comment.