diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 716ab0b4..e9a99e5d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -16,10 +16,10 @@ jobs: shell: bash -l {0} steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: python-version: 3.8 - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 channels: defaults,mjohnson541,conda-forge channel-priority: true activate-environment: pynta_env diff --git a/environment.yml b/environment.yml index e1bb0e45..850e8532 100644 --- a/environment.yml +++ b/environment.yml @@ -25,3 +25,4 @@ dependencies: - coverage - liblapack !=3.9.0 - pysidt + - ruamel.yaml < 0.18 \ No newline at end of file diff --git a/pynta/coveragedependence.py b/pynta/coveragedependence.py new file mode 100644 index 00000000..abcbfbd3 --- /dev/null +++ b/pynta/coveragedependence.py @@ -0,0 +1,2253 @@ +from molecule.molecule import Molecule,Atom,Bond,Group,GroupAtom,GroupBond +from molecule.exceptions import AtomTypeError +from ase.io import read, write +from ase.geometry import get_distances +from ase.visualize import view +from ase.neighborlist import natural_cutoffs +from acat.adsorption_sites import SlabAdsorptionSites +from pynta.utils import get_unique_sym, get_occupied_sites, sites_match, SiteOccupationException +from pynta.mol import * +from pynta.geometricanalysis import * +from pynta.tasks import * +from pynta.postprocessing import get_adsorbate_energies +from pysidt import * +from pysidt.extensions import split_mols +from pysidt.sidt import * +from fireworks import LaunchPad, Workflow +from fireworks.queue.queue_launcher import rapidfire as rapidfirequeue +from fireworks.features.multi_launcher import launch_multiprocess +from fireworks.utilities.fw_serializers import load_object_from_file +from fireworks.core.rocket_launcher import rapidfire +from fireworks.core.fworker import FWorker +from copy import deepcopy +import numpy as np +import json +import shutil +import os +import itertools +import logging + +def get_unstable_pairs(pairsdir,adsorbate_dir,sites,site_adjacency,nslab,max_dist=3.0,show=False,metal=None,facet=None, + infopath_dict=None,imag_freq_path_dict=None): + out_pairs = [] + coadname_dict = {"O=[Pt]": 1, "N#[Pt]": 1, "O[Pt]": 2, "[Pt]": 1} + allowed_structure_site_structure_map = generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=max_dist,cut_multidentate_off_num=None) + if show: + config_show = [] + for pair in os.listdir(pairsdir): + if not "_" in pair or pair[0] == ".": + continue + adname = pair.split("_")[0] + coadname = pair.split("_")[1] + if infopath_dict: + info_path = infopath_dict[adname] + else: + info_path = None + if imag_freq_path_dict: + imag_freq_path = imag_freq_path_dict[adname] + else: + imag_freq_path = None + for num in os.listdir(os.path.join(pairsdir,pair)): + p = os.path.join(pairsdir,pair,num) + if num.isdigit() and os.path.isdir(p): + init = read(os.path.join(p,"init.xyz")) + with open(os.path.join(p,"info.json"),'r') as f: + initinfo = json.load(f) + try: + m = Molecule().from_adjacency_list(initinfo["adjlist"],check_consistency=False) + #m.update(sort_atoms=False) + except AtomTypeError: + m = Molecule().from_adjacency_list(initinfo["adjlist"],raise_atomtype_exception=False, + raise_charge_exception=False,check_consistency=False) + for a in m.atoms: + if a.charge != 0: + a.charge = 0 + v = 0 + for a2,edge in a.edges.items(): + v += edge.order*2 + if not a.is_hydrogen(): + a.lone_pairs = int(np.round((8-v)/2)) +# try: +# m.update(sort_atoms=False) +# except Exception as e: +# print(m.to_adjacency_list()) +# raise e + is_ts = any(bd.get_order_str() == "R" for bd in m.get_all_edges()) + g = extract_pair_graph(init,sites,site_adjacency,nslab,max_dist=max_dist, + cut_multidentate_off_num=coadname_dict[coadname], + allowed_structure_site_structures=allowed_structure_site_structure_map, + is_ts=is_ts,metal=metal,facet=facet,info_path=info_path,imag_freq_path=imag_freq_path) + + + #g.update(sort_atoms=False) + outpath = os.path.join(p,"out.xyz") + if not os.path.exists(outpath): + if show: + pass + + out_pairs.append(g.to_group()) + else: + final = read(outpath) + try: + gout = extract_pair_graph(final,sites,site_adjacency,nslab,max_dist=max_dist, + allowed_structure_site_structures=allowed_structure_site_structure_map,is_ts=is_ts, + metal=metal,facet=facet,info_path=info_path,imag_freq_path=imag_freq_path) + if len(gout.atoms) != len(g.atoms): + out_pairs.append(g.to_group()) + if show: + config_show.append(init) + config_show.append(final) + continue + #gout.update(sort_atoms=False) + except (FindingPathError, SiteOccupationException): + continue + + if not gout.is_isomorphic(g,save_order=True,strict=False): + out_pairs.append(g.to_group()) + if show: + config_show.append(init) + config_show.append(final) + if show: + view(config_show) + return out_pairs + +def copy_stable_pairs(pairsdir,sites,site_adjacency,nslab,max_dist=3.0): + out_pairs = [] + count = 0 + coadname_dict = {"O=[Pt]": 1, "N#[Pt]": 1, "O[Pt]": 2, "[Pt]": 1} + for pair in os.listdir(pairsdir): + coadname = pair.split("_")[1] + for num in os.listdir(os.path.join(pairsdir,pair)): + p = os.path.join(pairsdir,pair,num) + if num.isdigit() and os.path.isdir(p): + init = read(os.path.join(p,"init.xyz")) + with open(os.path.join(p,"info.json"),'r') as f: + initinfo = json.load(f) + try: + m = Molecule().from_adjacency_list(initinfo["adjlist"]) + #m.update(sort_atoms=False) + except AtomTypeError: + m = Molecule().from_adjacency_list(initinfo["adjlist"],raise_atomtype_exception=False, + raise_charge_exception=False,check_consistency=False) + for a in m.atoms: + if a.charge != 0: + a.charge = 0 + v = 0 + for a2,edge in a.edges.items(): + v += edge.order*2 + if not a.is_hydrogen(): + a.lone_pairs = int(np.round((8-v)/2)) +# try: +# m.update(sort_atoms=False) +# except Exception as e: +# print(m.to_adjacency_list()) +# raise e + + g = extract_pair_graph(init,sites,site_adjacency,nslab,max_dist=max_dist, + cut_coads_off_num=coadname_dict[coadname]) + + + #g.update(sort_atoms=False) + outpath = os.path.join(p,"out.xyz") + if not os.path.exists(outpath): + out_pairs.append(g.to_group()) + else: + final = read(outpath) + try: + gout = extract_pair_graph(final,sites,site_adjacency,nslab,max_dist=max_dist) + if len(gout.atoms) != len(g.atoms): + out_pairs.append(g.to_group()) + continue + #gout.update(sort_atoms=False) + except FindingPathError: + continue + + if not gout.is_isomorphic(g,save_order=True,strict=False): + out_pairs.append(g.to_group()) + else: #matches + os.makedirs(os.path.join(os.path.split(pairsdir)[0],"sampling",str(count))) + shutil.copyfile(os.path.join(p,"out.xyz"), + os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"out.xyz") + ) + shutil.copyfile(os.path.join(p,"info.json"), + os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"info.json") + ) + shutil.copyfile(os.path.join(p,"init.xyz"), + os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"init.xyz") + ) + count += 1 + + + return None + +def generate_pair_geometries(adpath1,adpath2,slabpath,metal,facet,adinfo1=None,adinfo2=None, + max_dist=3.0,imag_freq_max=150.0,symmetric=None): + """ + adpath1 can be bidentate + adpath2 must be monodentate + """ + #slab information + slab = read(slabpath) + nslab = len(slab) + cas = SlabAdsorptionSites(slab,facet,allow_6fold=False,composition_effect=False, + label_sites=True, + surrogate_metal=metal) + sites = cas.get_sites() + site_adjacency = cas.get_neighbor_site_list() + + adsorbate_dir = os.path.split(adpath2)[0] + allowed_structure_site_structures = generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=max_dist) + + if symmetric is None: + symmetric = adpath1 == adpath2 + + #extract information about adsorbates and valid adsorbate geometries + if os.path.isfile(os.path.join(adpath1,"opt.xyz")): #TS + is_ts = True + if adinfo1 is None: + adinfo1 = os.path.join(os.path.split(adpath1)[0],"info.json") + + with open(adinfo1,"r") as f: + info1 = json.load(f) + + admol1,neighbor_sites1,ninds1 = generate_TS_2D(read(os.path.join(adpath1,"opt.xyz")), adinfo1, metal, facet, sites, site_adjacency, nslab, max_dist=np.inf, + imag_freq_path=os.path.join(adpath1,"vib.json_vib.json"), + allowed_structure_site_structures=allowed_structure_site_structures, + ) + + aseinds1 = [] + for i,at in enumerate(admol1.atoms): + if (not at.is_surface_site()) and at.is_bonded_to_surface(): + aseinds1.append(i-len(neighbor_sites1)+nslab) + ad1s = [read(os.path.join(adpath1,"opt.xyz"))] + ad12Ds = [admol1] + ad12Dneighbors = [neighbor_sites1] + ad12Dninds = [ninds1] + else: + is_ts = False + if adinfo1 is None: + adinfo1 = os.path.join(adpath1,"info.json") + + with open(adinfo1,"r") as f: + info1 = json.load(f) + + mol1 = Molecule().from_adjacency_list(info1["adjlist"]) + atom_to_molecule_surface_atom_map1 = { int(key):int(val) for key,val in info1["gratom_to_molecule_surface_atom_map"].items()} + ad1s = get_unique_adsorbate_geometries(adpath1,mol1,sites,site_adjacency,atom_to_molecule_surface_atom_map1, + nslab,imag_freq_max=imag_freq_max) + ad12Ds = [] + ad12Dneighbors = [] + ad12Dninds = [] + for a in ad1s: + admol1,neighbor_sites1,ninds1 = generate_adsorbate_2D(a, sites, site_adjacency, nslab, max_dist=np.inf) + ad12Ds.append(admol1) + ad12Dneighbors.append(neighbor_sites1) + ad12Dninds.append(ninds1) + + aseinds1 = [x+nslab for x in atom_to_molecule_surface_atom_map1.keys()] + + if os.path.isfile(adpath2): + raise ValueError + else: + if adinfo2 is None: + adinfo2 = os.path.join(adpath2,"info.json") + + with open(adinfo2,"r") as f: + info2 = json.load(f) + + mol2 = Molecule().from_adjacency_list(info2["adjlist"]) + assert len(mol2.get_adatoms()) == 1 + atom_to_molecule_surface_atom_map2 = { int(key):int(val) for key,val in info2["gratom_to_molecule_surface_atom_map"].items()} + ad2s = get_unique_adsorbate_geometries(adpath2,mol2,sites,site_adjacency,atom_to_molecule_surface_atom_map2, + nslab,imag_freq_max=imag_freq_max) + + + #generate pairs + pairs = [] + ad1_to_ad1_sites = dict() + ad1_to_ad2_sites = dict() + ad1_to_ad2_heights = dict() + ad1_to_actual_occ = dict() + #go through all adsorbate 1 configurations + for j,ad1 in enumerate(ad1s): + #find occupied sites + ad1_to_ad2_sites[j] = dict() + ad1_to_ad2_heights[j] = dict() + occ = get_occupied_sites(ad1,sites,nslab) + actual_occ = [] + for site in occ: + if site["bonding_index"] in aseinds1: + actual_occ.append(site) + + if len(actual_occ) == 0: +# print(adinfo1) +# print(aseinds1) +# print(atom_to_molecule_surface_atom_map1) +# print(mol1.to_adjacency_list()) +# print(occ) +# print(j) + raise ValueError + + ad1_to_ad1_sites[j] = [x for x in actual_occ] + + #find all sites to be resolved with pairs + neighbor_sites = [] + + for site in sites: + if any(sites_match(site,s,slab) for s in actual_occ): + continue + for occ_site in actual_occ: + bd,dist = get_distances([site["position"]], [occ_site["position"]], cell=slab.cell, pbc=slab.pbc) + v = np.linalg.norm(bd[:2]) + if v < max_dist: + neighbor_sites.append(site) + break + + #estimate heights for all placements + stable_neighbor_sites = [] + heights = [] + for site in neighbor_sites: + for i,ad2 in enumerate(ad2s): + if i not in ad1_to_ad2_sites[j].keys(): + ad1_to_ad2_sites[j][i] = [] + ad1_to_ad2_heights[j][i] = [] + + cas = SlabAdsorptionSites(ad2,facet,allow_6fold=False,composition_effect=False, + label_sites=True, + surrogate_metal=metal) + sites2 = cas.get_sites() + occ = get_occupied_sites(ad2,sites2,nslab) + occsite = None + + for s in occ: + if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map2.keys(): + occsite = s + break + else: +# print(occ) +# print(atom_to_molecule_surface_atom_map2.keys()) +# print(s['bonding_index']) + raise ValueError + + if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]: + ad1_to_ad2_sites[j][i].append(site) + ad1_to_ad2_heights[j][i].append(occsite["bond_length"]) + stable_neighbor_sites.append(site) + heights.append(occsite["bond_length"]) + break + + + adpairs = [] + pairmols = [] + + if not symmetric: + for j in range(len(ad1s)): + ad1_sites = ad1_to_ad1_sites[j] + + ad2_sites = [] + ad2_geoms = [] + heights = [] + for i,sites in ad1_to_ad2_sites[j].items(): + for k,site in enumerate(sites): + heights.append(ad1_to_ad2_heights[j][i][k]) + ad2_sites.append(ad1_to_ad2_sites[j][i][k]) + ad2_geoms.append(ad2s[i]) + + + inds = get_unique_site_inds(ad2_sites,slab,fixed_point=ad1_sites[0]["position"]) + + for i in inds: +# if any(sites_match(ad2_sites[i],s,slab) for s in ad1_to_ad1_sites[j]): +# continue + + ad = deepcopy(ad1s[j]) + surf_ind = list(atom_to_molecule_surface_atom_map2.keys())[0] + add_adsorbate_to_site(ad, ad2_geoms[i][nslab:], surf_ind, ad2_sites[i], height=heights[i]) + nsites = ad12Dneighbors[j] + ind = [k for k,x in enumerate(nsites) if sites_match(ad2_sites[i],x,slab)][0] + amol = deepcopy(ad12Ds[j]) + satom = amol.atoms[ind] + m2 = deepcopy(mol2) + admol2 = m2.get_desorbed_molecules()[0] + label_atom_dict = admol2.get_all_labeled_atoms() + for label,at in label_atom_dict.items(): #one iteration only + amol = amol.merge(admol2) + if label == "*1": + at.decrement_radical() + order = 1 + elif label == "*2": + at.decrement_radical() + at.decrement_radical() + order = 2 + elif label == "*3": + at.decrement_radical() + at.decrement_lone_pairs() + order = 3 + elif label == "*4": + at.decrement_radical() + at.decrement_radical() + at.decrement_lone_pairs() + order = 4 + else: + raise ValueError(label) + try: + bd = Bond(satom,at,order=order) + except: +# print(ad12Ds[j].to_adjacency_list()) +# print(mol2.to_adjacency_list()) +# print(amol.to_adjacency_list()) +# print(satom) +# print(at) +# print(order) + raise ValueError + amol.add_bond(bd) + amol.clear_labeled_atoms() + if amol.multiplicity == -187: #handle surface molecules + amol.multiplicity = amol.get_radical_count() + 1 + for pmol in pairmols: + if pmol.is_isomorphic(amol,save_order=True): #duplicate + break + else: + adpairs.append(ad) + pairmols.append(amol) + else: #symmetric case, monodentate-monodentate since ad2 must be monodentate + ad2_site_pairs = [] + ad2_geoms = [] + heights = [] + ad1_inds = [] + for j in range(len(ad1s)): + ad1_sites = ad1_to_ad1_sites[j] + + for i,sites in ad1_to_ad2_sites[j].items(): + for k,site in enumerate(sites): + heights.append(ad1_to_ad2_heights[j][i][k]) + ad2_site_pairs.append((ad1_sites[0],site)) + ad2_geoms.append(ad2s[i]) + ad1_inds.append(j) + + inds = get_unique_site_pair_inds(ad2_site_pairs,slab) + + for i in inds: +# if any(sites_match(ad2_site_pairs[i][1],s,slab) for s in ad1_to_ad1_sites[j]): +# print("matched") +# continue + ad = deepcopy(ad1s[ad1_inds[i]]) + surf_ind = list(atom_to_molecule_surface_atom_map2.keys())[0] + add_adsorbate_to_site(ad, ad2_geoms[i][nslab:], surf_ind, ad2_site_pairs[i][1], + height=heights[i]) + nsites = ad12Dneighbors[ad1_inds[i]] + try: + ind = [k for k,x in enumerate(nsites) if sites_match(ad2_site_pairs[i][1],x,slab)][0] + except Exception as e: +# print("site") +# print(ad2_site_pairs[i][1]) +# print("occ") +# print(ad1_to_ad1_sites[j]) +# print("nsites") +# print(nsites) +# bd,dist = get_distances([ad2_site_pairs[i][1]["position"]], [ad1_to_ad1_sites[j][0]["position"]], cell=slab.cell, pbc=slab.pbc) +# print(bd) +# print(dist) + raise e + amol = deepcopy(ad12Ds[j]) + satom = amol.atoms[ind] + m2 = deepcopy(mol2) + admol2 = m2.get_desorbed_molecules()[0] + label_atom_dict = admol2.get_all_labeled_atoms() + for label,at in label_atom_dict.items(): #one iteration only + amol = amol.merge(admol2) + if label == "*1": + at.decrement_radical() + order = 1 + elif label == "*2": + at.decrement_radical() + at.decrement_radical() + order = 2 + elif label == "*3": + at.decrement_radical() + at.decrement_lone_pairs() + order = 3 + elif label == "*4": + at.decrement_radical() + at.decrement_radical() + at.decrement_lone_pairs() + order = 4 + else: + raise ValueError(label) + try: + bd = Bond(satom,at,order=order) + except: +# print(ad12Ds[j].to_adjacency_list()) +# print(mol2.to_adjacency_list()) +# print(amol.to_adjacency_list()) +# print(satom) +# print(at) +# print(order) + raise ValueError + amol.add_bond(bd) + amol.clear_labeled_atoms() + if amol.multiplicity == -187: #handle surface molecules + amol.multiplicity = amol.get_radical_count() + 1 + for pmol in pairmols: + if pmol.is_isomorphic(amol,save_order=True): #duplicate + break + else: + adpairs.append(ad) + pairmols.append(amol) + + + return adpairs,pairmols + +def get_unique_site_inds(sites,slab,fixed_point=None,tol=0.15): + fingerprints = [] + for k,site in enumerate(sites): + if fixed_point is None: + fingerprints.append((site["morphology"],site["site"])) + else: + bd,d = get_distances([site["position"]], [fixed_point], cell=slab.cell, pbc=(True,True,False)) + xydist = np.linalg.norm(bd[0][0][:2]) + zdist = bd[0][0][2] + fingerprints.append((site["morphology"],site["site"],xydist,zdist,)) + + unique_sites = [] + unique_inds = [] + for i,f in enumerate(fingerprints): + boo = False + for uf in unique_sites: + if fingerprints_match(f,uf,tol=tol): + boo = True + break + if boo: + continue + else: + unique_sites.append(f) + unique_inds.append(i) + + return unique_inds + +def fingerprints_match(f1,f2,tol=0.15): + for i in range(len(f1)): + if isinstance(f1[i],str) or isinstance(f1[i],frozenset): + if f1[i] != f2[i]: + return False + elif isinstance(f1[i],float): + if abs(f1[i] - f2[i]) > tol: + return False + else: + raise ValueError + else: + return True + +def get_unique_site_pair_inds(site_pairs,slab,tol=0.15): + fingerprints = [] + for k,sites in enumerate(site_pairs): + site1 = sites[0] + site2 = sites[1] + bd,d = get_distances([site1["position"]], [site2["position"]], cell=slab.cell, pbc=(True,True,False)) + xydist = np.linalg.norm(bd[0][0][:2]) + zdist = bd[0][0][2] + fingerprints.append((frozenset([(site1["morphology"],site1["site"]),(site2["morphology"],site2["site"])]), + xydist,zdist,)) + + unique_sites = [] + unique_inds = [] + for i,f in enumerate(fingerprints): + boo = False + for uf in unique_sites: + if fingerprints_match(f,uf,tol=tol): + boo = True + break + if boo: + continue + else: + unique_sites.append(f) + unique_inds.append(i) + + return unique_inds + +def setup_pair_opts_for_rxns(targetdir,tsdirs,coadnames,metal,facet,max_dist=3.0,imag_freq_max=150.0): + pairdir = os.path.join(targetdir,"pairs") + addir = os.path.join(os.path.split(os.path.split(tsdirs[0])[0])[0],"Adsorbates") + slabpath = os.path.join(os.path.split(os.path.split(tsdirs[0])[0])[0],"slab.xyz") + if not os.path.exists(pairdir): + os.makedirs(pairdir) + + ads = set() + combs = [] + for tsdir in tsdirs: + for coadname in coadnames: + tp = (tsdir,coadname) + combs.append(tp) + + with open(os.path.join(os.path.split(tsdir)[0],"info.json"),"r") as f: + info = json.load(f) + for molname in info["species_names"]+info["reverse_names"]: + with open(os.path.join(addir,molname,"info.json"),"r") as f: + molinfo = json.load(f) + m = Molecule().from_adjacency_list(molinfo["adjlist"]) + if m.contains_surface_site(): + ads.add(molname) + + + for adname in ads: + for coadname in coadnames: + tp = (adname,coadname) + revtp = (coadname,adname) + if (revtp not in combs) and (tp not in combs): + combs.append(tp) + + outdirs_ad = [] + outdirs_ts = [] + for s in combs: + if os.path.exists(s[0]): + is_ts = True + else: + is_ts = False + if not is_ts: + name = "_".join(s) + else: + name = "_".join([os.path.split(os.path.split(s[0])[0])[1],s[1]]) + namedir = os.path.join(pairdir,name) + if not os.path.exists(namedir): + os.makedirs(namedir) + if not is_ts: + ds = [os.path.join(addir,x) for x in s] + else: + ds = [s[0],os.path.join(addir,s[1])] + pairs,pairmols = generate_pair_geometries(ds[0],ds[1],slabpath,metal,facet, + max_dist=max_dist,imag_freq_max=imag_freq_max) + for i,pair in enumerate(pairs): + os.makedirs(os.path.join(namedir,str(i))) + write(os.path.join(namedir,str(i),"init.xyz"), pair) + if not is_ts: + moldict = {"adjlist": pairmols[i].to_adjacency_list()} + else: + moldict = {"adjlist": pairmols[i].to_adjacency_list(),"tsdir": s[0]} + with open(os.path.join(namedir,str(i),"info.json"),'w') as f: + json.dump(moldict,f) + if not is_ts: + outdirs_ad.append(os.path.join(namedir,str(i),"init.xyz")) + else: + outdirs_ts.append(os.path.join(namedir,str(i),"init.xyz")) + + return outdirs_ad,outdirs_ts + +def get_adsorbate_ts_information(xyz,slabxyz,is_ts, + metal,facet,sites,site_adjacency,max_dist=3.0): + slab = read(slabxyz) + + + if is_ts: + ad = read(xyz) + xyzinfo = os.path.join(os.path.split(xyz)[0],"..","info.json") + with open(xyzinfo,"r") as f: + info = json.load(f) + + reactants = Molecule().from_adjacency_list(info["reactants"]) + products = Molecule().from_adjacency_list(info["products"]) + template_mol_map = [{ int(key):int(val) for key,val in x.items()} for x in info["template_mol_map"]] + molecule_to_atom_maps = [{ int(key):int(val) for key,val in x.items()} for x in info["molecule_to_atom_maps"]] + ads_sizes = info["ads_sizes"] + nslab = info["nslab"] + forward = info["forward"] + + broken_bonds,formed_bonds = get_broken_formed_bonds(reactants,products) + + extra_bonds = formed_bonds if forward else broken_bonds + template = reactants if forward else products + adatoms = template.get_adatoms() + adinds = [] + for ind,atom in enumerate(template.atoms): + if atom.is_surface_site(): + if len(atom.bonds) == 0: + s = [bd for bd in extra_bonds if atom.label in bd] + if len(s) > 0: + labels = list(s[0]) + labels.remove(atom.label) + alabel = labels[0] + a = template.get_labeled_atoms(alabel)[0] + adinds.append(template.atoms.index(a)) + else: + a = list(atom.bonds.keys())[0] + adinds.append(template.atoms.index(a)) + + aseinds = [] + for ind in adinds: + aseind = get_ase_index(ind,template_mol_map,molecule_to_atom_maps,nslab,ads_sizes) + aseinds.append(aseind) + + + else: + ad = read(xyz) + with open(os.path.join(os.path.split(os.path.split(xyz)[0])[0],"info.json"),"r") as f: + info = json.load(f) + + mol2D = Molecule().from_adjacency_list(info["adjlist"]) + atom_to_molecule_surface_map = { int(key):int(val) for key,val in info["gratom_to_molecule_surface_atom_map"].items()} + nslab = info["nslab"] + aseinds = [x+nslab for x in atom_to_molecule_surface_map.keys()] + + occ = get_occupied_sites(ad,sites,nslab) + + + actual_occ = [] + for site in occ: + if site["bonding_index"] in aseinds: + actual_occ.append(site) + + neighbor_sites = [] + + for site in sites: + if any(sites_match(site,s,slab) for s in actual_occ): + continue + for occ_site in actual_occ: + v,dist = get_distances([site["position"]], [occ_site["position"]], cell=slab.cell, pbc=slab.pbc) + if np.linalg.norm(v[:2]) < max_dist: + neighbor_sites.append(site) + break + + if is_ts: + admol,neighbor_sites_2D,ninds = generate_TS_2D(ad, xyzinfo, metal, facet, sites, site_adjacency, nslab, max_dist=None) + else: + admol,neighbor_sites_2D,ninds = generate_adsorbate_2D(ad, sites, site_adjacency, nslab, max_dist=None) + + return ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab + +def get_coadsorbate_information(coadnames,ads_dir,neighbor_sites,sites,site_adjacency,nslab,slab): + coad_to_stable_neighbor_sites = dict() + coad_site_stable_parameters = {name:[] for name in coadnames} + coad_atom_to_molecule_surface_atom_map = dict() + infocoad_dict = dict() + stable_neighbor_sites_total = [] + coads_dict = dict() + coad2Ds = dict() + for coadname in coadnames: + coaddir = os.path.join(ads_dir,coadname) + with open(os.path.join(coaddir,"info.json"),"r") as f: + infocoad = json.load(f) + + coad2D = Molecule().from_adjacency_list(infocoad["adjlist"]) + coad2Ds[coadname] = coad2D + atom_to_molecule_surface_atom_map = {int(key):int(val) for key,val in infocoad["gratom_to_molecule_surface_atom_map"].items()} + coads = get_unique_adsorbate_geometries(coaddir,Molecule().from_adjacency_list(infocoad["adjlist"]), + sites,site_adjacency,atom_to_molecule_surface_atom_map, + nslab) + atom_to_molecule_surface_atom_map = {int(key):int(val) for key,val in infocoad["gratom_to_molecule_surface_atom_map"].items()} + coad_atom_to_molecule_surface_atom_map[coadname] = atom_to_molecule_surface_atom_map + coads_dict[coadname] = coads + infocoad_dict[coadname] = infocoad + stable_neighbor_sites = [] + + for site in neighbor_sites: + + for coad in coads: + occ = get_occupied_sites(coad,sites,nslab) + occsite = None + for s in occ: + if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map.keys(): + occsite = s + break + else: + raise ValueError + + if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]: + stable_neighbor_sites.append(site) + if not any(sites_match(s,site,slab) for s in stable_neighbor_sites_total): + stable_neighbor_sites_total.append(site) + if not (site["site"],site["morphology"]) in coad_site_stable_parameters[coadname]: + coad_site_stable_parameters[coadname].append((site["site"],site["morphology"])) + break + + coad_to_stable_neighbor_sites[coadname] = stable_neighbor_sites + + return coad_to_stable_neighbor_sites, coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds + +def generate_coadsorbed_xyzs(outdir,ad_xyzs,ts_xyzs,slabxyz,pairsdir,ads_dir, + coadnames,metal,facet,sites,site_adjacency,max_dist=3.0): + slab = read(slabxyz) + nslab = len(slab) + + unstable_pairs = get_unstable_pairs(pairsdir,ads_dir,sites,site_adjacency,nslab,max_dist=None) + + ad_dict = dict() + ad_admol_dict = dict() + ad_neighbor_sites_2D_dict = dict() + ad_ninds_dict = dict() + ad_neighbor_sites_dict = dict() + ad_actual_occ_dict = dict() + ad_aseinds_dict = dict() + + for ad_xyz in ad_xyzs: + ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab = get_adsorbate_ts_information(ad_xyz,slabxyz,False,metal,facet,sites,site_adjacency,max_dist=max_dist) + ad_dict[ad_xyz] = ad + ad_admol_dict[ad_xyz] = admol + ad_neighbor_sites_2D_dict[ad_xyz] = neighbor_sites_2D + ad_ninds_dict[ad_xyz] = ninds + ad_neighbor_sites_dict[ad_xyz] = neighbor_sites + ad_actual_occ_dict[ad_xyz] = actual_occ + ad_aseinds_dict[ad_xyz] = aseinds + + ts_dict = dict() + ts_admol_dict = dict() + ts_neighbor_sites_2D_dict = dict() + ts_ninds_dict = dict() + ts_neighbor_sites_dict = dict() + ts_actual_occ_dict = dict() + ts_aseinds_dict = dict() + ts_xyz = None + for ts_xyz in ts_xyzs: + ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab = get_adsorbate_ts_information(ts_xyz,slabxyz,True,metal,facet,sites,site_adjacency,max_dist=max_dist) + ts_dict[ts_xyz] = ad + ts_admol_dict[ts_xyz] = admol + ts_neighbor_sites_2D_dict[ts_xyz] = neighbor_sites_2D + ts_ninds_dict[ts_xyz] = ninds + ts_neighbor_sites_dict[ts_xyz] = neighbor_sites + ts_actual_occ_dict[ts_xyz] = actual_occ + ts_aseinds_dict[ts_xyz] = aseinds + + + outatoms = [] + outmol2Dsts = [] + outmol2Dsad = [] + outxyzsts = [] + outxyzsad = [] + + for coadname in coadnames: + for j,ts_xyz in enumerate(ts_xyzs): + ts_name = "TS" + str(j) + print(ts_name) + if not os.path.exists(os.path.join(outdir,ts_name)): + os.makedirs(os.path.join(outdir,ts_name)) + shutil.copyfile(os.path.join(os.path.split(os.path.split(ts_xyz)[0])[0],"info.json"), + os.path.join(outdir,ts_name,"info.json")) + coad_to_stable_neighbor_sites,coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds = get_coadsorbate_information(coadnames, + ads_dir,ts_neighbor_sites_dict[ts_xyz],sites,site_adjacency,nslab,slab) + atoms,mol2Ds = generate_coadsorbed_geoms(ts_dict[ts_xyz], + ts_admol_dict[ts_xyz], + ts_neighbor_sites_2D_dict[ts_xyz],ts_ninds_dict[ts_xyz], + ts_actual_occ_dict[ts_xyz],ts_neighbor_sites_dict[ts_xyz], + ts_aseinds_dict[ts_xyz],slab,nslab,True,ads_dir,unstable_pairs,coadname, + metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters, + coad_atom_to_molecule_surface_atom_map,infocoad_dict, + stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=max_dist) + for i,mol2D in enumerate(mol2Ds): #check if we already have it + for mol2Dout in outmol2Dsts: + if mol2Dout.is_isomorphic(mol2D,save_order=True): + break + else: + outatoms.append(atoms[i]) + outmol2Dsts.append(mol2Ds[i]) + if not os.path.exists(os.path.join(outdir,ts_name,coadname,str(i))): + os.makedirs(os.path.join(outdir,ts_name,coadname,str(i))) + with open(os.path.join(outdir,ts_name,coadname,str(i),"info.json"),"w") as f: + d = {"adjlist": mol2D.to_adjacency_list(), + "xyz": ts_xyz} + json.dump(d,f) + write(os.path.join(outdir,ts_name,coadname,str(i),"init.xyz"),atoms[i]) + outxyzsts.append(os.path.join(outdir,ts_name,coadname,str(i),"init.xyz")) + + for j,ad_xyz in enumerate(ad_xyzs): + with open(os.path.join(os.path.split(os.path.split(ad_xyz)[0])[0],"info.json"),"r") as f: + info = json.load(f) + ad_name = info["name"] + print(ad_name) + if not os.path.exists(os.path.join(outdir,ad_name)): + os.makedirs(os.path.join(outdir,ad_name)) + shutil.copyfile(os.path.join(os.path.split(os.path.split(ad_xyz)[0])[0],"info.json"), + os.path.join(outdir,ad_name,"info.json")) + coad_to_stable_neighbor_sites,coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds = get_coadsorbate_information(coadnames, + ads_dir,ad_neighbor_sites_dict[ad_xyz],sites,site_adjacency,nslab,slab) + atoms,mol2Ds = generate_coadsorbed_geoms(ad_dict[ad_xyz], + ad_admol_dict[ad_xyz], + ad_neighbor_sites_2D_dict[ad_xyz],ad_ninds_dict[ad_xyz], + ad_actual_occ_dict[ad_xyz],ad_neighbor_sites_dict[ad_xyz], + ad_aseinds_dict[ad_xyz],slab,nslab,False,ads_dir,unstable_pairs,coadname, + metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters, + coad_atom_to_molecule_surface_atom_map,infocoad_dict, + stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=max_dist) + for i,mol2D in enumerate(mol2Ds): #check if we already have it + for mol2Dout in outmol2Dsad: + if mol2Dout.is_isomorphic(mol2D): + break + else: + outatoms.append(atoms[i]) + outmol2Dsad.append(mol2Ds[i]) + if not os.path.exists(os.path.join(outdir,ad_name,coadname,str(i))): + os.makedirs(os.path.join(outdir,ad_name,coadname,str(i))) + with open(os.path.join(outdir,ad_name,coadname,str(i),"info.json"),"w") as f: + d = {"adjlist": mol2D.to_adjacency_list(), + "xyz": ad_xyz} + json.dump(d,f) + write(os.path.join(outdir,ad_name,coadname,str(i),"init.xyz"),atoms[i]) + outxyzsad.append(os.path.join(outdir,ad_name,coadname,str(i),"init.xyz")) + + return outatoms,outmol2Dsad,outmol2Dsts,outxyzsad,outxyzsts + +def generate_coadsorbed_geoms(ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites, + aseinds,slab,nslab,is_ts,ads_dir,unstable_pairs,coadname, + metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters, + coad_atom_to_molecule_surface_atom_map,infocoad_dict, + stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=3.0): + + logging.error("stable neighbor sites: {}".format(len(stable_neighbor_sites_total))) + + coads = coads_dict[coadname] + atom_to_molecule_surface_atom_map = coad_atom_to_molecule_surface_atom_map[coadname] + infocoad = infocoad_dict[coadname] + coad2D = coad2Ds[coadname] + site_stable_parameters = coad_site_stable_parameters[coadname] + coad_occ_dict = {coads.index(coad): get_occupied_sites(coad,sites,nslab) for coad in coads} + coad_height_map = {coads.index(coad): list(get_bond_lengths_sites(Molecule().from_adjacency_list(infocoad["adjlist"]), + coad, + {int(x):y for x,y in infocoad["atom_to_molecule_atom_map"].items()}, + {int(x):y for x,y in infocoad["gratom_to_molecule_surface_atom_map"].items()}, + infocoad["nslab"],sites,site_adjacency, + facet=facet,metal=metal)[2].values())[0] for coad in coads} + outgeoms = [ad] + outmol2Ds = [admol] + geo_fails = 0 + mol2D_fails = 0 + config_fails = 0 + site_fails = 0 + unique_fails = 0 + for i,site in enumerate(stable_neighbor_sites_total): + logging.error("doing site {}".format(i)) + newoutgeoms = [] + newoutmol2Ds = [] + site_2D_inds = [i for i,x in enumerate(neighbor_sites_2D) if sites_match(site,x,slab)] + if not site_2D_inds: + site_fails += 1 + continue + + for j,geo in enumerate(outgeoms): + mol2D = outmol2Ds[j] + geo = deepcopy(geo) + geo,coad2D = add_coadsorbate_3D(geo,site,ad,coads,site_stable_parameters, + atom_to_molecule_surface_atom_map,infocoad,coad_occ_dict,coad_height_map,coad2D, + metal,facet,sites,site_adjacency,nslab) + if geo is None: + geo_fails += 1 + continue + mol2D = mol2D.copy(deep=True) + try: + mol2D = add_coadsorbate_2D(mol2D,site,coad2D,slab,neighbor_sites_2D,site_2D_inds) + except Exception as e: + print(mol2D.to_adjacency_list()) + print(site) + print(site_2D_inds) + raise e + + if mol2D is None: + mol2D_fails += 1 + continue + + if configuration_is_valid(mol2D,admol,is_ts,unstable_pairs): + for m in outmol2Ds: + if mol2D.is_isomorphic(m,save_order=True): + unique_fails += 1 + break + else: + assert len(geo) - nslab == len(mol2D.atoms) - len([a for a in mol2D.atoms if a.is_surface_site()]) + newoutgeoms.append(geo) + newoutmol2Ds.append(mol2D) + + outgeoms.extend(newoutgeoms) + outmol2Ds.extend(newoutmol2Ds) + logging.error("added so far: {}".format(len(outgeoms))) + + + outgeoms.remove(ad) #do not include the configuration with no coadsorbates in output + outmol2Ds.remove(admol) + return outgeoms,outmol2Ds + + +def add_coadsorbate_3D(geo,site,ad,coads,site_stable_parameters, + atom_to_molecule_surface_atom_map,infocoad,coad_occ_dict,coad_height_map,coad2D, + metal,facet,sites,site_adjacency,nslab): + if (site["site"],site["morphology"]) not in site_stable_parameters: + return None + for i,coad in enumerate(coads): + #occ = get_occupied_sites(coad,sites,nslab) + occ = coad_occ_dict[i] + occsite = None + for s in occ: + if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map.keys(): + occsite = s + break + else: + raise ValueError + if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]: + #assert len(mol2D.get_all_labeled_atoms()) == 0, mol2D.to_adjacency_list() + surf_ind = list(atom_to_molecule_surface_atom_map.keys())[0] + h = coad_height_map[i] + add_adsorbate_to_site(geo, coad[nslab:], surf_ind, site, height=h) + return geo,coad2D + else: + return None,None + +def add_coadsorbate_2D(mol2D,site,coad2D,slab,neighbor_sites_2D,site_2D_inds): + if site_2D_inds: + ind2D = site_2D_inds[0] + else: + return None + siteatom = mol2D.atoms[ind2D] + assert siteatom.site == site["site"] + for a in siteatom.edges.keys(): + if not a.is_surface_site(): + return None + c = coad2D.get_desorbed_molecules()[0] + mol2D = mol2D.merge(c) + ldict = mol2D.get_all_labeled_atoms() + label = list(ldict.keys())[0] + catom = list(ldict.values())[0] + catom.label = '' + if label == "*1": + bd = Bond(siteatom,catom,order=1) + catom.radical_electrons -= 1 + elif label == "*2": + bd = Bond(siteatom,catom,order=2) + if catom.radical_electrons >= 2: + catom.radical_electrons -= 2 + else: + catom.lone_pairs -= 1 + elif label == "*3": + bd = Bond(siteatom,catom,order=3) + if catom.radical_electrons >= 3: + catom.radical_electrons -= 3 + elif catom.radical_electrons == 1: + catom.radical_electrons = 0 + catom.lone_pairs -= 1 + elif catom.radical_electrons == 2: + catom.radical_electrons = 1 + catom.lone_pairs -= 1 + elif label == "*4": + bd = Bond(siteatom,catom,order=4) + if catom.radical_electrons >= 4: + catom.radical_electrons -= 4 + elif catom.radical_electrons == 1: + catom.lone_pairs -= 2 + elif catom.radical_electrons == 2: + catom.radical_electrons = 0 + catom.lone_pairs -= 1 + elif catom.radical_electrons == 3: + catom.radical_electrons = 1 + catom.lone_pairs -= 1 + else: + raise ValueError + mol2D.add_bond(bd) + mol2D.multiplicity = mol2D.get_radical_count() + 1 + mol2D.update_atomtypes() + mol2D.update_connectivity_values() + return mol2D + +def configuration_is_valid(mol2D,admol,is_ts,unstable_pairs): + unstable_ind_pairs = set() + if is_ts: + admol_splits = split_ts_to_reactants(admol,tagatoms=False) + for asplit in admol_splits: + snum = len([a for a in asplit.atoms if a.is_surface_site()]) + for unstable_pair in unstable_pairs: + iso = asplit.find_subgraph_isomorphisms(unstable_pair,save_order=True) + if iso: + inds = [] + for a in iso[0].keys(): + assert a in asplit.atoms + if a.is_bonded_to_surface() and not a.is_surface_site(): + inds.append(asplit.atoms.index(a)-snum) + unstable_ind_pairs.add(frozenset(inds)) #groups of atom inds that if they are in a isomorphism indicate to allow anything for that ts split + + struct = mol2D + if is_ts: + structspl = split_ts_to_reactants(struct,tagatoms=False) + else: + structspl = [struct] + + validity_judgements = [] + failed = False + for st in structspl: + snum = len([a for a in st.atoms if a.is_surface_site()]) + failed = False + for unstable_pair in unstable_pairs: + iso = st.find_subgraph_isomorphisms(unstable_pair,save_order=True) + + if iso: + inds = [] + for a in iso[0].keys(): + if a.is_bonded_to_surface() and not a.is_surface_site(): + inds.append(st.atoms.index(a)-snum) + if frozenset(inds) in unstable_ind_pairs: + pass + else: + failed = True + if not failed: + validity_judgements.append(True) + else: + validity_judgements.append(False) + + return all(validity_judgements) + +def get_best_adsorbate_xyz(adsorbate_path,sites,nslab): + """ + load the adsorbates associated with the reaction and find the unique optimized + adsorbate structures for each species + returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects + """ + prefixes = os.listdir(adsorbate_path) + with open(os.path.join(adsorbate_path,"info.json"),"r") as f: + info = json.load(f) + ase_to_mol_surface_atom_map = {int(k):int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()} + mol = Molecule().from_adjacency_list(info["adjlist"]) + geoms = [] + for prefix in prefixes: + path = os.path.join(adsorbate_path,prefix,prefix+".xyz") + if os.path.exists(path): + geoms.append(path) + xyzs = get_unique_sym(geoms) + adsorbate = None + min_energy = np.inf + Es,_,freq = get_adsorbate_energies(adsorbate_path) + for xyz in xyzs: + geo = read(xyz) + strind = os.path.split(xyz)[1].split(".")[0] + occ = get_occupied_sites(geo,sites,nslab) + required_surface_inds = set([ind+nslab for ind in ase_to_mol_surface_atom_map.keys()]) + found_surface_inds = set([site["bonding_index"] for site in occ]) + if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds): + if Es[strind] < min_energy: + adsorbate = xyz + min_energy = Es[strind] + + return adsorbate + +def adsorbate_interaction_decomposition(mol,local_radius=5): + surface_bonded_inds = [] + for i,at in enumerate(mol.atoms): + if at.is_bonded_to_surface() and not at.is_surface_site(): + surface_bonded_inds.append(i) + + structs = [] + for i,indi in enumerate(surface_bonded_inds): + for j,indj in enumerate(surface_bonded_inds): + if i > j: + st = mol.copy(deep=True) + st.atoms[indi].label = "*" + st.atoms[indj].label = "*" + paths = find_shortest_paths_sites(st.atoms[indi],st.atoms[indj]) + ad_atoms_i,ad_surf_sites_i = find_adsorbate_atoms_surface_sites(st.atoms[indi],st) + ad_atoms_j,ad_surf_sites_j = find_adsorbate_atoms_surface_sites(st.atoms[indj],st) + if st.atoms[indi] in ad_atoms_j: #i & j are part of same adsorbate/TS structure + continue + for p in paths: + for a in p: + if a.is_surface_site(): + a.label = "*" + + for ind in [indi,indj]: + for a in st.atoms: + if a.is_surface_site() and len(find_shortest_paths_sites(st.atoms[indi],a)[0]) - 2 <= local_radius: + a.label = "*" + + ats_to_delete = [a for a in st.atoms if a.is_surface_site() and a.label != "*" and a not in ad_surf_sites_i and a not in ad_surf_sites_j] + for a in ats_to_delete: + st.remove_atom(a) + try: + stout = [x for x in st.split() if x.contains_surface_site()][0] + except IndexError as e: + print(mol.to_adjacency_list()) + print(st.to_adjacency_list()) + raise e + for a in stout.atoms: + if a.is_surface_site(): + a.label = "" + + structs.append(stout) + + return structs + +def adsorbate_site_decomposition(mol): + surface_bonded_inds = [] + for i,at in enumerate(mol.atoms): + if at.is_bonded_to_surface() and not at.is_surface_site(): + surface_bonded_inds.append(i) + + structs = [] + for i,indi in enumerate(surface_bonded_inds): + st = mol.copy(deep=True) + st.atoms[indi].label = "*" + structs.append(st) + + return structs + +def get_adsorbed_atom_pairs(length=7, r_bonds=None): + """ + length is number of site atoms between adsorbates + """ + if r_bonds is None: + r_bonds = [1, 2, 3, 0.05] + groups = [] + for j in range(2,length+1): + g = Group().from_adjacency_list("""1 * R u0 px cx""") + a2 = g.atoms[0] + for i in range(j): + a = GroupAtom(atomtype=["X"],radical_electrons=[0],lone_pairs=[0],charge=[0]) + if i == 0: + b = GroupBond(a2, a, order=r_bonds) + else: + b = GroupBond(a2, a, order=["S"]) + g.add_atom(a) + g.add_bond(b) + a2 = a + a = GroupAtom(atomtype=["R"], label="*", radical_electrons=[0]) + b = GroupBond(a2, a, order=r_bonds) + g.add_atom(a) + g.add_bond(b) + groups.append(g) + + return groups + +import itertools +def get_adsorbed_atom_groups(Nad=3, length=7, r_bonds=None): + """ + length is number of site atoms between adsorbates + """ + assert Nad == 3, "Doesn't work for Nad=2 and the combination ordering seems to matter for Nad > 3, so only Nad=3" + if r_bonds is None: + r_bonds = [1, 2, 3, 0.05] + groups = [] + lengths = list(range(2,length+1)) + for comb in itertools.combinations(lengths,Nad): + g = Group().from_adjacency_list("""1 * R u0 px cx""") + a2 = g.atoms[0] + for k,j in enumerate(comb): + for i in range(j): + a = GroupAtom(atomtype=["X"],radical_electrons=[0],lone_pairs=[0],charge=[0]) + if i == 0: + b = GroupBond(a2, a, order=r_bonds) + else: + b = GroupBond(a2, a, order=["S"]) + g.add_atom(a) + g.add_bond(b) + a2 = a + if k+1 < len(comb): + a = GroupAtom(atomtype=["R"], label="*", radical_electrons=[0]) + b = GroupBond(a2, a, order=r_bonds) + g.add_atom(a) + g.add_bond(b) + else: + b = GroupBond(g.atoms[1],a2, order=["S"]) + g.add_bond(b) + + groups.append(g) + + return groups + +def get_atom_centered_correction(m,coadmol_E_dict): + out_structs = split_adsorbed_structures(m,clear_site_info=False) + correction = 0.0 + minE = min(coadmol_E_dict.values()) + for struct in out_structs: + for coadmol,E in coadmol_E_dict.items(): + if struct.is_isomorphic(coadmol,save_order=True): + correction += E - minE + break + return correction + +def get_atom_center_stability(m,coadmol_stability_dict): + out_structs = split_adsorbed_structures(m,clear_site_info=False) + for struct in out_structs: + for coadmol,v in coadmol_stability_dict.items(): + if struct.is_isomorphic(coadmol,save_order=True): + if not v: + return False + + return True + +def break_train_val_test(datums,test_fract=0.1,val_fract=0.1): + ds = datums[:] + N = len(datums) + Ntest = round(N*test_fract) + Nval = round(N*val_fract) + np.random.shuffle(ds) + test = ds[:Ntest] + val = ds[Ntest:Ntest+Nval] + train = ds[Nval+Ntest:] + return train,val,test + +def split_triad(tagged_grp): + labeled_atoms = tagged_grp.get_labeled_atoms("*") + + pair_atoms_inds = [] + a = labeled_atoms[0] + path = [a] + notatom = None + c = 0 + while c < 3: + if len(path) == 1: + anew = list(path[-1].bonds.keys())[0] + elif len(path[-1].bonds) == 3 and len(path) > 2: + anew = [x for x in path[-1].bonds.keys() if not x.is_surface_site()][0] + elif len(path[-1].bonds) == 3: + ats = list(path[-1].bonds.keys()) + for a in ats: + if a not in path and a is not notatom: + anew = a + break + else: + raise ValueError + elif len(path[-1].bonds) == 2: + ats = list(path[-1].bonds.keys()) + if ats[0] in path: + anew = ats[1] + else: + anew = ats[0] + path.append(anew) + + if not anew.is_surface_site(): + pair_atoms_inds.append([tagged_grp.atoms.index(a) for a in path]) + c += 1 + notatom = path[-3] + path = path[-2:][::-1] + + out_pairs = [] + for pair_inds in pair_atoms_inds: + g = tagged_grp.copy(deep=True) + ats = [g.atoms[ind] for ind in pair_inds] + atom_to_remove = [] + for a in g.atoms: + if a not in ats: + atom_to_remove.append(a) + for a in atom_to_remove: + g.remove_atom(a) + g.update() + out_pairs.append(g) + + return out_pairs + +def is_descendent_of_or_is(node,ancestor_node): + n = node + while n.parent is not None: + if n is ancestor_node: + return True + else: + n = n.parent + return False + +#currently not used anymore as the default algorithm is sufficient for pair-wise decompositions +class CoverageDependenceRegressor(MultiEvalSubgraphIsomorphicDecisionTreeRegressor): + def fit_rule(self, alpha=0.1): + max_depth = max([node.depth for node in self.nodes.values()]) + y = np.array([datum.value for datum in self.datums]) + preds = np.zeros(len(self.datums)) + self.node_uncertainties = dict() + weights = self.weights + W = self.W + triad_node = self.nodes["Root_Triad"] + for pair in [True,False]: + for depth in range(max_depth + 1): + if depth == 0: + self.nodes["Root"].rule = Rule(value=0.0,num_data=0) + continue #skip Root node + else: + if pair: + nodes = [node for node in self.nodes.values() if node.depth == depth and not is_descendent_of_or_is(node,triad_node)] + else: + nodes = [node for node in self.nodes.values() if node.depth == depth and is_descendent_of_or_is(node,triad_node)] + + if len(nodes) == 0: + continue + + # generate matrix + A = sp.lil_matrix((len(self.datums), len(nodes))) + y -= preds + + for i, datum in enumerate(self.datums): + for node in self.mol_node_maps[datum]["nodes"]: + while node is not None: + if node in nodes: + j = nodes.index(node) + A[i, j] += 1.0 + node = node.parent + + clf = linear_model.Lasso( + alpha=alpha, + fit_intercept=False, + tol=1e-4, + max_iter=1000000000, + selection="random", + ) + if weights is not None: + lasso = clf.fit(A, y, sample_weight=weights) + else: + lasso = clf.fit(A, y) + + preds = A * clf.coef_ + self.data_delta = preds - y + + for i, val in enumerate(clf.coef_): + nodes[i].rule = Rule(value=val, num_data=np.sum(A[:, i])) + + train_error = [self.evaluate(d.mol, estimate_uncertainty=False) - d.value for d in self.datums] + + logging.info("training MAE: {}".format(np.mean(np.abs(np.array(train_error))))) + + if self.validation_set: + val_error = [self.evaluate(d.mol, estimate_uncertainty=False) - d.value for d in self.validation_set] + val_mae = np.mean(np.abs(np.array(val_error))) + if val_mae < self.min_val_error: + self.min_val_error = val_mae + self.best_tree_nodes = list(self.nodes.keys()) + self.bestA = A + self.best_nodes = {k: v for k, v in self.nodes.items()} + self.best_mol_node_maps = { + k: {"mols": v["mols"][:], "nodes": v["nodes"][:]} + for k, v in self.mol_node_maps.items() + } + self.best_rule_map = {name:self.nodes[name].rule for name in self.best_tree_nodes} + self.val_mae = val_mae + logging.info("validation MAE: {}".format(self.val_mae)) + + if self.test_set: + test_error = [self.evaluate(d.mol) - d.value for d in self.test_set] + test_mae = np.mean(np.abs(np.array(test_error))) + logging.info("test MAE: {}".format(test_mae)) + + logging.info("# nodes: {}".format(len(self.nodes))) + +def train_sidt_cov_dep_regressor(pairs_datums,sampling_datums,r_site=None, + r_atoms=None,node_fract_training=0.7): + + if r_site is None: + r_site = ["","ontop","bridge","hcp","fcc"] + + if r_atoms is None: + r_atoms = ["C","O","N","H","X"] + + root_pair = Group().from_adjacency_list(""" + 1 * R u0 px cx {2,[S,D,T,Q,R]} + 2 X u0 p0 c0 {1,[S,D,T,Q,R]} + 3 * R u0 px cx {4,[S,D,T,Q,R]} + 4 X u0 p0 c0 {3,[S,D,T,Q,R]} + """) + + root_pair_node = Node(group=root_pair,name="Root",parent=None,depth=0) + + pair_nodes = [] + initial_pair_root_splits = get_adsorbed_atom_pairs(length=7,r_bonds=[1,2,3,0.05]) + for i,g in enumerate(initial_pair_root_splits): + gcleared = g.copy(deep=True) + gcleared.clear_labeled_atoms() + for d in pairs_datums+sampling_datums: + if d.mol.is_subgraph_isomorphic(gcleared,save_order=True): #only add if the group exists + n = Node(group=g,name=root_pair_node.name+"_"+str(i),parent=root_pair_node,depth=1) + pair_nodes.append(n) + root_pair_node.children.append(n) + break + + pairnodes = {n.name:n for n in [root_pair_node]+pair_nodes} + + Nfullnodes = (len(pairs_datums)+len(sampling_datums))*node_fract_training + + treepair = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition], + nodes=pairnodes, + r=[ATOMTYPES[x] for x in r_atoms], + r_bonds=[1,2,3,0.05], + r_un=[0], + r_site=["","ontop","bridge","hcp","fcc"], + max_structures_to_generate_extensions=100, + fract_nodes_expand_per_iter=0.025, + iter_max=2, + iter_item_cap=100, + ) + + treepair.generate_tree(data=pairs_datums+sampling_datums,max_nodes=Nfullnodes) + + return treepair + +def train_sidt_cov_dep_classifier(datums,r_site=None, + r_atoms=None,node_fract_training=0.5,node_min=50): + + if r_site is None: + r_site = ["","ontop","bridge","hcp","fcc"] + + if r_atoms is None: + r_atoms = ["C","O","N","H","X"] + + root_node = Node(group=None,name="Root",depth=0) + + root_pair = Group().from_adjacency_list(""" + 1 * R u0 px cx {2,[S,D,T,Q,R]} + 2 X u0 p0 c0 {1,[S,D,T,Q,R]} + 3 * R u0 px cx {4,[S,D,T,Q,R]} + 4 X u0 p0 c0 {3,[S,D,T,Q,R]} + """) + + root_triad = Group().from_adjacency_list(""" + 1 * R u0 px cx {2,[S,D,T,Q,R]} + 2 X u0 p0 c0 {1,[S,D,T,Q,R]} + 3 * R u0 px cx {4,[S,D,T,Q,R]} + 4 X u0 p0 c0 {3,[S,D,T,Q,R]} + 5 * R u0 px cx {6,[S,D,T,Q,R]} + 6 X u0 p0 c0 {5,[S,D,T,Q,R]} + """) + + root_pair_node = Node(group=root_pair,name="Root_Pair",parent=root_node,depth=1) + root_triad_node = Node(group=root_triad,name="Root_Triad",parent=root_node,depth=1) + root_node.children.extend([root_pair_node,root_triad_node]) + + pair_nodes = [] + initial_root_splits = get_adsorbed_atom_pairs(length=7,r_bonds=[1,2,3,0.05]) + for i,g in enumerate(initial_root_splits): + n = Node(group=g,name=root_pair_node.name+"_"+str(i),parent=root_pair_node,depth=2) + pair_nodes.append(n) + root_pair_node.children.append(n) + + triad_nodes = [] + initial_root_splits = get_adsorbed_atom_groups(Nad=3,length=7,r_bonds=[1,2,3,0.05]) + for i,g in enumerate(initial_root_splits): + n = Node(group=g,name=root_triad_node.name+"_"+str(i),parent=root_triad_node,depth=2) + triad_nodes.append(n) + root_triad_node.children.append(n) + + nodes = {n.name:n for n in [root_node,root_pair_node,root_triad_node]+pair_nodes+triad_nodes} + + node_len = len(nodes) + + tree = MultiEvalSubgraphIsomorphicDecisionTreeBinaryClassifier([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition], + nodes=nodes, + r=[ATOMTYPES[x] for x in r_atoms], + r_bonds=[1,2,3,0.05], + r_un=[0], + r_site=r_site, + iter_max=2, + iter_item_cap=100, + max_structures_to_generate_extensions=100, + ) + + train_sample,val,test = break_train_val_test(datums,test_fract=0.0,val_fract=0.1) + + if len(train_sample)*node_fract_training > node_min: + tree.generate_tree(data=train_sample,validation_set=val,max_nodes=len(train_sample)*node_fract_training, + postpruning_based_on_val=True) + else: + tree.generate_tree(data=train_sample,validation_set=val,max_nodes=node_min) + + return tree + +def add_ad_to_site(admol,coad,site): + c = coad.copy(deep=True) + at = [a for a in c.atoms if a.is_bonded_to_surface()][0] + bd = [bd for a,bd in at.bonds.items() if a.is_surface_site()][0] + order = bd.order + for a in c.atoms: + if a.is_surface_site(): + c.remove_atom(a) + + atind = c.atoms.index(at) + sind = admol.atoms.index(site) + admol_length = len(admol.atoms) + + admolout = admol.copy(deep=True).merge(c) + snew = admolout.atoms[sind] + atnew = admolout.atoms[len(admol.atoms)+atind] + assert site.site == snew.site and site.morphology == snew.morphology + assert atnew.element == at.element + + bd = Bond(atnew,snew,order=order) + admolout.add_bond(bd) + + admolout.clear_labeled_atoms() + admolout.multiplicity=1 + try: + admolout.update(sort_atoms=False) + except Exception as e: + raise ValueError((e,admol.to_adjacency_list(),coad.to_adjacency_list(),admolout.to_adjacency_list())) + admolout.update_connectivity_values() + + return admolout + +def get_configurations(admol, coad, coad_stable_sites, tree_interaction_classifier=None, coadmol_stability_dict=None, unstable_groups=None, + tree_interaction_regressor=None, tree_atom_regressor=None, coadmol_E_dict=None, energy_tol=None): + empty_sites = [a for a in admol.atoms if a.is_surface_site() and a.site in coad_stable_sites and not any([not a2.is_surface_site() for a2 in a.bonds.keys()])] + print("empty sites") + print(len(empty_sites)) + empty_site_inds = [admol.atoms.index(s) for s in empty_sites] + outmols = [admol] + outmols_split = [[admol]] + lowest_energy_surface_bond_dict = dict() + print(len(outmols)) + for i in range(len(empty_sites)): + newoutmols = [] + for m in outmols_split[i]: #all configurations with a fixed number of co-adsorbates (highest number hit yet) + for sind in empty_site_inds: + if any(not a.is_surface_site() for a in m.atoms[sind].bonds.keys()): #site is already occupied + continue + + outmol = add_ad_to_site(m,coad,m.atoms[sind]) + + if (tree_interaction_classifier and not tree_interaction_classifier.evaluate(outmol)) or \ + (coadmol_stability_dict and not get_atom_center_stability(outmol,coadmol_stability_dict)): #unstable configuration + continue + + if unstable_groups: + is_unstable = False + for grp in unstable_groups: + if outmol.is_subgraph_isomorphic(grp,save_order=True): + is_unstable = True + break + if is_unstable: + continue + + for mol in newoutmols: + if outmol.is_isomorphic(mol,save_order=True): + break + else: + if energy_tol: #skip configurations that are more than energy_tol higher than the lowest energy configuraiton for a given coverage found + if tree_atom_regressor is not None: + E = tree_atom_regressor.evaluate(m) + tree_interaction_regressor.evaluate(m) + elif coadmol_E_dict is not None: + E = get_atom_centered_correction(m,coadmol_E_dict) + tree_interaction_regressor.evaluate(m) + else: + raise ValueError + + Nsurfbonds = len([b for b in m.get_all_edges() if (b.atom1.is_surface_site() and not b.atom2.is_surface_site()) or (b.atom2.is_surface_site() and not b.atom1.is_surface_site())]) + if Nsurfbonds not in lowest_energy_surface_bond_dict.keys() or lowest_energy_surface_bond_dict[Nsurfbonds] > E: + lowest_energy_surface_bond_dict[Nsurfbonds] = E + newoutmols.append(outmol) + elif lowest_energy_surface_bond_dict[Nsurfbonds] + energy_tol < E: + pass #don't add configuration + else: + newoutmols.append(outmol) + else: + newoutmols.append(outmol) + + outmols.extend(newoutmols) + outmols_split.append(newoutmols) + print(len(outmols)) + + return outmols + +def check_stable(config,stability_datums): + for d in stability_datums: + if d.value == False and config.is_isomorphic(d.mol,save_order=True): + return False + else: + return True + +def evaluate_from_datums(config,energy_datums): + for d in energy_datums: + if config.is_isomorphic(d.mol,save_order=True): + return d.value + else: + return None + +def get_cov_energies_configs_concern_tree(tree_interaction_regressor, configs, coad_stable_sites, Ncoad_isolated, concern_energy_tol=None, tree_atom_regressor=None, coadmol_E_dict=None, + stability_datums=None): + Ncoad_energy_dict = dict() + Ncoad_config_dict = dict() + config_to_Eunctr = dict() + configs_of_concern = {} + Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites]) + for i,m in enumerate(configs): + Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated + + if stability_datums: + stable = check_stable(m,stability_datums) + else: + stable = True + + if not stable: + continue + + if tree_atom_regressor is not None: + Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True) + E = tree_atom_regressor.evaluate(m) + Einteraction + elif coadmol_E_dict is not None: + Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True) + E = get_atom_centered_correction(m,coadmol_E_dict) + Einteraction + else: + raise ValueError + + config_to_Eunctr[i] = (m,E,std,tr) + + if Ncoad not in Ncoad_energy_dict.keys(): + Ncoad_energy_dict[Ncoad] = E + Ncoad_config_dict[Ncoad] = m.to_adjacency_list() + elif E < Ncoad_energy_dict[Ncoad]: + Ncoad_energy_dict[Ncoad] = E + Ncoad_config_dict[Ncoad] = m.to_adjacency_list() + elif E == Ncoad_energy_dict[Ncoad]: + if isinstance(Ncoad_config_dict[Ncoad],list): + Ncoad_config_dict[Ncoad].append(m.to_adjacency_list()) + else: + Ncoad_config_dict[Ncoad] = [Ncoad_config_dict[Ncoad], m.to_adjacency_list()] + + for i in config_to_Eunctr.keys(): + Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated + m,E,std,tr = config_to_Eunctr[i] + if ((concern_energy_tol is None) or (Ncoad_energy_dict[Ncoad] + concern_energy_tol > E)): + configs_of_concern[i] = (m,E,tr,std) + + return Ncoad_energy_dict,Ncoad_config_dict,configs_of_concern + +def get_cov_energies(tree_interaction_regressor, configs, coad_stable_sites, Ncoad_isolated, tree_atom_regressor=None, coadmol_E_dict=None, + stability_datums=None): + Ncoad_energy_dict = dict() + Ncoad_config_dict = dict() + Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites]) + for m in configs: + Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated + + if tree_atom_regressor is not None: + E = tree_atom_regressor.evaluate(m) + tree_interaction_regressor.evaluate(m) + elif coadmol_E_dict is not None: + E = get_atom_centered_correction(m,coadmol_E_dict) + tree_interaction_regressor.evaluate(m) + else: + raise ValueError + if Ncoad not in Ncoad_energy_dict.keys(): + if (not stability_datums) or check_stable(m,stability_datums): + Ncoad_energy_dict[Ncoad] = E + Ncoad_config_dict[Ncoad] = m.to_adjacency_list() + elif E < Ncoad_energy_dict[Ncoad]: + if (not stability_datums) or check_stable(m,stability_datums): + Ncoad_energy_dict[Ncoad] = E + Ncoad_config_dict[Ncoad] = m.to_adjacency_list() + elif E == Ncoad_energy_dict[Ncoad]: + if (not stability_datums) or check_stable(m,stability_datums): + if isinstance(Ncoad_config_dict[Ncoad],list): + Ncoad_config_dict[Ncoad].append(m.to_adjacency_list()) + else: + Ncoad_config_dict[Ncoad] = [Ncoad_config_dict[Ncoad], m.to_adjacency_list()] + return Ncoad_energy_dict,Ncoad_config_dict + +def get_configs_of_concern(tree_interaction_regressor,configs,coad_stable_sites,Ncoad_energy_dict,Nocc_isolated,concern_energy_tol,tree_atom_regressor=None, + coadmol_E_dict=None): + configs_of_concern = {} + Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites]) + for m in configs: + Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Nocc_isolated + if tree_atom_regressor is not None: + Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True) + E = tree_atom_regressor.evaluate(m) + Einteraction + elif coadmol_E_dict is not None: + Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True) + E = get_atom_centered_correction(m,coadmol_E_dict) + Einteraction + else: + raise ValueError + if Ncoad_energy_dict[Ncoad] + concern_energy_tol > E: + configs_of_concern[m] = (E,tr,std) + + return configs_of_concern + +def load_coverage_delta(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,ts_pynta_dir=None,allowed_structure_site_structures=None, + out_file_name="out",vib_file_name="vib",is_ad=None,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False): + + try: + info = json.load(open(os.path.join(d,"info.json"))) + except: + info = None + + if is_ad is None: + is_ts = "xyz" in info.keys() and os.path.split(os.path.split(os.path.split(info["xyz"])[0])[0])[1][:2] == "TS" + else: + is_ts = not is_ad + + atoms = read(os.path.join(d,out_file_name+".xyz")) + + if not is_ts: + try: + admol,neighbor_sites,ninds = generate_adsorbate_2D(atoms, sites, site_adjacency, len(slab), max_dist=np.inf, cut_multidentate_off_num=None, + allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + except (SiteOccupationException,TooManyElectronsException,ValueError) as e: + return None,None,None,None + + logging.error(admol.to_adjacency_list()) + split_structs = split_adsorbed_structures(admol) + try: + vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab)) + + Ecad = atoms.get_potential_energy() - slab.get_potential_energy() + vibdata.get_zero_point_energy() + + Esep = 0.0 + for split_struct in split_structs: + # if not split_struct.contains_surface_site(): #if gas phase species don't extract + # return None,None,None,None + for ad,E in ad_energy_dict.items(): + if split_struct.is_isomorphic(ad,save_order=True): + Esep += E + break + else: + logging.error("no matching adsorbate for {}".format(split_struct.to_smiles())) + return None,None,None,None + + dE = Ecad - Esep + except: + dE = None + else: + if ts_pynta_dir is None: + xyz = info["xyz"] + ts_path = os.path.split(os.path.split(info["xyz"])[0])[0] + else: + ts_path = os.path.join(ts_pynta_dir,os.path.split(os.path.split(os.path.split(info["xyz"])[0])[0])[1]) + xyz = os.path.join(ts_path,os.path.split(os.path.split(info["xyz"])[0])[1],os.path.split(info["xyz"])[1]) + + ts_info_path = os.path.join(ts_path,"info.json") + + try: + admol,neighbor_sites,ninds = generate_TS_2D(atoms, ts_info_path, metal, facet, sites, site_adjacency, len(slab), imag_freq_path=os.path.join(d,"vib.0.traj"), + max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + except (SiteOccupationException,TooManyElectronsException, ValueError) as e: + return None,None,None,None + try: + vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab)) + except FileNotFoundError: + return admol,neighbor_sites,ninds,None + + Ecad = atoms.get_potential_energy() - slab.get_potential_energy() + vibdata.get_zero_point_energy() + + + ts = read(xyz) + ts_vibdata = get_vibdata(xyz,os.path.join(os.path.split(xyz)[0],"vib.json_vib.json"),len(slab)) + Ets = ts.get_potential_energy() - slab.get_potential_energy() + ts_vibdata.get_zero_point_energy() + + num_ts_atoms = len(ts) - len(slab) + + coatoms = atoms.copy() + for i in range(num_ts_atoms): #remove ts atoms before we identify coadsorbates + coatoms.pop(len(slab)) + try: + coadmol,coneighbor_sites,coninds = generate_adsorbate_2D(coatoms, sites, site_adjacency, len(slab), max_dist=np.inf, cut_multidentate_off_num=None) + except (SiteOccupationException,TooManyElectronsException,ValueError) as e: + return None,None,None,None + + split_structs = split_adsorbed_structures(coadmol) + Esep = Ets + for split_struct in split_structs: + for ad,E in ad_energy_dict.items(): + if split_struct.is_isomorphic(ad,save_order=True): + Esep += E + break + else: + logging.error("no matching adsorbate for {}".format(split_struct.to_smiles())) + return None,None,None,None + + dE = Ecad - Esep + + return admol,neighbor_sites,ninds,dE + + + +def check_TS_coadsorbate_disruption(atoms,admol,nslab3D,nslab2D,ntsatoms,mult=1.3): + ad = atoms[nslab3D:] + cutoffs = natural_cutoffs(ad,mult=mult) + adanalysis = Analysis(ad,cutoffs=cutoffs) + adadj = adanalysis.adjacency_matrix[0] + for i in range(nslab2D+ntsatoms,len(admol.atoms)): + at = admol.atoms[i] + ind = i - nslab2D + if adadj[ind,:ntsatoms].sum() > 0 or adadj[:ntsatoms,ind].sum() > 0 : + for k in range(ntsatoms): + if (not adadj[ind,k]) and (not adadj[k,ind]): + continue + else: + at = admol.atoms[nslab2D+k] + for bd in admol.get_bonds(at).values(): + if bd.get_order_str() == 'R': + return True + + return False + +def tagsites(atoms,sites): + aview = deepcopy(atoms) + anames = ['He','Ne','Ar','Kr','Xe','Rn'] + for i,site in enumerate(sites): + add_adsorbate_to_site(aview,Atoms(anames[i], [(0, 0, 0)]), 0, sites[i], height=1.0) + return aview + +def extract_sample(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,max_dist=3.0,rxn_alignment_min=0.7, + coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib",is_ad=None, + use_allowed_site_structures=True): + out_dict = dict() + + atoms_init = read(os.path.join(d,init_file_name+".xyz")) + if not os.path.exists(os.path.join(d,out_file_name+".xyz")): + atoms = None + else: + atoms = read(os.path.join(d,out_file_name+".xyz")) + + nslab = len(slab) + + if len(atoms_init) < len(slab): #gas phase + return None + #view(atoms_init) + #view(atoms) + # try: + # vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab)) + # except: + # vibdata = None + keep_binding_vdW_bonds=False + keep_vdW_surface_bonds=False + + try: + with open(os.path.join(d,"info.json"),'r') as f: + info = json.load(f) + mol = Molecule().from_adjacency_list(info["adjlist"]) + for bd in mol.get_all_edges(): + if bd.order == 0: + if bd.atom1.is_surface_site() or bd.atom2.is_surface_site(): + keep_binding_vdW_bonds = True + m = mol.copy(deep=True) + b = m.get_bond(m.atoms[mol.atoms.index(bd.atom1)],m.atoms[mol.atoms.index(bd.atom2)]) + m.remove_bond(b) + out = m.split() + if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface + keep_vdW_surface_bonds = True + except FileNotFoundError: + info = None + + if is_ad is None: + is_ad = os.path.split(os.path.split(os.path.split(os.path.split(info['xyz'])[0])[0])[0])[1] == 'Adsorbates' + + if is_ad and info: + orig_xyz = os.path.join(pynta_dir,os.sep.join(os.path.normpath(info['xyz']).split(os.sep)[-4:])) + else: + orig_xyz = os.path.join(pynta_dir,os.sep.join(os.path.normpath(info['xyz']).split(os.sep)[-3:])) + + if atoms is None: + out_dict["isomorphic"] = False + out_dict["init_info"] = info["adjlist"] + out_dict["out"] = None + out_dict["init_extracted"] = None + out_dict["sample_dir"] = d + out_dict["orig_xyz"] = orig_xyz + out_dict["valid"] = False + out_dict["dE"] = None + return out_dict + + if info: + with open(os.path.join(os.path.split(os.path.split(orig_xyz)[0])[0],"info.json"),'r') as f: + info_clean = json.load(f) + if orig_xyz: + if not is_ad: + reactants_clean = Molecule().from_adjacency_list(info_clean["reactants"]) + cut_multidentate_off_num = len(atoms) - nslab - len([a for a in reactants_clean.atoms if not a.is_surface_site()]) + else: + ad_clean = Molecule().from_adjacency_list(info_clean["adjlist"]) + cut_multidentate_off_num = len(atoms) - nslab - len([a for a in ad_clean.atoms if not a.is_surface_site()]) + else: + cut_multidentate_off_num = None + + if use_allowed_site_structures: + allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"), + sites,site_adjacency,nslab,max_dist=max_dist, + cut_multidentate_off_num=None) + else: + allowed_structure_site_structures = None + + admol,neighbor_sites,ninds,dE = load_coverage_delta(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency, + ts_pynta_dir=pynta_dir,allowed_structure_site_structures=allowed_structure_site_structures, + out_file_name=out_file_name,vib_file_name=vib_file_name,is_ad=is_ad, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + + + if is_ad: + try: + admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(atoms_init, sites, site_adjacency, nslab, + max_dist=None, cut_multidentate_off_num=cut_multidentate_off_num, allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + except TooManyElectronsException: + admol_init = None + neighbor_sites_init = None + ninds_init = None + + if info: + admol_info = Molecule().from_adjacency_list(info["adjlist"],check_consistency=False) + + valid = admol is not None #and len(admol.split()) == 1 + + else: + valid = True + info_path = os.path.join(os.path.split(os.path.split(orig_xyz)[0])[0],"info.json") + imag_freq_path = os.path.join(os.path.split(orig_xyz)[0],"vib.0.traj") + + try: + admol_init,neighbor_sites_init,ninds_init = generate_TS_2D(atoms_init, info_path, metal, facet, sites, site_adjacency, nslab, + imag_freq_path=imag_freq_path, max_dist=None, cut_multidentate_off_num=cut_multidentate_off_num, + allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + except TooManyElectronsException as e: + valid = False + admol_init = None + except SiteOccupationException as e: + logging.error("raised SiteOccupationException on init structure") + valid = False + admol_init = None + + admol_info = Molecule().from_adjacency_list(info["adjlist"],check_consistency=False) + + if valid: + valid = not (admol is None) + if not valid: + logging.error("TS admol has disconnected graph") + + if valid: + #convert TS path + ts_path = os.path.join(pynta_dir, os.path.split(os.path.split(os.path.split(orig_xyz)[0])[0])[1], os.path.split(os.path.split(orig_xyz)[0])[1], os.path.split(orig_xyz)[1]) + + vibdata_ts = get_vibdata(ts_path,os.path.join(os.path.split(ts_path)[0],"vib.json_vib.json"),len(slab)) + with open(os.path.join(os.path.split(os.path.split(ts_path)[0])[0],"info.json"),'r') as f: + ts_info = json.load(f) + + nslab3D = ts_info["nslab"] + ntsatoms = len([a for a in Molecule().from_adjacency_list(ts_info['reactants']).atoms if not a.is_surface_site()]) + nslab2D = len(ninds) + + if os.path.exists(os.path.join(d,vib_file_name+".json")): + coad_disrupt = check_TS_coadsorbate_disruption(atoms,admol,nslab3D,nslab2D,ntsatoms,mult=coad_disruption_tol) + tr_sample = Trajectory(os.path.join(d,"vib.0.traj")) + + #view(tr_sample) + tr_ts = Trajectory(os.path.join(os.path.split(ts_path)[0],"vib.0.traj")) + v_ts = tr_ts[15].positions[nslab:] - tr_ts[16].positions[nslab:] + v_ts /= np.linalg.norm(v_ts) + v_sample = tr_sample[15].positions[nslab:] - tr_sample[16].positions[nslab:] + v_sample /= np.linalg.norm(v_sample) + v_prod = np.abs(np.sum(v_sample[:len(v_ts)]*v_ts)) + + if len(admol.split()) > 1 or v_prod < rxn_alignment_min or coad_disrupt: + valid = False + else: + valid = True + else: + coad_disrupt = None + valid = None + + out_dict["valid"] = valid + out_dict["dE"] = dE + if admol is None or admol_init is None: + out_dict["isomorphic"] = False + else: + out_dict["isomorphic"] = admol.is_isomorphic(admol_init,save_order=True) + if admol: + out_dict["out"] = admol.to_adjacency_list() + else: + out_dict["out"] = None + if info: + out_dict["init_info"] = admol_info.to_adjacency_list() + if admol_init: + out_dict["init_extracted"] = admol_init.to_adjacency_list() + else: + out_dict["init_extracted"] = None + out_dict['orig_xyz'] = orig_xyz + out_dict['sample_dir'] = d + return out_dict + +def process_calculation(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,coadmol_E_dict,max_dist=3.0,rxn_alignment_min=0.7, + coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib",is_ad=None): + + datums_stability = [] + datum_E = None + + outdict = extract_sample(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,max_dist=max_dist,rxn_alignment_min=rxn_alignment_min, + coad_disruption_tol=coad_disruption_tol, + out_file_name=out_file_name,init_file_name=init_file_name,vib_file_name=vib_file_name,is_ad=is_ad) + + if outdict["init_extracted"]: + mol_init = Molecule().from_adjacency_list(outdict["init_extracted"],check_consistency=False) + else: + mol_init = Molecule().from_adjacency_list(outdict["init_info"],check_consistency=False) + + if (outdict["valid"] is not None) and (not outdict["isomorphic"]): + datums_stability.append(Datum(mol_init,False)) + + if outdict["out"] is not None: + mol_out = Molecule().from_adjacency_list(outdict["out"],check_consistency=False) + + if len(mol_out.split()) == 1: #ignore if desorbed species + if outdict["valid"] and outdict["dE"] is not None: #there is an energy value + skip = False + for at in mol_out.atoms: + n = 0 + if at.is_surface_site(): + for k,v in mol_out.get_edges(at).items(): + if not k.is_surface_site(): + n += 1 + if n > 1: + skip = True + if not skip: + try: + datum_E = Datum(mol_out,outdict["dE"]*96.48530749925793*1000.0 - get_atom_centered_correction(mol_out,coadmol_E_dict)) #eV to J/mol + datums_stability.append(Datum(mol_out,True)) + except KeyError: + pass + + + return datum_E,datums_stability + +def get_configs_for_calculation(configs_of_concern_by_admol,Ncoad_energy_by_admol,admol_name_structure_dict, + computed_configs,tree_regressor,Ncalc_per_iter,T=np.inf,calculation_selection_iterations=10): + group_to_occurence = dict() + configs_of_concern = [] + for admol_name in configs_of_concern_by_admol.keys(): + admol = admol_name_structure_dict[admol_name] + Nocc_isolated = len([a for a in admol.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) + configs_of_concern_admol = configs_of_concern_by_admol[admol_name] + group_to_occurence_admol = dict() + N = 0 #number of group contributions associated with given admol + for v in configs_of_concern_admol: + m = v[0] + E = v[1] + sigma = v[3] + grps = v[2] + Nocc = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) + configs_of_concern.append(v) + Emin = Ncoad_energy_by_admol[admol_name][Nocc-Nocc_isolated] + for grp in grps: + if grp in group_to_occurence_admol: + group_to_occurence_admol[grp] += np.exp(-(E-Emin)/(8.314*T)) + N += 1 + else: + group_to_occurence_admol[grp] = np.exp(-(E-Emin)/(8.314*T)) + N += 1 + for g,n in group_to_occurence_admol.items(): + if grp in group_to_occurence: + group_to_occurence[grp] += n/N + else: + group_to_occurence[grp] = n/N + + concern_groups = list(group_to_occurence.keys()) #selected ordering + + group_to_weight = np.array([group_to_occurence[g]*tree_regressor.nodes[g].rule.uncertainty for g in concern_groups]) + + config_to_group_fract = dict() + for j,v in enumerate(configs_of_concern): + config,E,tr,std = v + config_group_unc = np.array([tr.count(g)*tree_regressor.nodes[g].rule.uncertainty for g in concern_groups]) + s = config_group_unc.sum() + if s == 0: + config_to_group_fract[j] = config_group_unc + else: + config_to_group_fract[j] = config_group_unc/s + + + configs_for_calculation = [] + group_fract_for_calculation = [] + maxval = 0.0 + config_list = [x[0] for x in configs_of_concern] + #sort lower numbers of coadsorbates first, larger numbers of coadsorbates later + sorted_config_list = sorted(config_list[:], key = lambda x: len([bd for bd in x.get_all_edges() if (bd.atom1.is_surface_site() and not bd.atom2.is_surface_site()) or (bd.atom2.is_surface_site() and not bd.atom1.is_surface_site())])) + + for q in range(calculation_selection_iterations): #loop the greedy optimization a number of times to get close to local min + for config in sorted_config_list: + ind = [i for i,c in enumerate(config_list) if c is config][0] + if ind not in config_to_group_fract.keys(): + logging.error("config not in config_to_group_fract") + continue + for c in computed_configs: + if config.is_isomorphic(c,save_order=True): + break + else: + if len(configs_for_calculation) < Ncalc_per_iter: + configs_for_calculation = configs_for_calculation + [config] + group_fract = config_to_group_fract[ind] + group_fract_for_calculation.append(group_fract) + maxval = np.linalg.norm(sum(group_fract_for_calculation) * group_to_weight, ord=1) + else: + group_fract = config_to_group_fract[j] + g_old_sum = sum(group_fract_for_calculation) + maxarglocal = None + maxvallocal = maxval + for i in range(Ncalc_per_iter): + val = np.linalg.norm((g_old_sum - group_fract_for_calculation[i] + group_fract) * group_to_weight, ord=1) + if val > maxvallocal: + maxarglocal = i + maxvallocal = val + + if maxarglocal is not None: + group_fract_for_calculation[maxarglocal] = group_fract + configs_for_calculation[maxarglocal] = config + maxval = maxvallocal + + admol_to_config_for_calculation = dict() + for config in configs_for_calculation: + for admol_name,v in configs_of_concern_by_admol.items(): + if any(x[0] is config for x in v): + if admol_name in admol_to_config_for_calculation.keys(): + admol_to_config_for_calculation[admol_name].append(config) + else: + admol_to_config_for_calculation[admol_name] = [config] + break + else: + raise ValueError + + return configs_for_calculation,admol_to_config_for_calculation + +def mol_to_atoms(admol,slab,sites,metal,partial_atoms=None,partial_admol=None): + """Generate a 3D initial guess for a given 2D admol configuration + + Args: + admol (_type_): 2D representation of the configuration + slab (_type_): 3D Atoms object for the slab + sites (_type_): list of sites + metal (_type_): metal of the slab + partial_atoms (_type_, optional): a 3D Atoms object representing part of the target configuraitons. Defaults to None. + partial_admol (_type_, optional): 2D representation of the partial_atoms Atoms object. Defaults to None. + + Raises: + ValueError: _description_ + ValueError: _description_ + + Returns: + _type_: Atoms object corresponding to the admol 2D configuration + """ + if partial_atoms and partial_admol: + atoms = deepcopy(partial_atoms) + gpartial_admol = partial_admol.to_group() + admol_atom_order = admol.atoms[:] + gpartial_admol_order = gpartial_admol.atoms[:] + try: + subisos = admol.find_subgraph_isomorphisms(gpartial_admol,save_order=True) + except ValueError: + subisos = admol.find_subgraph_isomorphisms(gpartial_admol) + admol.atoms = admol_atom_order + gpartial_admol.atoms = gpartial_admol_order + if len(subisos) == 0: + raise ValueError("partial_admol is not subgraph isomorphic to admol: {0}, {1}".format(gpartial_admol.to_adjacency_list(),admol.to_adjacency_list())) + subiso = subisos[0] + atoms_in_partial = [a for a in subiso.keys() if not a.is_surface_site()] + split_structs,adsorbed_atom_dict = split_adsorbed_structures(admol,clear_site_info=False,adsorption_info=True,atoms_to_skip=atoms_in_partial) + elif partial_atoms is None and partial_admol is None: + atoms = deepcopy(slab) + split_structs,adsorbed_atom_dict = split_adsorbed_structures(admol,clear_site_info=False,adsorption_info=True) + else: + raise ValueError("Must include both partial_atoms, partial_admol to start from partial") + + for st in split_structs: + if len(st.atoms) == 0: + continue + ad,mol_to_atoms_map = get_adsorbate(st) + + if len(st.atoms) > 2: #not atomic adsorbate + rot_vec = np.array([0.0,0.0,0.0]) + for atst in st.atoms: + if atst.is_bonded_to_surface(): + adatom_molind = st.atoms.index(atst) + for bdat in atst.bonds.keys(): + if not bdat.is_surface_site(): + rot_vec += ad.positions[mol_to_atoms_map[st.atoms.index(bdat)]] - ad.positions[mol_to_atoms_map[adatom_molind]] + rot_vec /= np.linalg.norm(rot_vec) + ad.rotate(rot_vec,[0.0,0.0,1.0]) #rotate bonds toward the +z-axis + else: + adatom_molind = st.atoms.index([a for a in st.atoms if not a.is_surface_site()][0]) + + atom_surf_inds = [] + ad_sites = [] + for a,sind in adsorbed_atom_dict.items(): + if a in st.atoms: + assert sites[sind]["site"] == a.site, (admol.to_adjacency_list(),partial_admol.to_adjacency_list()) + atom_surf_inds.append(mol_to_atoms_map[adatom_molind]) + ad_sites.append(sites[sind]) + atoms,_,_ = place_adsorbate(ad,atoms,atom_surf_inds,ad_sites,metal) + + return atoms \ No newline at end of file diff --git a/pynta/geometricanalysis.py b/pynta/geometricanalysis.py new file mode 100644 index 00000000..b49d34ed --- /dev/null +++ b/pynta/geometricanalysis.py @@ -0,0 +1,1165 @@ +from molecule.molecule import Molecule,Atom,Bond +from ase.io import read, write +from ase.geometry import get_distances +from ase.geometry.analysis import Analysis +from ase.io.trajectory import Trajectory +from ase.vibrations import VibrationsData +from ase.thermochemistry import HarmonicThermo, IdealGasThermo +from pynta.utils import get_unique_sym, get_occupied_sites, sites_match, SiteOccupationException +from pynta.mol import * +import numpy as np +import json +import shutil +import os +import logging + +def generate_site_molecule(slab, target_sites, sites, site_adjacency, max_dist=None): + + #find overall neighboring sites + if max_dist: + neighbor_sites = [] + ninds = [] + for i,site in enumerate(sites): + for target_site in target_sites: + v,dist = get_distances([site["position"]], [target_site["position"]], cell=slab.cell, pbc=slab.pbc) + + if np.linalg.norm(v[:2]) < max_dist: + neighbor_sites.append(site) + ninds.append(i) + break + else: + neighbor_sites = sites[:] + ninds = list(range(len(sites))) + + atoms = [] + + for nsite in neighbor_sites: + atoms.append(Atom(element="X", lone_pairs=0, site=nsite["site"], morphology=nsite["morphology"])) + + mol = Molecule(atoms=atoms,multiplicity=1) + + + for i,nind in enumerate(ninds): + adjinds = site_adjacency[nind] + for adjind in adjinds: + if adjind in ninds: + j = ninds.index(adjind) + if not mol.has_bond(mol.atoms[i],mol.atoms[j]): + mol.add_bond(Bond(mol.atoms[i],mol.atoms[j],1.0)) + + return mol,neighbor_sites,ninds + + +def generate_adsorbate_molecule(adslab, sites, site_adjacency, nslab, max_dist=None, allowed_site_dict=dict()): + #adsorbate + ad = adslab[nslab:] + adanalysis = Analysis(ad) + adadj = adanalysis.adjacency_matrix[0] + adatoms = [] + for at in ad: + sym = at.symbol + if sym in ['N','P']: + adatoms.append(Atom(element=sym, lone_pairs=1)) + elif sym in ['O','S']: + adatoms.append(Atom(element=sym, lone_pairs=2)) + elif sym in ["F","Cl","Br","I"]: + adatoms.append(Atom(element=sym, lone_pairs=3)) + elif sym in ["Si","C","H","Li"]: + adatoms.append(Atom(element=sym, lone_pairs=0)) + else: + raise ValueError("Cannot handle atom {}".format(sym)) + + admol = Molecule(atoms=adatoms) + for i in range(len(adatoms)): + for j in range(len(adatoms)): + if i == j: + continue + if adadj[i,j] and not admol.has_bond(admol.atoms[i],admol.atoms[j]): + admol.add_bond(Bond(admol.atoms[i],admol.atoms[j],1.0)) + + #slab + occ = get_occupied_sites(adslab,sites,nslab,allowed_site_dict=allowed_site_dict) + target_sites = [site for site in sites if any((oc["position"] == site["position"]).all() for oc in occ)] + + #find overall neighboring sites + if max_dist: + neighbor_sites = [] + ninds = [] + for i,site in enumerate(sites): + for target_site in target_sites: + v,dist = get_distances([site["position"]], [target_site["position"]], cell=adslab.cell, pbc=adslab.pbc) + if np.linalg.norm(v[:2]) < max_dist: + neighbor_sites.append(site) + ninds.append(i) + break + else: + neighbor_sites = sites[:] + ninds = list(range(len(sites))) + + atoms = [] + + for nsite in neighbor_sites: + atoms.append(Atom(element="X", lone_pairs=0, site=nsite["site"], morphology=nsite["morphology"])) + + mol = Molecule(atoms=atoms,multiplicity=1) + + + for i,nind in enumerate(ninds): + adjinds = site_adjacency[nind] + for adjind in adjinds: + if adjind in ninds: + j = ninds.index(adjind) + if not mol.has_bond(mol.atoms[i],mol.atoms[j]): + mol.add_bond(Bond(mol.atoms[i],mol.atoms[j],1.0)) + + #merge adsorbate and slab + fullmol = Molecule(atoms=mol.atoms+admol.atoms) + fullmol.multiplicity = 1 + + for oc in occ: + adind = oc["bonding_index"] - nslab + len(neighbor_sites) + nind = [ninds[i] for i,nsite in enumerate(neighbor_sites) if (nsite["position"]==oc["position"]).all()][0] + site_index = ninds.index(nind) + fullmol.add_bond(Bond(fullmol.atoms[site_index],fullmol.atoms[adind],0)) + + return fullmol,neighbor_sites,ninds + +def fix_bond_orders(mol,allow_failure=False,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False): + atom_order = ["C","Si","N","P","O","S","F","Cl","Br","I","Li","H"]#["H","Li","I","Br","Cl","F","S","O","P","N","Si","C"]#["C","Si","N","P","O","S","F","Cl","Br","I","Li","H"] + for target in atom_order: + for a in mol.atoms: + if a.symbol == target: + if keep_vdW_surface_bonds or keep_binding_vdW_bonds: + fix_atom(mol,a,allow_failure=allow_failure,cleanup_surface_bonds=False) + else: + fix_atom(mol,a,allow_failure=allow_failure,cleanup_surface_bonds=True) + + if keep_vdW_surface_bonds: + vdW_bonds = [] + for bd in mol.get_all_edges(): + if bd.order == 0: + if not bd.atom1.is_surface_bond() and not bd.atom2.is_surface_bond(): + vdW_bonds.append(bd) + for bd in vdW_bonds: + mol.remove_bond(bd) + + elif keep_binding_vdW_bonds: + mtest = mol.copy(deep=True) + + vdW_bonds = [] + for bd in mtest.get_all_edges(): + if bd.order == 0: + vdW_bonds.append(bd) + for bd in vdW_bonds: + mtest.remove_bond(bd) + + split_structs,atom_mapping = split_adsorbed_structures(mtest,clear_site_info=True,atom_mapping=True) + + gas_cluster_inds = [] + for sstruct in split_structs: + if not any(a.is_surface_site() for a in sstruct.atoms): #nominal gas phase construct + gas_cluster_inds.extend([atom_mapping[a] for a in sstruct.atoms]) + + bd_to_remove = [] + for bd in mol.get_all_edges(): + if bd.order == 0: + if (bd.atom1.is_surface_site() and mol.atoms.index(bd.atom2) in gas_cluster_inds) or (bd.atom2.is_surface_site() and mol.atoms.index(bd.atom1) in gas_cluster_inds): + pass + else: + bd_to_remove.append(bd) + + for bd in bd_to_remove: + mol.remove_bond(bd) + + return None + +def get_octet_deviation(atom): + if atom.symbol in ["Li","H"]: + return 2 - (atom.lone_pairs*2 + 2*sum(x.order for x in atom.bonds.values() if x.order >= 0.5)) + else: + return 8 - (atom.lone_pairs*2 + 2*sum(x.order for x in atom.bonds.values() if x.order >= 0.5)) + + +def choose_bond_to_fix(symbols,octets,orders,bonded_to_surface): + atom_order = [["X"],["C","Si"],["N","P"],["O","S"]] + priority = [] + for i in range(len(symbols)): + sym = symbols[i] + if (sym != "X" and octets[i] <= 0) or orders[i] == "R": + priority.append(0) + else: + for j,atoms in enumerate(atom_order): + if sym in atoms: + if not bonded_to_surface[i]: + priority.append(j+11) + else: + priority.append(j+1) + return np.argmax(priority),np.max(priority) + +class TooManyElectronsException(Exception): + pass + +def fix_atom(mol,atom,allow_failure=False,cleanup_surface_bonds=True): + delta = get_octet_deviation(atom) + + if delta < 0: #too many electrons + logging.error(mol.to_adjacency_list()) + raise TooManyElectronsException("Cannot solve problem of too many electrons not counting surface bonds and all ad bonds are 1") + elif delta > 0: #too few electrons + atoms = [k for k,v in atom.bonds.items()] + symbols = [k.symbol for k in atoms] + octets = [get_octet_deviation(k) for k in atoms] + orders = [v.get_order_str() for k,v in atom.bonds.items()] + bonded_to_surface = [a.is_bonded_to_surface() for a in atoms] + deviation = True + while delta > 0: + ind,val = choose_bond_to_fix(symbols,octets,orders,bonded_to_surface) + if val == 0: + if allow_failure: + break + else: + raise ValueError("Could not fix bonds") + bd = atom.bonds[atoms[ind]] + bd.increment_order() + delta -= 2 + octets[ind] -= 2 + + #at end clean up improper surface bonds + if cleanup_surface_bonds: + to_remove = [] + for k,v in atom.bonds.items(): + if k.symbol == 'X' and v.order == 0: + to_remove.append(v) + + for v in to_remove: + mol.remove_bond(v) + +def generate_adsorbate_2D(atoms, sites, site_adjacency, nslab, max_dist=3.0, cut_multidentate_off_num=None, allowed_structure_site_structures=None, + keep_binding_vdW_bonds=False, keep_vdW_surface_bonds=False): + admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, nslab, max_dist=max_dist) + + if cut_multidentate_off_num: + bds_to_remove = [] + for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)): + if admol.atoms[i].is_bonded_to_surface(): + for a,b in admol.atoms[i].edges.items(): + if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num): + if b not in bds_to_remove: + bds_to_remove.append(b) + for b in bds_to_remove: + admol.remove_bond(b) + + fix_bond_orders(admol,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + + if allowed_structure_site_structures: #we are going to analyze the initial occupational analysis and update it based on expectations + allowed_site_dict = dict() + slabless = remove_slab(admol) + for site_structs in allowed_structure_site_structures: + struct = generate_without_site_info(site_structs[0]) + grp_struct = struct.to_group() + mappings = slabless.find_subgraph_isomorphisms(grp_struct,save_order=True) + considered_sites = [] + for mapping in mappings: + atouts = [at for at in mapping.keys() if at.is_surface_site()] + if set(atouts) in considered_sites: + continue + else: + considered_sites.append(set(atouts)) + + subgraph,inds = pluck_subgraph(slabless,atouts[0]) + for site_struct in site_structs: + if subgraph.is_isomorphic(site_struct,save_order=True): + break + else: + for atout in atouts: + adatom = [a for a in atout.bonds.keys() if not a.is_surface_site()][0] + ind = slabless.atoms.index(adatom) - len([a for a in slabless.atoms if a.is_surface_site()]) + struct_ind = [grp_struct.atoms.index(a) for aout,a in mapping.items() if aout==atout][0] + sitetype = [(site_struct.atoms[struct_ind].site,site_struct.atoms[struct_ind].morphology) for site_struct in site_structs] + if ind+nslab in allowed_site_dict.keys(): + allowed_site_dict[ind+nslab].extend(sitetype) + else: + allowed_site_dict[ind+nslab] = sitetype + + admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, nslab, max_dist=max_dist, allowed_site_dict=allowed_site_dict) + + if cut_multidentate_off_num: + bds_to_remove = [] + for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)): + if admol.atoms[i].is_bonded_to_surface(): + for a,b in admol.atoms[i].edges.items(): + if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num): + if b not in bds_to_remove: + bds_to_remove.append(b) + for b in bds_to_remove: + admol.remove_bond(b) + + fix_bond_orders(admol,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + + admol.update_atomtypes() + admol.update_connectivity_values() + + return admol,neighbor_sites,ninds + +def generate_TS_2D(atoms, info_path, metal, facet, sites, site_adjacency, nslab, imag_freq_path=None, + max_dist=3.0, cut_multidentate_off_num=None, allowed_structure_site_structures=None, + site_bond_cutoff=3.0,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False): + + admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, + nslab, max_dist=max_dist) + + if cut_multidentate_off_num: + bds_to_remove = [] + for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)): + if admol.atoms[i].is_bonded_to_surface(): + for a,b in admol.atoms[i].edges.items(): + if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num): + if b not in bds_to_remove: + bds_to_remove.append(b) + for b in bds_to_remove: + admol.remove_bond(b) + + with open(info_path) as f: + info = json.load(f) + + occ = get_occupied_sites(atoms,sites,nslab,site_bond_cutoff=site_bond_cutoff) + + template_mol_map = [{int(k):v for k,v in x.items()} for x in info["template_mol_map"]] + molecule_to_atom_maps = [{int(k):v for k,v in x.items()} for x in info["molecule_to_atom_maps"]] + broken_bonds,formed_bonds = get_broken_formed_bonds(Molecule().from_adjacency_list(info["reactants"]), + Molecule().from_adjacency_list(info["products"])) + rbonds = broken_bonds | formed_bonds + if info["forward"]: + template = Molecule().from_adjacency_list(info["reactants"]) + else: + template = Molecule().from_adjacency_list(info["products"]) + + for bd in rbonds: + inds2D = [] + for label in bd: + a = template.get_labeled_atoms(label)[0] + aind = template.atoms.index(a) + aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"]) + if aseind: + inds2D.append(aseind - nslab + len(neighbor_sites)) + + if len(inds2D) == 1: + i = inds2D[0] + for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond + if batom.is_surface_site(): + inds2D.append(admol.atoms.index(batom)) + break + else: #didn't find surface bond, need to identify the site + pos = None + for site in occ: + if site['bonding_index'] == i - len(neighbor_sites) + nslab: + pos = site["position"] + break + else: + logging.error("couldn't find right site bond") + logging.error(admol.to_adjacency_list()) + logging.error(i - len(neighbor_sites)) + raise SiteOccupationException + for j,s in enumerate(neighbor_sites): + if (s["position"] == pos).all(): + inds2D.append(j) + break + else: + raise IndexError("could not find matching site") + + if admol.has_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]): + b = admol.get_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]) + if b.get_order_str() != "R": + b.set_order_str("R") + else: + b.set_order_str("S") #this ensures the the split_ts leaves this bond connected + else: + admol.add_bond(Bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]],"R")) + + fix_bond_orders(admol,allow_failure=True,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + + + if allowed_structure_site_structures: #we are going to analyze the initial occupational analysis and update it based on expectations + allowed_site_dicts = [] + restricted_inds = set() + rs = split_ts_to_reactants(admol) #get reactants/products + for r in rs: + allowed_site_dict = dict() + slabless = remove_slab(r) #take the individual component + for site_structs in allowed_structure_site_structures: + struct = generate_without_site_info(site_structs[0]) + grp_struct = struct.to_group() #known stable adsorbates + mappings = slabless.find_subgraph_isomorphisms(grp_struct,save_order=True) #try to find mappings to known stable structures + for mapping in mappings: #go through each mapping + atouts = [at for at in mapping.keys() if at.is_surface_site()] + subgraph,inds = pluck_subgraph(slabless,atouts[0]) #extract the matching subgraph + for site_struct in site_structs: #go through the known stable structures + if subgraph.is_isomorphic(site_struct,save_order=True): + break + else: + + for atout in atouts: + adatom = [a for a in atout.bonds.keys() if not a.is_surface_site()][0] + ind = slabless.atoms.index(adatom) - len([a for a in slabless.atoms if a.is_surface_site()]) + struct_ind = [grp_struct.atoms.index(a) for aout,a in mapping.items() if aout==atout][0] + assert site_structs[0].atoms[struct_ind].is_surface_site() + sitetype = [(site_struct.atoms[struct_ind].site,site_struct.atoms[struct_ind].morphology) for site_struct in site_structs] + if ind+nslab in allowed_site_dict.keys(): + allowed_site_dict[ind+nslab].extend(sitetype) + else: + allowed_site_dict[ind+nslab] = sitetype + + allowed_site_dicts.append(allowed_site_dict) + restricted_inds |= set(allowed_site_dict.keys()) + + allowed_site_dict = dict() + for ind in restricted_inds: + for d in allowed_site_dicts: + if ind in d.keys(): + if ind not in allowed_site_dict.keys(): + allowed_site_dict[ind] = set(d[ind]) + else: + allowed_site_dict[ind] &= set(d[ind]) + + allowed_site_dict = {k: list(v) for k,v in allowed_site_dict.items()} + else: + allowed_site_dict = dict() + + admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, + nslab, max_dist=max_dist, allowed_site_dict=allowed_site_dict) + + if cut_multidentate_off_num: + bds_to_remove = [] + for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)): + if admol.atoms[i].is_bonded_to_surface(): + for a,b in admol.atoms[i].edges.items(): + if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num): + if b not in bds_to_remove: + bds_to_remove.append(b) + for b in bds_to_remove: + admol.remove_bond(b) + + for bd in rbonds: + inds2D = [] + for label in bd: + a = template.get_labeled_atoms(label)[0] + aind = template.atoms.index(a) + aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"]) + if aseind: + inds2D.append(aseind - nslab + len(neighbor_sites)) + + if len(inds2D) == 1: + i = inds2D[0] + for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond + if batom.is_surface_site(): + inds2D.append(admol.atoms.index(batom)) + break + else: #didn't find surface bond, need to identify the site + pos = None + for site in occ: + if site['bonding_index'] == i - len(neighbor_sites) + nslab: + pos = site["position"] + break + else: + raise ValueError("didn't find occupied site") + for j,s in enumerate(neighbor_sites): + if (s["position"] == pos).all(): + inds2D.append(j) + break + else: + raise IndexError("could not find matching site") + + if admol.has_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]): + b = admol.get_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]) + if b.get_order_str() != "R": + b.set_order_str("R") + else: #diffusion on surface results in two R bonds to different sites + if imag_freq_path is None: + logging.warning("Could not resolve diffusion type R-bonds due to lack of imaginary frequency info") + continue + if admol.atoms[inds2D[0]].is_surface_site(): + site_atom_center = admol.atoms[inds2D[0]] + Ratom = admol.atoms[inds2D[1]] + if allowed_structure_site_structures: + valid_sites = allowed_site_dict[inds2D[1] - len(neighbor_sites) + nslab] + else: + site_atom_center = admol.atoms[inds2D[1]] + Ratom = admol.atoms[inds2D[0]] + if allowed_structure_site_structures: + valid_sites = allowed_site_dict[inds2D[0] - len(neighbor_sites) + nslab] + site_center_ind = admol.atoms.index(site_atom_center) + + #get imaginary frequency motion + tr_ts = Trajectory(imag_freq_path) + v_ts,_ = get_distances(tr_ts[15].positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab], tr_ts[16].positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab], cell=atoms.cell, pbc=atoms.pbc) + v_ts = v_ts[0,0,:] + v_ts /= np.linalg.norm(v_ts) + #position of atom + target_pos = atoms.positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab] + site_pair = None + best_metric = None + iters = 1 + for i,site1 in enumerate(neighbor_sites): + pos1 = site1["position"] + site1_id = (site1["site"],site1["morphology"]) + for j in range(i): + site2 = neighbor_sites[j] + if i == j: + continue + site2_id = (site2["site"],site2["morphology"]) + pos2 = site2["position"] + v,dist = get_distances([pos1],[pos2],cell=atoms.cell,pbc=atoms.pbc) + dist = dist[0][0] + v = v[0,0,:] + v /= np.linalg.norm(v) + motion_alignment = abs(np.dot(v_ts,v)) + v1t,dist1t = get_distances([pos1],[target_pos],cell=atoms.cell,pbc=atoms.pbc) + dist1t = dist1t[0][0] + v1t = v1t[0,0,:] + v2t,dist2t = get_distances([pos2],[target_pos],cell=atoms.cell,pbc=atoms.pbc) + v2t = v2t[0,0,:] + dist2t = dist2t[0][0] + halfwayness = np.linalg.norm(v1t+v2t)/(dist2t+dist1t) + dist_site = max(dist1t,dist2t) + if not np.isnan(motion_alignment) and motion_alignment > 0.95 and dist < 3.0 and dist_site < 2.0 and (not allowed_structure_site_structures or (site1_id in valid_sites and site2_id in valid_sites)): + iters += 1 + metric = 1.0 / (halfwayness*dist_site) + if site_pair is None: + site_pair = [i,j] + best_metric = metric + elif metric > best_metric: + site_pair = [i,j] + best_metric = metric + if site_pair is None: + raise ValueError("Imaginary frequency doesn't mean minimal constraints for proper TS motion") + admol.remove_bond(b) + admol.add_bond(Bond(admol.atoms[site_pair[0]],Ratom,"R")) + admol.add_bond(Bond(admol.atoms[site_pair[1]],Ratom,"R")) + else: + admol.add_bond(Bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]],"R")) + + fix_bond_orders(admol,allow_failure=True,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) #Reaction bonds prevent proper octet completion + + #remove surface bonds that abused for octet completion in reactions + for atom in admol.atoms: + surfbds = [bd for at,bd in atom.bonds.items() if at.is_surface_site() and bd.get_order_str() != 'R'] + if len(surfbds) == 1: + Nrxnbd = len([bd for at,bd in atom.bonds.items() if bd.get_order_str() == 'R' and not at.is_surface_site()]) + if Nrxnbd >= 2 and abs(Nrxnbd/2 - surfbds[0].order) < 1e-3: + admol.remove_edge(surfbds[0]) + + admol.update_atomtypes() + admol.update_connectivity_values() + for i in range(len(neighbor_sites)): + assert neighbor_sites[i]["site"] == admol.atoms[i].site + assert neighbor_sites[i]["morphology"] == admol.atoms[i].morphology + return admol,neighbor_sites,ninds + +def tagsites(atoms,sites): + aview = deepcopy(atoms) + anames = ['He','Ne','Ar','Kr','Xe','Rn'] + for i,site in enumerate(sites): + add_adsorbate_to_site(aview,Atoms(anames[i], [(0, 0, 0)]), 0, sites[i], height=1.0) + return aview + +def split_ts_to_reactants(ts2d,tagatoms=False,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False): + rs = [] + rbondnum = 0 + for bd in ts2d.get_all_edges(): + if bd.is_reaction_bond(): + rbondnum += 1 + + combs = list(itertools.product([0,1],repeat=rbondnum)) + + for comb in combs: + r = deepcopy(ts2d) + iters = 0 + for i,bd in enumerate(r.get_all_edges()): + if bd.is_reaction_bond(): + if tagatoms: + if not bd.atom1.is_surface_site() and bd.atom1.is_bonded_to_surface(): + bd.atom1.label = "*" + if not bd.atom2.is_surface_site() and bd.atom2.is_bonded_to_surface(): + bd.atom2.label = "*" + + if comb[iters] == 0: + r.remove_bond(bd) + else: + bd.set_order_str('S') + iters += 1 + else: + bd.set_order_str('S') + try: + fix_bond_orders(r,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + r.update(sort_atoms=False) + r.update_connectivity_values() + rs.append(r) + except (TooManyElectronsException,ValueError): + pass + + return rs + +def generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=3.0,cut_multidentate_off_num=None): + allowed_structure_site_structures = [] + for ad in os.listdir(adsorbate_dir): + if not os.path.isdir(os.path.join(adsorbate_dir,ad)): + continue + with open(os.path.join(adsorbate_dir,ad,"info.json"),'r') as f: + info = json.load(f) + target_mol = Molecule().from_adjacency_list(info["adjlist"]) + atom_to_molecule_surface_atom_map = {int(k): int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()} + if not target_mol.contains_surface_site(): + continue + geoms = get_adsorbate_geometries(os.path.join(adsorbate_dir,ad),target_mol,sites,atom_to_molecule_surface_atom_map,nslab) + mols = [] + for geom in geoms: + admol,neighbor_sites,ninds = generate_adsorbate_2D(geom, sites, site_adjacency, nslab, max_dist=max_dist, cut_multidentate_off_num=cut_multidentate_off_num) + m = remove_slab(admol) + if not generate_without_site_info(m).is_isomorphic(target_mol,save_order=True): #geom didn't optimize to target + continue + for mol in mols: + if mol.is_isomorphic(m,save_order=True): + break + else: + m.update_atomtypes() + m.update_connectivity_values() + mols.append(m) + if mols: + allowed_structure_site_structures.append(mols) + + return allowed_structure_site_structures + +def get_best_adsorbate_geometries(adsorbate_path,aseinds,siteinfo,sites,site_adjacency,imag_freq_max=150.0): + """ + load the adsorbates geometries find the best matching unique optimized + adsorbate structures for each species + siteinfo = [("ontop","terrace"),("edge","terrace")] + aseinds = [37,38] + """ + info_path = os.path.join(adsorbate_path,"info.json") + with open(info_path,"r") as f: + info = json.load(f) + nslab = info["nslab"] + atom_to_molecule_surface_atom_map = {int(k):int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()} + mol = Molecule().from_adjacency_list(info["adjlist"]) + prefixes = os.listdir(adsorbate_path) + if len(prefixes) == 2: #includes info.json + return read(os.path.join(adsorbate_path,"0","0.xyz")),os.path.join(adsorbate_path,"0","0.xyz") + geoms = [] + bestxyz = None + best = None + best_fingerprint = None + best_score = None + best_energy = None + target_fingerprint = [d for x in siteinfo for d in x] + for prefix in prefixes: + if prefix == "info.json": + continue + path = os.path.join(adsorbate_path,prefix,prefix+".xyz") + freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json") + if os.path.exists(path) and os.path.exists(freq_path): + with open(freq_path,"r") as f: + freq_dict = json.load(f) + freq = np.complex128(freq_dict["frequencies"][0]) + if (np.isreal(freq) or freq.imag < imag_freq_max): + geoms.append(path) + + xyzs = geoms + + adsorbates = [] + for xyz in xyzs: + geo = read(xyz) + occ = get_occupied_sites(geo,sites,nslab) + required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()]) + found_surface_inds = set([site["bonding_index"] for site in occ]) + if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds): + if best is None: + best = geo + bestxyz = xyz + best_energy = geo.get_potential_energy() + assert len(siteinfo) <= len(occ) + for slist in itertools.permutations(occ,r=len(siteinfo)): + sinfo = [(site["site"],site["morphology"]) for site in slist] + fingerprint = [d for x in sinfo for d in x] + if best_fingerprint is None: + best_fingerprint = fingerprint + best_score = np.equal(np.array(best_fingerprint),np.array(target_fingerprint)).sum() + else: + score = np.equal(np.array(fingerprint),np.array(target_fingerprint)).sum() + if score > best_score: + best_fingerprint = fingerprint + best_score = score + + else: + for slist in itertools.permutations(occ,r=len(siteinfo)): + sinfo = [(site["site"],site["morphology"]) for site in slist] + fingerprint = [d for x in sinfo for d in x] + score = np.equal(np.array(fingerprint),np.array(target_fingerprint)).sum() + if score > best_score or score == best_score and geo.get_potential_energy() < best_energy: + best = geo + bestxyz = xyz + best_energy = geo.get_potential_energy() + best_fingerprint = fingerprint + best_score = score + + return best,bestxyz + +def get_best_reaction_adsorbate_geometries(admol,admol_neighbors,nslab2D,adsorbates_path,info,sites, + site_adjacency,imag_freq_max=150.0, + assume_reactant_product_templates_same_order=False): + + assert assume_reactant_product_templates_same_order #otherwise tricky to get formed bonds + + #get reverse template map + if info["forward"]: + template = Molecule().from_adjacency_list(info["reactants"]) + rev_template = Molecule().from_adjacency_list(info["products"]) + else: + template = Molecule().from_adjacency_list(info["products"]) + rev_template = Molecule().from_adjacency_list(info["reactants"]) + + broken_bonds,formed_bonds = get_broken_formed_bonds(template,rev_template) + + rev_mols = [] + for n in info["reverse_names"]: + molinfo_path = os.path.join(adsorbates_path,n,"info.json") + with open(molinfo_path,"r") as f: + molinfo = json.load(f) + mol = Molecule().from_adjacency_list(molinfo["adjlist"]) + rev_mols.append(mol) + + + rev_ads_sizes = [ads_size(m) for m in rev_mols] + molecule_to_atom_rev_maps = [] + for m in rev_mols: + ads,mol_to_atoms_map = get_adsorbate(mol) + molecule_to_atom_rev_maps.append(mol_to_atoms_map) + molecule_to_atom_rev_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in molecule_to_atom_rev_maps] + + template_rev_mol_map = get_template_mol_map(rev_template,rev_mols) + + template_rev_mol_map = get_template_mol_map(rev_template,rev_mols) + template_rev_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in template_rev_mol_map] + template_mol_map = [{int(k):int(v) for k,v in x.items()} for x in info["template_mol_map"]] + template_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in info["template_mol_map"]] + molecule_to_atom_maps = [{int(k):int(v) for k,v in x.items()} for x in info["molecule_to_atom_maps"]] + molecule_to_atom_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in info["molecule_to_atom_maps"]] + ads_sizes = info["ads_sizes"] + nslab = info["nslab"] + mols = [Molecule().from_adjacency_list(x) for x in info["mols"]] + + #map adsorbed atoms on admol to templates + adatoms = np.unique(np.array([a for a in admol.get_adatoms() if not a.is_surface_site()])).tolist() + adinds = [admol.atoms.index(adatom) - nslab2D for adatom in adatoms] #aseinds + template_adinds = [ase_to_template_index(a,template_mol_map_invert,molecule_to_atom_maps_invert, + ads_sizes) for a in adinds] + forward_geoms = [] + for i,ad in enumerate(info["species_names"]): + tempinds = [k for k in template_adinds if k in template_mol_map[i].keys()] + aseinds = [get_ase_index(k,template_mol_map,molecule_to_atom_maps,info["nslab"],ads_sizes) for k in tempinds] + siteinfo = [] + for j,a in enumerate(adatoms): + for bd in admol.get_bonds(a).values(): + if bd.atom1.is_surface_site(): + ind = admol.atoms.index(bd.atom1) + indad = admol.atoms.index(bd.atom2) + break + elif bd.atom2.is_surface_site(): + ind = admol.atoms.index(bd.atom2) + indad = admol.atoms.index(bd.atom1) + break + indtmp = ase_to_template_index(indad-nslab2D,template_mol_map_invert, + molecule_to_atom_maps_invert, ads_sizes) + molind,mind = get_mol_index(indtmp,template_mol_map) + + if molind == i and mols[i].atoms[mind].is_bonded_to_surface(): + site = admol_neighbors[ind] + siteinfo.append((site["site"],site["morphology"])) + + best = get_best_adsorbate_geometries(os.path.join(adsorbates_path,ad),aseinds,siteinfo,sites,site_adjacency, + imag_freq_max=imag_freq_max)[0] + forward_geoms.append(best) + + reverse_geoms = [] + for j,ad in enumerate(info["reverse_names"]): + tempinds = [k for k in template_adinds if k in template_rev_mol_map[j].keys()] + aseinds = [get_ase_index(k,template_mol_map,molecule_to_atom_maps,info["nslab"],ads_sizes) for k in tempinds] + siteinfo = [] + for i,a in enumerate(adatoms): + for bd in admol.get_bonds(a).values(): + if bd.atom1.is_surface_site(): + ind = admol.atoms.index(bd.atom1) + indad = admol.atoms.index(bd.atom2) + break + elif bd.atom2.is_surface_site(): + ind = admol.atoms.index(bd.atom2) + indad = admol.atoms.index(bd.atom1) + break + #assuming rev and forward template indices are the same + indtmp = ase_to_template_index(indad-nslab2D,template_mol_map_invert, + molecule_to_atom_maps_invert, ads_sizes) + + molind,mind = get_mol_index(indtmp,template_rev_mol_map) + if molind == j and rev_mols[j].atoms[mind].is_bonded_to_surface(): + site = admol_neighbors[ind] + siteinfo.append((site["site"],site["morphology"])) + + best = get_best_adsorbate_geometries(os.path.join(adsorbates_path,ad),aseinds,siteinfo,sites,site_adjacency, + imag_freq_max=imag_freq_max)[0] + reverse_geoms.append(best) + + return forward_geoms,reverse_geoms + +def ase_to_template_index(aseind, template_mol_map_invert, molecule_to_atom_maps_invert, ads_sizes, nslab=None, ): + if nslab: + aseind = aseind - nslab + + #go to molind + n = 0 + for i,d in enumerate(molecule_to_atom_maps_invert): + if aseind > n + len(d) - 1: + n += ads_sizes[i] + continue + else: + molnum = i + molind = molecule_to_atom_maps_invert[molnum][aseind-n] + break + else: + raise ValueError + + return template_mol_map_invert[molnum][molind] + +def get_bond_factors(ts_path,adsorbates_path,metal,facet,sites,site_adjacency,max_dist=3.0,imag_freq_max=150.0): + + info_path = os.path.join(os.path.split(os.path.split(ts_path)[0])[0],"info.json") + with open(info_path,"r") as f: + info = json.load(f) + + template_mol_map = [{int(k):v for k,v in x.items()} for x in info["template_mol_map"]] + molecule_to_atom_maps = [{int(k):v for k,v in x.items()} for x in info["molecule_to_atom_maps"]] + broken_bonds,formed_bonds = get_broken_formed_bonds(Molecule().from_adjacency_list(info["reactants"]), + Molecule().from_adjacency_list(info["products"])) + rbonds = broken_bonds | formed_bonds + nslab = info["nslab"] + + atms = read(ts_path) + + admol,neighbor_sites,ninds = generate_TS_2D(atms, info_path, metal, facet, sites, site_adjacency, + nslab, max_dist=max_dist) + + occ = get_occupied_sites(atms,sites,nslab) + occ_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in occ} + + reactants,products = get_best_reaction_adsorbate_geometries(admol,neighbor_sites,len([a for a in admol.atoms if a.is_surface_site()]), + adsorbates_path,info,sites,site_adjacency, + imag_freq_max=imag_freq_max, + assume_reactant_product_templates_same_order=True) + + if info["forward"]: + template = Molecule().from_adjacency_list(info["reactants"]) + rev_template = Molecule().from_adjacency_list(info["products"]) + else: + template = Molecule().from_adjacency_list(info["products"]) + rev_template = Molecule().from_adjacency_list(info["reactants"]) + + rev_mols = [] + molecule_to_atom_rev_maps = [] + for n in info["reverse_names"]: + molinfo_path = os.path.join(adsorbates_path,n,"info.json") + with open(molinfo_path,"r") as f: + molinfo = json.load(f) + mol = Molecule().from_adjacency_list(molinfo["adjlist"]) + rev_mols.append(mol) + molecule_to_atom_rev_maps.append({int(v):int(k) for k,v in molinfo["atom_to_molecule_atom_map"].items()}) + + template_rev_mol_map = get_template_mol_map(rev_template,rev_mols) + rev_ads_sizes = [ads_size(mol) for mol in rev_mols] + + template_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in info["template_mol_map"]] + molecule_to_atom_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in info["molecule_to_atom_maps"]] + ads_sizes = info["ads_sizes"] + mols = [Molecule().from_adjacency_list(x) for x in info["mols"]] + + adjlist_to_length = dict() + adjlist_to_well_length = dict() + + for bd in rbonds: + aseinds = [] + inds2D = [] + + for label in bd: + a = template.get_labeled_atoms(label)[0] + aind = template.atoms.index(a) + aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"]) + if aseind: + aseinds.append(aseind) + inds2D.append(aseind - nslab + len(neighbor_sites)) + + if len(inds2D) == 1: + i = inds2D[0] + for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond + if batom.is_surface_site(): + inds2D.append(admol.atoms.index(batom)) + break + else: #didn't find surface bond, need to identify the site + pos = None + for site in occ: + if site['bonding_index'] == i - len(neighbor_sites) + nslab: + pos = site["position"] + break + else: + logging.error(admol.to_adjacency_list()) + raise SiteOccupationException + for j,s in enumerate(neighbor_sites): + if (s["position"] == pos).all(): + inds2D.append(j) + break + + m = admol.copy(deep=True) + for i in inds2D: + m.atoms[i].label = "*" + + templateinds = [ase_to_template_index(aind, template_mol_map_invert, molecule_to_atom_maps_invert, ads_sizes, nslab) for aind in aseinds] + if len(templateinds) == 2: + forward = template.has_bond(template.atoms[templateinds[0]],template.atoms[templateinds[1]]) + else: + forward = template.atoms[templateinds[0]].is_bonded_to_surface() + + if len(aseinds) == 2: + L = atms.get_distance(aseinds[0],aseinds[1],mic=True) + if forward: + molind = get_mol_index(templateinds[0],template_mol_map)[0] + aind1 = get_ase_index(templateinds[0],template_mol_map,molecule_to_atom_maps,nslab,ads_sizes) + aind2 = get_ase_index(templateinds[1],template_mol_map,molecule_to_atom_maps,nslab,ads_sizes) + if mols[molind].contains_surface_site(): + Lwell = reactants[molind].get_distance(aind1,aind2,mic=True) + else: + Lwell = reactants[molind].get_distance(aind1-nslab,aind2-nslab,mic=True) + else: + molind,mind = get_mol_index(templateinds[0],template_rev_mol_map) + aind1 = get_ase_index(templateinds[0],template_rev_mol_map,molecule_to_atom_rev_maps,nslab,rev_ads_sizes) + aind2 = get_ase_index(templateinds[1],template_rev_mol_map,molecule_to_atom_rev_maps,nslab,rev_ads_sizes) + if rev_mols[molind].contains_surface_site(): + Lwell = products[molind].get_distance(aind1,aind2,mic=True) + else: + Lwell = products[molind].get_distance(aind1-nslab,aind2-nslab,mic=True) + + elif len(aseinds) == 1: + L = occ_bd_lengths[aseinds[0]] + if forward: + molind,mind = get_mol_index(templateinds[0],template_mol_map) + aind1 = molecule_to_atom_maps[molind][mind]+nslab + mocc = get_occupied_sites(reactants[molind],sites,nslab) + mocc_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in mocc} + Lwell = mocc_bd_lengths[aind1] + else: + molind,mind = get_mol_index(templateinds[0],template_rev_mol_map) + aind1 = molecule_to_atom_maps[molind][mind]+nslab + mocc = get_occupied_sites(products[molind],sites,nslab) + mocc_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in mocc} + Lwell = mocc_bd_lengths[aind1] + + if Lwell > 2.0: + assert False + adjlist_to_length[m.to_adjacency_list()] = L + adjlist_to_well_length[m.to_adjacency_list()] = Lwell + + adjlist_to_bond_factor = dict() + for k in adjlist_to_length.keys(): + adjlist_to_bond_factor[k] = adjlist_to_length[k]/adjlist_to_well_length[k] + + return adjlist_to_bond_factor,adjlist_to_length,adjlist_to_well_length + +def get_unique_adsorbate_geometries(adsorbate_path,mol,sites,site_adjacency,atom_to_molecule_surface_atom_map, + nslab,imag_freq_max=150.0): + """ + load the adsorbates associated with the reaction and find the unique optimized + adsorbate structures for each species + returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects + """ + prefixes = os.listdir(adsorbate_path) + geoms = [] + for prefix in prefixes: + if prefix == "info.json": + continue + path = os.path.join(adsorbate_path,prefix,prefix+".xyz") + freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json") + if os.path.exists(path) and os.path.exists(freq_path): + with open(freq_path,"r") as f: + freq_dict = json.load(f) + freq = np.complex128(freq_dict["frequencies"][0]) + if (np.isreal(freq) or freq.imag < imag_freq_max): + geoms.append(path) + + ase_atoms = [read(geom) for geom in geoms] + mols = [generate_adsorbate_2D(geom, sites, site_adjacency, nslab, max_dist=np.inf)[0] for geom in ase_atoms] + filtered_mols = [] + filtered_geoms = [] + filtered_energies = [] + for j,m in enumerate(mols): + for i,fm in enumerate(filtered_mols): + if fm.is_isomorphic(m,save_order=True): + if filtered_energies[i] > ase_atoms[j].get_potential_energy(): + filtered_mols[i] = m + filtered_geoms[i] = ase_atoms[j] + filtered_energies[i] = ase_atoms[j].get_potential_energy() + else: + filtered_mols.append(m) + filtered_geoms.append(ase_atoms[j]) + filtered_energies.append(ase_atoms[j].get_potential_energy()) + + adsorbates = [] + for j,geo in enumerate(filtered_geoms): + occ = get_occupied_sites(geo,sites,nslab) + required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()]) + found_surface_inds = set([site["bonding_index"] for site in occ]) + if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds): + adsorbates.append(geo) + + return adsorbates + +def get_adsorbate_geometries(adsorbate_path,mol,sites,atom_to_molecule_surface_atom_map, + nslab,imag_freq_max=150.0): + """ + load the adsorbates associated with the reaction and find the unique optimized + adsorbate structures for each species + returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects + """ + prefixes = os.listdir(adsorbate_path) + geoms = [] + for prefix in prefixes: + if prefix == "info.json": + continue + path = os.path.join(adsorbate_path,prefix,prefix+".xyz") + freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json") + if os.path.exists(path) and os.path.exists(freq_path): + with open(freq_path,"r") as f: + freq_dict = json.load(f) + freq = np.complex128(freq_dict["frequencies"][0]) + if (np.isreal(freq) or freq.imag < imag_freq_max): + geoms.append(path) + + xyzs = get_unique_sym(geoms) + adsorbates = [] + for xyz in xyzs: + geo = read(xyz) + occ = get_occupied_sites(geo,sites,nslab) + required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()]) + found_surface_inds = set([site["bonding_index"] for site in occ]) + if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds): + adsorbates.append(geo) + + return adsorbates + +def get_lowest_adsorbate_energies(adsorbates_path): + """ + load the adsorbates geometries find the lowest energy correct structure and the 2D representation + """ + ad_energy_dict = dict() + for ad in os.listdir(adsorbates_path): + path = os.path.join(adsorbates_path,ad) + if not os.path.isdir(path): + continue + with open(os.path.join(path,"info.json"),"r") as f: + info = json.load(f) + mol = Molecule().from_adjacency_list(info["adjlist"]) + Es = get_adsorbate_energies(path)[0] + ad_energy_dict[mol] = np.inf + for E in Es.values(): + if E < ad_energy_dict[mol]: + ad_energy_dict[mol] = E + + return ad_energy_dict + +def extract_pair_graph(atoms,sites,site_adjacency,nslab,max_dist,cut_multidentate_off_num=None,allowed_structure_site_structures=None, + is_ts=False,info_path=None,metal=None,facet=None,imag_freq_path=None): + if is_ts: + admol,neighbor_sites,ninds = generate_TS_2D(atoms, info_path, metal, facet, sites, site_adjacency, nslab, imag_freq_path=imag_freq_path, + max_dist=max_dist, cut_multidentate_off_num=cut_multidentate_off_num, allowed_structure_site_structures=allowed_structure_site_structures, + ) + else: + admol,neighbor_sites,ninds = generate_adsorbate_2D(atoms, sites, site_adjacency, nslab, + max_dist=max_dist,cut_multidentate_off_num=cut_multidentate_off_num,allowed_structure_site_structures=allowed_structure_site_structures) + return reduce_graph_to_pairs(admol) + +def get_adsorbate_energies(ad_path,atom_corrections=None,include_zpe=True): + """ + get ZPE corrected adsorbate energies + """ + dirs = os.listdir(ad_path) + slab = read(os.path.join(os.path.split(os.path.split(ad_path)[0])[0],"slab.xyz")) + Eslab = slab.get_potential_energy() + with open(os.path.join(ad_path,"info.json")) as f: + info = json.load(f) + + m = Molecule().from_adjacency_list(info["adjlist"]) + if atom_corrections: + AEC = 0.0 + for atom in m.atoms: + s = str(atom.element) + if not "X" in s: + AEC += atom_corrections[s] + else: #if no corrections don't add a correction + AEC = 0.0 + + gasphase = len(info["gratom_to_molecule_surface_atom_map"]) == 0 + spin = (Molecule().from_adjacency_list(info["adjlist"]).multiplicity - 1.0)/2.0 + Es = dict() + thermos = dict() + fs = dict() + for d in dirs: + if d == "info.json": + continue + optdir = os.path.join(ad_path,d,d+".xyz") + freqdir = os.path.join(ad_path,d,"vib.json_vib.json") + if not (os.path.exists(optdir) and os.path.exists(freqdir)): + continue + sp = read(optdir) + E = sp.get_potential_energy() + if gasphase: + vibdata = get_vibdata(optdir,freqdir,0) + else: + vibdata = get_vibdata(optdir,freqdir,len(slab)) + + freqs = vibdata.get_frequencies().tolist() + fs[d] = freqs + + ZPE = vibdata.get_zero_point_energy() + + if not gasphase: + + if include_zpe: + Es[d] = E + ZPE - Eslab - AEC + else: + Es[d] = E - Eslab - AEC + thermos[d] = HarmonicThermo(np.array([x for x in np.real(vibdata.get_energies()) if x > 0.0]),potentialenergy=E-Eslab-AEC) + else: + if len(sp) == 1: + geometry = 'monatomic' + elif any([ I < 1.0e-3 for I in sp.get_moments_of_inertia()]): + geometry = "linear" + else: + geometry = "nonlinear" + + if include_zpe: + Es[d] = E + ZPE - AEC + else: + Es[d] = E - AEC + + thermos[d] = IdealGasThermo(np.real(vibdata.get_energies()),geometry, + potentialenergy=E - AEC,atoms=sp,symmetrynumber=1, + natoms=len(sp),spin=spin) + + return Es,thermos,fs + +def get_vibdata(dopt,dvib,nslab): + xyz = read(dopt) + inds = range(nslab,len(xyz)) + with open(dvib,'r') as f: + out = json.load(f) + sh = (len(inds),3,len(inds),3) + H = np.asarray(out["hessian"]) + if H.shape != sh: + H = H.reshape(sh) + vib = VibrationsData(xyz,H,inds) + return vib \ No newline at end of file diff --git a/pynta/main.py b/pynta/main.py index f822af71..a5add503 100644 --- a/pynta/main.py +++ b/pynta/main.py @@ -1,5 +1,6 @@ from pynta.tasks import * from pynta.mol import get_adsorbate, generate_unique_site_additions, generate_adsorbate_guesses, get_name,generate_unique_placements +from pynta.coveragedependence import * from molecule.molecule import Molecule import ase.build from ase.io import read, write @@ -41,7 +42,8 @@ def __init__(self,path,rxns_file,surface_type,metal,label,launchpad_path=None,fw irc_mode="fixed", #choose irc mode: 'skip', 'relaxed', 'fixed' lattice_opt_software_kwargs={'kpts': (25,25,25), 'ecutwfc': 70, 'degauss':0.02, 'mixing_mode': 'plain'}, reset_launchpad=False,queue_adapter_path=None,num_jobs=25,max_num_hfsp_opts=None,#max_num_hfsp_opts is mostly for fast testing - Eharmtol=3.0,Eharmfiltertol=30.0,Ntsmin=5,frozen_layers=2,fmaxopt=0.05,fmaxirc=0.1,fmaxopthard=0.05,c=None): + Eharmtol=3.0,Eharmfiltertol=30.0,Ntsmin=5,frozen_layers=2,fmaxopt=0.05,fmaxirc=0.1,fmaxopthard=0.05,c=None, + surrogate_metal=None): self.surface_type = surface_type if launchpad_path: @@ -64,6 +66,10 @@ def __init__(self,path,rxns_file,surface_type,metal,label,launchpad_path=None,fw self.facet = metal + surface_type self.fws = [] self.metal = metal + if surrogate_metal is None: + self.surrogate_metal = metal + else: + self.surrogate_metal = surrogate_metal self.adsorbate_fw_dict = dict() self.software_kwargs = software_kwargs self.irc_mode = irc_mode @@ -171,7 +177,7 @@ def analyze_slab(self): full_slab = self.slab cas = SlabAdsorptionSites(full_slab, self.surface_type,allow_6fold=False,composition_effect=False, label_sites=True, - surrogate_metal=self.metal) + surrogate_metal=self.surrogate_metal) self.cas = cas @@ -496,7 +502,7 @@ def setup_transition_states(self,adsorbates_finished=False): "gratom_to_molecule_atom_maps":{sm: {str(k):v for k,v in d.items()} for sm,d in self.gratom_to_molecule_atom_maps.items()}, "gratom_to_molecule_surface_atom_maps":{sm: {str(k):v for k,v in d.items()} for sm,d in self.gratom_to_molecule_surface_atom_maps.items()}, "nslab":self.nslab,"Eharmtol":self.Eharmtol,"Eharmfiltertol":self.Eharmfiltertol,"Ntsmin":self.Ntsmin, - "max_num_hfsp_opts":self.max_num_hfsp_opts}) + "max_num_hfsp_opts":self.max_num_hfsp_opts, "surrogate_metal":self.surrogate_metal}) reactants = rxn["reactant_names"] products = rxn["product_names"] parents = [] @@ -572,3 +578,201 @@ def execute_from_initial_ad_guesses(self): self.launch() + +class CoverageDependence: + def __init__(self,path,metal,surface_type,repeats,pynta_run_directory,software,software_kwargs,label,sites,site_adjacency,coad_stable_sites,adsorbates=[],transition_states=dict(),coadsorbates=[], + max_dist=3.0,frozen_layers=2,fmaxopt=0.05,Ncalc_per_iter=6,TS_opt_software_kwargs=None,launchpad_path=None, + fworker_path=None,queue=False,njobs_queue=0,reset_launchpad=False,queue_adapter_path=None, + num_jobs=25,surrogate_metal=None,concern_energy_tol=None,max_iters=np.inf): + self.path = path + self.metal = metal + self.repeats = repeats + self.nslab = np.product(repeats) + self.pynta_run_directory = pynta_run_directory + self.pairs_directory = os.path.join(self.path,"pairs") + self.slab_path = os.path.join(self.pynta_run_directory,"slab.xyz") + self.adsorbates_path = os.path.join(self.pynta_run_directory,"Adsorbates") + self.coad_stable_sites = coad_stable_sites + self.software = software + self.software_kwargs = software_kwargs + self.surface_type = surface_type + self.facet = metal + surface_type + self.sites = sites + self.site_adjacency = site_adjacency + self.Ncalc_per_iter = Ncalc_per_iter + self.software_kwargs_TS = deepcopy(software_kwargs) + self.concern_energy_tol = concern_energy_tol + if TS_opt_software_kwargs: + for key,val in TS_opt_software_kwargs.items(): + self.software_kwargs_TS[key] = val + + if adsorbates != []: + raise ValueError("Not implemented yet") + self.adsorbates = adsorbates + self.transition_states = transition_states + self.coadsorbates = coadsorbates + self.max_dist = max_dist + self.frozen_layers = frozen_layers + self.layers = self.repeats[2] + self.freeze_ind = int((self.nslab/self.layers)*self.frozen_layers) + self.fmaxopt = fmaxopt + self.label = label + self.max_iters = max_iters + + if launchpad_path: + launchpad = LaunchPad.from_file(launchpad_path) + else: + launchpad = LaunchPad() + + if reset_launchpad: + launchpad.reset('', require_password=False) + self.launchpad = launchpad + + self.fworker_path = fworker_path + if fworker_path: + self.fworker = FWorker.from_file(fworker_path) + else: + self.fworker = FWorker() + + self.queue = queue + self.njobs_queue = njobs_queue + self.reset_launchpad = reset_launchpad + if queue: + self.qadapter = load_object_from_file(queue_adapter_path) + self.num_jobs = num_jobs + + if surrogate_metal is None: + self.surrogate_metal = metal + else: + self.surrogate_metal = surrogate_metal + + self.pairs_fws = [] + self.fws = [] + + def setup_pairs_calculations(self): + tsdirs = [os.path.join(self.pynta_run_directory,t,ind) for t,ind in self.transition_states.items()] + outdirs_ad,outdirs_ts = setup_pair_opts_for_rxns(self.path,tsdirs,self.coadsorbates,self.surrogate_metal,self.surface_type,max_dist=self.max_dist) + + for d in outdirs_ad: + fwopt = optimize_firework(d, + self.software,"weakopt", + opt_method="MDMin",opt_kwargs={'dt': 0.05,"trajectory": "weakopt.traj"},software_kwargs=self.software_kwargs,order=0, + run_kwargs={"fmax" : 0.5, "steps" : 30},parents=[], + constraints=["freeze up to {}".format(self.freeze_ind)], + ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=3) + fwopt2 = optimize_firework(os.path.join(os.path.split(d)[0],"weakopt.xyz"), + self.software,"out", + opt_method="QuasiNewton",opt_kwargs={"trajectory": "out.traj"},software_kwargs=self.software_kwargs,order=0, + run_kwargs={"fmax" : self.fmaxopt, "steps" : 70},parents=[fwopt], + constraints=["freeze up to {}".format(self.freeze_ind)], + ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=2) + + fwvib = vibrations_firework(os.path.join(os.path.split(d)[0],"out.xyz"), + self.software,"vib",software_kwargs=self.software_kwargs,parents=[fwopt2], + constraints=["freeze up to "+str(self.nslab)]) + self.pairs_fws.append(fwopt) + self.pairs_fws.append(fwopt2) + self.pairs_fws.append(fwvib) + + for d in outdirs_ts: + fwopt = optimize_firework(d, + self.software,"out", sella=True, + opt_kwargs={"trajectory": "out.traj"},software_kwargs=self.software_kwargs_TS, + order=1, + run_kwargs={"fmax" : self.fmaxopt, "steps" : 70},parents=[], + constraints=["freeze up to {}".format(self.freeze_ind)], + ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=3) + + fwvib = vibrations_firework(os.path.join(os.path.split(d)[0],"out.xyz"), + self.software,"vib",software_kwargs=self.software_kwargs,parents=[fwopt], + constraints=["freeze up to "+str(self.nslab)]) + self.pairs_fws.append(fwopt) + self.pairs_fws.append(fwvib) + + self.fws.extend(self.pairs_fws) + + def setup_active_learning_loop(self): + admol_name_path_dict = {k: os.path.join(self.pynta_run_directory,k,v,"opt.xyz") for k,v in self.transition_states.items()} + admol_name_structure_dict = dict() + ads = self.adsorbates + allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(self.pynta_run_directory,"Adsorbates"),self.sites,self.site_adjacency,self.nslab,max_dist=np.inf) + + for ts in self.transition_states.keys(): + info_path = os.path.join(self.pynta_run_directory,ts,"info.json") + with open(info_path) as f: + info = json.load(f) + mol = Molecule().from_adjacency_list(info["adjlist"]) + keep_binding_vdW_bonds=False + keep_vdW_surface_bonds=False + for bd in mol.get_all_edges(): + if bd.order == 0: + if bd.atom1.is_surface_site() or bd.atom2.is_surface_site(): + keep_binding_vdW_bonds = True + m = mol.copy(deep=True) + b = m.get_bond(m.atoms(mol.atoms.index(bd.atom1)),m.atoms[mol.atoms.index(bd.atom2)]) + m.remove_bond(b) + out = m.split() + if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface + keep_vdW_surface_bonds = True + + atoms = read(admol_name_path_dict[ts]) + st,_,_ = generate_TS_2D(atoms, info_path, self.metal, self.surface_type, self.sites, self.site_adjacency, self.nslab, + max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + admol_name_structure_dict[ts] = st + with open(info_path,"r") as f: + info = json.load(f) + for name in info["species_names"]+info["reverse_names"]: + if name not in ads: + ads.append(name) + + for ad in ads: + p = os.path.join(self.pynta_run_directory,"Adsorbates",ad) + with open(os.path.join(p,"info.json")) as f: + info = json.load(f) + + mol = Molecule().from_adjacency_list(info["adjlist"]) + + if mol.contains_surface_site(): + keep_binding_vdW_bonds=False + keep_vdW_surface_bonds=False + for bd in mol.get_all_edges(): + if bd.order == 0: + if bd.atom1.is_surface_site() or bd.atom2.is_surface_site(): + keep_binding_vdW_bonds = True + m = mol.copy(deep=True) + b = m.get_bond(m.atoms(mol.atoms.index(bd.atom1)),m.atoms[mol.atoms.index(bd.atom2)]) + m.remove_bond(b) + out = m.split() + if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface + keep_vdW_surface_bonds = True + + ad_xyz = get_best_adsorbate_xyz(p,self.sites,self.nslab) + admol_name_path_dict[ad] = ad_xyz + atoms = read(ad_xyz) + st,_,_ = generate_adsorbate_2D(atoms, self.sites, self.site_adjacency, self.nslab, max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures, + keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) + admol_name_structure_dict[ad] = st + + calculation_directories = [] #identify pairs directories + for p1 in os.listdir(os.path.join(self.path,"pairs")): + for p2 in os.listdir(os.path.join(self.path,"pairs",p1)): + calculation_directories.append(os.path.join(self.path,"pairs",p1,p2)) + + + fw = train_covdep_model_firework(self.path,admol_name_path_dict,admol_name_structure_dict,self.sites,self.site_adjacency, + self.pynta_run_directory, self.metal, self.surface_type, self.slab_path, calculation_directories, self.coadsorbates[0], + self.coad_stable_sites, self.software, self.software_kwargs, self.software_kwargs_TS, self.freeze_ind, self.fmaxopt, + parents=self.fws, max_iters=self.max_iters, + Ncalc_per_iter=self.Ncalc_per_iter,iter=0,concern_energy_tol=self.concern_energy_tol,ignore_errors=True) + + self.fws.append(fw) + + + def execute(self,run_pairs=True,run_active_learning=False): + if run_pairs: + self.setup_pairs_calculations() + if run_active_learning: + self.setup_active_learning_loop() + wf = Workflow(self.fws, name=self.label) + self.launchpad.add_wf(wf) \ No newline at end of file diff --git a/pynta/mol.py b/pynta/mol.py index 54f938b3..9a551093 100644 --- a/pynta/mol.py +++ b/pynta/mol.py @@ -1,4 +1,4 @@ -from molecule.molecule import Molecule +from molecule.molecule import Molecule,Atom,Bond from ase.io import read, write from ase.data import covalent_radii from ase import Atoms @@ -13,12 +13,14 @@ get_rodrigues_rotation_matrix, get_angle_between, get_rejection_between) -from pynta.utils import get_unique_sym_struct_index_clusters, get_unique_sym, get_unique_sym_structs, get_unique_sym_struct_indices +from pynta.utils import get_unique_sym_struct_index_clusters, get_unique_sym, get_unique_sym_structs, get_unique_sym_struct_indices, get_occupied_sites from pynta.calculator import run_harmonically_forced_xtb from rdkit import Chem from copy import deepcopy import numpy as np import random +import itertools + def get_desorbed_with_map(mol): molcopy = mol.copy(deep=True) @@ -748,7 +750,7 @@ def get_ase_index(ind,template_mol_map,molecule_to_gratom_maps,nslab,ads_sizes): else: return None #surface site -def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,facet="fcc111",metal="Cu",cas=None): +def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,sites,site_adjacency,facet="fcc111",metal="Cu",): """ gets bond lengths and site information indexed to the Molecule object atoms bond lengths is a matrix with the distances between all atoms that share bonds @@ -757,20 +759,15 @@ def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,facet="fcc111",m rev_atom_map = {value: key for key,value in atom_map.items()} #map from mol to ads rev_surf_atom_map = { value: key for key,value in surf_atom_map.items()} - if cas is None: - cas = SlabAdsorptionSites(ads,facet,allow_6fold=False,composition_effect=False, - label_sites=True, - surrogate_metal=metal) - adcov = SlabAdsorbateCoverage(ads,adsorption_sites=cas) - occ = adcov.get_sites(occupied_only=True) + occ = get_occupied_sites(ads,sites,nslab) surface_dict = [{"atom_index":x["bonding_index"]-nslab, "site":x["site"], "bond_length":x["bond_length"], "position":x["position"]} for x in occ] if len(occ) < len(surf_atom_map): #number of sites on geometry disagrees with surf_atom_map - print("occupational analysis in get_bond_lengths_sites failed") - print(mol.to_adjacency_list()) - print("expected {} sites".format(len(surf_atom_map))) - print("found {} sites".format(len(occ))) +# print("occupational analysis in get_bond_lengths_sites failed") +# print(mol.to_adjacency_list()) +# print("expected {} sites".format(len(surf_atom_map))) +# print("found {} sites".format(len(occ))) return None,None,None if mol.contains_surface_site(): @@ -817,3 +814,204 @@ def get_name(mol): return mol.to_smiles() except: return mol.to_adjacency_list().replace("\n"," ")[:-1] + + +def remove_slab(mol,remove_slab_bonds=False,update_atomtypes=True): + m = mol.copy(deep=True) + for site in m.get_surface_sites(): + for a in site.bonds.keys(): + if not a.is_surface_site(): + break + else: + m.remove_atom(site) + + if remove_slab_bonds: + bonds_to_remove = [] + for bd in m.get_all_edges(): + if bd.atom1.is_surface_site() and bd.atom2.is_surface_site(): + m.remove_bond(bd) + + if update_atomtypes: + m.update_atomtypes() + m.update_connectivity_values() + + return m + +def pluck_subgraph(mol,atom): + subgraph_atoms = [] + new_subgraph_atoms = [atom] + while new_subgraph_atoms != []: + temp = [] + subgraph_atoms.extend(new_subgraph_atoms) + for a in new_subgraph_atoms: + for a2 in a.bonds.keys(): + if a2 not in subgraph_atoms: + temp.append(a2) + new_subgraph_atoms = temp + + inds = [mol.atoms.index(a) for a in subgraph_atoms] + struct = Molecule(atoms=subgraph_atoms) + struct.update_atomtypes() + struct.update_connectivity_values() + return struct,inds + +def generate_without_site_info(m): + mol = m.copy(deep=True) + for a in mol.atoms: + if a.is_surface_site(): + a.site = "" + a.morphology = "" + return mol + +class FindingPathError(Exception): + pass + +def reduce_graph_to_pairs(admol): + adatoms = [a for a in admol.atoms if a.is_bonded_to_surface() and not a.is_surface_site()] + surface_save = set() + for i,a1 in enumerate(adatoms): + for a2 in adatoms[i+1:]: + paths = find_shortest_paths(a1,a2) + if paths is None: #separation between pair is so large the generated site graphs don't connect + raise FindingPathError(admol.to_adjacency_list()) + for path in paths: + surface_save = surface_save | set([a for a in path if a.is_surface_site()]) + + atoms_to_remove = [] + for i,a in enumerate(admol.atoms): + if a.is_surface_site() and not (a in surface_save): + atoms_to_remove.append(a) + + for a in atoms_to_remove: + admol.remove_atom(a) + + admol.update_atomtypes() + admol.update_connectivity_values() + + return admol + +def find_shortest_paths(start, end, path=None): + paths = [[start]] + outpaths = [] + while paths != []: + newpaths = [] + for path in paths: + for node in path[-1].edges.keys(): + if node in path: + continue + elif node is end: + outpaths.append(path[:]+[node]) + elif outpaths == []: + newpaths.append(path[:]+[node]) + if outpaths: + return outpaths + else: + paths = newpaths + + return None + +def find_shortest_paths_sites(start, end, path=None): + paths = [[start]] + outpaths = [] + while paths != []: + newpaths = [] + for path in paths: + for node in path[-1].edges.keys(): + if node in path: + continue + elif node is end: + outpaths.append(path[:]+[node]) + elif outpaths == [] and node.is_surface_site(): + newpaths.append(path[:]+[node]) + if outpaths: + return outpaths + else: + paths = newpaths + + return None + +def find_adsorbate_atoms_surface_sites(atom,mol): + """ + one atom of the associated adsorbate on the surface Molecule object mol + returns all of the surface atoms bonded to adsorbate + """ + atsout = [atom] + ats = [atom] + surf_sites = [] + while ats != []: + new_ats = [] + for a in ats: + for a2 in atom.edges.keys(): + if a2 in atsout: + continue + elif a2.is_surface_site() and a2 not in surf_sites: + surf_sites.append(a2) + elif not a2.is_surface_site(): + new_ats.append(a2) + atsout.append(a2) + ats = new_ats + + return atsout,surf_sites + +def split_adsorbed_structures(admol,clear_site_info=True,adsorption_info=False,atoms_to_skip=None, + atom_mapping=False): + """_summary_ + + Args: + admol (_type_): a Molecule object resolving a slab + clear_site_info (bool, optional): clears site identify information. Defaults to True. + adsorption_info (bool, optional): also returns the map from atom to index for admol for each surface site structures are split off from. Defaults to False. + atoms_to_skip (_type_, optional): list of atoms in admol to not include in the split structures. Defaults to None. + + Returns: + _type_: _description_ + """ + m = admol.copy(deep=True) + + if atoms_to_skip is None: + atoms_to_remove = [] + skip_atoms = [] + else: + skip_atoms = [m.atoms[admol.atoms.index(a)] for a in atoms_to_skip] + atoms_to_remove = skip_atoms[:] + + for i,at in enumerate(m.atoms): + if at.is_surface_site(): + bdict = m.get_bonds(at) + if len(bdict) > 0: + for a,b in bdict.items(): + if not a.is_surface_site() and (a not in skip_atoms): + break + else: + atoms_to_remove.append(at) + else: + atoms_to_remove.append(at) + + if adsorption_info: + adsorbed_atom_dict = {at: i for i,at in enumerate(m.atoms) if at.is_surface_site() and at not in atoms_to_remove} + + if atom_mapping: + mapping = {at: i for i,at in enumerate(m.atoms) if at not in atoms_to_remove} + + for at in atoms_to_remove: + m.remove_atom(at) + + if clear_site_info: + for at in m.atoms: + if at.is_surface_site(): + at.site = "" + at.morphology = "" + split_structs = m.split() + for s in split_structs: + s.update(sort_atoms=False) + s.update_connectivity_values() + s.multiplicity = 1 + + if atom_mapping and not adsorption_info: + return split_structs,mapping + elif atom_mapping and adsorption_info: + return split_structs,adsorption_info,mapping + elif not atom_mapping and adsorption_info: + return split_structs,adsorbed_atom_dict + else: + return split_structs diff --git a/pynta/postprocessing.py b/pynta/postprocessing.py index 23ab75b7..7d733a47 100644 --- a/pynta/postprocessing.py +++ b/pynta/postprocessing.py @@ -12,6 +12,7 @@ from molecule.molecule import Molecule from acat.adsorption_sites import SlabAdsorptionSites from molecule.thermo import Wilhoit +from pynta.geometricanalysis import get_adsorbate_energies, get_vibdata eV_to_Jmol = 9.648328e4 @@ -192,78 +193,6 @@ def fit_rate_coefficient(thermoRs,thermoTS,dE,rnum,s0,Ts=None): return arr -def get_adsorbate_energies(ad_path,atom_corrections=None,include_zpe=True): - """ - get ZPE corrected adsorbate energies - """ - dirs = os.listdir(ad_path) - slab = read(os.path.join(os.path.split(os.path.split(ad_path)[0])[0],"slab.xyz")) - Eslab = slab.get_potential_energy() - with open(os.path.join(ad_path,"info.json")) as f: - info = json.load(f) - - m = Molecule().from_adjacency_list(info["adjlist"]) - if atom_corrections: - AEC = 0.0 - for atom in m.atoms: - s = str(atom.element) - if not "X" in s: - AEC += atom_corrections[s] - else: #if no corrections don't add a correction - AEC = 0.0 - - gasphase = len(info["gratom_to_molecule_surface_atom_map"]) == 0 - spin = (Molecule().from_adjacency_list(info["adjlist"]).multiplicity - 1.0)/2.0 - Es = dict() - thermos = dict() - fs = dict() - for d in dirs: - if d == "info.json": - continue - optdir = os.path.join(ad_path,d,d+".xyz") - freqdir = os.path.join(ad_path,d,"vib.json_vib.json") - if not (os.path.exists(optdir) and os.path.exists(freqdir)): - continue - sp = read(optdir) - E = sp.get_potential_energy() - if gasphase: - vibdata = get_vibdata(optdir,freqdir,0) - else: - vibdata = get_vibdata(optdir,freqdir,len(slab)) - - freqs = vibdata.get_frequencies().tolist() - fs[d] = freqs - - ZPE = vibdata.get_zero_point_energy() - - if not gasphase: - - if include_zpe: - Es[d] = E + ZPE - Eslab - AEC - else: - Es[d] = E - Eslab - AEC - thermos[d] = HarmonicThermo(np.array([x for x in np.real(vibdata.get_energies()) if x > 0.0]),potentialenergy=E-Eslab-AEC) - else: - if len(sp) == 1: - geometry = 'monatomic' - elif any([ I < 1.0e-3 for I in sp.get_moments_of_inertia()]): - geometry = "linear" - else: - geometry = "nonlinear" - - if include_zpe: - Es[d] = E + ZPE - AEC - else: - Es[d] = E - AEC - - thermos[d] = IdealGasThermo(np.real(vibdata.get_energies()),geometry, - potentialenergy=E - AEC,atoms=sp,symmetrynumber=1, - natoms=len(sp),spin=spin) - - return Es,thermos,fs - - - def get_reactant_products_energy(ts_path,reactants,products): path = os.path.split(ts_path)[0] ads_path = os.path.join(path,"Adsorbates") @@ -302,17 +231,7 @@ def get_reactant_products_energy(ts_path,reactants,products): pthermos.append(pthermo[ind]) return rE,pE,rthermos,pthermos -def get_vibdata(dopt,dvib,nslab): - xyz = read(dopt) - inds = range(nslab,len(xyz)) - with open(dvib,'r') as f: - out = json.load(f) - sh = (len(inds),3,len(inds),3) - H = np.asarray(out["hessian"]) - if H.shape != sh: - H = H.reshape(sh) - vib = VibrationsData(xyz,H,inds) - return vib + def get_animated_mode(dopt,dvib,nslab,n=0): diff --git a/pynta/tasks.py b/pynta/tasks.py index 89eb865a..2cd7a88d 100644 --- a/pynta/tasks.py +++ b/pynta/tasks.py @@ -20,6 +20,8 @@ from pynta.utils import * from pynta.calculator import run_harmonically_forced_xtb, add_sella_constraint from pynta.mol import * +from pynta.coveragedependence import * +from pynta.geometricanalysis import * from xtb.ase.calculator import XTB import numpy as np import multiprocessing as mp @@ -128,6 +130,7 @@ def run_task(self, fw_spec): raise e else: errors.append(e) + return FWAction(stored_data={"error": errors,"converged": False}) if socket and os.path.exists(socket_address): os.unlink(socket_address) @@ -469,7 +472,7 @@ class MolecularTSEstimate(FiretaskBase): required_params = ["rxn","ts_path","slab_path","adsorbates_path","rxns_file","path","metal","facet", "name_to_adjlist_dict", "gratom_to_molecule_atom_maps", "gratom_to_molecule_surface_atom_maps","irc_mode", - "vib_obj_dict","opt_obj_dict","nslab","Eharmtol","Eharmfiltertol","Ntsmin","max_num_hfsp_opts"] + "vib_obj_dict","opt_obj_dict","nslab","Eharmtol","Eharmfiltertol","Ntsmin","max_num_hfsp_opts","surrogate_metal"] optional_params = ["out_path","spawn_jobs","nprocs","IRC_obj_dict"] def run_task(self, fw_spec): gratom_to_molecule_atom_maps = {sm: {int(k):v for k,v in d.items()} for sm,d in self["gratom_to_molecule_atom_maps"].items()} @@ -490,12 +493,13 @@ def run_task(self, fw_spec): Ntsmin = self["Ntsmin"] max_num_hfsp_opts = self["max_num_hfsp_opts"] slab_path = self["slab_path"] + surrogate_metal = self["surrogate_metal"] slab = read(slab_path) irc_mode = self["irc_mode"] cas = SlabAdsorptionSites(slab,facet,allow_6fold=False,composition_effect=False, label_sites=True, - surrogate_metal=metal) + surrogate_metal=surrogate_metal) adsorbates_path = self["adsorbates_path"] @@ -915,6 +919,394 @@ def run_task(self, fw_spec): return FWAction(stored_data={"error": errors,"converged": converged}) +def calculate_configruation_energies_firework(admol_name,tree_file,path,coad_stable_sites,Nocc_isolated, + coadmol_E_dict,concern_energy_tol=None,out_path=None,parents=[],iter=0,ignore_errors=False): + d = {"admol_name": admol_name,"tree_file": tree_file,"path": path, + "coad_stable_sites": coad_stable_sites,"Nocc_isolated": Nocc_isolated,"coadmol_E_dict": coadmol_E_dict, + "concern_energy_tol": concern_energy_tol} + t1 = CalculateConfigurationEnergiesTask(d) + if out_path is None: + out_path_energy = os.path.join(os.path.split(tree_file)[0],"Ncoad_energy_"+admol_name+".json") + out_path_config = os.path.join(os.path.split(tree_file)[0],"Ncoad_config_"+admol_name+".json") + out_path_concern = os.path.join(os.path.split(tree_file)[0],"configs_of_concern_"+admol_name+".json") + else: + out_path_energy = os.path.join(out_path,"Ncoad_energy_"+admol_name+".json") + out_path_config = os.path.join(out_path,"Ncoad_config_"+admol_name+".json") + out_path_concern = os.path.join(out_path,"configs_of_concern_"+admol_name+".json") + t2 = FileTransferTask({'files': [{'src': "Ncoad_energy_"+admol_name+".json", 'dest': out_path_energy}, + {'src': "Ncoad_config_"+admol_name+".json", 'dest': out_path_config}, + {'src': "configs_of_concern_"+admol_name+".json", 'dest': out_path_concern}], 'mode': 'copy', 'ignore_errors': ignore_errors}) + return Firework([t1,t2],parents=parents,name=admol_name+"_energies"+str(iter)) + +@explicit_serialize +class CalculateConfigurationEnergiesTask(FiretaskBase): + required_params = ["admol_name","tree_file","path","coad_stable_sites","Nocc_isolated","coadmol_E_dict"] + optional_params = ["concern_energy_tol","ignore_errors"] + def run_task(self, fw_spec): + admol_name = self['admol_name'] + tree_file = self["tree_file"] + path= self["path"] + coad_stable_sites = self["coad_stable_sites"] + Nocc_isolated = self["Nocc_isolated"] + coadmol_E_dict = {Molecule().from_adjacency_list(k):v for k,v in self["coadmol_E_dict"].items()} + concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None + ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False + + try: + nodes = read_nodes(tree_file) + root = [n for n in nodes.values() if n.parent is None][0] + if len(root.children) == 1: #pairwise only tree + tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition], + nodes=nodes) + elif len(root.children) == 2: + tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition], + nodes=nodes) + else: + raise ValueError + + with open(os.path.join(path,"Configurations",admol_name+".json"),'r') as f: + configs = [Molecule().from_adjacency_list(m,check_consistency=False) for m in json.load(f)] + + Ncoad_energy_dict,Ncoad_config_dict,configs_of_concern_admol = get_cov_energies_configs_concern_tree(tree, configs, coad_stable_sites, Nocc_isolated, concern_energy_tol, + coadmol_E_dict=coadmol_E_dict) + with open("Ncoad_energy_"+admol_name+".json",'w') as f: + json.dump(Ncoad_energy_dict,f) + with open("Ncoad_config_"+admol_name+".json",'w') as f: + json.dump(Ncoad_config_dict,f) + with open("configs_of_concern_"+admol_name+".json",'w') as f: + json.dump([tuple([v[0].to_adjacency_list(),v[1],v[2],v[3]]) for v in configs_of_concern_admol.values()],f) + + except Exception as e: + if not ignore_errors: + raise e + else: + return FWAction(stored_data={"error": e}, exit=True) + + return FWAction() + +def train_covdep_model_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency, + pynta_dir, metal, facet, slab_path, calculation_directories, coadname, + coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=[],Ncalc_per_iter=6,iter=0,max_iters=6,concern_energy_tol=None,ignore_errors=False): + d = {"path": path, "admol_name_path_dict": admol_name_path_dict, "admol_name_structure_dict": {k : v.to_adjacency_list() for k,v in admol_name_structure_dict.items()}, + "sites": sites, "site_adjacency": {str(k):v for k,v in site_adjacency.items()}, "pynta_dir": pynta_dir, "metal": metal, "facet": facet, "slab_path": slab_path, + "calculation_directories": calculation_directories, "coadname": coadname, "coad_stable_sites": coad_stable_sites, + "Ncalc_per_iter": Ncalc_per_iter, "iter": iter, "max_iters": max_iters, "software": software, "software_kwargs": software_kwargs, "software_kwargs_TS": software_kwargs_TS, "freeze_ind": freeze_ind, + "fmaxopt": fmaxopt, "concern_energy_tol": concern_energy_tol, "ignore_errors": ignore_errors} + t1 = TrainCovdepModelTask(d) + return Firework([t1],parents=parents,name="Training Model "+str(iter),spec={"_allow_fizzled_parents":True, "_priority": 4}) + +@explicit_serialize +class TrainCovdepModelTask(FiretaskBase): + required_params = ["path","admol_name_path_dict","admol_name_structure_dict","sites","site_adjacency", "pynta_dir", "metal", "facet", + "slab_path", "calculation_directories", "coadname", "coad_stable_sites", "Ncalc_per_iter", "iter", "max_iters", "software", + "software_kwargs", "software_kwargs_TS", "freeze_ind", "fmaxopt"] + optional_params = ["concern_energy_tol","ignore_errors"] + def run_task(self, fw_spec): + path = self["path"] + admol_name_path_dict = self["admol_name_path_dict"] + admol_name_structure_dict = {k: Molecule().from_adjacency_list(v,check_consistency=False) for k,v in self["admol_name_structure_dict"].items()} + sites = [] + for site in self["sites"]: + site["normal"] = np.array(site["normal"]) + site["position"] = np.array(site["position"]) + site["indices"] = tuple(site["indices"]) + sites.append(site) + site_adjacency = {int(k):[int(x) for x in v] for k,v in self["site_adjacency"].items()} + pynta_dir = self["pynta_dir"] + metal = self["metal"] + facet = self["facet"] + slab_path = self["slab_path"] + calculation_directories = self["calculation_directories"] + coadname = self["coadname"] + Ncalc_per_iter = self["Ncalc_per_iter"] + iter = self["iter"] + coad_stable_sites = self["coad_stable_sites"] + concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None + ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False + software_kwargs = self["software_kwargs"] + software = self["software"] + freeze_ind = self["freeze_ind"] + software_kwargs_TS = self["software_kwargs_TS"] + fmaxopt = self["fmaxopt"] + max_iters = self["max_iters"] + + coad = admol_name_structure_dict[coadname] + coad_simple = remove_slab(coad) + + coad_path = os.path.join(pynta_dir,"Adsorbates",coadname) + slab = read(slab_path) + nslab = len(slab) + allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"),sites,site_adjacency,nslab,max_dist=np.inf) + + ad_energy_dict = get_lowest_adsorbate_energies(os.path.join(pynta_dir,"Adsorbates")) + Es = get_adsorbate_energies(coad_path)[0] + coadmol_E_dict = dict() + coadmol_stability_dict = dict() + for p in os.listdir(coad_path): + if p == "info.json" or (p not in Es.keys()): + continue + admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+"_init.xyz")),sites,site_adjacency,nslab,max_dist=np.inf,allowed_structure_site_structures=allowed_structure_site_structures) + admol,neighbor_sites,ninds = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+".xyz")),sites,site_adjacency,nslab,max_dist=np.inf,allowed_structure_site_structures=allowed_structure_site_structures) + out_struct = split_adsorbed_structures(admol,clear_site_info=False)[0] + out_struct_init = split_adsorbed_structures(admol_init,clear_site_info=False)[0] + coadmol_E_dict[out_struct] = Es[p] + if admol_init.is_isomorphic(admol,save_order=True): + coadmol_stability_dict[out_struct_init] = True + else: + coadmol_stability_dict[out_struct_init] = False + + if not os.path.exists(os.path.join(path,"Configurations")): + info_paths = {adname: os.path.join(os.path.split(os.path.split(p)[0])[0],"info.json") for adname,p in admol_name_path_dict.items()} + imag_freq_paths = {adname: os.path.join(os.path.split(p)[0],"vib.json_vib.json") for adname,p in admol_name_path_dict.items()} + unstable_pairs = get_unstable_pairs(os.path.join(path,'pairs'), + os.path.join(pynta_dir,"Adsorbates"), + sites,site_adjacency,nslab,max_dist=np.inf,show=False, infopath_dict=info_paths, imag_freq_path_dict=imag_freq_paths) + + os.makedirs(os.path.join(path,"Configurations")) + for admol_name,admol in admol_name_structure_dict.items(): + configs = get_configurations(admol, coad_simple, coad_stable_sites, coadmol_stability_dict=coadmol_stability_dict, unstable_groups=unstable_pairs, + coadmol_E_dict=coadmol_E_dict) + + with open(os.path.join(path,"Configurations",admol_name+".json"),'w') as f: + json.dump([x.to_adjacency_list() for x in configs],f) + + if iter > 1: + with open(os.path.join(path,"pairs_datums.json"),'r') as f: + pairs_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)] + with open(os.path.join(path,"Iterations",str(iter-1),"cumulative_sample_datums.json"),'r') as f: + old_sample_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)] + elif iter == 1: + with open(os.path.join(path,"pairs_datums.json"),'r') as f: + pairs_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)] + old_sample_datums = [] + + if iter == 0: + computed_configs = [] + else: + with open(os.path.join(path,"Iterations",str(iter-1),"computed_configurations.json"),'r') as f: + computed_configs = [Molecule().from_adjacency_list(x,check_consistency=False) for x in json.load(f)] + + new_datums_E = [] + new_computed_configs = [] + for d in calculation_directories: + with open(os.path.join(d,"info.json"),'r') as f: + info = json.load(f) + adjlist = info['adjlist'] + init_config = Molecule().from_adjacency_list(adjlist,check_consistency=False) + new_computed_configs.append(init_config) + datum_E,datums_stability = process_calculation(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,coadmol_E_dict,max_dist=3.0,rxn_alignment_min=0.7, + coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib_vib",is_ad=None) + if datum_E: + new_datums_E.append(datum_E) + if datum_E and not datum_E.mol.is_isomorphic(init_config,save_order=True): + new_computed_configs.append(datum_E.mol) + + if not os.path.exists(os.path.join(path,"Iterations",str(iter))): + os.makedirs(os.path.join(path,"Iterations",str(iter))) + + with open(os.path.join(path,"Iterations",str(iter),"computed_configurations.json"),'w') as f: + json.dump([x.to_adjacency_list() for x in computed_configs+new_computed_configs],f) + + if iter == 0: + pairs_datums = new_datums_E + with open(os.path.join(path,"pairs_datums.json"),'w') as f: + json.dump([{"mol": d.mol.to_adjacency_list(),"value": d.value} for d in pairs_datums],f) + sampling_datums = [] + else: + sampling_datums = old_sample_datums + new_datums_E + with open(os.path.join(path,"Iterations",str(iter),"cumulative_sample_datums.json"),'w') as f: + json.dump([{"mol": d.mol.to_adjacency_list(),"value": d.value}for d in sampling_datums],f) + + Nconfigs = len(admol_name_structure_dict) + Ncoads = 1 + tree = train_sidt_cov_dep_regressor(pairs_datums,sampling_datums,r_site=None, + r_atoms=None,node_fract_training=0.7) + + tree_file = os.path.join(path,"Iterations",str(iter),"regressor.json") + write_nodes(tree,tree_file) + + config_E_fws = [] + for admol_name in admol_name_path_dict.keys(): + st = admol_name_structure_dict[admol_name] + Nocc_isolated = len([a for a in st.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) + fw = calculate_configruation_energies_firework(admol_name,tree_file,path,coad_stable_sites,Nocc_isolated, + {k.to_adjacency_list(): v for k,v in coadmol_E_dict.items()},concern_energy_tol=concern_energy_tol,parents=[],iter=iter,ignore_errors=ignore_errors) + config_E_fws.append(fw) + + scfw = select_calculations_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency, + pynta_dir, metal, facet, slab_path, calculation_directories, coadname, + coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=config_E_fws,Ncalc_per_iter=Ncalc_per_iter,iter=iter, + max_iters=max_iters,concern_energy_tol=concern_energy_tol,ignore_errors=ignore_errors) + + newwf = Workflow(config_E_fws+[scfw],name="Select Calculations "+str(iter)) + + return FWAction(detours=newwf) + +def select_calculations_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency, + pynta_dir, metal, facet, slab_path, calculation_directories, coadname, + coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=[],Ncalc_per_iter=6,iter=0,max_iters=6,concern_energy_tol=None,ignore_errors=False): + d = {"path": path,"admol_name_path_dict": admol_name_path_dict,"admol_name_structure_dict": {k:v.to_adjacency_list() for k,v in admol_name_structure_dict.items()}, + "sites": sites, "site_adjacency": {str(k): v for k,v in site_adjacency.items()}, "pynta_dir": pynta_dir, "metal": metal, "facet": facet, "slab_path": slab_path, + "calculation_directories": calculation_directories, "coadname": coadname, "coad_stable_sites": coad_stable_sites, + "Ncalc_per_iter": Ncalc_per_iter, "software": software, "iter": iter, "max_iters": max_iters, + "software_kwargs": software_kwargs, "software_kwargs_TS": software_kwargs_TS, "freeze_ind": freeze_ind, + "fmaxopt": fmaxopt, "concern_energy_tol": concern_energy_tol, "ignore_errors": ignore_errors} + t1 = SelectCalculationsTask(d) + return Firework([t1],parents=parents,name="Selecting Calculations "+str(iter),spec={"_priority": 4}) + +@explicit_serialize +class SelectCalculationsTask(FiretaskBase): + required_params = ["path","admol_name_path_dict","admol_name_structure_dict","sites","site_adjacency", "pynta_dir", "metal", "facet", + "slab_path", "calculation_directories", "coadname", "coad_stable_sites", "iter", "software", "max_iters", + "software_kwargs", "software_kwargs_TS", "freeze_ind", "fmaxopt"] + optional_params = ["concern_energy_tol","ignore_errors"] + def run_task(self, fw_spec): + path = self["path"] + admol_name_path_dict = self["admol_name_path_dict"] + admol_name_structure_dict = {k: Molecule().from_adjacency_list(v,check_consistency=False) for k,v in self["admol_name_structure_dict"].items()} + sites = [] + sites = [] + for site in self["sites"]: + site["normal"] = np.array(site["normal"]) + site["position"] = np.array(site["position"]) + site["indices"] = tuple(site["indices"]) + sites.append(site) + site_adjacency = {int(k):[int(x) for x in v] for k,v in self["site_adjacency"].items()} + pynta_dir = self["pynta_dir"] + metal = self["metal"] + facet = self["facet"] + slab_path = self["slab_path"] + calculation_directories = self["calculation_directories"] + coadname = self["coadname"] + Ncalc_per_iter = self["Ncalc_per_iter"] + iter = self["iter"] + coad_stable_sites = self["coad_stable_sites"] + concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None + ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False + software_kwargs = self["software_kwargs"] + software = self["software"] + freeze_ind = self["freeze_ind"] + software_kwargs_TS = self["software_kwargs_TS"] + fmaxopt = self["fmaxopt"] + max_iters = self["max_iters"] + + if iter == max_iters: #terminate + return FWAction() + + coad = admol_name_structure_dict[coadname] + + coad_path = os.path.join(pynta_dir,"Adsorbates",coadname) + + slab = read(slab_path) + nslab = len(slab) + + allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"),sites,site_adjacency,nslab,max_dist=np.inf) + + ad_energy_dict = get_lowest_adsorbate_energies(os.path.join(pynta_dir,"Adsorbates")) + Es = get_adsorbate_energies(coad_path)[0] + coadmol_E_dict = dict() + coadmol_stability_dict = dict() + for p in os.listdir(coad_path): + if p == "info.json" or (p not in Es.keys()): + continue + admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+"_init.xyz")),sites,site_adjacency,nslab,max_dist=np.inf) + admol,neighbor_sites,ninds = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+".xyz")),sites,site_adjacency,nslab,max_dist=np.inf) + out_struct = split_adsorbed_structures(admol,clear_site_info=False)[0] + out_struct_init = split_adsorbed_structures(admol_init,clear_site_info=False)[0] + coadmol_E_dict[out_struct] = Es[p] + if admol_init.is_isomorphic(admol,save_order=True): + coadmol_stability_dict[out_struct_init] = True + else: + coadmol_stability_dict[out_struct_init] = False + + + #load configurations and Ncoad_energies + configs_of_concern_by_admol = dict() + Ncoad_energy_by_admol = dict() + for admol_name,st in admol_name_structure_dict.items(): + config_path = os.path.join(path,"Iterations",str(iter),"configs_of_concern_"+admol_name+".json") + with open(config_path,'r') as f: + configs_of_concern_by_admol[admol_name] = [(Molecule().from_adjacency_list(k[0],check_consistency=False),k[1],k[2],k[3]) for k in json.load(f)] + Ncoad_energy_path = os.path.join(path,"Iterations",str(iter),"Ncoad_energy_"+admol_name+".json") + with open(Ncoad_energy_path,'w') as f: + Ncoad_energy_by_admol[admol_name] = json.load(f) + + #load tree + nodes = read_nodes(os.path.join(path,"Iterations",str(iter),"regressor.json")) + tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition], + nodes=nodes) + + #load computed configs + with open(os.path.join(path,"Iterations",str(iter),"computed_configurations.json"),'r') as f: + computed_configs = [Molecule().from_adjacency_list(x,check_consistency=False) for x in json.load(f)] + + configs_for_calculation,admol_to_config_for_calculation= get_configs_for_calculation(configs_of_concern_by_admol,Ncoad_energy_by_admol,admol_name_structure_dict,computed_configs,tree,Ncalc_per_iter) + + os.makedirs(os.path.join(path,"Iterations",str(iter),"Samples")) + assert len(configs_for_calculation) > 0, configs_for_calculation + sample_fws = [] + calculation_directories = [] + for i,config in enumerate(configs_for_calculation): + adname = None + for admol_name,config_list in admol_to_config_for_calculation.items(): + if any(x is config for x in config_list): + adname = admol_name + break + else: + raise ValueError + + partial_admol = admol_name_structure_dict[adname] + admol_path = admol_name_path_dict[adname] + partial_atoms = read(admol_path) + init_atoms = mol_to_atoms(config,slab,sites,metal,partial_atoms=partial_atoms,partial_admol=partial_admol) + os.makedirs(os.path.join(path,"Iterations",str(iter),"Samples",str(i))) + init_path = os.path.join(path,"Iterations",str(iter),"Samples",str(i),"init.xyz") + write(init_path,init_atoms) + calculation_directories.append(os.path.split(init_path)[0]) + json_out = {"adjlist": config.to_adjacency_list(), "xyz": admol_path} + with open(os.path.join(os.path.split(init_path)[0],'info.json'),'w') as f: + json.dump(json_out,f) + + if not any(bd.get_order_str() == 'R' for bd in config.get_all_edges()): + fwopt = optimize_firework(init_path, + software,"weakopt", + opt_method="MDMin",opt_kwargs={'dt': 0.05,"trajectory": "weakopt.traj"},software_kwargs=software_kwargs,order=0, + run_kwargs={"fmax" : 0.5, "steps" : 30},parents=[], + constraints=["freeze up to {}".format(freeze_ind)], + ignore_errors=True, metal=metal, facet=facet, priority=3) + fwopt2 = optimize_firework(os.path.join(os.path.split(init_path)[0],"weakopt.xyz"), + software,"out", + opt_method="QuasiNewton",opt_kwargs={"trajectory": "out.traj"},software_kwargs=software_kwargs,order=0, + run_kwargs={"fmax" : fmaxopt, "steps" : 70},parents=[fwopt], + constraints=["freeze up to {}".format(freeze_ind)], + ignore_errors=True, metal=metal, facet=facet, priority=2) + + fwvib = vibrations_firework(os.path.join(os.path.split(init_path)[0],"out.xyz"), + software,"vib",software_kwargs=software_kwargs,parents=[fwopt2], + constraints=["freeze up to "+str(nslab)]) + sample_fws.extend([fwopt,fwopt2,fwvib]) + else: + fwopt = optimize_firework(init_path, + software,"out", sella=True, + opt_kwargs={"trajectory": "out.traj"},software_kwargs=software_kwargs_TS, + order=1, + run_kwargs={"fmax" : fmaxopt, "steps" : 70},parents=[], + constraints=["freeze up to {}".format(freeze_ind)], + ignore_errors=True, metal=metal, facet=facet, priority=3) + + fwvib = vibrations_firework(os.path.join(os.path.split(init_path)[0],"out.xyz"), + software,"vib",software_kwargs=software_kwargs,parents=[fwopt], + constraints=["freeze up to "+str(nslab)]) + sample_fws.extend([fwopt,fwvib]) + + tfw = train_covdep_model_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency, + pynta_dir, metal, facet, slab_path, calculation_directories, coadname, + coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=sample_fws, + Ncalc_per_iter=Ncalc_per_iter,iter=iter+1,max_iters=max_iters,concern_energy_tol=concern_energy_tol,ignore_errors=ignore_errors) + newwf = Workflow(sample_fws+[tfw],name="Train Iteration "+str(iter+1)) + + return FWAction(detours=newwf) + def map_harmonically_forced_xtb(input): tsstruct,atom_bond_potentials,site_bond_potentials,nslab,constraints,ts_path,j,molecule_to_atom_maps,ase_to_mol_num = input os.makedirs(os.path.join(ts_path,str(j))) diff --git a/pynta/transitionstate.py b/pynta/transitionstate.py index c8fed6ce..4e456dda 100644 --- a/pynta/transitionstate.py +++ b/pynta/transitionstate.py @@ -168,6 +168,8 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites Generate unique initial structures for TS guess generation """ tsstructs = [] + slab_sites = cas.get_sites() + site_adjacency = cas.get_neighbor_site_list() ordered_adsorbates = [adsorbates[name] for name in species_names] for adss in itertools.product(*ordered_adsorbates): if num_surf_sites[0] > 0: @@ -184,7 +186,7 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites ad = adss[1][nslab:] bondlengths1,sites1,site_lengths1 = get_bond_lengths_sites(mol_dict[name],adss[1],gratom_to_molecule_atom_maps[name], gratom_to_molecule_surface_atom_maps[name],nslab, - facet=facet,metal=metal,cas=cas) + slab_sites,site_adjacency,facet=facet,metal=metal) sitetype1 = list(sites1.values())[0] height1 = list(site_lengths1.values())[0] surf_ind1 = list(gratom_to_molecule_surface_atom_maps[name].keys())[0] @@ -201,7 +203,7 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites ad2 = adss[2][nslab:] bondlengths2,sites2,site_lengths2 = get_bond_lengths_sites(mol_dict[name2],adss[2],gratom_to_molecule_atom_maps[name2], gratom_to_molecule_surface_atom_maps[name2],nslab, - facet=facet,metal=metal,cas=cas) + slab_sites,site_adjacency,facet=facet,metal=metal) sitetype2 = list(sites2.values())[0] height2 = list(site_lengths2.values())[0] surf_ind2 = list(gratom_to_molecule_surface_atom_maps[name2].keys())[0] @@ -263,6 +265,8 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t site_bond_potential_lists = [] site_bond_dict_list = [] out_structs = [] + slab_sites = cas.get_sites() + site_adjacency = cas.get_neighbor_site_list() nodes_file = os.path.join(os.path.split(pynta.models.__file__)[0],"TS_tree_nodes.json") nodes = read_nodes(nodes_file) @@ -286,7 +290,7 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t bondlengths,sites,site_lengths = get_bond_lengths_sites(mol_dict[name],ads, gratom_to_molecule_atom_maps[name], gratom_to_molecule_surface_atom_maps[name], - nslab,facet=facet,metal=metal,cas=cas) + nslab,slab_sites,site_adjacency,facet=facet,metal=metal) bdlength_list.append(bondlengths) sites_list.append(sites) site_lengths_list.append(site_lengths) @@ -312,7 +316,7 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t bondlengths,sites,site_lengths = get_bond_lengths_sites(mol_dict[name],ads, gratom_to_molecule_atom_maps[name], gratom_to_molecule_surface_atom_maps[name], - nslab,facet=facet,metal=metal,cas=cas) + nslab,slab_sites,site_adjacency,facet=facet,metal=metal) bdlength_list.append(bondlengths) sites_list.append(sites) site_lengths_list.append(site_lengths) @@ -566,6 +570,10 @@ def estimate_deq_k(sidt,labels,dwell,forward_template,reverse_template,template_ a.label = '' v = sidt.evaluate(mol) + + if hasattr(v,"value"): + v = v.value + if any([a.is_surface_site() for a in mol.get_labeled_atoms('*')]): return dwell*(v-1.0),100.0 else: diff --git a/pynta/utils.py b/pynta/utils.py index 56264bfd..298db204 100644 --- a/pynta/utils.py +++ b/pynta/utils.py @@ -4,11 +4,107 @@ from ase.utils.structure_comparator import SymmetryEquivalenceCheck from ase.io import write, read import ase.constraints +from ase.geometry import get_distances from copy import deepcopy from importlib import import_module import numpy as np import copy +def sites_match(site1,site2,slab,tol=0.5): + """determine if two sites match + + Args: + site1 (dict): site dictionary for first site + site2 (dict): site dictionary for second site + slab (ase.Atoms): the ase.Atoms object for the slab the sites are on + tol (float, optional): Distance tolerance defaults to 0.5 Angstroms. + + Returns: + _type_: _description_ + """ + v,dist = get_distances([site1["position"]], [site2["position"]], cell=slab.cell, pbc=slab.pbc) + if dist > tol: + return False + elif site1["site"] != site2["site"]: + return False + elif site1["morphology"] != site2["morphology"]: + return False + else: + return True + +def get_occupied_sites(struct,sites,nslab,allowed_site_dict=dict(),site_bond_cutoff=2.5, + site_bond_disruption_cutoff=0.5,cutoff_corrections={"ontop": 0.3}): + """determine what sites are occupied by what atoms + + Args: + struct (ase.Atoms): Atoms object for the structure + sites (list): list of site dictionaries + nslab (int): number of atoms in the surface + allowed_site_dict (dict, optional): dictionary mapping atom index to a list of allowed (site,morphology) for that atom + site_bond_cutoff (float, optional): _description_. Defaults to 2.5. + + Raises: + ValueError: _description_ + + Returns: + _type_: _description_ + """ + occ_sites = [] + for i in range(nslab,len(struct)): + pos = struct.positions[i] + if i in allowed_site_dict.keys(): + allowed_sites = allowed_site_dict[i] + else: + allowed_sites = None + siteout = None + mindist = None + for site in sites: + if allowed_sites and (site["site"],site["morphology"]) not in allowed_sites: #skip disallowed sites + continue + v,dist = get_distances([site["position"]], [pos], cell=struct.cell, pbc=struct.pbc) + if siteout is None: + siteout = site + mindist = dist + n = v + else: + if dist < mindist: + mindist = dist + siteout = site + n = v + + if mindist is None: + #print(i) + #view(struct) + raise ValueError + + if (siteout["site"] not in cutoff_corrections.keys() and mindist < site_bond_cutoff) or (siteout["site"] in cutoff_corrections.keys() and mindist < site_bond_cutoff + cutoff_corrections[siteout["site"]]): + mindn = None + for j in range(nslab,len(struct)): #check for site bond disruption by other adsorbed atoms + if i == j: + continue + else: + AB,ABdist = get_distances([struct.positions[j]], [pos], cell=struct.cell, pbc=struct.pbc) + x = np.dot(AB[0,0,:],n[0,0,:]) + if x < 0: #this means our target atom is closer than the other atom, which is okay (for this atom) + continue + dn = np.linalg.norm(AB[0,0,:] - x/mindist**2*n[0,0,:]) + if mindn is None: + mindn = dn + elif dn < mindn: + mindn = dn + + + if mindn is None or mindn > site_bond_disruption_cutoff: + s = deepcopy(siteout) + s["normal"] = n + s["bonding_index"] = i + s["bond_length"] = mindist + occ_sites.append(s) + + return occ_sites + +class SiteOccupationException(Exception): + pass def get_unique_sym(geoms): ''' Check for the symmetry equivalent structures in the given files @@ -30,6 +126,7 @@ def get_unique_sym(geoms): for geom in geoms: adsorbate_atom_obj = read(geom) + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints for comparison comparision = comparator.compare( @@ -56,6 +153,7 @@ def get_unique_sym_indices(geoms): for geom in geoms: adsorbate_atom_obj = read(geom) + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints before comparison comparision = comparator.compare( @@ -87,6 +185,7 @@ def get_unique_sym_structs(geoms): for i,geom in enumerate(geoms_copy): adsorbate_atom_obj = geom + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints before comparison comparision = comparator.compare( @@ -116,6 +215,7 @@ def get_unique_sym_struct_indices(geoms): for i,geom in enumerate(geoms_copy): adsorbate_atom_obj = geom + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints before comparison comparision = comparator.compare( @@ -145,6 +245,7 @@ def get_unique_sym_struct_index_clusters(geoms): for i,geom in enumerate(geoms_copy): adsorbate_atom_obj = geom + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints before comparison comparison = None @@ -180,6 +281,7 @@ def filter_nonunique_TS_guess_indices(geoms,Es): for j,geom in enumerate(geoms): adsorbate_atom_obj = read(geom) + adsorbate_atom_obj.set_constraint() adsorbate_atom_obj.pbc = True adsorbate_atom_obj.set_constraint() # Reset constraints before comparison for i,good_adsorbate in enumerate(good_adsorbates_atom_obj_list):