diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 716ab0b4..e9a99e5d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -16,10 +16,10 @@ jobs:
         shell: bash -l {0}
     steps:
       - uses: actions/checkout@v2
-      - uses: conda-incubator/setup-miniconda@v2
+      - uses: conda-incubator/setup-miniconda@v3
         with:
           python-version: 3.8
-          miniforge-variant: Mambaforge
+          miniforge-variant: Miniforge3
           channels: defaults,mjohnson541,conda-forge
           channel-priority: true
           activate-environment: pynta_env
diff --git a/environment.yml b/environment.yml
index e1bb0e45..850e8532 100644
--- a/environment.yml
+++ b/environment.yml
@@ -25,3 +25,4 @@ dependencies:
   - coverage
   - liblapack !=3.9.0
   - pysidt
+  - ruamel.yaml < 0.18
\ No newline at end of file
diff --git a/pynta/coveragedependence.py b/pynta/coveragedependence.py
new file mode 100644
index 00000000..abcbfbd3
--- /dev/null
+++ b/pynta/coveragedependence.py
@@ -0,0 +1,2253 @@
+from molecule.molecule import Molecule,Atom,Bond,Group,GroupAtom,GroupBond
+from molecule.exceptions import AtomTypeError
+from ase.io import read, write
+from ase.geometry import get_distances
+from ase.visualize import view
+from ase.neighborlist import natural_cutoffs
+from acat.adsorption_sites import SlabAdsorptionSites
+from pynta.utils import get_unique_sym, get_occupied_sites, sites_match, SiteOccupationException
+from pynta.mol import *
+from pynta.geometricanalysis import *
+from pynta.tasks import *
+from pynta.postprocessing import get_adsorbate_energies
+from pysidt import *
+from pysidt.extensions import split_mols
+from pysidt.sidt import *
+from fireworks import LaunchPad, Workflow
+from fireworks.queue.queue_launcher import rapidfire as rapidfirequeue
+from fireworks.features.multi_launcher import launch_multiprocess
+from fireworks.utilities.fw_serializers import load_object_from_file
+from fireworks.core.rocket_launcher import rapidfire
+from fireworks.core.fworker import FWorker
+from copy import deepcopy
+import numpy as np
+import json
+import shutil
+import os
+import itertools 
+import logging
+
+def get_unstable_pairs(pairsdir,adsorbate_dir,sites,site_adjacency,nslab,max_dist=3.0,show=False,metal=None,facet=None,
+                       infopath_dict=None,imag_freq_path_dict=None):
+    out_pairs = []
+    coadname_dict = {"O=[Pt]": 1, "N#[Pt]": 1, "O[Pt]": 2, "[Pt]": 1}
+    allowed_structure_site_structure_map = generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=max_dist,cut_multidentate_off_num=None)
+    if show:
+        config_show = []
+    for pair in os.listdir(pairsdir):
+        if not "_" in pair or pair[0] == ".":
+            continue
+        adname = pair.split("_")[0]
+        coadname = pair.split("_")[1]
+        if infopath_dict:
+            info_path = infopath_dict[adname]
+        else:
+            info_path = None
+        if imag_freq_path_dict:
+            imag_freq_path = imag_freq_path_dict[adname]
+        else:
+            imag_freq_path = None
+        for num in os.listdir(os.path.join(pairsdir,pair)):
+            p = os.path.join(pairsdir,pair,num) 
+            if num.isdigit() and os.path.isdir(p):
+                init = read(os.path.join(p,"init.xyz"))
+                with open(os.path.join(p,"info.json"),'r') as f:
+                    initinfo = json.load(f)
+                try:
+                    m = Molecule().from_adjacency_list(initinfo["adjlist"],check_consistency=False)
+                    #m.update(sort_atoms=False)
+                except AtomTypeError:
+                    m = Molecule().from_adjacency_list(initinfo["adjlist"],raise_atomtype_exception=False,
+                            raise_charge_exception=False,check_consistency=False)
+                    for a in m.atoms:
+                        if a.charge != 0:
+                            a.charge = 0
+                            v = 0
+                            for a2,edge in a.edges.items():
+                                v += edge.order*2
+                            if not a.is_hydrogen():
+                                a.lone_pairs = int(np.round((8-v)/2))
+#                     try:
+#                         m.update(sort_atoms=False)
+#                     except Exception as e:
+#                         print(m.to_adjacency_list())
+#                         raise e
+                is_ts = any(bd.get_order_str() == "R" for bd in m.get_all_edges())
+                g = extract_pair_graph(init,sites,site_adjacency,nslab,max_dist=max_dist,
+                                       cut_multidentate_off_num=coadname_dict[coadname],
+                                       allowed_structure_site_structures=allowed_structure_site_structure_map,
+                                       is_ts=is_ts,metal=metal,facet=facet,info_path=info_path,imag_freq_path=imag_freq_path)
+                    
+                
+                #g.update(sort_atoms=False)
+                outpath = os.path.join(p,"out.xyz")
+                if not os.path.exists(outpath):
+                    if show:
+                        pass
+                        
+                    out_pairs.append(g.to_group())
+                else:
+                    final = read(outpath)
+                    try:
+                        gout = extract_pair_graph(final,sites,site_adjacency,nslab,max_dist=max_dist,
+                                                  allowed_structure_site_structures=allowed_structure_site_structure_map,is_ts=is_ts,
+                                                  metal=metal,facet=facet,info_path=info_path,imag_freq_path=imag_freq_path)
+                        if len(gout.atoms) != len(g.atoms):
+                            out_pairs.append(g.to_group())
+                            if show:
+                                config_show.append(init)
+                                config_show.append(final)
+                            continue
+                        #gout.update(sort_atoms=False)
+                    except (FindingPathError, SiteOccupationException):
+                        continue
+                    
+                    if not gout.is_isomorphic(g,save_order=True,strict=False):
+                        out_pairs.append(g.to_group())
+                        if show:
+                            config_show.append(init)
+                            config_show.append(final)
+    if show:
+        view(config_show)
+    return out_pairs
+
+def copy_stable_pairs(pairsdir,sites,site_adjacency,nslab,max_dist=3.0):
+    out_pairs = []
+    count = 0
+    coadname_dict = {"O=[Pt]": 1, "N#[Pt]": 1, "O[Pt]": 2, "[Pt]": 1}
+    for pair in os.listdir(pairsdir):
+        coadname = pair.split("_")[1]
+        for num in os.listdir(os.path.join(pairsdir,pair)):
+            p = os.path.join(pairsdir,pair,num) 
+            if num.isdigit() and os.path.isdir(p):
+                init = read(os.path.join(p,"init.xyz"))
+                with open(os.path.join(p,"info.json"),'r') as f:
+                    initinfo = json.load(f)
+                try:
+                    m = Molecule().from_adjacency_list(initinfo["adjlist"])
+                    #m.update(sort_atoms=False)
+                except AtomTypeError:
+                    m = Molecule().from_adjacency_list(initinfo["adjlist"],raise_atomtype_exception=False,
+                            raise_charge_exception=False,check_consistency=False)
+                    for a in m.atoms:
+                        if a.charge != 0:
+                            a.charge = 0
+                            v = 0
+                            for a2,edge in a.edges.items():
+                                v += edge.order*2
+                            if not a.is_hydrogen():
+                                a.lone_pairs = int(np.round((8-v)/2))
+#                     try:
+#                         m.update(sort_atoms=False)
+#                     except Exception as e:
+#                         print(m.to_adjacency_list())
+#                         raise e
+                
+                g = extract_pair_graph(init,sites,site_adjacency,nslab,max_dist=max_dist,
+                                       cut_coads_off_num=coadname_dict[coadname])
+                    
+                
+                #g.update(sort_atoms=False)
+                outpath = os.path.join(p,"out.xyz")
+                if not os.path.exists(outpath):
+                    out_pairs.append(g.to_group())
+                else:
+                    final = read(outpath)
+                    try:
+                        gout = extract_pair_graph(final,sites,site_adjacency,nslab,max_dist=max_dist)
+                        if len(gout.atoms) != len(g.atoms):
+                            out_pairs.append(g.to_group())
+                            continue
+                        #gout.update(sort_atoms=False)
+                    except FindingPathError:
+                        continue
+                    
+                    if not gout.is_isomorphic(g,save_order=True,strict=False):
+                        out_pairs.append(g.to_group())
+                    else: #matches
+                        os.makedirs(os.path.join(os.path.split(pairsdir)[0],"sampling",str(count)))
+                        shutil.copyfile(os.path.join(p,"out.xyz"),
+                                        os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"out.xyz")
+                                       )
+                        shutil.copyfile(os.path.join(p,"info.json"),
+                                        os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"info.json")
+                                       )
+                        shutil.copyfile(os.path.join(p,"init.xyz"),
+                                        os.path.join(os.path.split(pairsdir)[0],"sampling",str(count),"init.xyz")
+                                       )
+                        count += 1
+
+
+    return None
+
+def generate_pair_geometries(adpath1,adpath2,slabpath,metal,facet,adinfo1=None,adinfo2=None,
+                             max_dist=3.0,imag_freq_max=150.0,symmetric=None):
+    """
+    adpath1 can be bidentate
+    adpath2 must be monodentate
+    """
+    #slab information
+    slab = read(slabpath)
+    nslab = len(slab)
+    cas = SlabAdsorptionSites(slab,facet,allow_6fold=False,composition_effect=False,
+                            label_sites=True,
+                            surrogate_metal=metal)
+    sites = cas.get_sites()
+    site_adjacency = cas.get_neighbor_site_list()
+    
+    adsorbate_dir = os.path.split(adpath2)[0]
+    allowed_structure_site_structures = generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=max_dist)
+
+    if symmetric is None:
+        symmetric = adpath1 == adpath2
+    
+    #extract information about adsorbates and valid adsorbate geometries
+    if os.path.isfile(os.path.join(adpath1,"opt.xyz")): #TS
+        is_ts = True
+        if adinfo1 is None:
+            adinfo1 = os.path.join(os.path.split(adpath1)[0],"info.json")
+        
+        with open(adinfo1,"r") as f:
+            info1 = json.load(f)
+        
+        admol1,neighbor_sites1,ninds1 = generate_TS_2D(read(os.path.join(adpath1,"opt.xyz")), adinfo1, metal, facet, sites, site_adjacency, nslab, max_dist=np.inf,
+                                                    imag_freq_path=os.path.join(adpath1,"vib.json_vib.json"),
+                                                    allowed_structure_site_structures=allowed_structure_site_structures,
+        )
+        
+        aseinds1 = []
+        for i,at in enumerate(admol1.atoms):
+            if (not at.is_surface_site()) and at.is_bonded_to_surface():
+                aseinds1.append(i-len(neighbor_sites1)+nslab)
+        ad1s = [read(os.path.join(adpath1,"opt.xyz"))]
+        ad12Ds = [admol1]
+        ad12Dneighbors = [neighbor_sites1]
+        ad12Dninds = [ninds1]
+    else:
+        is_ts = False
+        if adinfo1 is None:
+            adinfo1 = os.path.join(adpath1,"info.json")
+        
+        with open(adinfo1,"r") as f:
+            info1 = json.load(f)
+
+        mol1 = Molecule().from_adjacency_list(info1["adjlist"])
+        atom_to_molecule_surface_atom_map1 = { int(key):int(val) for key,val in info1["gratom_to_molecule_surface_atom_map"].items()}
+        ad1s = get_unique_adsorbate_geometries(adpath1,mol1,sites,site_adjacency,atom_to_molecule_surface_atom_map1,
+                                    nslab,imag_freq_max=imag_freq_max)
+        ad12Ds = []
+        ad12Dneighbors = []
+        ad12Dninds = []
+        for a in ad1s:
+            admol1,neighbor_sites1,ninds1 = generate_adsorbate_2D(a, sites, site_adjacency, nslab, max_dist=np.inf)
+            ad12Ds.append(admol1)
+            ad12Dneighbors.append(neighbor_sites1)
+            ad12Dninds.append(ninds1)
+        
+        aseinds1 = [x+nslab for x in atom_to_molecule_surface_atom_map1.keys()]
+        
+    if os.path.isfile(adpath2):
+        raise ValueError
+    else:
+        if adinfo2 is None:
+            adinfo2 = os.path.join(adpath2,"info.json")
+        
+        with open(adinfo2,"r") as f:
+            info2 = json.load(f)
+        
+        mol2 = Molecule().from_adjacency_list(info2["adjlist"])
+        assert len(mol2.get_adatoms()) == 1
+        atom_to_molecule_surface_atom_map2 = { int(key):int(val) for key,val in info2["gratom_to_molecule_surface_atom_map"].items()}
+        ad2s = get_unique_adsorbate_geometries(adpath2,mol2,sites,site_adjacency,atom_to_molecule_surface_atom_map2,
+                                    nslab,imag_freq_max=imag_freq_max)
+    
+
+    #generate pairs
+    pairs = []
+    ad1_to_ad1_sites = dict()
+    ad1_to_ad2_sites = dict()
+    ad1_to_ad2_heights = dict()
+    ad1_to_actual_occ = dict()
+    #go through all adsorbate 1 configurations
+    for j,ad1 in enumerate(ad1s):
+        #find occupied sites
+        ad1_to_ad2_sites[j] = dict()
+        ad1_to_ad2_heights[j] = dict()
+        occ = get_occupied_sites(ad1,sites,nslab)
+        actual_occ = []
+        for site in occ:
+            if site["bonding_index"] in aseinds1:
+                actual_occ.append(site)
+        
+        if len(actual_occ) == 0:
+#             print(adinfo1)
+#             print(aseinds1)
+#             print(atom_to_molecule_surface_atom_map1)
+#             print(mol1.to_adjacency_list())
+#             print(occ)
+#             print(j)
+            raise ValueError
+        
+        ad1_to_ad1_sites[j] = [x for x in actual_occ]
+        
+        #find all sites to be resolved with pairs
+        neighbor_sites = []
+        
+        for site in sites:
+            if any(sites_match(site,s,slab) for s in actual_occ):
+                continue
+            for occ_site in actual_occ:
+                bd,dist = get_distances([site["position"]], [occ_site["position"]], cell=slab.cell, pbc=slab.pbc)
+                v = np.linalg.norm(bd[:2])
+                if v < max_dist:
+                    neighbor_sites.append(site)
+                    break
+        
+        #estimate heights for all placements
+        stable_neighbor_sites = []
+        heights = []
+        for site in neighbor_sites:
+            for i,ad2 in enumerate(ad2s):
+                if i not in ad1_to_ad2_sites[j].keys():
+                    ad1_to_ad2_sites[j][i] = []
+                    ad1_to_ad2_heights[j][i] = []
+        
+                cas = SlabAdsorptionSites(ad2,facet,allow_6fold=False,composition_effect=False,
+                                    label_sites=True,
+                                    surrogate_metal=metal)
+                sites2 = cas.get_sites()
+                occ = get_occupied_sites(ad2,sites2,nslab)
+                occsite = None
+                
+                for s in occ:
+                    if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map2.keys():
+                        occsite = s
+                        break
+                else:
+#                     print(occ)
+#                     print(atom_to_molecule_surface_atom_map2.keys())
+#                     print(s['bonding_index'])
+                    raise ValueError
+
+                if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]:
+                    ad1_to_ad2_sites[j][i].append(site)
+                    ad1_to_ad2_heights[j][i].append(occsite["bond_length"])
+                    stable_neighbor_sites.append(site)
+                    heights.append(occsite["bond_length"])
+                    break
+
+    
+    adpairs = []
+    pairmols = []
+    
+    if not symmetric:
+        for j in range(len(ad1s)):
+            ad1_sites = ad1_to_ad1_sites[j]
+            
+            ad2_sites = []
+            ad2_geoms = []
+            heights = []
+            for i,sites in ad1_to_ad2_sites[j].items():
+                for k,site in enumerate(sites):
+                    heights.append(ad1_to_ad2_heights[j][i][k])
+                    ad2_sites.append(ad1_to_ad2_sites[j][i][k])
+                    ad2_geoms.append(ad2s[i])
+
+
+            inds = get_unique_site_inds(ad2_sites,slab,fixed_point=ad1_sites[0]["position"])
+
+            for i in inds:
+#                 if any(sites_match(ad2_sites[i],s,slab) for s in ad1_to_ad1_sites[j]):
+#                     continue
+                
+                ad = deepcopy(ad1s[j])
+                surf_ind = list(atom_to_molecule_surface_atom_map2.keys())[0]
+                add_adsorbate_to_site(ad, ad2_geoms[i][nslab:], surf_ind, ad2_sites[i], height=heights[i])
+                nsites = ad12Dneighbors[j]
+                ind = [k for k,x in enumerate(nsites) if sites_match(ad2_sites[i],x,slab)][0]
+                amol = deepcopy(ad12Ds[j])
+                satom = amol.atoms[ind]
+                m2 = deepcopy(mol2)
+                admol2 = m2.get_desorbed_molecules()[0]
+                label_atom_dict = admol2.get_all_labeled_atoms()
+                for label,at in label_atom_dict.items(): #one iteration only
+                    amol = amol.merge(admol2)
+                    if label == "*1":
+                        at.decrement_radical()
+                        order = 1
+                    elif label == "*2":
+                        at.decrement_radical()
+                        at.decrement_radical()
+                        order = 2
+                    elif label == "*3":
+                        at.decrement_radical()
+                        at.decrement_lone_pairs()
+                        order = 3
+                    elif label == "*4":
+                        at.decrement_radical()
+                        at.decrement_radical()
+                        at.decrement_lone_pairs()
+                        order = 4
+                    else:
+                        raise ValueError(label)
+                    try:
+                        bd = Bond(satom,at,order=order)
+                    except:
+#                         print(ad12Ds[j].to_adjacency_list())
+#                         print(mol2.to_adjacency_list())
+#                         print(amol.to_adjacency_list())
+#                         print(satom)
+#                         print(at)
+#                         print(order)
+                        raise ValueError
+                    amol.add_bond(bd)
+                    amol.clear_labeled_atoms()
+                    if amol.multiplicity == -187: #handle surface molecules
+                        amol.multiplicity = amol.get_radical_count() + 1
+                for pmol in pairmols:
+                    if pmol.is_isomorphic(amol,save_order=True): #duplicate
+                        break
+                else:
+                    adpairs.append(ad)
+                    pairmols.append(amol)
+    else: #symmetric case, monodentate-monodentate since ad2 must be monodentate
+        ad2_site_pairs = []
+        ad2_geoms = []
+        heights = []
+        ad1_inds = []
+        for j in range(len(ad1s)):
+            ad1_sites = ad1_to_ad1_sites[j]
+            
+            for i,sites in ad1_to_ad2_sites[j].items():
+                for k,site in enumerate(sites):
+                    heights.append(ad1_to_ad2_heights[j][i][k])
+                    ad2_site_pairs.append((ad1_sites[0],site))
+                    ad2_geoms.append(ad2s[i])
+                    ad1_inds.append(j)
+
+        inds = get_unique_site_pair_inds(ad2_site_pairs,slab)
+
+        for i in inds:
+#             if any(sites_match(ad2_site_pairs[i][1],s,slab) for s in ad1_to_ad1_sites[j]):
+#                 print("matched")
+#                 continue
+            ad = deepcopy(ad1s[ad1_inds[i]])
+            surf_ind = list(atom_to_molecule_surface_atom_map2.keys())[0]
+            add_adsorbate_to_site(ad, ad2_geoms[i][nslab:], surf_ind, ad2_site_pairs[i][1], 
+                                  height=heights[i])
+            nsites = ad12Dneighbors[ad1_inds[i]]
+            try:
+                ind = [k for k,x in enumerate(nsites) if sites_match(ad2_site_pairs[i][1],x,slab)][0]
+            except Exception as e:
+#                 print("site")
+#                 print(ad2_site_pairs[i][1])
+#                 print("occ")
+#                 print(ad1_to_ad1_sites[j])
+#                 print("nsites")
+#                 print(nsites)
+#                 bd,dist = get_distances([ad2_site_pairs[i][1]["position"]], [ad1_to_ad1_sites[j][0]["position"]], cell=slab.cell, pbc=slab.pbc)
+#                 print(bd)
+#                 print(dist)
+                raise e
+            amol = deepcopy(ad12Ds[j])
+            satom = amol.atoms[ind]
+            m2 = deepcopy(mol2)
+            admol2 = m2.get_desorbed_molecules()[0]
+            label_atom_dict = admol2.get_all_labeled_atoms()
+            for label,at in label_atom_dict.items(): #one iteration only
+                amol = amol.merge(admol2)
+                if label == "*1":
+                    at.decrement_radical()
+                    order = 1
+                elif label == "*2":
+                    at.decrement_radical()
+                    at.decrement_radical()
+                    order = 2
+                elif label == "*3":
+                    at.decrement_radical()
+                    at.decrement_lone_pairs()
+                    order = 3
+                elif label == "*4":
+                    at.decrement_radical()
+                    at.decrement_radical()
+                    at.decrement_lone_pairs()
+                    order = 4
+                else:
+                    raise ValueError(label)
+                try:
+                    bd = Bond(satom,at,order=order)
+                except:
+#                     print(ad12Ds[j].to_adjacency_list())
+#                     print(mol2.to_adjacency_list())
+#                     print(amol.to_adjacency_list())
+#                     print(satom)
+#                     print(at)
+#                     print(order)
+                    raise ValueError
+                amol.add_bond(bd)
+                amol.clear_labeled_atoms()
+                if amol.multiplicity == -187: #handle surface molecules
+                    amol.multiplicity = amol.get_radical_count() + 1
+            for pmol in pairmols:
+                if pmol.is_isomorphic(amol,save_order=True): #duplicate
+                    break
+            else:
+                adpairs.append(ad)
+                pairmols.append(amol)
+    
+
+    return adpairs,pairmols
+
+def get_unique_site_inds(sites,slab,fixed_point=None,tol=0.15):
+    fingerprints = []
+    for k,site in enumerate(sites):
+        if fixed_point is None:
+            fingerprints.append((site["morphology"],site["site"]))
+        else:
+            bd,d = get_distances([site["position"]], [fixed_point], cell=slab.cell, pbc=(True,True,False))
+            xydist = np.linalg.norm(bd[0][0][:2])
+            zdist = bd[0][0][2]
+            fingerprints.append((site["morphology"],site["site"],xydist,zdist,))
+    
+    unique_sites = []
+    unique_inds = []
+    for i,f in enumerate(fingerprints):
+        boo = False
+        for uf in unique_sites:  
+            if fingerprints_match(f,uf,tol=tol):
+                boo = True
+                break
+        if boo:
+            continue
+        else:
+            unique_sites.append(f)
+            unique_inds.append(i)
+
+    return unique_inds
+
+def fingerprints_match(f1,f2,tol=0.15):
+    for i in range(len(f1)):
+        if isinstance(f1[i],str) or isinstance(f1[i],frozenset):
+            if f1[i] != f2[i]:
+                return False
+        elif isinstance(f1[i],float):
+            if abs(f1[i] - f2[i]) > tol:
+                return False
+        else:
+            raise ValueError
+    else:
+        return True
+
+def get_unique_site_pair_inds(site_pairs,slab,tol=0.15):
+    fingerprints = []
+    for k,sites in enumerate(site_pairs):
+        site1 = sites[0]
+        site2 = sites[1]
+        bd,d = get_distances([site1["position"]], [site2["position"]], cell=slab.cell, pbc=(True,True,False))
+        xydist = np.linalg.norm(bd[0][0][:2])
+        zdist = bd[0][0][2]
+        fingerprints.append((frozenset([(site1["morphology"],site1["site"]),(site2["morphology"],site2["site"])]),
+                             xydist,zdist,))
+    
+    unique_sites = []
+    unique_inds = []
+    for i,f in enumerate(fingerprints):
+        boo = False
+        for uf in unique_sites:  
+            if fingerprints_match(f,uf,tol=tol):
+                boo = True
+                break
+        if boo:
+            continue
+        else:
+            unique_sites.append(f)
+            unique_inds.append(i)
+
+    return unique_inds
+
+def setup_pair_opts_for_rxns(targetdir,tsdirs,coadnames,metal,facet,max_dist=3.0,imag_freq_max=150.0):
+    pairdir = os.path.join(targetdir,"pairs")
+    addir = os.path.join(os.path.split(os.path.split(tsdirs[0])[0])[0],"Adsorbates")
+    slabpath = os.path.join(os.path.split(os.path.split(tsdirs[0])[0])[0],"slab.xyz")
+    if not os.path.exists(pairdir):
+        os.makedirs(pairdir)
+    
+    ads = set()  
+    combs = []
+    for tsdir in tsdirs:
+        for coadname in coadnames:
+            tp = (tsdir,coadname)
+            combs.append(tp)
+        
+        with open(os.path.join(os.path.split(tsdir)[0],"info.json"),"r") as f:
+            info = json.load(f)
+        for molname in info["species_names"]+info["reverse_names"]:
+            with open(os.path.join(addir,molname,"info.json"),"r") as f:
+                molinfo = json.load(f)
+            m = Molecule().from_adjacency_list(molinfo["adjlist"])
+            if m.contains_surface_site():
+                ads.add(molname)
+    
+    
+    for adname in ads:
+        for coadname in coadnames:
+            tp = (adname,coadname)
+            revtp = (coadname,adname)
+            if (revtp not in combs) and (tp not in combs):
+                combs.append(tp)
+                
+    outdirs_ad = []
+    outdirs_ts = []
+    for s in combs:
+        if os.path.exists(s[0]):
+            is_ts = True
+        else: 
+            is_ts = False
+        if not is_ts:
+            name = "_".join(s)
+        else:
+            name = "_".join([os.path.split(os.path.split(s[0])[0])[1],s[1]])
+        namedir = os.path.join(pairdir,name)
+        if not os.path.exists(namedir):
+            os.makedirs(namedir)
+            if not is_ts:
+                ds = [os.path.join(addir,x) for x in s]
+            else:
+                ds = [s[0],os.path.join(addir,s[1])]
+            pairs,pairmols = generate_pair_geometries(ds[0],ds[1],slabpath,metal,facet,
+                                 max_dist=max_dist,imag_freq_max=imag_freq_max)
+            for i,pair in enumerate(pairs):
+                os.makedirs(os.path.join(namedir,str(i)))
+                write(os.path.join(namedir,str(i),"init.xyz"), pair)
+                if not is_ts:
+                    moldict = {"adjlist": pairmols[i].to_adjacency_list()}
+                else:
+                    moldict = {"adjlist": pairmols[i].to_adjacency_list(),"tsdir": s[0]}
+                with open(os.path.join(namedir,str(i),"info.json"),'w') as f:
+                    json.dump(moldict,f)
+                if not is_ts:
+                    outdirs_ad.append(os.path.join(namedir,str(i),"init.xyz"))
+                else:
+                    outdirs_ts.append(os.path.join(namedir,str(i),"init.xyz"))
+                    
+    return outdirs_ad,outdirs_ts
+
+def get_adsorbate_ts_information(xyz,slabxyz,is_ts,
+                               metal,facet,sites,site_adjacency,max_dist=3.0):
+    slab = read(slabxyz)
+    
+        
+    if is_ts:
+        ad = read(xyz)
+        xyzinfo = os.path.join(os.path.split(xyz)[0],"..","info.json")
+        with open(xyzinfo,"r") as f:
+            info = json.load(f)
+
+        reactants = Molecule().from_adjacency_list(info["reactants"])
+        products = Molecule().from_adjacency_list(info["products"])
+        template_mol_map = [{ int(key):int(val) for key,val in x.items()} for x in info["template_mol_map"]]
+        molecule_to_atom_maps = [{ int(key):int(val) for key,val in x.items()} for x in info["molecule_to_atom_maps"]]
+        ads_sizes = info["ads_sizes"]
+        nslab = info["nslab"]
+        forward = info["forward"]
+
+        broken_bonds,formed_bonds = get_broken_formed_bonds(reactants,products)
+
+        extra_bonds = formed_bonds if forward else broken_bonds
+        template = reactants if forward else products
+        adatoms = template.get_adatoms()
+        adinds = []
+        for ind,atom in enumerate(template.atoms):
+            if atom.is_surface_site():
+                if len(atom.bonds) == 0:
+                    s = [bd for bd in extra_bonds if atom.label in bd]
+                    if len(s) > 0:
+                        labels = list(s[0])
+                        labels.remove(atom.label)
+                        alabel = labels[0]
+                        a = template.get_labeled_atoms(alabel)[0]
+                        adinds.append(template.atoms.index(a))
+                else:
+                    a = list(atom.bonds.keys())[0]
+                    adinds.append(template.atoms.index(a))
+
+        aseinds = []
+        for ind in adinds:
+            aseind = get_ase_index(ind,template_mol_map,molecule_to_atom_maps,nslab,ads_sizes)
+            aseinds.append(aseind)
+            
+        
+    else:
+        ad = read(xyz)
+        with open(os.path.join(os.path.split(os.path.split(xyz)[0])[0],"info.json"),"r") as f:
+            info = json.load(f)
+        
+        mol2D = Molecule().from_adjacency_list(info["adjlist"])
+        atom_to_molecule_surface_map = { int(key):int(val) for key,val in info["gratom_to_molecule_surface_atom_map"].items()}
+        nslab = info["nslab"]
+        aseinds = [x+nslab for x in atom_to_molecule_surface_map.keys()]
+        
+    occ = get_occupied_sites(ad,sites,nslab)
+    
+    
+    actual_occ = []
+    for site in occ:
+        if site["bonding_index"] in aseinds:
+            actual_occ.append(site)
+
+    neighbor_sites = []
+
+    for site in sites:
+        if any(sites_match(site,s,slab) for s in actual_occ):
+            continue
+        for occ_site in actual_occ:
+            v,dist = get_distances([site["position"]], [occ_site["position"]], cell=slab.cell, pbc=slab.pbc)
+            if np.linalg.norm(v[:2]) < max_dist:
+                neighbor_sites.append(site)
+                break
+    
+    if is_ts:
+        admol,neighbor_sites_2D,ninds = generate_TS_2D(ad, xyzinfo, metal, facet, sites, site_adjacency, nslab, max_dist=None)
+    else:
+        admol,neighbor_sites_2D,ninds = generate_adsorbate_2D(ad, sites, site_adjacency, nslab, max_dist=None)
+    
+    return ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab
+
+def get_coadsorbate_information(coadnames,ads_dir,neighbor_sites,sites,site_adjacency,nslab,slab):
+    coad_to_stable_neighbor_sites = dict()
+    coad_site_stable_parameters = {name:[] for name in coadnames}
+    coad_atom_to_molecule_surface_atom_map = dict()
+    infocoad_dict = dict()
+    stable_neighbor_sites_total = []
+    coads_dict = dict()
+    coad2Ds = dict()
+    for coadname in coadnames:
+        coaddir = os.path.join(ads_dir,coadname)
+        with open(os.path.join(coaddir,"info.json"),"r") as f:
+            infocoad = json.load(f)
+
+        coad2D = Molecule().from_adjacency_list(infocoad["adjlist"])
+        coad2Ds[coadname] = coad2D
+        atom_to_molecule_surface_atom_map = {int(key):int(val) for key,val in infocoad["gratom_to_molecule_surface_atom_map"].items()}
+        coads = get_unique_adsorbate_geometries(coaddir,Molecule().from_adjacency_list(infocoad["adjlist"]),
+                               sites,site_adjacency,atom_to_molecule_surface_atom_map,
+                               nslab)
+        atom_to_molecule_surface_atom_map = {int(key):int(val) for key,val in infocoad["gratom_to_molecule_surface_atom_map"].items()}
+        coad_atom_to_molecule_surface_atom_map[coadname] = atom_to_molecule_surface_atom_map
+        coads_dict[coadname] = coads
+        infocoad_dict[coadname] = infocoad
+        stable_neighbor_sites = []
+
+        for site in neighbor_sites:
+
+            for coad in coads:
+                occ = get_occupied_sites(coad,sites,nslab)
+                occsite = None
+                for s in occ:
+                    if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map.keys():
+                        occsite = s
+                        break
+                else:
+                    raise ValueError
+
+                if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]:
+                    stable_neighbor_sites.append(site)
+                    if not any(sites_match(s,site,slab) for s in stable_neighbor_sites_total):
+                        stable_neighbor_sites_total.append(site)
+                    if not (site["site"],site["morphology"]) in coad_site_stable_parameters[coadname]:
+                        coad_site_stable_parameters[coadname].append((site["site"],site["morphology"]))
+                    break
+
+        coad_to_stable_neighbor_sites[coadname] = stable_neighbor_sites
+
+    return coad_to_stable_neighbor_sites, coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds
+
+def generate_coadsorbed_xyzs(outdir,ad_xyzs,ts_xyzs,slabxyz,pairsdir,ads_dir,
+                             coadnames,metal,facet,sites,site_adjacency,max_dist=3.0):
+    slab = read(slabxyz)
+    nslab = len(slab)
+
+    unstable_pairs = get_unstable_pairs(pairsdir,ads_dir,sites,site_adjacency,nslab,max_dist=None)
+    
+    ad_dict = dict()
+    ad_admol_dict = dict()
+    ad_neighbor_sites_2D_dict = dict()
+    ad_ninds_dict = dict()
+    ad_neighbor_sites_dict = dict()
+    ad_actual_occ_dict = dict()
+    ad_aseinds_dict = dict()
+    
+    for ad_xyz in ad_xyzs:
+        ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab = get_adsorbate_ts_information(ad_xyz,slabxyz,False,metal,facet,sites,site_adjacency,max_dist=max_dist)
+        ad_dict[ad_xyz] = ad
+        ad_admol_dict[ad_xyz] = admol
+        ad_neighbor_sites_2D_dict[ad_xyz] = neighbor_sites_2D
+        ad_ninds_dict[ad_xyz] = ninds
+        ad_neighbor_sites_dict[ad_xyz] = neighbor_sites
+        ad_actual_occ_dict[ad_xyz] = actual_occ
+        ad_aseinds_dict[ad_xyz] = aseinds
+        
+    ts_dict = dict()
+    ts_admol_dict = dict()
+    ts_neighbor_sites_2D_dict = dict()
+    ts_ninds_dict = dict()
+    ts_neighbor_sites_dict = dict()
+    ts_actual_occ_dict = dict()
+    ts_aseinds_dict = dict()
+    ts_xyz = None
+    for ts_xyz in ts_xyzs:
+        ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,aseinds,slab,nslab = get_adsorbate_ts_information(ts_xyz,slabxyz,True,metal,facet,sites,site_adjacency,max_dist=max_dist)
+        ts_dict[ts_xyz] = ad
+        ts_admol_dict[ts_xyz] = admol
+        ts_neighbor_sites_2D_dict[ts_xyz] = neighbor_sites_2D
+        ts_ninds_dict[ts_xyz] = ninds
+        ts_neighbor_sites_dict[ts_xyz] = neighbor_sites
+        ts_actual_occ_dict[ts_xyz] = actual_occ
+        ts_aseinds_dict[ts_xyz] = aseinds
+
+    
+    outatoms = []
+    outmol2Dsts = []
+    outmol2Dsad = []
+    outxyzsts = []
+    outxyzsad = []
+    
+    for coadname in coadnames:
+        for j,ts_xyz in enumerate(ts_xyzs):
+            ts_name = "TS" + str(j)
+            print(ts_name)
+            if not os.path.exists(os.path.join(outdir,ts_name)):
+                os.makedirs(os.path.join(outdir,ts_name))
+            shutil.copyfile(os.path.join(os.path.split(os.path.split(ts_xyz)[0])[0],"info.json"),
+                            os.path.join(outdir,ts_name,"info.json"))
+            coad_to_stable_neighbor_sites,coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds = get_coadsorbate_information(coadnames,
+                                                                        ads_dir,ts_neighbor_sites_dict[ts_xyz],sites,site_adjacency,nslab,slab)
+            atoms,mol2Ds = generate_coadsorbed_geoms(ts_dict[ts_xyz],
+                                                          ts_admol_dict[ts_xyz],
+                                                          ts_neighbor_sites_2D_dict[ts_xyz],ts_ninds_dict[ts_xyz],
+                                                          ts_actual_occ_dict[ts_xyz],ts_neighbor_sites_dict[ts_xyz],
+                               ts_aseinds_dict[ts_xyz],slab,nslab,True,ads_dir,unstable_pairs,coadname,
+                               metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters,
+                               coad_atom_to_molecule_surface_atom_map,infocoad_dict,
+                               stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=max_dist)
+            for i,mol2D in enumerate(mol2Ds): #check if we already have it
+                for mol2Dout in outmol2Dsts:
+                    if mol2Dout.is_isomorphic(mol2D,save_order=True):
+                        break
+                else:
+                    outatoms.append(atoms[i])
+                    outmol2Dsts.append(mol2Ds[i])
+                    if not os.path.exists(os.path.join(outdir,ts_name,coadname,str(i))):
+                        os.makedirs(os.path.join(outdir,ts_name,coadname,str(i)))
+                    with open(os.path.join(outdir,ts_name,coadname,str(i),"info.json"),"w") as f:
+                        d = {"adjlist": mol2D.to_adjacency_list(),
+                            "xyz": ts_xyz}
+                        json.dump(d,f)
+                    write(os.path.join(outdir,ts_name,coadname,str(i),"init.xyz"),atoms[i])
+                    outxyzsts.append(os.path.join(outdir,ts_name,coadname,str(i),"init.xyz"))
+        
+        for j,ad_xyz in enumerate(ad_xyzs):
+            with open(os.path.join(os.path.split(os.path.split(ad_xyz)[0])[0],"info.json"),"r") as f:
+                info = json.load(f)
+            ad_name = info["name"]
+            print(ad_name)
+            if not os.path.exists(os.path.join(outdir,ad_name)):
+                os.makedirs(os.path.join(outdir,ad_name))
+            shutil.copyfile(os.path.join(os.path.split(os.path.split(ad_xyz)[0])[0],"info.json"),
+                                         os.path.join(outdir,ad_name,"info.json"))
+            coad_to_stable_neighbor_sites,coad_site_stable_parameters,coad_atom_to_molecule_surface_atom_map,infocoad_dict,stable_neighbor_sites_total,coads_dict,coad2Ds = get_coadsorbate_information(coadnames,
+                                                                        ads_dir,ad_neighbor_sites_dict[ad_xyz],sites,site_adjacency,nslab,slab)
+            atoms,mol2Ds = generate_coadsorbed_geoms(ad_dict[ad_xyz],
+                                                          ad_admol_dict[ad_xyz],
+                                                          ad_neighbor_sites_2D_dict[ad_xyz],ad_ninds_dict[ad_xyz],
+                                                          ad_actual_occ_dict[ad_xyz],ad_neighbor_sites_dict[ad_xyz],
+                               ad_aseinds_dict[ad_xyz],slab,nslab,False,ads_dir,unstable_pairs,coadname,
+                               metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters,
+                               coad_atom_to_molecule_surface_atom_map,infocoad_dict,
+                               stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=max_dist)
+            for i,mol2D in enumerate(mol2Ds): #check if we already have it
+                for mol2Dout in outmol2Dsad:
+                    if mol2Dout.is_isomorphic(mol2D):
+                        break
+                else:
+                    outatoms.append(atoms[i])
+                    outmol2Dsad.append(mol2Ds[i])
+                    if not os.path.exists(os.path.join(outdir,ad_name,coadname,str(i))):
+                        os.makedirs(os.path.join(outdir,ad_name,coadname,str(i)))
+                    with open(os.path.join(outdir,ad_name,coadname,str(i),"info.json"),"w") as f:
+                        d = {"adjlist": mol2D.to_adjacency_list(),
+                            "xyz": ad_xyz}
+                        json.dump(d,f)
+                    write(os.path.join(outdir,ad_name,coadname,str(i),"init.xyz"),atoms[i])
+                    outxyzsad.append(os.path.join(outdir,ad_name,coadname,str(i),"init.xyz"))
+        
+    return outatoms,outmol2Dsad,outmol2Dsts,outxyzsad,outxyzsts
+
+def generate_coadsorbed_geoms(ad,admol,neighbor_sites_2D,ninds,actual_occ,neighbor_sites,
+                               aseinds,slab,nslab,is_ts,ads_dir,unstable_pairs,coadname,
+                               metal,facet,sites,site_adjacency,coad_to_stable_neighbor_sites, coad_site_stable_parameters,
+                               coad_atom_to_molecule_surface_atom_map,infocoad_dict,
+                               stable_neighbor_sites_total,coads_dict,coad2Ds,max_dist=3.0):
+    
+    logging.error("stable neighbor sites: {}".format(len(stable_neighbor_sites_total)))
+    
+    coads = coads_dict[coadname]
+    atom_to_molecule_surface_atom_map = coad_atom_to_molecule_surface_atom_map[coadname]
+    infocoad = infocoad_dict[coadname]
+    coad2D = coad2Ds[coadname]
+    site_stable_parameters = coad_site_stable_parameters[coadname]
+    coad_occ_dict = {coads.index(coad): get_occupied_sites(coad,sites,nslab) for coad in coads}
+    coad_height_map = {coads.index(coad): list(get_bond_lengths_sites(Molecule().from_adjacency_list(infocoad["adjlist"]),
+                                                                       coad,
+                                         {int(x):y for x,y in infocoad["atom_to_molecule_atom_map"].items()},
+                                        {int(x):y for x,y in infocoad["gratom_to_molecule_surface_atom_map"].items()},
+                                                                       infocoad["nslab"],sites,site_adjacency,
+                                                                facet=facet,metal=metal)[2].values())[0] for coad in coads}
+    outgeoms = [ad]
+    outmol2Ds = [admol]
+    geo_fails = 0
+    mol2D_fails = 0
+    config_fails = 0
+    site_fails = 0
+    unique_fails = 0
+    for i,site in enumerate(stable_neighbor_sites_total):
+        logging.error("doing site {}".format(i))
+        newoutgeoms = []
+        newoutmol2Ds = []
+        site_2D_inds = [i for i,x in enumerate(neighbor_sites_2D) if sites_match(site,x,slab)]
+        if not site_2D_inds:
+            site_fails += 1
+            continue
+        
+        for j,geo in enumerate(outgeoms):
+            mol2D = outmol2Ds[j]
+            geo = deepcopy(geo)
+            geo,coad2D = add_coadsorbate_3D(geo,site,ad,coads,site_stable_parameters,
+                        atom_to_molecule_surface_atom_map,infocoad,coad_occ_dict,coad_height_map,coad2D,
+                       metal,facet,sites,site_adjacency,nslab)
+            if geo is None:
+                geo_fails += 1
+                continue
+            mol2D = mol2D.copy(deep=True)
+            try:
+                mol2D = add_coadsorbate_2D(mol2D,site,coad2D,slab,neighbor_sites_2D,site_2D_inds)
+            except Exception as e:
+                print(mol2D.to_adjacency_list())
+                print(site)
+                print(site_2D_inds)
+                raise e
+            
+            if mol2D is None:
+                mol2D_fails += 1
+                continue
+            
+            if configuration_is_valid(mol2D,admol,is_ts,unstable_pairs):
+                for m in outmol2Ds:
+                    if mol2D.is_isomorphic(m,save_order=True):
+                        unique_fails += 1
+                        break
+                else:
+                    assert len(geo) - nslab == len(mol2D.atoms) - len([a for a in mol2D.atoms if a.is_surface_site()])
+                    newoutgeoms.append(geo)
+                    newoutmol2Ds.append(mol2D)
+
+        outgeoms.extend(newoutgeoms)
+        outmol2Ds.extend(newoutmol2Ds)
+        logging.error("added so far: {}".format(len(outgeoms)))
+    
+    
+    outgeoms.remove(ad) #do not include the configuration with no coadsorbates in output
+    outmol2Ds.remove(admol)
+    return outgeoms,outmol2Ds
+
+
+def add_coadsorbate_3D(geo,site,ad,coads,site_stable_parameters,
+                        atom_to_molecule_surface_atom_map,infocoad,coad_occ_dict,coad_height_map,coad2D,
+                       metal,facet,sites,site_adjacency,nslab):
+    if (site["site"],site["morphology"]) not in site_stable_parameters:
+        return None
+    for i,coad in enumerate(coads):
+        #occ = get_occupied_sites(coad,sites,nslab)
+        occ = coad_occ_dict[i]
+        occsite = None
+        for s in occ:
+            if (s['bonding_index'] - nslab) in atom_to_molecule_surface_atom_map.keys():
+                occsite = s
+                break
+        else:
+            raise ValueError
+        if occsite["site"] == site["site"] and occsite["morphology"] == site["morphology"]:
+            #assert len(mol2D.get_all_labeled_atoms()) == 0, mol2D.to_adjacency_list()
+            surf_ind = list(atom_to_molecule_surface_atom_map.keys())[0]
+            h = coad_height_map[i]
+            add_adsorbate_to_site(geo, coad[nslab:], surf_ind, site, height=h)
+            return geo,coad2D
+    else:
+        return None,None
+
+def add_coadsorbate_2D(mol2D,site,coad2D,slab,neighbor_sites_2D,site_2D_inds):
+    if site_2D_inds:
+        ind2D = site_2D_inds[0]
+    else:
+        return None
+    siteatom = mol2D.atoms[ind2D]
+    assert siteatom.site == site["site"]
+    for a in siteatom.edges.keys():
+        if not a.is_surface_site():
+            return None
+    c = coad2D.get_desorbed_molecules()[0]
+    mol2D = mol2D.merge(c)
+    ldict = mol2D.get_all_labeled_atoms()
+    label = list(ldict.keys())[0]
+    catom = list(ldict.values())[0]
+    catom.label = ''
+    if label == "*1":
+        bd = Bond(siteatom,catom,order=1)
+        catom.radical_electrons -= 1
+    elif label == "*2":
+        bd = Bond(siteatom,catom,order=2)
+        if catom.radical_electrons >= 2:
+            catom.radical_electrons -= 2
+        else:
+            catom.lone_pairs -= 1
+    elif label == "*3":
+        bd = Bond(siteatom,catom,order=3)
+        if catom.radical_electrons >= 3:
+            catom.radical_electrons -= 3
+        elif catom.radical_electrons == 1:
+            catom.radical_electrons = 0
+            catom.lone_pairs -= 1
+        elif catom.radical_electrons == 2:
+            catom.radical_electrons = 1
+            catom.lone_pairs -= 1
+    elif label == "*4":
+        bd = Bond(siteatom,catom,order=4)
+        if catom.radical_electrons >= 4:
+            catom.radical_electrons -= 4
+        elif catom.radical_electrons == 1:
+            catom.lone_pairs -= 2
+        elif catom.radical_electrons == 2:
+            catom.radical_electrons = 0
+            catom.lone_pairs -= 1
+        elif catom.radical_electrons == 3:
+            catom.radical_electrons = 1
+            catom.lone_pairs -= 1
+    else:
+        raise ValueError
+    mol2D.add_bond(bd)
+    mol2D.multiplicity = mol2D.get_radical_count() + 1
+    mol2D.update_atomtypes()
+    mol2D.update_connectivity_values()
+    return mol2D
+
+def configuration_is_valid(mol2D,admol,is_ts,unstable_pairs):
+    unstable_ind_pairs = set()
+    if is_ts:
+        admol_splits = split_ts_to_reactants(admol,tagatoms=False)
+        for asplit in admol_splits:
+            snum = len([a for a in asplit.atoms if a.is_surface_site()])
+            for unstable_pair in unstable_pairs:
+                iso = asplit.find_subgraph_isomorphisms(unstable_pair,save_order=True)
+                if iso:
+                    inds = []
+                    for a in iso[0].keys():
+                        assert a in asplit.atoms
+                        if a.is_bonded_to_surface() and not a.is_surface_site():
+                            inds.append(asplit.atoms.index(a)-snum)
+                    unstable_ind_pairs.add(frozenset(inds)) #groups of atom inds that if they are in a isomorphism indicate to allow anything for that ts split
+
+    struct = mol2D
+    if is_ts:
+        structspl = split_ts_to_reactants(struct,tagatoms=False)
+    else:
+        structspl = [struct]
+    
+    validity_judgements = []
+    failed = False
+    for st in structspl:
+        snum = len([a for a in st.atoms if a.is_surface_site()])
+        failed = False
+        for unstable_pair in unstable_pairs:
+            iso = st.find_subgraph_isomorphisms(unstable_pair,save_order=True)
+
+            if iso:
+                inds = []
+                for a in iso[0].keys():
+                    if a.is_bonded_to_surface() and not a.is_surface_site():
+                        inds.append(st.atoms.index(a)-snum)
+                if frozenset(inds) in unstable_ind_pairs:
+                    pass
+                else:
+                    failed = True
+        if not failed:
+            validity_judgements.append(True)
+        else:
+            validity_judgements.append(False)
+
+    return all(validity_judgements)
+
+def get_best_adsorbate_xyz(adsorbate_path,sites,nslab):
+    """
+    load the adsorbates associated with the reaction and find the unique optimized
+    adsorbate structures for each species
+    returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects
+    """
+    prefixes = os.listdir(adsorbate_path)
+    with open(os.path.join(adsorbate_path,"info.json"),"r") as f:
+        info = json.load(f)
+    ase_to_mol_surface_atom_map = {int(k):int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()}
+    mol = Molecule().from_adjacency_list(info["adjlist"])
+    geoms = []
+    for prefix in prefixes:
+        path = os.path.join(adsorbate_path,prefix,prefix+".xyz")
+        if os.path.exists(path):
+            geoms.append(path)
+    xyzs = get_unique_sym(geoms)
+    adsorbate = None
+    min_energy = np.inf
+    Es,_,freq = get_adsorbate_energies(adsorbate_path)
+    for xyz in xyzs:
+        geo = read(xyz)
+        strind = os.path.split(xyz)[1].split(".")[0]
+        occ = get_occupied_sites(geo,sites,nslab)
+        required_surface_inds = set([ind+nslab for ind in ase_to_mol_surface_atom_map.keys()])
+        found_surface_inds = set([site["bonding_index"] for site in occ])
+        if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds):
+            if Es[strind] < min_energy:
+                adsorbate = xyz
+                min_energy = Es[strind]
+
+    return adsorbate
+
+def adsorbate_interaction_decomposition(mol,local_radius=5):
+    surface_bonded_inds = []
+    for i,at in enumerate(mol.atoms):
+        if at.is_bonded_to_surface() and not at.is_surface_site():
+            surface_bonded_inds.append(i)
+    
+    structs = []
+    for i,indi in enumerate(surface_bonded_inds):
+        for j,indj in enumerate(surface_bonded_inds):
+            if i > j:
+                st = mol.copy(deep=True)
+                st.atoms[indi].label = "*"
+                st.atoms[indj].label = "*"
+                paths = find_shortest_paths_sites(st.atoms[indi],st.atoms[indj])
+                ad_atoms_i,ad_surf_sites_i = find_adsorbate_atoms_surface_sites(st.atoms[indi],st)
+                ad_atoms_j,ad_surf_sites_j = find_adsorbate_atoms_surface_sites(st.atoms[indj],st)
+                if st.atoms[indi] in ad_atoms_j: #i & j are part of same adsorbate/TS structure
+                    continue
+                for p in paths:
+                    for a in p:
+                        if a.is_surface_site():
+                            a.label = "*"
+                            
+                for ind in [indi,indj]:
+                    for a in st.atoms:
+                        if a.is_surface_site() and len(find_shortest_paths_sites(st.atoms[indi],a)[0]) - 2 <= local_radius:
+                            a.label = "*"
+                
+                ats_to_delete = [a for a in st.atoms if a.is_surface_site() and a.label != "*" and a not in ad_surf_sites_i and a not in ad_surf_sites_j]
+                for a in ats_to_delete:
+                    st.remove_atom(a)
+                try:
+                    stout = [x for x in st.split() if x.contains_surface_site()][0]
+                except IndexError as e:
+                    print(mol.to_adjacency_list())
+                    print(st.to_adjacency_list())
+                    raise e
+                for a in stout.atoms:
+                    if a.is_surface_site():
+                        a.label = ""
+                
+                structs.append(stout)
+    
+    return structs
+    
+def adsorbate_site_decomposition(mol):
+    surface_bonded_inds = []
+    for i,at in enumerate(mol.atoms):
+        if at.is_bonded_to_surface() and not at.is_surface_site():
+            surface_bonded_inds.append(i)
+    
+    structs = []
+    for i,indi in enumerate(surface_bonded_inds):
+        st = mol.copy(deep=True)
+        st.atoms[indi].label = "*"
+        structs.append(st)
+    
+    return structs
+
+def get_adsorbed_atom_pairs(length=7, r_bonds=None):
+    """
+    length is number of site atoms between adsorbates
+    """
+    if r_bonds is None:
+        r_bonds = [1, 2, 3, 0.05]
+    groups = []
+    for j in range(2,length+1):
+        g = Group().from_adjacency_list("""1 * R u0 px cx""")
+        a2 = g.atoms[0]
+        for i in range(j):
+            a = GroupAtom(atomtype=["X"],radical_electrons=[0],lone_pairs=[0],charge=[0])
+            if i == 0:
+                b = GroupBond(a2, a, order=r_bonds)
+            else:
+                b = GroupBond(a2, a, order=["S"])
+            g.add_atom(a)
+            g.add_bond(b)
+            a2 = a
+        a = GroupAtom(atomtype=["R"], label="*", radical_electrons=[0])
+        b = GroupBond(a2, a, order=r_bonds)
+        g.add_atom(a)
+        g.add_bond(b)
+        groups.append(g)
+
+    return groups
+
+import itertools
+def get_adsorbed_atom_groups(Nad=3, length=7, r_bonds=None):
+    """
+    length is number of site atoms between adsorbates
+    """
+    assert Nad == 3, "Doesn't work for Nad=2 and the combination ordering seems to matter for Nad > 3, so only Nad=3"
+    if r_bonds is None:
+        r_bonds = [1, 2, 3, 0.05]
+    groups = []
+    lengths = list(range(2,length+1))
+    for comb in itertools.combinations(lengths,Nad):
+        g = Group().from_adjacency_list("""1 * R u0 px cx""")
+        a2 = g.atoms[0]
+        for k,j in enumerate(comb):
+            for i in range(j):
+                a = GroupAtom(atomtype=["X"],radical_electrons=[0],lone_pairs=[0],charge=[0])
+                if i == 0:
+                    b = GroupBond(a2, a, order=r_bonds)
+                else:
+                    b = GroupBond(a2, a, order=["S"])
+                g.add_atom(a)
+                g.add_bond(b)
+                a2 = a
+            if k+1 < len(comb):
+                a = GroupAtom(atomtype=["R"], label="*", radical_electrons=[0])
+                b = GroupBond(a2, a, order=r_bonds)
+                g.add_atom(a)
+                g.add_bond(b)
+            else:
+                b = GroupBond(g.atoms[1],a2, order=["S"])
+                g.add_bond(b)
+        
+        groups.append(g)
+
+    return groups
+
+def get_atom_centered_correction(m,coadmol_E_dict):
+    out_structs = split_adsorbed_structures(m,clear_site_info=False)
+    correction = 0.0
+    minE = min(coadmol_E_dict.values())
+    for struct in out_structs:
+        for coadmol,E in coadmol_E_dict.items():
+            if struct.is_isomorphic(coadmol,save_order=True):
+                correction += E - minE
+                break
+    return correction
+
+def get_atom_center_stability(m,coadmol_stability_dict):
+    out_structs = split_adsorbed_structures(m,clear_site_info=False)
+    for struct in out_structs:
+        for coadmol,v in coadmol_stability_dict.items():
+            if struct.is_isomorphic(coadmol,save_order=True):
+                if not v:
+                    return False
+
+    return True
+
+def break_train_val_test(datums,test_fract=0.1,val_fract=0.1):
+    ds = datums[:]
+    N = len(datums)
+    Ntest = round(N*test_fract)
+    Nval = round(N*val_fract)
+    np.random.shuffle(ds)
+    test = ds[:Ntest]
+    val = ds[Ntest:Ntest+Nval]
+    train = ds[Nval+Ntest:]
+    return train,val,test
+
+def split_triad(tagged_grp):
+    labeled_atoms = tagged_grp.get_labeled_atoms("*")
+    
+    pair_atoms_inds = []
+    a = labeled_atoms[0]
+    path = [a]
+    notatom = None
+    c = 0
+    while c < 3:
+        if len(path) == 1:
+            anew = list(path[-1].bonds.keys())[0]
+        elif len(path[-1].bonds) == 3 and len(path) > 2:
+            anew = [x for x in path[-1].bonds.keys() if not x.is_surface_site()][0]
+        elif len(path[-1].bonds) == 3:
+            ats = list(path[-1].bonds.keys())
+            for a in ats:
+                if a not in path and a is not notatom:
+                    anew = a
+                    break
+            else:
+                raise ValueError
+        elif len(path[-1].bonds) == 2:
+            ats = list(path[-1].bonds.keys())
+            if ats[0] in path:
+                anew = ats[1]
+            else:
+                anew = ats[0]
+        path.append(anew)
+        
+        if not anew.is_surface_site():
+            pair_atoms_inds.append([tagged_grp.atoms.index(a) for a in path])
+            c += 1
+            notatom = path[-3]
+            path = path[-2:][::-1]
+            
+    out_pairs = []
+    for pair_inds in pair_atoms_inds:
+        g = tagged_grp.copy(deep=True)
+        ats = [g.atoms[ind] for ind in pair_inds]
+        atom_to_remove = []
+        for a in g.atoms:
+            if a not in ats:
+                atom_to_remove.append(a)
+        for a in atom_to_remove:
+            g.remove_atom(a)
+        g.update()
+        out_pairs.append(g)
+        
+    return out_pairs
+
+def is_descendent_of_or_is(node,ancestor_node):
+    n = node
+    while n.parent is not None:
+        if n is ancestor_node:
+            return True
+        else:
+            n = n.parent
+    return False
+
+#currently not used anymore as the default algorithm is sufficient for pair-wise decompositions
+class CoverageDependenceRegressor(MultiEvalSubgraphIsomorphicDecisionTreeRegressor):
+    def fit_rule(self, alpha=0.1):
+        max_depth = max([node.depth for node in self.nodes.values()])
+        y = np.array([datum.value for datum in self.datums])
+        preds = np.zeros(len(self.datums))
+        self.node_uncertainties = dict()
+        weights = self.weights
+        W = self.W
+        triad_node = self.nodes["Root_Triad"]
+        for pair in [True,False]:
+            for depth in range(max_depth + 1):
+                if depth == 0:
+                    self.nodes["Root"].rule = Rule(value=0.0,num_data=0)
+                    continue #skip Root node
+                else:
+                    if pair:
+                        nodes = [node for node in self.nodes.values() if node.depth == depth and not is_descendent_of_or_is(node,triad_node)]
+                    else:
+                        nodes = [node for node in self.nodes.values() if node.depth == depth and is_descendent_of_or_is(node,triad_node)]
+                
+                if len(nodes) == 0:
+                    continue
+                    
+                # generate matrix
+                A = sp.lil_matrix((len(self.datums), len(nodes)))
+                y -= preds
+    
+                for i, datum in enumerate(self.datums):
+                    for node in self.mol_node_maps[datum]["nodes"]:
+                        while node is not None:
+                            if node in nodes:
+                                j = nodes.index(node)
+                                A[i, j] += 1.0
+                            node = node.parent
+    
+                clf = linear_model.Lasso(
+                    alpha=alpha,
+                    fit_intercept=False,
+                    tol=1e-4,
+                    max_iter=1000000000,
+                    selection="random",
+                )
+                if weights is not None:
+                    lasso = clf.fit(A, y, sample_weight=weights)
+                else:
+                    lasso = clf.fit(A, y)
+                
+                preds = A * clf.coef_
+                self.data_delta = preds - y
+    
+                for i, val in enumerate(clf.coef_):
+                    nodes[i].rule = Rule(value=val, num_data=np.sum(A[:, i]))
+
+        train_error = [self.evaluate(d.mol, estimate_uncertainty=False) - d.value for d in self.datums]
+
+        logging.info("training MAE: {}".format(np.mean(np.abs(np.array(train_error)))))
+
+        if self.validation_set:
+            val_error = [self.evaluate(d.mol, estimate_uncertainty=False) - d.value for d in self.validation_set]
+            val_mae = np.mean(np.abs(np.array(val_error)))
+            if val_mae < self.min_val_error:
+                self.min_val_error = val_mae
+                self.best_tree_nodes = list(self.nodes.keys())
+                self.bestA = A
+                self.best_nodes = {k: v for k, v in self.nodes.items()}
+                self.best_mol_node_maps = {
+                    k: {"mols": v["mols"][:], "nodes": v["nodes"][:]}
+                    for k, v in self.mol_node_maps.items()
+                }
+                self.best_rule_map = {name:self.nodes[name].rule for name in self.best_tree_nodes}
+            self.val_mae = val_mae
+            logging.info("validation MAE: {}".format(self.val_mae))
+
+        if self.test_set:
+            test_error = [self.evaluate(d.mol) - d.value for d in self.test_set]
+            test_mae = np.mean(np.abs(np.array(test_error)))
+            logging.info("test MAE: {}".format(test_mae))
+            
+        logging.info("# nodes: {}".format(len(self.nodes)))
+        
+def train_sidt_cov_dep_regressor(pairs_datums,sampling_datums,r_site=None,
+                                r_atoms=None,node_fract_training=0.7):
+
+    if r_site is None:
+        r_site = ["","ontop","bridge","hcp","fcc"]
+
+    if r_atoms is None:
+        r_atoms = ["C","O","N","H","X"]
+    
+    root_pair = Group().from_adjacency_list("""
+    1 * R u0 px cx {2,[S,D,T,Q,R]}
+    2   X u0 p0 c0 {1,[S,D,T,Q,R]}
+    3 * R u0 px cx {4,[S,D,T,Q,R]}
+    4   X u0 p0 c0 {3,[S,D,T,Q,R]}
+    """)
+
+    root_pair_node = Node(group=root_pair,name="Root",parent=None,depth=0)
+
+    pair_nodes = []
+    initial_pair_root_splits = get_adsorbed_atom_pairs(length=7,r_bonds=[1,2,3,0.05])
+    for i,g in enumerate(initial_pair_root_splits):
+        gcleared = g.copy(deep=True)
+        gcleared.clear_labeled_atoms()
+        for d in pairs_datums+sampling_datums:
+            if d.mol.is_subgraph_isomorphic(gcleared,save_order=True): #only add if the group exists
+                n = Node(group=g,name=root_pair_node.name+"_"+str(i),parent=root_pair_node,depth=1)
+                pair_nodes.append(n)
+                root_pair_node.children.append(n)
+                break
+
+    pairnodes = {n.name:n for n in [root_pair_node]+pair_nodes}
+
+    Nfullnodes = (len(pairs_datums)+len(sampling_datums))*node_fract_training
+
+    treepair = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition],
+                                                   nodes=pairnodes,
+                                                   r=[ATOMTYPES[x] for x in r_atoms],
+                                                   r_bonds=[1,2,3,0.05],
+                                                             r_un=[0],
+                                                   r_site=["","ontop","bridge","hcp","fcc"],
+                                                   max_structures_to_generate_extensions=100,
+                                                   fract_nodes_expand_per_iter=0.025,
+                                                   iter_max=2,
+                                                   iter_item_cap=100,
+                                                  )
+        
+    treepair.generate_tree(data=pairs_datums+sampling_datums,max_nodes=Nfullnodes)
+    
+    return treepair
+
+def train_sidt_cov_dep_classifier(datums,r_site=None,
+                                r_atoms=None,node_fract_training=0.5,node_min=50):
+
+    if r_site is None:
+        r_site = ["","ontop","bridge","hcp","fcc"]
+
+    if r_atoms is None:
+        r_atoms = ["C","O","N","H","X"]
+    
+    root_node = Node(group=None,name="Root",depth=0)
+    
+    root_pair = Group().from_adjacency_list("""
+    1 * R u0 px cx {2,[S,D,T,Q,R]}
+    2   X u0 p0 c0 {1,[S,D,T,Q,R]}
+    3 * R u0 px cx {4,[S,D,T,Q,R]}
+    4   X u0 p0 c0 {3,[S,D,T,Q,R]}
+    """)
+    
+    root_triad = Group().from_adjacency_list("""
+    1 * R u0 px cx {2,[S,D,T,Q,R]}
+    2   X u0 p0 c0 {1,[S,D,T,Q,R]}
+    3 * R u0 px cx {4,[S,D,T,Q,R]}
+    4   X u0 p0 c0 {3,[S,D,T,Q,R]}
+    5 * R u0 px cx {6,[S,D,T,Q,R]}
+    6   X u0 p0 c0 {5,[S,D,T,Q,R]}
+    """)
+
+    root_pair_node = Node(group=root_pair,name="Root_Pair",parent=root_node,depth=1)
+    root_triad_node = Node(group=root_triad,name="Root_Triad",parent=root_node,depth=1)
+    root_node.children.extend([root_pair_node,root_triad_node])
+
+    pair_nodes = []
+    initial_root_splits = get_adsorbed_atom_pairs(length=7,r_bonds=[1,2,3,0.05])
+    for i,g in enumerate(initial_root_splits):
+        n = Node(group=g,name=root_pair_node.name+"_"+str(i),parent=root_pair_node,depth=2)
+        pair_nodes.append(n)
+        root_pair_node.children.append(n)
+
+    triad_nodes = []
+    initial_root_splits = get_adsorbed_atom_groups(Nad=3,length=7,r_bonds=[1,2,3,0.05])
+    for i,g in enumerate(initial_root_splits):
+        n = Node(group=g,name=root_triad_node.name+"_"+str(i),parent=root_triad_node,depth=2)
+        triad_nodes.append(n)
+        root_triad_node.children.append(n)
+
+    nodes = {n.name:n for n in [root_node,root_pair_node,root_triad_node]+pair_nodes+triad_nodes}
+    
+    node_len = len(nodes)
+
+    tree = MultiEvalSubgraphIsomorphicDecisionTreeBinaryClassifier([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition],
+                                                                   nodes=nodes,
+                                               r=[ATOMTYPES[x] for x in r_atoms],
+                                               r_bonds=[1,2,3,0.05],
+                                                         r_un=[0],
+                                               r_site=r_site,
+                                                iter_max=2,
+                                                iter_item_cap=100,
+                                                max_structures_to_generate_extensions=100,
+                                              )
+
+    train_sample,val,test = break_train_val_test(datums,test_fract=0.0,val_fract=0.1)
+
+    if len(train_sample)*node_fract_training > node_min:
+        tree.generate_tree(data=train_sample,validation_set=val,max_nodes=len(train_sample)*node_fract_training,
+                       postpruning_based_on_val=True)
+    else:
+        tree.generate_tree(data=train_sample,validation_set=val,max_nodes=node_min)
+
+    return tree
+
+def add_ad_to_site(admol,coad,site):
+    c = coad.copy(deep=True)
+    at = [a for a in c.atoms if a.is_bonded_to_surface()][0]
+    bd = [bd for a,bd in at.bonds.items() if a.is_surface_site()][0]
+    order = bd.order
+    for a in c.atoms:
+        if a.is_surface_site():
+            c.remove_atom(a)
+    
+    atind = c.atoms.index(at)
+    sind = admol.atoms.index(site)
+    admol_length = len(admol.atoms)
+    
+    admolout = admol.copy(deep=True).merge(c)
+    snew = admolout.atoms[sind]
+    atnew = admolout.atoms[len(admol.atoms)+atind]
+    assert site.site == snew.site and site.morphology == snew.morphology
+    assert atnew.element == at.element
+    
+    bd = Bond(atnew,snew,order=order)
+    admolout.add_bond(bd)
+    
+    admolout.clear_labeled_atoms()
+    admolout.multiplicity=1
+    try:
+        admolout.update(sort_atoms=False)
+    except Exception as e:
+        raise ValueError((e,admol.to_adjacency_list(),coad.to_adjacency_list(),admolout.to_adjacency_list()))
+    admolout.update_connectivity_values()
+    
+    return admolout
+    
+def get_configurations(admol, coad, coad_stable_sites, tree_interaction_classifier=None, coadmol_stability_dict=None, unstable_groups=None,
+                       tree_interaction_regressor=None, tree_atom_regressor=None, coadmol_E_dict=None, energy_tol=None):
+    empty_sites = [a for a in admol.atoms if a.is_surface_site() and a.site in coad_stable_sites and not any([not a2.is_surface_site() for a2 in a.bonds.keys()])]
+    print("empty sites")
+    print(len(empty_sites))
+    empty_site_inds = [admol.atoms.index(s) for s in empty_sites]
+    outmols = [admol]
+    outmols_split = [[admol]]
+    lowest_energy_surface_bond_dict = dict()
+    print(len(outmols))
+    for i in range(len(empty_sites)):
+        newoutmols = []
+        for m in outmols_split[i]: #all configurations with a fixed number of co-adsorbates (highest number hit yet)
+            for sind in empty_site_inds:
+                if any(not a.is_surface_site() for a in m.atoms[sind].bonds.keys()): #site is already occupied 
+                    continue
+                
+                outmol = add_ad_to_site(m,coad,m.atoms[sind])
+    
+                if (tree_interaction_classifier and not tree_interaction_classifier.evaluate(outmol)) or \
+                    (coadmol_stability_dict and not get_atom_center_stability(outmol,coadmol_stability_dict)): #unstable configuration
+                    continue
+
+                if unstable_groups:
+                    is_unstable = False
+                    for grp in unstable_groups:
+                        if outmol.is_subgraph_isomorphic(grp,save_order=True):
+                            is_unstable = True 
+                            break
+                    if is_unstable:
+                        continue
+                    
+                for mol in newoutmols:
+                    if outmol.is_isomorphic(mol,save_order=True):
+                        break
+                else:
+                    if energy_tol: #skip configurations that are more than energy_tol higher than the lowest energy configuraiton for a given coverage found
+                        if tree_atom_regressor is not None:
+                            E = tree_atom_regressor.evaluate(m) + tree_interaction_regressor.evaluate(m)
+                        elif coadmol_E_dict is not None:
+                            E = get_atom_centered_correction(m,coadmol_E_dict) + tree_interaction_regressor.evaluate(m)
+                        else:
+                            raise ValueError
+    
+                        Nsurfbonds = len([b for b in m.get_all_edges() if (b.atom1.is_surface_site() and not b.atom2.is_surface_site()) or (b.atom2.is_surface_site() and not b.atom1.is_surface_site())])
+                        if Nsurfbonds not in lowest_energy_surface_bond_dict.keys() or lowest_energy_surface_bond_dict[Nsurfbonds] > E:
+                            lowest_energy_surface_bond_dict[Nsurfbonds] = E
+                            newoutmols.append(outmol)
+                        elif lowest_energy_surface_bond_dict[Nsurfbonds] + energy_tol < E:
+                            pass #don't add configuration
+                        else:
+                            newoutmols.append(outmol)
+                    else:
+                        newoutmols.append(outmol)
+                        
+        outmols.extend(newoutmols)
+        outmols_split.append(newoutmols)
+        print(len(outmols))
+
+    return outmols
+
+def check_stable(config,stability_datums):
+    for d in stability_datums:
+        if d.value == False and config.is_isomorphic(d.mol,save_order=True):
+            return False
+    else:
+        return True
+
+def evaluate_from_datums(config,energy_datums):
+    for d in energy_datums:
+        if config.is_isomorphic(d.mol,save_order=True):
+            return d.value
+    else:
+        return None
+
+def get_cov_energies_configs_concern_tree(tree_interaction_regressor, configs, coad_stable_sites, Ncoad_isolated, concern_energy_tol=None, tree_atom_regressor=None, coadmol_E_dict=None, 
+                     stability_datums=None):
+    Ncoad_energy_dict = dict()
+    Ncoad_config_dict = dict()
+    config_to_Eunctr = dict()
+    configs_of_concern = {}
+    Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites])
+    for i,m in enumerate(configs):
+        Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated
+
+        if stability_datums:
+            stable = check_stable(m,stability_datums)
+        else:
+            stable = True
+            
+        if not stable:
+            continue
+            
+        if tree_atom_regressor is not None:
+            Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True)
+            E = tree_atom_regressor.evaluate(m) + Einteraction
+        elif coadmol_E_dict is not None:
+            Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True)
+            E = get_atom_centered_correction(m,coadmol_E_dict) + Einteraction
+        else:
+            raise ValueError
+        
+        config_to_Eunctr[i] = (m,E,std,tr)
+        
+        if Ncoad not in Ncoad_energy_dict.keys():
+            Ncoad_energy_dict[Ncoad] = E
+            Ncoad_config_dict[Ncoad] = m.to_adjacency_list()
+        elif E < Ncoad_energy_dict[Ncoad]:
+            Ncoad_energy_dict[Ncoad] = E
+            Ncoad_config_dict[Ncoad] = m.to_adjacency_list()
+        elif E == Ncoad_energy_dict[Ncoad]:
+            if isinstance(Ncoad_config_dict[Ncoad],list):
+                Ncoad_config_dict[Ncoad].append(m.to_adjacency_list())
+            else:
+                Ncoad_config_dict[Ncoad] = [Ncoad_config_dict[Ncoad], m.to_adjacency_list()]
+    
+    for i in config_to_Eunctr.keys():
+        Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated
+        m,E,std,tr = config_to_Eunctr[i]
+        if ((concern_energy_tol is None) or (Ncoad_energy_dict[Ncoad] + concern_energy_tol > E)):
+            configs_of_concern[i] = (m,E,tr,std)
+    
+    return Ncoad_energy_dict,Ncoad_config_dict,configs_of_concern
+    
+def get_cov_energies(tree_interaction_regressor, configs, coad_stable_sites, Ncoad_isolated, tree_atom_regressor=None, coadmol_E_dict=None, 
+                     stability_datums=None):
+    Ncoad_energy_dict = dict()
+    Ncoad_config_dict = dict()
+    Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites])
+    for m in configs:
+        Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Ncoad_isolated
+
+        if tree_atom_regressor is not None:
+            E = tree_atom_regressor.evaluate(m) + tree_interaction_regressor.evaluate(m)
+        elif coadmol_E_dict is not None:
+            E = get_atom_centered_correction(m,coadmol_E_dict) + tree_interaction_regressor.evaluate(m)
+        else:
+            raise ValueError
+        if Ncoad not in Ncoad_energy_dict.keys():
+            if (not stability_datums) or check_stable(m,stability_datums):
+                Ncoad_energy_dict[Ncoad] = E
+                Ncoad_config_dict[Ncoad] = m.to_adjacency_list()
+        elif E < Ncoad_energy_dict[Ncoad]:
+            if (not stability_datums) or check_stable(m,stability_datums):
+                Ncoad_energy_dict[Ncoad] = E
+                Ncoad_config_dict[Ncoad] = m.to_adjacency_list()
+        elif E == Ncoad_energy_dict[Ncoad]:
+            if (not stability_datums) or check_stable(m,stability_datums):
+                if isinstance(Ncoad_config_dict[Ncoad],list):
+                    Ncoad_config_dict[Ncoad].append(m.to_adjacency_list())
+                else:
+                    Ncoad_config_dict[Ncoad] = [Ncoad_config_dict[Ncoad], m.to_adjacency_list()]
+    return Ncoad_energy_dict,Ncoad_config_dict
+
+def get_configs_of_concern(tree_interaction_regressor,configs,coad_stable_sites,Ncoad_energy_dict,Nocc_isolated,concern_energy_tol,tree_atom_regressor=None,
+                           coadmol_E_dict=None):
+    configs_of_concern = {}
+    Nempty = len([a for a in configs[0].atoms if a.is_surface_site() and a.site in coad_stable_sites])
+    for m in configs:
+        Ncoad = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())]) - Nocc_isolated
+        if tree_atom_regressor is not None:
+            Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True)
+            E = tree_atom_regressor.evaluate(m) + Einteraction
+        elif coadmol_E_dict is not None:
+            Einteraction,std,tr = tree_interaction_regressor.evaluate(m,trace=True, estimate_uncertainty=True)
+            E = get_atom_centered_correction(m,coadmol_E_dict) + Einteraction
+        else:
+            raise ValueError
+        if Ncoad_energy_dict[Ncoad] + concern_energy_tol > E:
+            configs_of_concern[m] = (E,tr,std)
+        
+    return configs_of_concern
+
+def load_coverage_delta(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,ts_pynta_dir=None,allowed_structure_site_structures=None,
+                       out_file_name="out",vib_file_name="vib",is_ad=None,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False):
+
+    try:
+        info = json.load(open(os.path.join(d,"info.json")))
+    except:
+        info = None
+
+    if is_ad is None:
+        is_ts =  "xyz" in info.keys() and os.path.split(os.path.split(os.path.split(info["xyz"])[0])[0])[1][:2] == "TS"
+    else:
+        is_ts = not is_ad
+    
+    atoms = read(os.path.join(d,out_file_name+".xyz"))
+    
+    if not is_ts:
+        try:
+            admol,neighbor_sites,ninds = generate_adsorbate_2D(atoms, sites, site_adjacency, len(slab), max_dist=np.inf, cut_multidentate_off_num=None, 
+                                                               allowed_structure_site_structures=allowed_structure_site_structures,
+                                                               keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+        except (SiteOccupationException,TooManyElectronsException,ValueError) as e:
+            return None,None,None,None
+
+        logging.error(admol.to_adjacency_list())
+        split_structs = split_adsorbed_structures(admol)
+        try:
+            vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab))
+
+            Ecad = atoms.get_potential_energy() - slab.get_potential_energy() + vibdata.get_zero_point_energy()
+    
+            Esep = 0.0
+            for split_struct in split_structs:
+                # if not split_struct.contains_surface_site(): #if gas phase species don't extract
+                #     return None,None,None,None
+                for ad,E in ad_energy_dict.items():
+                    if split_struct.is_isomorphic(ad,save_order=True):
+                        Esep += E
+                        break
+                else:
+                    logging.error("no matching adsorbate for {}".format(split_struct.to_smiles()))
+                    return None,None,None,None
+            
+            dE = Ecad - Esep
+        except:
+            dE = None
+    else:
+        if ts_pynta_dir is None:
+            xyz = info["xyz"]
+            ts_path = os.path.split(os.path.split(info["xyz"])[0])[0]
+        else:
+            ts_path = os.path.join(ts_pynta_dir,os.path.split(os.path.split(os.path.split(info["xyz"])[0])[0])[1])
+            xyz = os.path.join(ts_path,os.path.split(os.path.split(info["xyz"])[0])[1],os.path.split(info["xyz"])[1])
+        
+        ts_info_path = os.path.join(ts_path,"info.json")
+
+        try:
+            admol,neighbor_sites,ninds = generate_TS_2D(atoms, ts_info_path, metal, facet, sites, site_adjacency, len(slab), imag_freq_path=os.path.join(d,"vib.0.traj"), 
+                                                        max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures,
+                                                        keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+        except (SiteOccupationException,TooManyElectronsException, ValueError) as e:
+            return None,None,None,None
+        try:
+            vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab))
+        except FileNotFoundError:
+            return admol,neighbor_sites,ninds,None
+        
+        Ecad = atoms.get_potential_energy() - slab.get_potential_energy() + vibdata.get_zero_point_energy()
+        
+        
+        ts = read(xyz)
+        ts_vibdata = get_vibdata(xyz,os.path.join(os.path.split(xyz)[0],"vib.json_vib.json"),len(slab))
+        Ets = ts.get_potential_energy() - slab.get_potential_energy()  + ts_vibdata.get_zero_point_energy()
+        
+        num_ts_atoms = len(ts) - len(slab)
+        
+        coatoms = atoms.copy()
+        for i in range(num_ts_atoms): #remove ts atoms before we identify coadsorbates
+            coatoms.pop(len(slab))
+        try:
+            coadmol,coneighbor_sites,coninds = generate_adsorbate_2D(coatoms, sites, site_adjacency, len(slab), max_dist=np.inf, cut_multidentate_off_num=None)
+        except (SiteOccupationException,TooManyElectronsException,ValueError) as e:
+            return None,None,None,None
+            
+        split_structs = split_adsorbed_structures(coadmol)
+        Esep = Ets
+        for split_struct in split_structs:
+            for ad,E in ad_energy_dict.items():
+                if split_struct.is_isomorphic(ad,save_order=True):
+                    Esep += E
+                    break
+            else:
+                logging.error("no matching adsorbate for {}".format(split_struct.to_smiles()))
+                return None,None,None,None
+                
+        dE = Ecad - Esep
+    
+    return admol,neighbor_sites,ninds,dE
+
+
+
+def check_TS_coadsorbate_disruption(atoms,admol,nslab3D,nslab2D,ntsatoms,mult=1.3):
+    ad = atoms[nslab3D:]
+    cutoffs = natural_cutoffs(ad,mult=mult)
+    adanalysis = Analysis(ad,cutoffs=cutoffs)
+    adadj = adanalysis.adjacency_matrix[0]
+    for i in range(nslab2D+ntsatoms,len(admol.atoms)):
+        at = admol.atoms[i]
+        ind = i - nslab2D
+        if adadj[ind,:ntsatoms].sum() > 0 or adadj[:ntsatoms,ind].sum() > 0 :
+            for k in range(ntsatoms):
+                if (not adadj[ind,k]) and (not adadj[k,ind]):
+                    continue
+                else:
+                    at = admol.atoms[nslab2D+k]
+                    for bd in admol.get_bonds(at).values():
+                        if bd.get_order_str() == 'R':
+                            return True
+                        
+    return False
+    
+def tagsites(atoms,sites):
+    aview = deepcopy(atoms)
+    anames = ['He','Ne','Ar','Kr','Xe','Rn']
+    for i,site in enumerate(sites):
+        add_adsorbate_to_site(aview,Atoms(anames[i], [(0, 0, 0)]), 0, sites[i], height=1.0)
+    return aview
+
+def extract_sample(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,max_dist=3.0,rxn_alignment_min=0.7,
+                   coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib",is_ad=None,
+                  use_allowed_site_structures=True):
+    out_dict = dict()
+    
+    atoms_init = read(os.path.join(d,init_file_name+".xyz"))
+    if not os.path.exists(os.path.join(d,out_file_name+".xyz")):
+        atoms = None
+    else:
+        atoms = read(os.path.join(d,out_file_name+".xyz"))
+
+    nslab = len(slab)
+    
+    if len(atoms_init) < len(slab): #gas phase
+        return None
+    #view(atoms_init)
+    #view(atoms)
+    # try:
+    #     vibdata = get_vibdata(os.path.join(d,out_file_name+".xyz"),os.path.join(d,vib_file_name+".json"),len(slab))
+    # except:
+    #     vibdata = None
+    keep_binding_vdW_bonds=False 
+    keep_vdW_surface_bonds=False
+    
+    try:
+        with open(os.path.join(d,"info.json"),'r') as f:
+            info = json.load(f)
+            mol = Molecule().from_adjacency_list(info["adjlist"])
+            for bd in mol.get_all_edges():
+                if bd.order == 0:
+                    if bd.atom1.is_surface_site() or bd.atom2.is_surface_site():
+                        keep_binding_vdW_bonds = True
+                        m = mol.copy(deep=True)
+                        b = m.get_bond(m.atoms[mol.atoms.index(bd.atom1)],m.atoms[mol.atoms.index(bd.atom2)])
+                        m.remove_bond(b)
+                        out = m.split()
+                        if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface
+                            keep_vdW_surface_bonds = True
+    except FileNotFoundError:
+        info = None
+        
+    if is_ad is None:
+        is_ad = os.path.split(os.path.split(os.path.split(os.path.split(info['xyz'])[0])[0])[0])[1] == 'Adsorbates'
+
+    if is_ad and info:
+        orig_xyz = os.path.join(pynta_dir,os.sep.join(os.path.normpath(info['xyz']).split(os.sep)[-4:]))
+    else:
+        orig_xyz = os.path.join(pynta_dir,os.sep.join(os.path.normpath(info['xyz']).split(os.sep)[-3:]))
+    
+    if atoms is None:
+        out_dict["isomorphic"] = False 
+        out_dict["init_info"] = info["adjlist"]
+        out_dict["out"] = None 
+        out_dict["init_extracted"] = None 
+        out_dict["sample_dir"] = d
+        out_dict["orig_xyz"] = orig_xyz
+        out_dict["valid"] = False
+        out_dict["dE"] = None
+        return out_dict
+        
+    if info:
+        with open(os.path.join(os.path.split(os.path.split(orig_xyz)[0])[0],"info.json"),'r') as f:
+            info_clean = json.load(f)
+    if orig_xyz:
+        if not is_ad:
+            reactants_clean = Molecule().from_adjacency_list(info_clean["reactants"])
+            cut_multidentate_off_num = len(atoms) - nslab - len([a for a in reactants_clean.atoms if not a.is_surface_site()])
+        else:
+            ad_clean = Molecule().from_adjacency_list(info_clean["adjlist"])
+            cut_multidentate_off_num = len(atoms) - nslab - len([a for a in ad_clean.atoms if not a.is_surface_site()])
+    else:
+        cut_multidentate_off_num = None
+
+    if use_allowed_site_structures:
+        allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"),
+                                                                                         sites,site_adjacency,nslab,max_dist=max_dist,
+                                                                                                 cut_multidentate_off_num=None)
+    else:
+        allowed_structure_site_structures = None
+    
+    admol,neighbor_sites,ninds,dE = load_coverage_delta(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,
+                                            ts_pynta_dir=pynta_dir,allowed_structure_site_structures=allowed_structure_site_structures,
+                                                       out_file_name=out_file_name,vib_file_name=vib_file_name,is_ad=is_ad,
+                                                       keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+    
+    
+    if is_ad:
+        try:
+            admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(atoms_init, sites, site_adjacency, nslab, 
+                     max_dist=None, cut_multidentate_off_num=cut_multidentate_off_num, allowed_structure_site_structures=allowed_structure_site_structures,
+                     keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+        except TooManyElectronsException:
+            admol_init = None
+            neighbor_sites_init = None
+            ninds_init = None
+            
+        if info:
+            admol_info = Molecule().from_adjacency_list(info["adjlist"],check_consistency=False)
+            
+        valid = admol is not None #and len(admol.split()) == 1
+
+    else:
+        valid = True
+        info_path = os.path.join(os.path.split(os.path.split(orig_xyz)[0])[0],"info.json")
+        imag_freq_path = os.path.join(os.path.split(orig_xyz)[0],"vib.0.traj")
+
+        try:
+            admol_init,neighbor_sites_init,ninds_init = generate_TS_2D(atoms_init, info_path, metal, facet, sites, site_adjacency, nslab, 
+                     imag_freq_path=imag_freq_path, max_dist=None, cut_multidentate_off_num=cut_multidentate_off_num, 
+                                                                   allowed_structure_site_structures=allowed_structure_site_structures,
+                                                                   keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+        except TooManyElectronsException as e:
+            valid = False
+            admol_init = None
+        except SiteOccupationException as e:
+            logging.error("raised SiteOccupationException on init structure")
+            valid = False
+            admol_init = None
+
+        admol_info = Molecule().from_adjacency_list(info["adjlist"],check_consistency=False)
+        
+        if valid:
+            valid = not (admol is None)
+            if not valid:
+                logging.error("TS admol has disconnected graph")
+
+        if valid:
+            #convert TS path
+            ts_path = os.path.join(pynta_dir, os.path.split(os.path.split(os.path.split(orig_xyz)[0])[0])[1], os.path.split(os.path.split(orig_xyz)[0])[1], os.path.split(orig_xyz)[1])
+            
+            vibdata_ts = get_vibdata(ts_path,os.path.join(os.path.split(ts_path)[0],"vib.json_vib.json"),len(slab))
+            with open(os.path.join(os.path.split(os.path.split(ts_path)[0])[0],"info.json"),'r') as f:
+                ts_info = json.load(f)
+        
+            nslab3D = ts_info["nslab"]
+            ntsatoms = len([a for a in Molecule().from_adjacency_list(ts_info['reactants']).atoms if not a.is_surface_site()])
+            nslab2D = len(ninds)
+
+            if os.path.exists(os.path.join(d,vib_file_name+".json")):
+                coad_disrupt = check_TS_coadsorbate_disruption(atoms,admol,nslab3D,nslab2D,ntsatoms,mult=coad_disruption_tol)
+                tr_sample = Trajectory(os.path.join(d,"vib.0.traj"))
+
+                #view(tr_sample)
+                tr_ts = Trajectory(os.path.join(os.path.split(ts_path)[0],"vib.0.traj"))
+                v_ts = tr_ts[15].positions[nslab:] - tr_ts[16].positions[nslab:]
+                v_ts /= np.linalg.norm(v_ts)
+                v_sample = tr_sample[15].positions[nslab:] - tr_sample[16].positions[nslab:]
+                v_sample /= np.linalg.norm(v_sample)
+                v_prod = np.abs(np.sum(v_sample[:len(v_ts)]*v_ts))
+
+                if len(admol.split()) > 1 or v_prod < rxn_alignment_min or coad_disrupt:
+                    valid = False
+                else:
+                    valid = True
+            else:
+                coad_disrupt = None
+                valid = None
+            
+    out_dict["valid"] = valid
+    out_dict["dE"] = dE
+    if admol is None or admol_init is None:
+        out_dict["isomorphic"] = False
+    else:
+        out_dict["isomorphic"] = admol.is_isomorphic(admol_init,save_order=True)
+    if admol:
+        out_dict["out"] = admol.to_adjacency_list()
+    else:
+        out_dict["out"] = None
+    if info:
+        out_dict["init_info"] = admol_info.to_adjacency_list()
+    if admol_init:
+        out_dict["init_extracted"] = admol_init.to_adjacency_list()
+    else:
+        out_dict["init_extracted"] = None
+    out_dict['orig_xyz'] = orig_xyz
+    out_dict['sample_dir'] = d
+    return out_dict
+
+def process_calculation(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,coadmol_E_dict,max_dist=3.0,rxn_alignment_min=0.7,
+                    coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib",is_ad=None):
+    
+    datums_stability = []
+    datum_E = None
+    
+    outdict = extract_sample(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,max_dist=max_dist,rxn_alignment_min=rxn_alignment_min,
+                    coad_disruption_tol=coad_disruption_tol,
+                    out_file_name=out_file_name,init_file_name=init_file_name,vib_file_name=vib_file_name,is_ad=is_ad)
+    
+    if outdict["init_extracted"]:
+        mol_init = Molecule().from_adjacency_list(outdict["init_extracted"],check_consistency=False)
+    else:
+        mol_init = Molecule().from_adjacency_list(outdict["init_info"],check_consistency=False)
+    
+    if (outdict["valid"] is not None) and (not outdict["isomorphic"]):
+        datums_stability.append(Datum(mol_init,False))
+
+    if outdict["out"] is not None:
+        mol_out = Molecule().from_adjacency_list(outdict["out"],check_consistency=False)
+        
+        if len(mol_out.split()) == 1: #ignore if desorbed species
+            if outdict["valid"] and outdict["dE"] is not None: #there is an energy value
+                skip = False
+                for at in mol_out.atoms:
+                    n = 0
+                    if at.is_surface_site():
+                        for k,v in mol_out.get_edges(at).items():
+                            if not k.is_surface_site():
+                                n += 1
+                    if n > 1:
+                        skip = True
+                if not skip:
+                    try:
+                        datum_E = Datum(mol_out,outdict["dE"]*96.48530749925793*1000.0 - get_atom_centered_correction(mol_out,coadmol_E_dict)) #eV to J/mol
+                        datums_stability.append(Datum(mol_out,True))
+                    except KeyError:
+                        pass
+                    
+    
+    return datum_E,datums_stability
+
+def get_configs_for_calculation(configs_of_concern_by_admol,Ncoad_energy_by_admol,admol_name_structure_dict,
+                                computed_configs,tree_regressor,Ncalc_per_iter,T=np.inf,calculation_selection_iterations=10):
+    group_to_occurence = dict()
+    configs_of_concern = []
+    for admol_name in configs_of_concern_by_admol.keys():
+        admol = admol_name_structure_dict[admol_name]
+        Nocc_isolated = len([a for a in admol.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())])
+        configs_of_concern_admol = configs_of_concern_by_admol[admol_name]
+        group_to_occurence_admol = dict()
+        N = 0 #number of group contributions associated with given admol
+        for v in configs_of_concern_admol:
+            m = v[0]
+            E = v[1]
+            sigma = v[3]
+            grps = v[2]
+            Nocc = len([a for a in m.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())])
+            configs_of_concern.append(v)
+            Emin = Ncoad_energy_by_admol[admol_name][Nocc-Nocc_isolated]
+            for grp in grps:
+                if grp in group_to_occurence_admol:
+                    group_to_occurence_admol[grp] += np.exp(-(E-Emin)/(8.314*T))
+                    N += 1
+                else:
+                    group_to_occurence_admol[grp] = np.exp(-(E-Emin)/(8.314*T))
+                    N += 1
+        for g,n in group_to_occurence_admol.items():
+            if grp in group_to_occurence:
+                group_to_occurence[grp] += n/N
+            else:
+                group_to_occurence[grp] = n/N 
+                    
+    concern_groups = list(group_to_occurence.keys()) #selected ordering
+        
+    group_to_weight = np.array([group_to_occurence[g]*tree_regressor.nodes[g].rule.uncertainty for g in concern_groups])
+    
+    config_to_group_fract = dict()
+    for j,v in enumerate(configs_of_concern):
+        config,E,tr,std = v
+        config_group_unc = np.array([tr.count(g)*tree_regressor.nodes[g].rule.uncertainty for g in concern_groups])
+        s = config_group_unc.sum()
+        if s == 0:
+            config_to_group_fract[j] = config_group_unc
+        else:
+            config_to_group_fract[j] = config_group_unc/s
+            
+    
+    configs_for_calculation = []
+    group_fract_for_calculation = []
+    maxval = 0.0
+    config_list = [x[0] for x in configs_of_concern]
+    #sort lower numbers of coadsorbates first, larger numbers of coadsorbates later
+    sorted_config_list = sorted(config_list[:], key = lambda x: len([bd for bd in x.get_all_edges() if (bd.atom1.is_surface_site() and not bd.atom2.is_surface_site()) or (bd.atom2.is_surface_site() and not bd.atom1.is_surface_site())]))
+
+    for q in range(calculation_selection_iterations): #loop the greedy optimization a number of times to get close to local min
+        for config in sorted_config_list:
+            ind = [i for i,c in enumerate(config_list) if c is config][0]
+            if ind not in config_to_group_fract.keys():
+                logging.error("config not in config_to_group_fract")
+                continue
+            for c in computed_configs:
+                if config.is_isomorphic(c,save_order=True):
+                    break
+            else:
+                if len(configs_for_calculation) < Ncalc_per_iter:
+                    configs_for_calculation = configs_for_calculation + [config]
+                    group_fract = config_to_group_fract[ind]
+                    group_fract_for_calculation.append(group_fract)
+                    maxval = np.linalg.norm(sum(group_fract_for_calculation) * group_to_weight, ord=1)
+                else:
+                    group_fract = config_to_group_fract[j]
+                    g_old_sum = sum(group_fract_for_calculation)
+                    maxarglocal = None
+                    maxvallocal = maxval
+                    for i in range(Ncalc_per_iter):
+                        val = np.linalg.norm((g_old_sum - group_fract_for_calculation[i] + group_fract) * group_to_weight, ord=1)
+                        if val > maxvallocal:
+                            maxarglocal = i
+                            maxvallocal = val
+                            
+                    if maxarglocal is not None:
+                        group_fract_for_calculation[maxarglocal] = group_fract
+                        configs_for_calculation[maxarglocal] = config
+                        maxval = maxvallocal
+        
+    admol_to_config_for_calculation = dict()
+    for config in configs_for_calculation:
+        for admol_name,v in configs_of_concern_by_admol.items():
+            if any(x[0] is config for x in v):
+                if admol_name in admol_to_config_for_calculation.keys():
+                    admol_to_config_for_calculation[admol_name].append(config)
+                else:
+                    admol_to_config_for_calculation[admol_name] = [config]
+                break
+        else:
+            raise ValueError
+
+    return configs_for_calculation,admol_to_config_for_calculation
+
+def mol_to_atoms(admol,slab,sites,metal,partial_atoms=None,partial_admol=None):
+    """Generate a 3D initial guess for a given 2D admol configuration
+
+    Args:
+        admol (_type_): 2D representation of the configuration
+        slab (_type_): 3D Atoms object for the slab
+        sites (_type_): list of sites
+        metal (_type_): metal of the slab
+        partial_atoms (_type_, optional): a 3D Atoms object representing part of the target configuraitons. Defaults to None.
+        partial_admol (_type_, optional): 2D representation of the partial_atoms Atoms object. Defaults to None.
+
+    Raises:
+        ValueError: _description_
+        ValueError: _description_
+
+    Returns:
+        _type_: Atoms object corresponding to the admol 2D configuration
+    """
+    if partial_atoms and partial_admol:
+        atoms = deepcopy(partial_atoms)
+        gpartial_admol = partial_admol.to_group()
+        admol_atom_order = admol.atoms[:]
+        gpartial_admol_order = gpartial_admol.atoms[:]
+        try:
+            subisos = admol.find_subgraph_isomorphisms(gpartial_admol,save_order=True)
+        except ValueError:
+            subisos = admol.find_subgraph_isomorphisms(gpartial_admol)
+            admol.atoms = admol_atom_order
+            gpartial_admol.atoms = gpartial_admol_order
+        if len(subisos) == 0:
+            raise ValueError("partial_admol is not subgraph isomorphic to admol: {0}, {1}".format(gpartial_admol.to_adjacency_list(),admol.to_adjacency_list()))
+        subiso = subisos[0]
+        atoms_in_partial = [a for a in subiso.keys() if not a.is_surface_site()]
+        split_structs,adsorbed_atom_dict = split_adsorbed_structures(admol,clear_site_info=False,adsorption_info=True,atoms_to_skip=atoms_in_partial)
+    elif partial_atoms is None and partial_admol is None:
+        atoms = deepcopy(slab)
+        split_structs,adsorbed_atom_dict = split_adsorbed_structures(admol,clear_site_info=False,adsorption_info=True)
+    else:
+        raise ValueError("Must include both partial_atoms, partial_admol to start from partial")
+    
+    for st in split_structs:
+        if len(st.atoms) == 0:
+            continue
+        ad,mol_to_atoms_map = get_adsorbate(st)
+        
+        if len(st.atoms) > 2: #not atomic adsorbate
+            rot_vec = np.array([0.0,0.0,0.0])
+            for atst in st.atoms:
+                if atst.is_bonded_to_surface():
+                    adatom_molind = st.atoms.index(atst)
+                    for bdat in atst.bonds.keys():
+                        if not bdat.is_surface_site():
+                            rot_vec += ad.positions[mol_to_atoms_map[st.atoms.index(bdat)]] - ad.positions[mol_to_atoms_map[adatom_molind]]
+            rot_vec /= np.linalg.norm(rot_vec)
+            ad.rotate(rot_vec,[0.0,0.0,1.0]) #rotate bonds toward the +z-axis
+        else:
+            adatom_molind = st.atoms.index([a for a in st.atoms if not a.is_surface_site()][0])
+
+        atom_surf_inds = []
+        ad_sites = []
+        for a,sind in adsorbed_atom_dict.items():
+            if a in st.atoms:
+                assert sites[sind]["site"] == a.site, (admol.to_adjacency_list(),partial_admol.to_adjacency_list())
+                atom_surf_inds.append(mol_to_atoms_map[adatom_molind])
+                ad_sites.append(sites[sind])
+        atoms,_,_ = place_adsorbate(ad,atoms,atom_surf_inds,ad_sites,metal)
+
+    return atoms
\ No newline at end of file
diff --git a/pynta/geometricanalysis.py b/pynta/geometricanalysis.py
new file mode 100644
index 00000000..b49d34ed
--- /dev/null
+++ b/pynta/geometricanalysis.py
@@ -0,0 +1,1165 @@
+from molecule.molecule import Molecule,Atom,Bond
+from ase.io import read, write
+from ase.geometry import get_distances
+from ase.geometry.analysis import Analysis
+from ase.io.trajectory import Trajectory
+from ase.vibrations import VibrationsData
+from ase.thermochemistry import HarmonicThermo, IdealGasThermo
+from pynta.utils import get_unique_sym, get_occupied_sites, sites_match, SiteOccupationException
+from pynta.mol import *
+import numpy as np
+import json
+import shutil
+import os
+import logging
+
+def generate_site_molecule(slab, target_sites, sites, site_adjacency, max_dist=None):
+    
+    #find overall neighboring sites
+    if max_dist:
+        neighbor_sites = []
+        ninds = []
+        for i,site in enumerate(sites):
+            for target_site in target_sites:
+                v,dist = get_distances([site["position"]], [target_site["position"]], cell=slab.cell, pbc=slab.pbc)
+
+                if np.linalg.norm(v[:2]) < max_dist:
+                    neighbor_sites.append(site)
+                    ninds.append(i)
+                    break
+    else:
+        neighbor_sites = sites[:]
+        ninds = list(range(len(sites)))
+    
+    atoms = []
+    
+    for nsite in neighbor_sites:
+        atoms.append(Atom(element="X", lone_pairs=0, site=nsite["site"], morphology=nsite["morphology"]))
+    
+    mol = Molecule(atoms=atoms,multiplicity=1)
+    
+    
+    for i,nind in enumerate(ninds):
+        adjinds = site_adjacency[nind]
+        for adjind in adjinds:
+            if adjind in ninds:
+                j = ninds.index(adjind)
+                if not mol.has_bond(mol.atoms[i],mol.atoms[j]):
+                    mol.add_bond(Bond(mol.atoms[i],mol.atoms[j],1.0))
+    
+    return mol,neighbor_sites,ninds
+
+
+def generate_adsorbate_molecule(adslab, sites, site_adjacency, nslab, max_dist=None, allowed_site_dict=dict()):
+    #adsorbate
+    ad = adslab[nslab:]
+    adanalysis = Analysis(ad)
+    adadj = adanalysis.adjacency_matrix[0]
+    adatoms = []
+    for at in ad:
+        sym = at.symbol
+        if sym in ['N','P']:
+            adatoms.append(Atom(element=sym, lone_pairs=1))
+        elif sym in ['O','S']:
+            adatoms.append(Atom(element=sym, lone_pairs=2))
+        elif sym in ["F","Cl","Br","I"]:
+            adatoms.append(Atom(element=sym, lone_pairs=3))
+        elif sym in ["Si","C","H","Li"]:
+            adatoms.append(Atom(element=sym, lone_pairs=0))
+        else:
+            raise ValueError("Cannot handle atom {}".format(sym))
+            
+    admol = Molecule(atoms=adatoms)
+    for i in range(len(adatoms)):
+        for j in range(len(adatoms)):
+            if i == j:
+                continue
+            if adadj[i,j] and not admol.has_bond(admol.atoms[i],admol.atoms[j]):
+                admol.add_bond(Bond(admol.atoms[i],admol.atoms[j],1.0))
+    
+    #slab
+    occ = get_occupied_sites(adslab,sites,nslab,allowed_site_dict=allowed_site_dict)
+    target_sites = [site for site in sites if any((oc["position"] == site["position"]).all() for oc in occ)]
+    
+    #find overall neighboring sites
+    if max_dist:
+        neighbor_sites = []
+        ninds = []
+        for i,site in enumerate(sites):
+            for target_site in target_sites:
+                v,dist = get_distances([site["position"]], [target_site["position"]], cell=adslab.cell, pbc=adslab.pbc)
+                if np.linalg.norm(v[:2]) < max_dist:
+                    neighbor_sites.append(site)
+                    ninds.append(i)
+                    break
+    else:
+        neighbor_sites = sites[:]
+        ninds = list(range(len(sites)))
+        
+    atoms = []
+    
+    for nsite in neighbor_sites:
+        atoms.append(Atom(element="X", lone_pairs=0, site=nsite["site"], morphology=nsite["morphology"]))
+    
+    mol = Molecule(atoms=atoms,multiplicity=1)
+    
+    
+    for i,nind in enumerate(ninds):
+        adjinds = site_adjacency[nind]
+        for adjind in adjinds:
+            if adjind in ninds:
+                j = ninds.index(adjind)
+                if not mol.has_bond(mol.atoms[i],mol.atoms[j]):
+                    mol.add_bond(Bond(mol.atoms[i],mol.atoms[j],1.0))
+    
+    #merge adsorbate and slab
+    fullmol = Molecule(atoms=mol.atoms+admol.atoms)
+    fullmol.multiplicity = 1
+    
+    for oc in occ:
+        adind = oc["bonding_index"] - nslab + len(neighbor_sites)
+        nind = [ninds[i] for i,nsite in enumerate(neighbor_sites) if (nsite["position"]==oc["position"]).all()][0]
+        site_index = ninds.index(nind)
+        fullmol.add_bond(Bond(fullmol.atoms[site_index],fullmol.atoms[adind],0))
+    
+    return fullmol,neighbor_sites,ninds
+
+def fix_bond_orders(mol,allow_failure=False,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False):
+    atom_order = ["C","Si","N","P","O","S","F","Cl","Br","I","Li","H"]#["H","Li","I","Br","Cl","F","S","O","P","N","Si","C"]#["C","Si","N","P","O","S","F","Cl","Br","I","Li","H"]
+    for target in atom_order:
+        for a in mol.atoms:
+            if a.symbol == target:
+                if keep_vdW_surface_bonds or keep_binding_vdW_bonds:
+                    fix_atom(mol,a,allow_failure=allow_failure,cleanup_surface_bonds=False)
+                else:
+                    fix_atom(mol,a,allow_failure=allow_failure,cleanup_surface_bonds=True)
+    
+    if keep_vdW_surface_bonds:
+        vdW_bonds = []
+        for bd in mol.get_all_edges():
+            if bd.order == 0:
+                if not bd.atom1.is_surface_bond() and not bd.atom2.is_surface_bond():
+                    vdW_bonds.append(bd)
+        for bd in vdW_bonds:
+            mol.remove_bond(bd)
+            
+    elif keep_binding_vdW_bonds:
+        mtest = mol.copy(deep=True)
+        
+        vdW_bonds = []
+        for bd in mtest.get_all_edges():
+            if bd.order == 0:
+                vdW_bonds.append(bd)
+        for bd in vdW_bonds:
+            mtest.remove_bond(bd)
+            
+        split_structs,atom_mapping = split_adsorbed_structures(mtest,clear_site_info=True,atom_mapping=True)
+        
+        gas_cluster_inds = []
+        for sstruct in split_structs:
+            if not any(a.is_surface_site() for a in sstruct.atoms): #nominal gas phase construct
+                gas_cluster_inds.extend([atom_mapping[a] for a in sstruct.atoms])
+        
+        bd_to_remove = []
+        for bd in mol.get_all_edges():
+            if bd.order == 0:
+                if (bd.atom1.is_surface_site() and mol.atoms.index(bd.atom2) in gas_cluster_inds) or (bd.atom2.is_surface_site() and mol.atoms.index(bd.atom1) in gas_cluster_inds):
+                    pass
+                else:
+                    bd_to_remove.append(bd)
+        
+        for bd in bd_to_remove:
+            mol.remove_bond(bd)
+    
+    return None
+
+def get_octet_deviation(atom):
+    if atom.symbol in ["Li","H"]:
+        return 2 - (atom.lone_pairs*2 + 2*sum(x.order for x in atom.bonds.values() if x.order >= 0.5))
+    else:
+        return 8 - (atom.lone_pairs*2 + 2*sum(x.order for x in atom.bonds.values() if x.order >= 0.5))
+    
+
+def choose_bond_to_fix(symbols,octets,orders,bonded_to_surface):
+    atom_order = [["X"],["C","Si"],["N","P"],["O","S"]]
+    priority = []
+    for i in range(len(symbols)):
+        sym = symbols[i]
+        if (sym != "X" and octets[i] <= 0) or orders[i] == "R":
+            priority.append(0)
+        else:
+            for j,atoms in enumerate(atom_order):
+                if sym in atoms:
+                    if not bonded_to_surface[i]:
+                        priority.append(j+11)
+                    else:
+                        priority.append(j+1)
+    return np.argmax(priority),np.max(priority)
+
+class TooManyElectronsException(Exception):
+    pass
+
+def fix_atom(mol,atom,allow_failure=False,cleanup_surface_bonds=True):
+    delta = get_octet_deviation(atom)
+    
+    if delta < 0: #too many electrons
+        logging.error(mol.to_adjacency_list())
+        raise TooManyElectronsException("Cannot solve problem of too many electrons not counting surface bonds and all ad bonds are 1")
+    elif delta > 0: #too few electrons
+        atoms = [k for k,v in atom.bonds.items()]
+        symbols = [k.symbol for k in atoms]
+        octets = [get_octet_deviation(k) for k in atoms]
+        orders = [v.get_order_str() for k,v in atom.bonds.items()]
+        bonded_to_surface = [a.is_bonded_to_surface() for a in atoms]
+        deviation = True
+        while delta > 0:
+            ind,val = choose_bond_to_fix(symbols,octets,orders,bonded_to_surface)
+            if val == 0:
+                if allow_failure:
+                    break
+                else:
+                    raise ValueError("Could not fix bonds")
+            bd = atom.bonds[atoms[ind]]
+            bd.increment_order()
+            delta -= 2
+            octets[ind] -= 2
+        
+    #at end clean up improper surface bonds
+    if cleanup_surface_bonds:
+        to_remove = []
+        for k,v in atom.bonds.items():
+            if k.symbol == 'X' and v.order == 0:    
+                to_remove.append(v)
+        
+        for v in to_remove:
+            mol.remove_bond(v)
+        
+def generate_adsorbate_2D(atoms, sites, site_adjacency, nslab, max_dist=3.0, cut_multidentate_off_num=None, allowed_structure_site_structures=None,
+                          keep_binding_vdW_bonds=False, keep_vdW_surface_bonds=False):
+    admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, nslab, max_dist=max_dist)
+    
+    if cut_multidentate_off_num:
+        bds_to_remove = []
+        for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)):
+            if admol.atoms[i].is_bonded_to_surface():         
+                for a,b in admol.atoms[i].edges.items():
+                    if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num):
+                        if b not in bds_to_remove:
+                            bds_to_remove.append(b)
+        for b in bds_to_remove:
+            admol.remove_bond(b)
+    
+    fix_bond_orders(admol,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+    
+    if allowed_structure_site_structures: #we are going to analyze the initial occupational analysis and update it based on expectations
+        allowed_site_dict = dict()
+        slabless = remove_slab(admol)
+        for site_structs in allowed_structure_site_structures:
+            struct = generate_without_site_info(site_structs[0])
+            grp_struct = struct.to_group()
+            mappings = slabless.find_subgraph_isomorphisms(grp_struct,save_order=True)
+            considered_sites = []
+            for mapping in mappings:
+                atouts = [at for at in mapping.keys() if at.is_surface_site()]
+                if set(atouts) in considered_sites:
+                    continue
+                else:
+                    considered_sites.append(set(atouts))
+                
+                subgraph,inds = pluck_subgraph(slabless,atouts[0])
+                for site_struct in site_structs:
+                    if subgraph.is_isomorphic(site_struct,save_order=True):
+                        break
+                else:
+                    for atout in atouts:
+                        adatom = [a for a in atout.bonds.keys() if not a.is_surface_site()][0]
+                        ind = slabless.atoms.index(adatom) - len([a for a in slabless.atoms if a.is_surface_site()])
+                        struct_ind = [grp_struct.atoms.index(a) for aout,a in mapping.items() if aout==atout][0]
+                        sitetype = [(site_struct.atoms[struct_ind].site,site_struct.atoms[struct_ind].morphology) for site_struct in site_structs]
+                        if ind+nslab in allowed_site_dict.keys():
+                            allowed_site_dict[ind+nslab].extend(sitetype)
+                        else:
+                            allowed_site_dict[ind+nslab] = sitetype
+
+        admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, nslab, max_dist=max_dist, allowed_site_dict=allowed_site_dict)
+
+        if cut_multidentate_off_num:
+            bds_to_remove = []
+            for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)):
+                if admol.atoms[i].is_bonded_to_surface():         
+                    for a,b in admol.atoms[i].edges.items():
+                        if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num):
+                            if b not in bds_to_remove:
+                                bds_to_remove.append(b)
+            for b in bds_to_remove:
+                admol.remove_bond(b)
+            
+        fix_bond_orders(admol,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+    
+    admol.update_atomtypes()
+    admol.update_connectivity_values()
+    
+    return admol,neighbor_sites,ninds
+
+def generate_TS_2D(atoms, info_path,  metal, facet, sites, site_adjacency, nslab, imag_freq_path=None,
+                     max_dist=3.0, cut_multidentate_off_num=None, allowed_structure_site_structures=None,
+                     site_bond_cutoff=3.0,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False):
+
+    admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, 
+                                                             nslab, max_dist=max_dist) 
+
+    if cut_multidentate_off_num:
+        bds_to_remove = []
+        for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)):
+            if admol.atoms[i].is_bonded_to_surface():         
+                for a,b in admol.atoms[i].edges.items():
+                    if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num):
+                        if b not in bds_to_remove:
+                            bds_to_remove.append(b)
+        for b in bds_to_remove:
+            admol.remove_bond(b)
+    
+    with open(info_path) as f:
+        info = json.load(f)
+    
+    occ = get_occupied_sites(atoms,sites,nslab,site_bond_cutoff=site_bond_cutoff)
+    
+    template_mol_map = [{int(k):v for k,v in x.items()}  for x in info["template_mol_map"]]
+    molecule_to_atom_maps = [{int(k):v for k,v in x.items()}  for x in info["molecule_to_atom_maps"]]
+    broken_bonds,formed_bonds = get_broken_formed_bonds(Molecule().from_adjacency_list(info["reactants"]),
+                                                        Molecule().from_adjacency_list(info["products"]))
+    rbonds = broken_bonds | formed_bonds
+    if info["forward"]:
+        template = Molecule().from_adjacency_list(info["reactants"])
+    else:
+        template = Molecule().from_adjacency_list(info["products"])
+    
+    for bd in rbonds:
+        inds2D = []
+        for label in bd:
+            a = template.get_labeled_atoms(label)[0]
+            aind = template.atoms.index(a)
+            aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"])
+            if aseind:
+                inds2D.append(aseind - nslab + len(neighbor_sites))
+
+        if len(inds2D) == 1:
+            i = inds2D[0]
+            for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond
+                if batom.is_surface_site():
+                    inds2D.append(admol.atoms.index(batom))
+                    break
+            else: #didn't find surface bond, need to identify the site
+                pos = None
+                for site in occ:
+                    if site['bonding_index'] == i - len(neighbor_sites) + nslab:
+                        pos = site["position"]
+                        break
+                else:
+                    logging.error("couldn't find right site bond")
+                    logging.error(admol.to_adjacency_list())
+                    logging.error(i - len(neighbor_sites))
+                    raise SiteOccupationException
+                for j,s in enumerate(neighbor_sites):
+                    if (s["position"] == pos).all():
+                        inds2D.append(j)
+                        break
+                else:
+                    raise IndexError("could not find matching site")
+
+        if admol.has_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]):
+            b = admol.get_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]])
+            if b.get_order_str() != "R":
+                b.set_order_str("R")
+            else: 
+                b.set_order_str("S") #this ensures the the split_ts leaves this bond connected 
+        else:
+            admol.add_bond(Bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]],"R"))
+
+    fix_bond_orders(admol,allow_failure=True,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+
+    
+    if allowed_structure_site_structures: #we are going to analyze the initial occupational analysis and update it based on expectations
+        allowed_site_dicts = []
+        restricted_inds = set()
+        rs = split_ts_to_reactants(admol) #get reactants/products
+        for r in rs:
+            allowed_site_dict = dict()
+            slabless = remove_slab(r) #take the individual component
+            for site_structs in allowed_structure_site_structures:
+                struct = generate_without_site_info(site_structs[0])
+                grp_struct = struct.to_group() #known stable adsorbates
+                mappings = slabless.find_subgraph_isomorphisms(grp_struct,save_order=True) #try to find mappings to known stable structures 
+                for mapping in mappings: #go through each mapping
+                    atouts = [at for at in mapping.keys() if at.is_surface_site()]
+                    subgraph,inds = pluck_subgraph(slabless,atouts[0]) #extract the matching subgraph
+                    for site_struct in site_structs: #go through the known stable structures
+                        if subgraph.is_isomorphic(site_struct,save_order=True):
+                            break
+                    else:
+                        
+                        for atout in atouts:
+                            adatom = [a for a in atout.bonds.keys() if not a.is_surface_site()][0]
+                            ind = slabless.atoms.index(adatom) - len([a for a in slabless.atoms if a.is_surface_site()])
+                            struct_ind = [grp_struct.atoms.index(a) for aout,a in mapping.items() if aout==atout][0]
+                            assert site_structs[0].atoms[struct_ind].is_surface_site()
+                            sitetype = [(site_struct.atoms[struct_ind].site,site_struct.atoms[struct_ind].morphology) for site_struct in site_structs]
+                            if ind+nslab in allowed_site_dict.keys():
+                                allowed_site_dict[ind+nslab].extend(sitetype)
+                            else:
+                                allowed_site_dict[ind+nslab] = sitetype
+
+            allowed_site_dicts.append(allowed_site_dict)
+            restricted_inds |= set(allowed_site_dict.keys())
+            
+        allowed_site_dict = dict()
+        for ind in restricted_inds:
+            for d in allowed_site_dicts:
+                if ind in d.keys():
+                    if ind not in allowed_site_dict.keys():
+                        allowed_site_dict[ind] = set(d[ind])
+                    else:
+                        allowed_site_dict[ind] &= set(d[ind])
+
+        allowed_site_dict = {k: list(v) for k,v in allowed_site_dict.items()}
+    else:
+        allowed_site_dict = dict()
+
+    admol,neighbor_sites,ninds = generate_adsorbate_molecule(atoms, sites, site_adjacency, 
+                                                             nslab, max_dist=max_dist, allowed_site_dict=allowed_site_dict) 
+
+    if cut_multidentate_off_num:
+        bds_to_remove = []
+        for i in range(len(admol.atoms)-cut_multidentate_off_num,len(admol.atoms)):
+            if admol.atoms[i].is_bonded_to_surface():         
+                for a,b in admol.atoms[i].edges.items():
+                    if not a.is_surface_site() and (a.is_bonded_to_surface() or admol.atoms.index(a) < len(admol.atoms)-cut_multidentate_off_num):
+                        if b not in bds_to_remove:
+                            bds_to_remove.append(b)
+        for b in bds_to_remove:
+            admol.remove_bond(b)
+
+    for bd in rbonds:
+        inds2D = []
+        for label in bd:
+            a = template.get_labeled_atoms(label)[0]
+            aind = template.atoms.index(a)
+            aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"])
+            if aseind:
+                inds2D.append(aseind - nslab + len(neighbor_sites))
+
+        if len(inds2D) == 1:
+            i = inds2D[0]
+            for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond
+                if batom.is_surface_site():
+                    inds2D.append(admol.atoms.index(batom))
+                    break
+            else: #didn't find surface bond, need to identify the site
+                pos = None
+                for site in occ:
+                    if site['bonding_index'] == i - len(neighbor_sites) + nslab:
+                        pos = site["position"]
+                        break
+                else:
+                    raise ValueError("didn't find occupied site")
+                for j,s in enumerate(neighbor_sites):
+                    if (s["position"] == pos).all():
+                        inds2D.append(j)
+                        break
+                else:
+                    raise IndexError("could not find matching site")
+
+        if admol.has_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]]):
+            b = admol.get_bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]])
+            if b.get_order_str() != "R":
+                b.set_order_str("R")
+            else: #diffusion on surface results in two R bonds to different sites
+                if imag_freq_path is None:
+                    logging.warning("Could not resolve diffusion type R-bonds due to lack of imaginary frequency info")
+                    continue
+                if admol.atoms[inds2D[0]].is_surface_site():
+                    site_atom_center = admol.atoms[inds2D[0]]
+                    Ratom = admol.atoms[inds2D[1]]
+                    if allowed_structure_site_structures:
+                        valid_sites = allowed_site_dict[inds2D[1] - len(neighbor_sites) + nslab]
+                else:
+                    site_atom_center = admol.atoms[inds2D[1]]
+                    Ratom = admol.atoms[inds2D[0]]
+                    if allowed_structure_site_structures:
+                        valid_sites = allowed_site_dict[inds2D[0] - len(neighbor_sites) + nslab]
+                site_center_ind = admol.atoms.index(site_atom_center)
+                
+                #get imaginary frequency motion
+                tr_ts = Trajectory(imag_freq_path)
+                v_ts,_ = get_distances(tr_ts[15].positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab], tr_ts[16].positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab], cell=atoms.cell, pbc=atoms.pbc)
+                v_ts = v_ts[0,0,:]
+                v_ts /= np.linalg.norm(v_ts)
+                #position of atom
+                target_pos = atoms.positions[admol.atoms.index(Ratom) - len(neighbor_sites) + nslab]
+                site_pair = None
+                best_metric = None
+                iters = 1
+                for i,site1 in enumerate(neighbor_sites):
+                    pos1 = site1["position"]
+                    site1_id = (site1["site"],site1["morphology"])
+                    for j in range(i):
+                        site2 = neighbor_sites[j]
+                        if i == j:
+                            continue
+                        site2_id = (site2["site"],site2["morphology"])
+                        pos2 = site2["position"]
+                        v,dist = get_distances([pos1],[pos2],cell=atoms.cell,pbc=atoms.pbc)
+                        dist = dist[0][0]
+                        v = v[0,0,:]
+                        v /= np.linalg.norm(v)
+                        motion_alignment = abs(np.dot(v_ts,v))
+                        v1t,dist1t = get_distances([pos1],[target_pos],cell=atoms.cell,pbc=atoms.pbc)
+                        dist1t = dist1t[0][0]
+                        v1t = v1t[0,0,:]
+                        v2t,dist2t = get_distances([pos2],[target_pos],cell=atoms.cell,pbc=atoms.pbc)
+                        v2t = v2t[0,0,:]
+                        dist2t = dist2t[0][0]
+                        halfwayness = np.linalg.norm(v1t+v2t)/(dist2t+dist1t)
+                        dist_site = max(dist1t,dist2t)
+                        if not np.isnan(motion_alignment) and motion_alignment > 0.95 and dist < 3.0 and dist_site < 2.0 and (not allowed_structure_site_structures or (site1_id in valid_sites and site2_id in valid_sites)):
+                            iters += 1
+                            metric = 1.0 / (halfwayness*dist_site)
+                            if site_pair is None:
+                                site_pair = [i,j]
+                                best_metric = metric
+                            elif metric > best_metric:
+                                site_pair = [i,j]
+                                best_metric = metric
+                if site_pair is None:
+                    raise ValueError("Imaginary frequency doesn't mean minimal constraints for proper TS motion")
+                admol.remove_bond(b)
+                admol.add_bond(Bond(admol.atoms[site_pair[0]],Ratom,"R"))
+                admol.add_bond(Bond(admol.atoms[site_pair[1]],Ratom,"R")) 
+        else:
+            admol.add_bond(Bond(admol.atoms[inds2D[0]],admol.atoms[inds2D[1]],"R"))
+
+    fix_bond_orders(admol,allow_failure=True,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds) #Reaction bonds prevent proper octet completion
+    
+    #remove surface bonds that abused for octet completion in reactions
+    for atom in admol.atoms:
+        surfbds = [bd for at,bd in atom.bonds.items() if at.is_surface_site() and bd.get_order_str() != 'R']
+        if len(surfbds) == 1:
+            Nrxnbd = len([bd for at,bd in atom.bonds.items() if bd.get_order_str() == 'R' and not at.is_surface_site()])
+            if Nrxnbd >= 2 and abs(Nrxnbd/2 - surfbds[0].order) < 1e-3:
+                admol.remove_edge(surfbds[0])
+        
+    admol.update_atomtypes()
+    admol.update_connectivity_values()
+    for i in range(len(neighbor_sites)):
+        assert neighbor_sites[i]["site"] == admol.atoms[i].site
+        assert neighbor_sites[i]["morphology"] == admol.atoms[i].morphology
+    return admol,neighbor_sites,ninds
+
+def tagsites(atoms,sites):
+    aview = deepcopy(atoms)
+    anames = ['He','Ne','Ar','Kr','Xe','Rn']
+    for i,site in enumerate(sites):
+        add_adsorbate_to_site(aview,Atoms(anames[i], [(0, 0, 0)]), 0, sites[i], height=1.0)
+    return aview
+
+def split_ts_to_reactants(ts2d,tagatoms=False,keep_binding_vdW_bonds=False,keep_vdW_surface_bonds=False):
+    rs = []
+    rbondnum = 0
+    for bd in ts2d.get_all_edges():
+        if bd.is_reaction_bond():
+            rbondnum += 1
+    
+    combs = list(itertools.product([0,1],repeat=rbondnum))
+
+    for comb in combs:
+        r = deepcopy(ts2d)
+        iters = 0
+        for i,bd in enumerate(r.get_all_edges()):
+            if bd.is_reaction_bond():
+                if tagatoms:
+                    if not bd.atom1.is_surface_site() and bd.atom1.is_bonded_to_surface():
+                        bd.atom1.label = "*"
+                    if not bd.atom2.is_surface_site() and bd.atom2.is_bonded_to_surface():
+                        bd.atom2.label = "*"
+                    
+                if comb[iters] == 0:
+                    r.remove_bond(bd)
+                else:
+                    bd.set_order_str('S')
+                iters += 1
+            else:
+                bd.set_order_str('S')
+        try:
+            fix_bond_orders(r,keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+            r.update(sort_atoms=False)
+            r.update_connectivity_values()
+            rs.append(r)
+        except (TooManyElectronsException,ValueError):
+            pass
+    
+    return rs
+      
+def generate_allowed_structure_site_structures(adsorbate_dir,sites,site_adjacency,nslab,max_dist=3.0,cut_multidentate_off_num=None):
+    allowed_structure_site_structures = []
+    for ad in os.listdir(adsorbate_dir):
+        if not os.path.isdir(os.path.join(adsorbate_dir,ad)):
+            continue
+        with open(os.path.join(adsorbate_dir,ad,"info.json"),'r') as f:
+            info = json.load(f)
+            target_mol = Molecule().from_adjacency_list(info["adjlist"])
+            atom_to_molecule_surface_atom_map = {int(k): int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()}
+        if not target_mol.contains_surface_site():
+            continue
+        geoms = get_adsorbate_geometries(os.path.join(adsorbate_dir,ad),target_mol,sites,atom_to_molecule_surface_atom_map,nslab)
+        mols = []
+        for geom in geoms:
+            admol,neighbor_sites,ninds = generate_adsorbate_2D(geom, sites, site_adjacency, nslab, max_dist=max_dist, cut_multidentate_off_num=cut_multidentate_off_num)
+            m = remove_slab(admol)
+            if not generate_without_site_info(m).is_isomorphic(target_mol,save_order=True): #geom didn't optimize to target
+                continue
+            for mol in mols:
+                if mol.is_isomorphic(m,save_order=True):
+                    break
+            else:
+                m.update_atomtypes()
+                m.update_connectivity_values()
+                mols.append(m)
+        if mols:
+            allowed_structure_site_structures.append(mols)
+        
+    return allowed_structure_site_structures
+
+def get_best_adsorbate_geometries(adsorbate_path,aseinds,siteinfo,sites,site_adjacency,imag_freq_max=150.0):
+    """
+    load the adsorbates geometries find the best matching unique optimized
+    adsorbate structures for each species
+    siteinfo = [("ontop","terrace"),("edge","terrace")]
+    aseinds = [37,38]
+    """
+    info_path = os.path.join(adsorbate_path,"info.json")
+    with open(info_path,"r") as f:
+        info = json.load(f)
+    nslab = info["nslab"]
+    atom_to_molecule_surface_atom_map = {int(k):int(v) for k,v in info["gratom_to_molecule_surface_atom_map"].items()}
+    mol = Molecule().from_adjacency_list(info["adjlist"])
+    prefixes = os.listdir(adsorbate_path)
+    if len(prefixes) == 2: #includes info.json
+        return read(os.path.join(adsorbate_path,"0","0.xyz")),os.path.join(adsorbate_path,"0","0.xyz")
+    geoms = []
+    bestxyz = None
+    best = None
+    best_fingerprint = None
+    best_score = None
+    best_energy = None
+    target_fingerprint = [d for x in siteinfo for d in x]
+    for prefix in prefixes:
+        if prefix == "info.json":
+            continue
+        path = os.path.join(adsorbate_path,prefix,prefix+".xyz")
+        freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json")
+        if os.path.exists(path) and os.path.exists(freq_path):
+            with open(freq_path,"r") as f:
+                freq_dict = json.load(f)
+            freq = np.complex128(freq_dict["frequencies"][0])
+            if (np.isreal(freq) or freq.imag < imag_freq_max):
+                geoms.append(path)
+                
+    xyzs = geoms
+    
+    adsorbates = []
+    for xyz in xyzs:
+        geo = read(xyz)
+        occ = get_occupied_sites(geo,sites,nslab)
+        required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()])
+        found_surface_inds = set([site["bonding_index"] for site in occ])
+        if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds):
+            if best is None:
+                best = geo
+                bestxyz = xyz
+                best_energy = geo.get_potential_energy()
+                assert len(siteinfo) <= len(occ)
+                for slist in itertools.permutations(occ,r=len(siteinfo)):
+                    sinfo = [(site["site"],site["morphology"]) for site in slist]
+                    fingerprint = [d for x in sinfo for d in x]
+                    if best_fingerprint is None:
+                        best_fingerprint = fingerprint
+                        best_score = np.equal(np.array(best_fingerprint),np.array(target_fingerprint)).sum()
+                    else:
+                        score = np.equal(np.array(fingerprint),np.array(target_fingerprint)).sum()
+                        if score > best_score:
+                            best_fingerprint = fingerprint
+                            best_score = score
+                                  
+            else:
+                for slist in itertools.permutations(occ,r=len(siteinfo)):
+                    sinfo = [(site["site"],site["morphology"]) for site in slist]
+                    fingerprint = [d for x in sinfo for d in x]
+                    score = np.equal(np.array(fingerprint),np.array(target_fingerprint)).sum()
+                    if score > best_score or score == best_score and geo.get_potential_energy() < best_energy:
+                        best = geo
+                        bestxyz = xyz
+                        best_energy = geo.get_potential_energy()
+                        best_fingerprint = fingerprint
+                        best_score = score
+    
+    return best,bestxyz
+
+def get_best_reaction_adsorbate_geometries(admol,admol_neighbors,nslab2D,adsorbates_path,info,sites,
+                                           site_adjacency,imag_freq_max=150.0,
+                                           assume_reactant_product_templates_same_order=False):
+
+    assert assume_reactant_product_templates_same_order #otherwise tricky to get formed bonds
+    
+    #get reverse template map
+    if info["forward"]:
+        template = Molecule().from_adjacency_list(info["reactants"])
+        rev_template = Molecule().from_adjacency_list(info["products"])
+    else:
+        template = Molecule().from_adjacency_list(info["products"])
+        rev_template = Molecule().from_adjacency_list(info["reactants"])
+    
+    broken_bonds,formed_bonds = get_broken_formed_bonds(template,rev_template)
+    
+    rev_mols = []
+    for n in info["reverse_names"]:
+        molinfo_path = os.path.join(adsorbates_path,n,"info.json")
+        with open(molinfo_path,"r") as f:
+            molinfo = json.load(f)
+        mol = Molecule().from_adjacency_list(molinfo["adjlist"])
+        rev_mols.append(mol)
+    
+    
+    rev_ads_sizes = [ads_size(m) for m in rev_mols]
+    molecule_to_atom_rev_maps = []
+    for m in rev_mols:
+        ads,mol_to_atoms_map = get_adsorbate(mol)
+        molecule_to_atom_rev_maps.append(mol_to_atoms_map)
+    molecule_to_atom_rev_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in molecule_to_atom_rev_maps]
+    
+    template_rev_mol_map = get_template_mol_map(rev_template,rev_mols)
+    
+    template_rev_mol_map = get_template_mol_map(rev_template,rev_mols)
+    template_rev_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in template_rev_mol_map]
+    template_mol_map = [{int(k):int(v) for k,v in x.items()} for x in info["template_mol_map"]]
+    template_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in info["template_mol_map"]]
+    molecule_to_atom_maps = [{int(k):int(v) for k,v in x.items()} for x in info["molecule_to_atom_maps"]]
+    molecule_to_atom_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in info["molecule_to_atom_maps"]]
+    ads_sizes = info["ads_sizes"]
+    nslab = info["nslab"]
+    mols = [Molecule().from_adjacency_list(x) for x in info["mols"]]
+    
+    #map adsorbed atoms on admol to templates
+    adatoms = np.unique(np.array([a for a in admol.get_adatoms() if not a.is_surface_site()])).tolist()
+    adinds = [admol.atoms.index(adatom) - nslab2D for adatom in adatoms] #aseinds
+    template_adinds = [ase_to_template_index(a,template_mol_map_invert,molecule_to_atom_maps_invert,
+                                             ads_sizes) for a in adinds]
+    forward_geoms = []
+    for i,ad in enumerate(info["species_names"]):
+        tempinds = [k for k in template_adinds if k in template_mol_map[i].keys()] 
+        aseinds = [get_ase_index(k,template_mol_map,molecule_to_atom_maps,info["nslab"],ads_sizes) for k in tempinds]
+        siteinfo = []
+        for j,a in enumerate(adatoms):
+            for bd in admol.get_bonds(a).values():
+                if bd.atom1.is_surface_site():
+                    ind = admol.atoms.index(bd.atom1)
+                    indad = admol.atoms.index(bd.atom2)
+                    break
+                elif bd.atom2.is_surface_site():
+                    ind = admol.atoms.index(bd.atom2)
+                    indad = admol.atoms.index(bd.atom1)
+                    break
+            indtmp = ase_to_template_index(indad-nslab2D,template_mol_map_invert, 
+                                           molecule_to_atom_maps_invert, ads_sizes)
+            molind,mind = get_mol_index(indtmp,template_mol_map)
+            
+            if molind == i and mols[i].atoms[mind].is_bonded_to_surface():
+                site = admol_neighbors[ind]
+                siteinfo.append((site["site"],site["morphology"]))
+        
+        best = get_best_adsorbate_geometries(os.path.join(adsorbates_path,ad),aseinds,siteinfo,sites,site_adjacency,
+                                             imag_freq_max=imag_freq_max)[0]
+        forward_geoms.append(best)
+        
+    reverse_geoms = []  
+    for j,ad in enumerate(info["reverse_names"]):
+        tempinds = [k for k in template_adinds if k in template_rev_mol_map[j].keys()] 
+        aseinds = [get_ase_index(k,template_mol_map,molecule_to_atom_maps,info["nslab"],ads_sizes) for k in tempinds]
+        siteinfo = []
+        for i,a in enumerate(adatoms):
+            for bd in admol.get_bonds(a).values():
+                if bd.atom1.is_surface_site():
+                    ind = admol.atoms.index(bd.atom1)
+                    indad = admol.atoms.index(bd.atom2)
+                    break
+                elif bd.atom2.is_surface_site():
+                    ind = admol.atoms.index(bd.atom2)
+                    indad = admol.atoms.index(bd.atom1)
+                    break
+            #assuming rev and forward template indices are the same
+            indtmp = ase_to_template_index(indad-nslab2D,template_mol_map_invert, 
+                                           molecule_to_atom_maps_invert, ads_sizes)
+
+            molind,mind = get_mol_index(indtmp,template_rev_mol_map)
+            if molind == j and rev_mols[j].atoms[mind].is_bonded_to_surface():
+                site = admol_neighbors[ind]
+                siteinfo.append((site["site"],site["morphology"]))
+
+        best = get_best_adsorbate_geometries(os.path.join(adsorbates_path,ad),aseinds,siteinfo,sites,site_adjacency,
+                                             imag_freq_max=imag_freq_max)[0]
+        reverse_geoms.append(best)
+    
+    return forward_geoms,reverse_geoms
+
+def ase_to_template_index(aseind, template_mol_map_invert, molecule_to_atom_maps_invert, ads_sizes, nslab=None, ):
+    if nslab:
+        aseind = aseind - nslab
+        
+    #go to molind
+    n = 0
+    for i,d in enumerate(molecule_to_atom_maps_invert):
+        if aseind > n + len(d) - 1:
+            n += ads_sizes[i]
+            continue
+        else:
+            molnum = i
+            molind = molecule_to_atom_maps_invert[molnum][aseind-n]
+            break
+    else:
+        raise ValueError
+    
+    return template_mol_map_invert[molnum][molind]
+
+def get_bond_factors(ts_path,adsorbates_path,metal,facet,sites,site_adjacency,max_dist=3.0,imag_freq_max=150.0):
+    
+    info_path = os.path.join(os.path.split(os.path.split(ts_path)[0])[0],"info.json")
+    with open(info_path,"r") as f:
+        info = json.load(f)
+    
+    template_mol_map = [{int(k):v for k,v in x.items()}  for x in info["template_mol_map"]]
+    molecule_to_atom_maps = [{int(k):v for k,v in x.items()}  for x in info["molecule_to_atom_maps"]]
+    broken_bonds,formed_bonds = get_broken_formed_bonds(Molecule().from_adjacency_list(info["reactants"]),
+                                                        Molecule().from_adjacency_list(info["products"]))
+    rbonds = broken_bonds | formed_bonds
+    nslab = info["nslab"]
+    
+    atms = read(ts_path)
+    
+    admol,neighbor_sites,ninds = generate_TS_2D(atms, info_path, metal, facet, sites, site_adjacency, 
+                                                nslab, max_dist=max_dist)
+        
+    occ = get_occupied_sites(atms,sites,nslab)
+    occ_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in occ}
+    
+    reactants,products = get_best_reaction_adsorbate_geometries(admol,neighbor_sites,len([a for a in admol.atoms if a.is_surface_site()]),
+                                                adsorbates_path,info,sites,site_adjacency,
+                                                                imag_freq_max=imag_freq_max,
+                                               assume_reactant_product_templates_same_order=True)
+
+    if info["forward"]:
+        template = Molecule().from_adjacency_list(info["reactants"])
+        rev_template = Molecule().from_adjacency_list(info["products"])
+    else:
+        template = Molecule().from_adjacency_list(info["products"])
+        rev_template = Molecule().from_adjacency_list(info["reactants"])
+
+    rev_mols = []
+    molecule_to_atom_rev_maps = []
+    for n in info["reverse_names"]:
+        molinfo_path = os.path.join(adsorbates_path,n,"info.json")
+        with open(molinfo_path,"r") as f:
+            molinfo = json.load(f)
+        mol = Molecule().from_adjacency_list(molinfo["adjlist"])
+        rev_mols.append(mol)
+        molecule_to_atom_rev_maps.append({int(v):int(k) for k,v in molinfo["atom_to_molecule_atom_map"].items()})
+
+    template_rev_mol_map = get_template_mol_map(rev_template,rev_mols)
+    rev_ads_sizes = [ads_size(mol) for mol in rev_mols]
+
+    template_mol_map_invert = [{int(v):int(k) for k,v in x.items()} for x in info["template_mol_map"]]
+    molecule_to_atom_maps_invert = [{int(v):int(k) for k,v in x.items()} for x in info["molecule_to_atom_maps"]]
+    ads_sizes = info["ads_sizes"]
+    mols = [Molecule().from_adjacency_list(x) for x in info["mols"]]
+    
+    adjlist_to_length = dict()
+    adjlist_to_well_length = dict()
+    
+    for bd in rbonds:
+        aseinds = []
+        inds2D = []
+            
+        for label in bd:
+            a = template.get_labeled_atoms(label)[0]
+            aind = template.atoms.index(a)
+            aseind = get_ase_index(aind,template_mol_map,molecule_to_atom_maps,nslab,info["ads_sizes"])
+            if aseind:
+                aseinds.append(aseind)
+                inds2D.append(aseind - nslab + len(neighbor_sites))
+
+        if len(inds2D) == 1:
+            i = inds2D[0]
+            for batom in admol.atoms[i].bonds.keys(): #try to find a surface bond
+                if batom.is_surface_site():
+                    inds2D.append(admol.atoms.index(batom))
+                    break
+            else: #didn't find surface bond, need to identify the site
+                pos = None
+                for site in occ:
+                    if site['bonding_index'] == i - len(neighbor_sites) + nslab:
+                        pos = site["position"]
+                        break
+                else:
+                    logging.error(admol.to_adjacency_list())
+                    raise SiteOccupationException
+                for j,s in enumerate(neighbor_sites):
+                    if (s["position"] == pos).all():
+                        inds2D.append(j)
+                        break
+        
+        m = admol.copy(deep=True)
+        for i in inds2D:
+            m.atoms[i].label = "*"
+        
+        templateinds = [ase_to_template_index(aind, template_mol_map_invert, molecule_to_atom_maps_invert, ads_sizes, nslab) for aind in aseinds]
+        if len(templateinds) == 2:
+            forward = template.has_bond(template.atoms[templateinds[0]],template.atoms[templateinds[1]])
+        else:
+            forward = template.atoms[templateinds[0]].is_bonded_to_surface()
+
+        if len(aseinds) == 2:
+            L = atms.get_distance(aseinds[0],aseinds[1],mic=True)
+            if forward:
+                molind = get_mol_index(templateinds[0],template_mol_map)[0]
+                aind1 = get_ase_index(templateinds[0],template_mol_map,molecule_to_atom_maps,nslab,ads_sizes)
+                aind2 = get_ase_index(templateinds[1],template_mol_map,molecule_to_atom_maps,nslab,ads_sizes)
+                if mols[molind].contains_surface_site():
+                    Lwell = reactants[molind].get_distance(aind1,aind2,mic=True)
+                else:
+                    Lwell = reactants[molind].get_distance(aind1-nslab,aind2-nslab,mic=True)
+            else:
+                molind,mind = get_mol_index(templateinds[0],template_rev_mol_map)
+                aind1 = get_ase_index(templateinds[0],template_rev_mol_map,molecule_to_atom_rev_maps,nslab,rev_ads_sizes)
+                aind2 = get_ase_index(templateinds[1],template_rev_mol_map,molecule_to_atom_rev_maps,nslab,rev_ads_sizes)                
+                if rev_mols[molind].contains_surface_site():
+                    Lwell = products[molind].get_distance(aind1,aind2,mic=True)
+                else:
+                    Lwell = products[molind].get_distance(aind1-nslab,aind2-nslab,mic=True)
+
+        elif len(aseinds) == 1:
+            L = occ_bd_lengths[aseinds[0]]
+            if forward:
+                molind,mind = get_mol_index(templateinds[0],template_mol_map)
+                aind1 = molecule_to_atom_maps[molind][mind]+nslab
+                mocc = get_occupied_sites(reactants[molind],sites,nslab)
+                mocc_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in mocc}
+                Lwell = mocc_bd_lengths[aind1]
+            else:
+                molind,mind = get_mol_index(templateinds[0],template_rev_mol_map)
+                aind1 = molecule_to_atom_maps[molind][mind]+nslab
+                mocc = get_occupied_sites(products[molind],sites,nslab)
+                mocc_bd_lengths = {site["bonding_index"]:site['bond_length'] for site in mocc}
+                Lwell = mocc_bd_lengths[aind1]
+                
+        if Lwell > 2.0:
+            assert False
+        adjlist_to_length[m.to_adjacency_list()] = L
+        adjlist_to_well_length[m.to_adjacency_list()] = Lwell
+    
+    adjlist_to_bond_factor = dict()
+    for k in adjlist_to_length.keys():
+        adjlist_to_bond_factor[k] = adjlist_to_length[k]/adjlist_to_well_length[k]
+    
+    return adjlist_to_bond_factor,adjlist_to_length,adjlist_to_well_length
+
+def get_unique_adsorbate_geometries(adsorbate_path,mol,sites,site_adjacency,atom_to_molecule_surface_atom_map,
+                                    nslab,imag_freq_max=150.0):
+    """
+    load the adsorbates associated with the reaction and find the unique optimized
+    adsorbate structures for each species
+    returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects
+    """
+    prefixes = os.listdir(adsorbate_path)
+    geoms = []
+    for prefix in prefixes:
+        if prefix == "info.json":
+            continue
+        path = os.path.join(adsorbate_path,prefix,prefix+".xyz")
+        freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json")
+        if os.path.exists(path) and os.path.exists(freq_path):
+            with open(freq_path,"r") as f:
+                freq_dict = json.load(f)
+            freq = np.complex128(freq_dict["frequencies"][0])
+            if (np.isreal(freq) or freq.imag < imag_freq_max):
+                geoms.append(path)
+    
+    ase_atoms = [read(geom) for geom in geoms]
+    mols = [generate_adsorbate_2D(geom, sites, site_adjacency, nslab, max_dist=np.inf)[0] for geom in ase_atoms]
+    filtered_mols = []
+    filtered_geoms = []
+    filtered_energies = []
+    for j,m in enumerate(mols):
+        for i,fm in enumerate(filtered_mols):
+            if fm.is_isomorphic(m,save_order=True):
+                if filtered_energies[i] > ase_atoms[j].get_potential_energy():
+                    filtered_mols[i] = m 
+                    filtered_geoms[i] = ase_atoms[j]
+                    filtered_energies[i] = ase_atoms[j].get_potential_energy()
+        else:
+            filtered_mols.append(m)
+            filtered_geoms.append(ase_atoms[j])
+            filtered_energies.append(ase_atoms[j].get_potential_energy())
+            
+    adsorbates = []
+    for j,geo in enumerate(filtered_geoms):
+        occ = get_occupied_sites(geo,sites,nslab)
+        required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()])
+        found_surface_inds = set([site["bonding_index"] for site in occ])
+        if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds):
+            adsorbates.append(geo)
+
+    return adsorbates
+
+def get_adsorbate_geometries(adsorbate_path,mol,sites,atom_to_molecule_surface_atom_map,
+                                    nslab,imag_freq_max=150.0):
+    """
+    load the adsorbates associated with the reaction and find the unique optimized
+    adsorbate structures for each species
+    returns a dictionary mapping each adsorbate name to a list of ase.Atoms objects
+    """
+    prefixes = os.listdir(adsorbate_path)
+    geoms = []
+    for prefix in prefixes:
+        if prefix == "info.json":
+            continue
+        path = os.path.join(adsorbate_path,prefix,prefix+".xyz")
+        freq_path = os.path.join(adsorbate_path,prefix,"vib.json_vib.json")
+        if os.path.exists(path) and os.path.exists(freq_path):
+            with open(freq_path,"r") as f:
+                freq_dict = json.load(f)
+            freq = np.complex128(freq_dict["frequencies"][0])
+            if (np.isreal(freq) or freq.imag < imag_freq_max):
+                geoms.append(path)
+                
+    xyzs = get_unique_sym(geoms)
+    adsorbates = []
+    for xyz in xyzs:
+        geo = read(xyz)
+        occ = get_occupied_sites(geo,sites,nslab)
+        required_surface_inds = set([ind+nslab for ind in atom_to_molecule_surface_atom_map.keys()])
+        found_surface_inds = set([site["bonding_index"] for site in occ])
+        if len(occ) >= len(mol.get_adatoms()) and required_surface_inds.issubset(found_surface_inds):
+            adsorbates.append(geo)
+
+    return adsorbates
+
+def get_lowest_adsorbate_energies(adsorbates_path):
+    """
+    load the adsorbates geometries find the lowest energy correct structure and the 2D representation
+    """
+    ad_energy_dict = dict()
+    for ad in os.listdir(adsorbates_path):
+        path = os.path.join(adsorbates_path,ad)
+        if not os.path.isdir(path):
+            continue
+        with open(os.path.join(path,"info.json"),"r") as f:
+            info = json.load(f)
+            mol = Molecule().from_adjacency_list(info["adjlist"])
+        Es = get_adsorbate_energies(path)[0]
+        ad_energy_dict[mol] = np.inf
+        for E in Es.values():
+            if E < ad_energy_dict[mol]:
+                ad_energy_dict[mol] = E
+        
+    return ad_energy_dict
+
+def extract_pair_graph(atoms,sites,site_adjacency,nslab,max_dist,cut_multidentate_off_num=None,allowed_structure_site_structures=None,
+                       is_ts=False,info_path=None,metal=None,facet=None,imag_freq_path=None):
+    if is_ts:
+        admol,neighbor_sites,ninds = generate_TS_2D(atoms, info_path, metal, facet, sites, site_adjacency, nslab, imag_freq_path=imag_freq_path,
+                        max_dist=max_dist, cut_multidentate_off_num=cut_multidentate_off_num, allowed_structure_site_structures=allowed_structure_site_structures,
+                        )
+    else:
+        admol,neighbor_sites,ninds = generate_adsorbate_2D(atoms, sites, site_adjacency, nslab, 
+                                                       max_dist=max_dist,cut_multidentate_off_num=cut_multidentate_off_num,allowed_structure_site_structures=allowed_structure_site_structures)
+    return reduce_graph_to_pairs(admol)
+
+def get_adsorbate_energies(ad_path,atom_corrections=None,include_zpe=True):
+    """
+    get ZPE corrected adsorbate energies
+    """
+    dirs = os.listdir(ad_path)
+    slab = read(os.path.join(os.path.split(os.path.split(ad_path)[0])[0],"slab.xyz"))
+    Eslab = slab.get_potential_energy()
+    with open(os.path.join(ad_path,"info.json")) as f:
+        info = json.load(f)
+
+    m = Molecule().from_adjacency_list(info["adjlist"])
+    if atom_corrections:
+        AEC = 0.0
+        for atom in m.atoms:
+            s = str(atom.element)
+            if not "X" in s:
+                AEC += atom_corrections[s]
+    else: #if no corrections don't add a correction
+        AEC = 0.0
+    
+    gasphase = len(info["gratom_to_molecule_surface_atom_map"]) == 0
+    spin = (Molecule().from_adjacency_list(info["adjlist"]).multiplicity - 1.0)/2.0
+    Es = dict()
+    thermos = dict()
+    fs = dict()
+    for d in dirs:
+        if d == "info.json":
+            continue
+        optdir = os.path.join(ad_path,d,d+".xyz")
+        freqdir = os.path.join(ad_path,d,"vib.json_vib.json")
+        if not (os.path.exists(optdir) and os.path.exists(freqdir)):
+            continue
+        sp = read(optdir)
+        E = sp.get_potential_energy()
+        if gasphase:
+            vibdata = get_vibdata(optdir,freqdir,0)
+        else:
+            vibdata = get_vibdata(optdir,freqdir,len(slab))
+
+        freqs = vibdata.get_frequencies().tolist()
+        fs[d] = freqs
+
+        ZPE = vibdata.get_zero_point_energy()
+
+        if not gasphase:
+
+            if include_zpe:
+                Es[d] = E + ZPE - Eslab - AEC
+            else:
+                Es[d] = E - Eslab - AEC
+            thermos[d] = HarmonicThermo(np.array([x for x in np.real(vibdata.get_energies()) if x > 0.0]),potentialenergy=E-Eslab-AEC)
+        else:
+            if len(sp) == 1:
+                geometry = 'monatomic'
+            elif any([ I < 1.0e-3 for I in sp.get_moments_of_inertia()]):
+                geometry = "linear"
+            else:
+                geometry = "nonlinear"
+
+            if include_zpe:
+                Es[d] = E + ZPE - AEC
+            else:
+                Es[d] = E - AEC
+
+            thermos[d] = IdealGasThermo(np.real(vibdata.get_energies()),geometry,
+                                        potentialenergy=E - AEC,atoms=sp,symmetrynumber=1,
+                                        natoms=len(sp),spin=spin)
+
+    return Es,thermos,fs
+
+def get_vibdata(dopt,dvib,nslab):
+    xyz = read(dopt)
+    inds = range(nslab,len(xyz))
+    with open(dvib,'r') as f:
+        out = json.load(f)
+    sh = (len(inds),3,len(inds),3)
+    H = np.asarray(out["hessian"])
+    if H.shape != sh:
+        H = H.reshape(sh)
+    vib = VibrationsData(xyz,H,inds)
+    return vib
\ No newline at end of file
diff --git a/pynta/main.py b/pynta/main.py
index f822af71..a5add503 100644
--- a/pynta/main.py
+++ b/pynta/main.py
@@ -1,5 +1,6 @@
 from pynta.tasks import *
 from pynta.mol import get_adsorbate, generate_unique_site_additions, generate_adsorbate_guesses, get_name,generate_unique_placements
+from pynta.coveragedependence import *
 from molecule.molecule import Molecule
 import ase.build
 from ase.io import read, write
@@ -41,7 +42,8 @@ def __init__(self,path,rxns_file,surface_type,metal,label,launchpad_path=None,fw
         irc_mode="fixed", #choose irc mode: 'skip', 'relaxed', 'fixed'
         lattice_opt_software_kwargs={'kpts': (25,25,25), 'ecutwfc': 70, 'degauss':0.02, 'mixing_mode': 'plain'},
         reset_launchpad=False,queue_adapter_path=None,num_jobs=25,max_num_hfsp_opts=None,#max_num_hfsp_opts is mostly for fast testing
-        Eharmtol=3.0,Eharmfiltertol=30.0,Ntsmin=5,frozen_layers=2,fmaxopt=0.05,fmaxirc=0.1,fmaxopthard=0.05,c=None):
+        Eharmtol=3.0,Eharmfiltertol=30.0,Ntsmin=5,frozen_layers=2,fmaxopt=0.05,fmaxirc=0.1,fmaxopthard=0.05,c=None,
+        surrogate_metal=None):
 
         self.surface_type = surface_type
         if launchpad_path:
@@ -64,6 +66,10 @@ def __init__(self,path,rxns_file,surface_type,metal,label,launchpad_path=None,fw
         self.facet = metal + surface_type
         self.fws = []
         self.metal = metal
+        if surrogate_metal is None:
+            self.surrogate_metal = metal
+        else:
+            self.surrogate_metal = surrogate_metal
         self.adsorbate_fw_dict = dict()
         self.software_kwargs = software_kwargs
         self.irc_mode = irc_mode
@@ -171,7 +177,7 @@ def analyze_slab(self):
         full_slab = self.slab
         cas = SlabAdsorptionSites(full_slab, self.surface_type,allow_6fold=False,composition_effect=False,
                         label_sites=True,
-                        surrogate_metal=self.metal)
+                        surrogate_metal=self.surrogate_metal)
 
         self.cas = cas
 
@@ -496,7 +502,7 @@ def setup_transition_states(self,adsorbates_finished=False):
                     "gratom_to_molecule_atom_maps":{sm: {str(k):v for k,v in d.items()} for sm,d in self.gratom_to_molecule_atom_maps.items()},
                     "gratom_to_molecule_surface_atom_maps":{sm: {str(k):v for k,v in d.items()} for sm,d in self.gratom_to_molecule_surface_atom_maps.items()},
                     "nslab":self.nslab,"Eharmtol":self.Eharmtol,"Eharmfiltertol":self.Eharmfiltertol,"Ntsmin":self.Ntsmin,
-                    "max_num_hfsp_opts":self.max_num_hfsp_opts})
+                    "max_num_hfsp_opts":self.max_num_hfsp_opts, "surrogate_metal":self.surrogate_metal})
             reactants = rxn["reactant_names"]
             products = rxn["product_names"]
             parents = []
@@ -572,3 +578,201 @@ def execute_from_initial_ad_guesses(self):
 
 
         self.launch()
+
+class CoverageDependence:
+    def __init__(self,path,metal,surface_type,repeats,pynta_run_directory,software,software_kwargs,label,sites,site_adjacency,coad_stable_sites,adsorbates=[],transition_states=dict(),coadsorbates=[],
+                 max_dist=3.0,frozen_layers=2,fmaxopt=0.05,Ncalc_per_iter=6,TS_opt_software_kwargs=None,launchpad_path=None,
+                 fworker_path=None,queue=False,njobs_queue=0,reset_launchpad=False,queue_adapter_path=None,
+                 num_jobs=25,surrogate_metal=None,concern_energy_tol=None,max_iters=np.inf):
+        self.path = path
+        self.metal = metal
+        self.repeats = repeats
+        self.nslab = np.product(repeats)
+        self.pynta_run_directory = pynta_run_directory
+        self.pairs_directory = os.path.join(self.path,"pairs")
+        self.slab_path = os.path.join(self.pynta_run_directory,"slab.xyz")
+        self.adsorbates_path = os.path.join(self.pynta_run_directory,"Adsorbates")
+        self.coad_stable_sites = coad_stable_sites
+        self.software = software
+        self.software_kwargs = software_kwargs
+        self.surface_type = surface_type
+        self.facet = metal + surface_type
+        self.sites = sites
+        self.site_adjacency = site_adjacency
+        self.Ncalc_per_iter = Ncalc_per_iter
+        self.software_kwargs_TS = deepcopy(software_kwargs)
+        self.concern_energy_tol = concern_energy_tol
+        if TS_opt_software_kwargs:
+            for key,val in TS_opt_software_kwargs.items():
+                self.software_kwargs_TS[key] = val
+        
+        if adsorbates != []:
+            raise ValueError("Not implemented yet")
+        self.adsorbates = adsorbates
+        self.transition_states = transition_states
+        self.coadsorbates = coadsorbates
+        self.max_dist = max_dist
+        self.frozen_layers = frozen_layers
+        self.layers = self.repeats[2]
+        self.freeze_ind = int((self.nslab/self.layers)*self.frozen_layers)
+        self.fmaxopt = fmaxopt
+        self.label = label
+        self.max_iters = max_iters
+        
+        if launchpad_path:
+            launchpad = LaunchPad.from_file(launchpad_path)
+        else:
+            launchpad = LaunchPad()
+
+        if reset_launchpad:
+            launchpad.reset('', require_password=False)
+        self.launchpad = launchpad
+        
+        self.fworker_path = fworker_path
+        if fworker_path:
+            self.fworker = FWorker.from_file(fworker_path)
+        else:
+            self.fworker = FWorker()
+            
+        self.queue = queue
+        self.njobs_queue = njobs_queue
+        self.reset_launchpad = reset_launchpad
+        if queue:
+            self.qadapter = load_object_from_file(queue_adapter_path)
+        self.num_jobs = num_jobs
+        
+        if surrogate_metal is None:
+            self.surrogate_metal = metal
+        else:
+            self.surrogate_metal = surrogate_metal 
+            
+        self.pairs_fws = []
+        self.fws = []
+        
+    def setup_pairs_calculations(self):
+        tsdirs = [os.path.join(self.pynta_run_directory,t,ind) for t,ind in self.transition_states.items()]
+        outdirs_ad,outdirs_ts = setup_pair_opts_for_rxns(self.path,tsdirs,self.coadsorbates,self.surrogate_metal,self.surface_type,max_dist=self.max_dist)
+        
+        for d in outdirs_ad:
+            fwopt = optimize_firework(d,
+                            self.software,"weakopt",
+                            opt_method="MDMin",opt_kwargs={'dt': 0.05,"trajectory": "weakopt.traj"},software_kwargs=self.software_kwargs,order=0,
+                            run_kwargs={"fmax" : 0.5, "steps" : 30},parents=[],
+                              constraints=["freeze up to {}".format(self.freeze_ind)],
+                            ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=3)
+            fwopt2 = optimize_firework(os.path.join(os.path.split(d)[0],"weakopt.xyz"),
+                            self.software,"out",
+                            opt_method="QuasiNewton",opt_kwargs={"trajectory": "out.traj"},software_kwargs=self.software_kwargs,order=0,
+                            run_kwargs={"fmax" : self.fmaxopt, "steps" : 70},parents=[fwopt],
+                              constraints=["freeze up to {}".format(self.freeze_ind)],
+                            ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=2)
+        
+            fwvib = vibrations_firework(os.path.join(os.path.split(d)[0],"out.xyz"),
+                                        self.software,"vib",software_kwargs=self.software_kwargs,parents=[fwopt2],
+                                        constraints=["freeze up to "+str(self.nslab)])
+            self.pairs_fws.append(fwopt)
+            self.pairs_fws.append(fwopt2)
+            self.pairs_fws.append(fwvib)
+        
+        for d in outdirs_ts:
+            fwopt = optimize_firework(d,
+                            self.software,"out", sella=True, 
+                            opt_kwargs={"trajectory": "out.traj"},software_kwargs=self.software_kwargs_TS,
+                            order=1,
+                            run_kwargs={"fmax" : self.fmaxopt, "steps" : 70},parents=[],
+                              constraints=["freeze up to {}".format(self.freeze_ind)],
+                            ignore_errors=True, metal=self.metal, facet=self.surface_type, priority=3)
+            
+            fwvib = vibrations_firework(os.path.join(os.path.split(d)[0],"out.xyz"),
+                                        self.software,"vib",software_kwargs=self.software_kwargs,parents=[fwopt],
+                                        constraints=["freeze up to "+str(self.nslab)])
+            self.pairs_fws.append(fwopt)
+            self.pairs_fws.append(fwvib)
+            
+        self.fws.extend(self.pairs_fws)
+    
+    def setup_active_learning_loop(self):
+        admol_name_path_dict = {k: os.path.join(self.pynta_run_directory,k,v,"opt.xyz") for k,v in self.transition_states.items()}
+        admol_name_structure_dict = dict()
+        ads = self.adsorbates
+        allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(self.pynta_run_directory,"Adsorbates"),self.sites,self.site_adjacency,self.nslab,max_dist=np.inf)
+
+        for ts in self.transition_states.keys():
+            info_path = os.path.join(self.pynta_run_directory,ts,"info.json")
+            with open(info_path) as f:
+                info = json.load(f)
+            mol = Molecule().from_adjacency_list(info["adjlist"])
+            keep_binding_vdW_bonds=False 
+            keep_vdW_surface_bonds=False
+            for bd in mol.get_all_edges():
+                if bd.order == 0:
+                    if bd.atom1.is_surface_site() or bd.atom2.is_surface_site():
+                        keep_binding_vdW_bonds = True
+                        m = mol.copy(deep=True)
+                        b = m.get_bond(m.atoms(mol.atoms.index(bd.atom1)),m.atoms[mol.atoms.index(bd.atom2)])
+                        m.remove_bond(b)
+                        out = m.split()
+                        if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface
+                            keep_vdW_surface_bonds = True
+                        
+            atoms = read(admol_name_path_dict[ts])
+            st,_,_ = generate_TS_2D(atoms, info_path,  self.metal, self.surface_type, self.sites, self.site_adjacency, self.nslab,
+                     max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures,
+                     keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+            admol_name_structure_dict[ts] = st
+            with open(info_path,"r") as f:
+                info = json.load(f)
+                for name in info["species_names"]+info["reverse_names"]:
+                    if name not in ads:
+                        ads.append(name)
+        
+        for ad in ads:
+            p = os.path.join(self.pynta_run_directory,"Adsorbates",ad)
+            with open(os.path.join(p,"info.json")) as f:
+                info = json.load(f)
+                
+            mol = Molecule().from_adjacency_list(info["adjlist"])
+            
+            if mol.contains_surface_site():
+                keep_binding_vdW_bonds=False 
+                keep_vdW_surface_bonds=False
+                for bd in mol.get_all_edges():
+                    if bd.order == 0:
+                        if bd.atom1.is_surface_site() or bd.atom2.is_surface_site():
+                            keep_binding_vdW_bonds = True
+                            m = mol.copy(deep=True)
+                            b = m.get_bond(m.atoms(mol.atoms.index(bd.atom1)),m.atoms[mol.atoms.index(bd.atom2)])
+                            m.remove_bond(b)
+                            out = m.split()
+                            if len(out) == 1: #vdW bond is not only thing connecting adsorbate to surface
+                                keep_vdW_surface_bonds = True
+                        
+                ad_xyz = get_best_adsorbate_xyz(p,self.sites,self.nslab)
+                admol_name_path_dict[ad] = ad_xyz 
+                atoms = read(ad_xyz)
+                st,_,_ = generate_adsorbate_2D(atoms, self.sites, self.site_adjacency, self.nslab, max_dist=np.inf, allowed_structure_site_structures=allowed_structure_site_structures,
+                                               keep_binding_vdW_bonds=keep_binding_vdW_bonds,keep_vdW_surface_bonds=keep_vdW_surface_bonds)
+                admol_name_structure_dict[ad] = st
+                
+        calculation_directories = [] #identify pairs directories
+        for p1 in os.listdir(os.path.join(self.path,"pairs")):
+            for p2 in os.listdir(os.path.join(self.path,"pairs",p1)):
+                calculation_directories.append(os.path.join(self.path,"pairs",p1,p2))
+        
+        
+        fw = train_covdep_model_firework(self.path,admol_name_path_dict,admol_name_structure_dict,self.sites,self.site_adjacency,
+                                self.pynta_run_directory, self.metal, self.surface_type, self.slab_path, calculation_directories, self.coadsorbates[0], 
+                                self.coad_stable_sites, self.software, self.software_kwargs, self.software_kwargs_TS, self.freeze_ind, self.fmaxopt,
+                                parents=self.fws, max_iters=self.max_iters,
+                                Ncalc_per_iter=self.Ncalc_per_iter,iter=0,concern_energy_tol=self.concern_energy_tol,ignore_errors=True)
+
+        self.fws.append(fw)
+        
+        
+    def execute(self,run_pairs=True,run_active_learning=False):
+        if run_pairs:
+            self.setup_pairs_calculations()
+        if run_active_learning:
+            self.setup_active_learning_loop()
+        wf = Workflow(self.fws, name=self.label)
+        self.launchpad.add_wf(wf)
\ No newline at end of file
diff --git a/pynta/mol.py b/pynta/mol.py
index 54f938b3..9a551093 100644
--- a/pynta/mol.py
+++ b/pynta/mol.py
@@ -1,4 +1,4 @@
-from molecule.molecule import Molecule
+from molecule.molecule import Molecule,Atom,Bond
 from ase.io import read, write
 from ase.data import covalent_radii
 from ase import Atoms
@@ -13,12 +13,14 @@
                          get_rodrigues_rotation_matrix,
                          get_angle_between,
                          get_rejection_between)
-from pynta.utils import get_unique_sym_struct_index_clusters, get_unique_sym, get_unique_sym_structs, get_unique_sym_struct_indices
+from pynta.utils import get_unique_sym_struct_index_clusters, get_unique_sym, get_unique_sym_structs, get_unique_sym_struct_indices, get_occupied_sites
 from pynta.calculator import run_harmonically_forced_xtb
 from rdkit import Chem
 from copy import deepcopy
 import numpy as np
 import random
+import itertools
+
 
 def get_desorbed_with_map(mol):
     molcopy = mol.copy(deep=True)
@@ -748,7 +750,7 @@ def get_ase_index(ind,template_mol_map,molecule_to_gratom_maps,nslab,ads_sizes):
     else:
         return None #surface site
 
-def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,facet="fcc111",metal="Cu",cas=None):
+def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,sites,site_adjacency,facet="fcc111",metal="Cu",):
     """
     gets bond lengths and site information indexed to the Molecule object atoms
     bond lengths is a matrix with the distances between all atoms that share bonds
@@ -757,20 +759,15 @@ def get_bond_lengths_sites(mol,ads,atom_map,surf_atom_map,nslab,facet="fcc111",m
     rev_atom_map = {value: key for key,value in atom_map.items()} #map from mol to ads
     rev_surf_atom_map = { value: key for key,value in surf_atom_map.items()}
 
-    if cas is None:
-        cas = SlabAdsorptionSites(ads,facet,allow_6fold=False,composition_effect=False,
-                            label_sites=True,
-                            surrogate_metal=metal)
-    adcov = SlabAdsorbateCoverage(ads,adsorption_sites=cas)
-    occ = adcov.get_sites(occupied_only=True)
+    occ = get_occupied_sites(ads,sites,nslab)
     surface_dict = [{"atom_index":x["bonding_index"]-nslab, "site":x["site"],
                        "bond_length":x["bond_length"], "position":x["position"]} for x in occ]
 
     if len(occ) < len(surf_atom_map): #number of sites on geometry disagrees with surf_atom_map
-        print("occupational analysis in get_bond_lengths_sites failed")
-        print(mol.to_adjacency_list())
-        print("expected {} sites".format(len(surf_atom_map)))
-        print("found {} sites".format(len(occ)))
+#         print("occupational analysis in get_bond_lengths_sites failed")
+#         print(mol.to_adjacency_list())
+#         print("expected {} sites".format(len(surf_atom_map)))
+#         print("found {} sites".format(len(occ)))
         return None,None,None
 
     if mol.contains_surface_site():
@@ -817,3 +814,204 @@ def get_name(mol):
         return mol.to_smiles()
     except:
         return mol.to_adjacency_list().replace("\n"," ")[:-1]
+
+
+def remove_slab(mol,remove_slab_bonds=False,update_atomtypes=True):
+    m = mol.copy(deep=True)
+    for site in m.get_surface_sites():
+        for a in site.bonds.keys():
+            if not a.is_surface_site():
+                break
+        else:
+            m.remove_atom(site)
+            
+    if remove_slab_bonds:
+        bonds_to_remove = []
+        for bd in m.get_all_edges():
+            if bd.atom1.is_surface_site() and bd.atom2.is_surface_site():
+                m.remove_bond(bd)
+    
+    if update_atomtypes:           
+        m.update_atomtypes()
+    m.update_connectivity_values()
+    
+    return m
+
+def pluck_subgraph(mol,atom):
+    subgraph_atoms = []
+    new_subgraph_atoms = [atom]
+    while new_subgraph_atoms != []:
+        temp = []
+        subgraph_atoms.extend(new_subgraph_atoms)
+        for a in new_subgraph_atoms:
+            for a2 in a.bonds.keys():
+                if a2 not in subgraph_atoms:
+                    temp.append(a2)
+        new_subgraph_atoms = temp
+    
+    inds = [mol.atoms.index(a) for a in subgraph_atoms]
+    struct = Molecule(atoms=subgraph_atoms)
+    struct.update_atomtypes()
+    struct.update_connectivity_values()
+    return struct,inds
+    
+def generate_without_site_info(m):
+    mol = m.copy(deep=True)
+    for a in mol.atoms:
+        if a.is_surface_site():
+            a.site = ""
+            a.morphology = ""
+    return mol
+
+class FindingPathError(Exception):
+    pass
+
+def reduce_graph_to_pairs(admol):
+    adatoms = [a for a in admol.atoms if a.is_bonded_to_surface() and not a.is_surface_site()]
+    surface_save = set()
+    for i,a1 in enumerate(adatoms):
+        for a2 in adatoms[i+1:]:
+            paths = find_shortest_paths(a1,a2)
+            if paths is None: #separation between pair is so large the generated site graphs don't connect
+                raise FindingPathError(admol.to_adjacency_list())
+            for path in paths:
+                surface_save = surface_save | set([a for a in path if a.is_surface_site()])
+    
+    atoms_to_remove = []
+    for i,a in enumerate(admol.atoms):
+        if a.is_surface_site() and not (a in surface_save):
+            atoms_to_remove.append(a)
+    
+    for a in atoms_to_remove:
+        admol.remove_atom(a)
+
+    admol.update_atomtypes()
+    admol.update_connectivity_values()
+    
+    return admol
+
+def find_shortest_paths(start, end, path=None):
+    paths = [[start]]
+    outpaths = []
+    while paths != []:
+        newpaths = []
+        for path in paths:
+            for node in path[-1].edges.keys():
+                if node in path:
+                    continue
+                elif node is end:
+                    outpaths.append(path[:]+[node])
+                elif outpaths == []:
+                    newpaths.append(path[:]+[node])
+        if outpaths:
+            return outpaths
+        else:
+            paths = newpaths
+    
+    return None
+
+def find_shortest_paths_sites(start, end, path=None):
+    paths = [[start]]
+    outpaths = []
+    while paths != []:
+        newpaths = []
+        for path in paths:
+            for node in path[-1].edges.keys():
+                if node in path:
+                    continue
+                elif node is end:
+                    outpaths.append(path[:]+[node])
+                elif outpaths == [] and node.is_surface_site():
+                    newpaths.append(path[:]+[node])
+        if outpaths:
+            return outpaths
+        else:
+            paths = newpaths
+    
+    return None
+    
+def find_adsorbate_atoms_surface_sites(atom,mol):
+    """
+    one atom of the associated adsorbate on the surface Molecule object mol
+    returns all of the surface atoms bonded to adsorbate
+    """
+    atsout = [atom]
+    ats = [atom]
+    surf_sites = []
+    while ats != []:
+        new_ats = []
+        for a in ats:
+            for a2 in atom.edges.keys():
+                if a2 in atsout:
+                    continue
+                elif a2.is_surface_site() and a2 not in surf_sites:
+                    surf_sites.append(a2)
+                elif not a2.is_surface_site():
+                    new_ats.append(a2)
+                    atsout.append(a2)
+        ats = new_ats
+
+    return atsout,surf_sites
+
+def split_adsorbed_structures(admol,clear_site_info=True,adsorption_info=False,atoms_to_skip=None,
+                              atom_mapping=False):
+    """_summary_
+
+    Args:
+        admol (_type_): a Molecule object resolving a slab
+        clear_site_info (bool, optional): clears site identify information. Defaults to True.
+        adsorption_info (bool, optional): also returns the map from atom to index for admol for each surface site structures are split off from. Defaults to False.
+        atoms_to_skip (_type_, optional): list of atoms in admol to not include in the split structures. Defaults to None.
+
+    Returns:
+        _type_: _description_
+    """
+    m = admol.copy(deep=True)
+    
+    if atoms_to_skip is None:
+        atoms_to_remove = []
+        skip_atoms = []
+    else:
+        skip_atoms = [m.atoms[admol.atoms.index(a)] for a in atoms_to_skip]
+        atoms_to_remove = skip_atoms[:]
+    
+    for i,at in enumerate(m.atoms):
+        if at.is_surface_site():
+            bdict = m.get_bonds(at)
+            if len(bdict) > 0:
+                for a,b in bdict.items():
+                    if not a.is_surface_site() and (a not in skip_atoms):
+                        break
+                else:
+                    atoms_to_remove.append(at)
+            else:
+                atoms_to_remove.append(at)
+
+    if adsorption_info:
+        adsorbed_atom_dict = {at: i for i,at in enumerate(m.atoms) if at.is_surface_site() and at not in atoms_to_remove}
+
+    if atom_mapping:
+        mapping = {at: i for i,at in enumerate(m.atoms) if at not in atoms_to_remove}
+    
+    for at in atoms_to_remove:
+        m.remove_atom(at)
+    
+    if clear_site_info:
+        for at in m.atoms:
+            if at.is_surface_site():
+                at.site = ""
+                at.morphology = ""
+    split_structs = m.split()
+    for s in split_structs:
+        s.update(sort_atoms=False)
+        s.update_connectivity_values()
+        s.multiplicity = 1
+
+    if atom_mapping and not adsorption_info:
+        return split_structs,mapping 
+    elif atom_mapping and adsorption_info:
+        return split_structs,adsorption_info,mapping 
+    elif not atom_mapping and adsorption_info:
+        return split_structs,adsorbed_atom_dict
+    else:
+        return split_structs
diff --git a/pynta/postprocessing.py b/pynta/postprocessing.py
index 23ab75b7..7d733a47 100644
--- a/pynta/postprocessing.py
+++ b/pynta/postprocessing.py
@@ -12,6 +12,7 @@
 from molecule.molecule import Molecule
 from acat.adsorption_sites import SlabAdsorptionSites
 from molecule.thermo import Wilhoit
+from pynta.geometricanalysis import get_adsorbate_energies, get_vibdata
 
 eV_to_Jmol = 9.648328e4
 
@@ -192,78 +193,6 @@ def fit_rate_coefficient(thermoRs,thermoTS,dE,rnum,s0,Ts=None):
 
     return arr
 
-def get_adsorbate_energies(ad_path,atom_corrections=None,include_zpe=True):
-    """
-    get ZPE corrected adsorbate energies
-    """
-    dirs = os.listdir(ad_path)
-    slab = read(os.path.join(os.path.split(os.path.split(ad_path)[0])[0],"slab.xyz"))
-    Eslab = slab.get_potential_energy()
-    with open(os.path.join(ad_path,"info.json")) as f:
-        info = json.load(f)
-
-    m = Molecule().from_adjacency_list(info["adjlist"])
-    if atom_corrections:
-        AEC = 0.0
-        for atom in m.atoms:
-            s = str(atom.element)
-            if not "X" in s:
-                AEC += atom_corrections[s]
-    else: #if no corrections don't add a correction
-        AEC = 0.0
-    
-    gasphase = len(info["gratom_to_molecule_surface_atom_map"]) == 0
-    spin = (Molecule().from_adjacency_list(info["adjlist"]).multiplicity - 1.0)/2.0
-    Es = dict()
-    thermos = dict()
-    fs = dict()
-    for d in dirs:
-        if d == "info.json":
-            continue
-        optdir = os.path.join(ad_path,d,d+".xyz")
-        freqdir = os.path.join(ad_path,d,"vib.json_vib.json")
-        if not (os.path.exists(optdir) and os.path.exists(freqdir)):
-            continue
-        sp = read(optdir)
-        E = sp.get_potential_energy()
-        if gasphase:
-            vibdata = get_vibdata(optdir,freqdir,0)
-        else:
-            vibdata = get_vibdata(optdir,freqdir,len(slab))
-
-        freqs = vibdata.get_frequencies().tolist()
-        fs[d] = freqs
-
-        ZPE = vibdata.get_zero_point_energy()
-
-        if not gasphase:
-
-            if include_zpe:
-                Es[d] = E + ZPE - Eslab - AEC
-            else:
-                Es[d] = E - Eslab - AEC
-            thermos[d] = HarmonicThermo(np.array([x for x in np.real(vibdata.get_energies()) if x > 0.0]),potentialenergy=E-Eslab-AEC)
-        else:
-            if len(sp) == 1:
-                geometry = 'monatomic'
-            elif any([ I < 1.0e-3 for I in sp.get_moments_of_inertia()]):
-                geometry = "linear"
-            else:
-                geometry = "nonlinear"
-
-            if include_zpe:
-                Es[d] = E + ZPE - AEC
-            else:
-                Es[d] = E - AEC
-
-            thermos[d] = IdealGasThermo(np.real(vibdata.get_energies()),geometry,
-                                        potentialenergy=E - AEC,atoms=sp,symmetrynumber=1,
-                                        natoms=len(sp),spin=spin)
-
-    return Es,thermos,fs
-
-
-
 def get_reactant_products_energy(ts_path,reactants,products):
     path = os.path.split(ts_path)[0]
     ads_path = os.path.join(path,"Adsorbates")
@@ -302,17 +231,7 @@ def get_reactant_products_energy(ts_path,reactants,products):
         pthermos.append(pthermo[ind])
     return rE,pE,rthermos,pthermos
 
-def get_vibdata(dopt,dvib,nslab):
-    xyz = read(dopt)
-    inds = range(nslab,len(xyz))
-    with open(dvib,'r') as f:
-        out = json.load(f)
-    sh = (len(inds),3,len(inds),3)
-    H = np.asarray(out["hessian"])
-    if H.shape != sh:
-        H = H.reshape(sh)
-    vib = VibrationsData(xyz,H,inds)
-    return vib
+
 
 def get_animated_mode(dopt,dvib,nslab,n=0):
 
diff --git a/pynta/tasks.py b/pynta/tasks.py
index 89eb865a..2cd7a88d 100644
--- a/pynta/tasks.py
+++ b/pynta/tasks.py
@@ -20,6 +20,8 @@
 from pynta.utils import *
 from pynta.calculator import run_harmonically_forced_xtb, add_sella_constraint
 from pynta.mol import *
+from pynta.coveragedependence import *
+from pynta.geometricanalysis import *
 from xtb.ase.calculator import XTB
 import numpy as np
 import multiprocessing as mp
@@ -128,6 +130,7 @@ def run_task(self, fw_spec):
                 raise e
             else:
                 errors.append(e)
+                return FWAction(stored_data={"error": errors,"converged": False})
 
         if socket and os.path.exists(socket_address):
             os.unlink(socket_address)
@@ -469,7 +472,7 @@ class MolecularTSEstimate(FiretaskBase):
     required_params = ["rxn","ts_path","slab_path","adsorbates_path","rxns_file","path","metal","facet",
                         "name_to_adjlist_dict", "gratom_to_molecule_atom_maps",
                         "gratom_to_molecule_surface_atom_maps","irc_mode",
-                        "vib_obj_dict","opt_obj_dict","nslab","Eharmtol","Eharmfiltertol","Ntsmin","max_num_hfsp_opts"]
+                        "vib_obj_dict","opt_obj_dict","nslab","Eharmtol","Eharmfiltertol","Ntsmin","max_num_hfsp_opts","surrogate_metal"]
     optional_params = ["out_path","spawn_jobs","nprocs","IRC_obj_dict"]
     def run_task(self, fw_spec):
         gratom_to_molecule_atom_maps = {sm: {int(k):v for k,v in d.items()} for sm,d in self["gratom_to_molecule_atom_maps"].items()}
@@ -490,12 +493,13 @@ def run_task(self, fw_spec):
         Ntsmin = self["Ntsmin"]
         max_num_hfsp_opts = self["max_num_hfsp_opts"]
         slab_path = self["slab_path"]
+        surrogate_metal = self["surrogate_metal"]
         slab = read(slab_path)
         irc_mode = self["irc_mode"]
 
         cas = SlabAdsorptionSites(slab,facet,allow_6fold=False,composition_effect=False,
                             label_sites=True,
-                            surrogate_metal=metal)
+                            surrogate_metal=surrogate_metal)
 
         adsorbates_path = self["adsorbates_path"]
 
@@ -915,6 +919,394 @@ def run_task(self, fw_spec):
         
         return FWAction(stored_data={"error": errors,"converged": converged})
 
+def calculate_configruation_energies_firework(admol_name,tree_file,path,coad_stable_sites,Nocc_isolated,
+                                              coadmol_E_dict,concern_energy_tol=None,out_path=None,parents=[],iter=0,ignore_errors=False):
+    d = {"admol_name": admol_name,"tree_file": tree_file,"path": path,
+         "coad_stable_sites": coad_stable_sites,"Nocc_isolated": Nocc_isolated,"coadmol_E_dict": coadmol_E_dict, 
+         "concern_energy_tol": concern_energy_tol}
+    t1 = CalculateConfigurationEnergiesTask(d)
+    if out_path is None: 
+        out_path_energy = os.path.join(os.path.split(tree_file)[0],"Ncoad_energy_"+admol_name+".json")
+        out_path_config = os.path.join(os.path.split(tree_file)[0],"Ncoad_config_"+admol_name+".json")
+        out_path_concern = os.path.join(os.path.split(tree_file)[0],"configs_of_concern_"+admol_name+".json")
+    else:
+        out_path_energy = os.path.join(out_path,"Ncoad_energy_"+admol_name+".json")
+        out_path_config = os.path.join(out_path,"Ncoad_config_"+admol_name+".json")
+        out_path_concern = os.path.join(out_path,"configs_of_concern_"+admol_name+".json")
+    t2 = FileTransferTask({'files': [{'src': "Ncoad_energy_"+admol_name+".json", 'dest': out_path_energy},
+                                     {'src': "Ncoad_config_"+admol_name+".json", 'dest': out_path_config},
+                                     {'src': "configs_of_concern_"+admol_name+".json", 'dest': out_path_concern}], 'mode': 'copy', 'ignore_errors': ignore_errors})
+    return Firework([t1,t2],parents=parents,name=admol_name+"_energies"+str(iter))
+
+@explicit_serialize
+class CalculateConfigurationEnergiesTask(FiretaskBase):
+    required_params = ["admol_name","tree_file","path","coad_stable_sites","Nocc_isolated","coadmol_E_dict"]
+    optional_params = ["concern_energy_tol","ignore_errors"]
+    def run_task(self, fw_spec):
+        admol_name = self['admol_name']
+        tree_file = self["tree_file"]
+        path= self["path"]
+        coad_stable_sites = self["coad_stable_sites"]
+        Nocc_isolated = self["Nocc_isolated"]
+        coadmol_E_dict = {Molecule().from_adjacency_list(k):v for k,v in self["coadmol_E_dict"].items()}
+        concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None
+        ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False
+        
+        try:
+            nodes = read_nodes(tree_file)
+            root = [n for n in nodes.values() if n.parent is None][0]
+            if len(root.children) == 1: #pairwise only tree
+                tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition],
+                                                        nodes=nodes)
+            elif len(root.children) == 2:
+                tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition],
+                                                        nodes=nodes)
+            else:
+                raise ValueError
+            
+            with open(os.path.join(path,"Configurations",admol_name+".json"),'r') as f:
+                configs = [Molecule().from_adjacency_list(m,check_consistency=False) for m in json.load(f)]
+            
+            Ncoad_energy_dict,Ncoad_config_dict,configs_of_concern_admol = get_cov_energies_configs_concern_tree(tree, configs, coad_stable_sites, Nocc_isolated, concern_energy_tol, 
+                                                coadmol_E_dict=coadmol_E_dict)
+            with open("Ncoad_energy_"+admol_name+".json",'w') as f:
+                json.dump(Ncoad_energy_dict,f)
+            with open("Ncoad_config_"+admol_name+".json",'w') as f:
+                json.dump(Ncoad_config_dict,f)
+            with open("configs_of_concern_"+admol_name+".json",'w') as f:
+                json.dump([tuple([v[0].to_adjacency_list(),v[1],v[2],v[3]]) for v in configs_of_concern_admol.values()],f)
+                
+        except Exception as e:
+            if not ignore_errors:
+                raise e
+            else:
+                return FWAction(stored_data={"error": e}, exit=True)
+
+        return FWAction()
+
+def train_covdep_model_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency,
+                                pynta_dir, metal, facet, slab_path, calculation_directories, coadname,
+                                coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt,  parents=[],Ncalc_per_iter=6,iter=0,max_iters=6,concern_energy_tol=None,ignore_errors=False):
+    d = {"path": path, "admol_name_path_dict": admol_name_path_dict, "admol_name_structure_dict": {k : v.to_adjacency_list() for k,v in admol_name_structure_dict.items()},
+         "sites": sites, "site_adjacency": {str(k):v for k,v in site_adjacency.items()}, "pynta_dir": pynta_dir, "metal": metal, "facet": facet, "slab_path": slab_path,
+         "calculation_directories": calculation_directories, "coadname": coadname, "coad_stable_sites": coad_stable_sites, 
+        "Ncalc_per_iter": Ncalc_per_iter, "iter": iter, "max_iters": max_iters, "software": software, "software_kwargs": software_kwargs, "software_kwargs_TS": software_kwargs_TS, "freeze_ind": freeze_ind, 
+        "fmaxopt": fmaxopt, "concern_energy_tol": concern_energy_tol, "ignore_errors": ignore_errors}
+    t1 = TrainCovdepModelTask(d)
+    return Firework([t1],parents=parents,name="Training Model "+str(iter),spec={"_allow_fizzled_parents":True, "_priority": 4})
+
+@explicit_serialize
+class TrainCovdepModelTask(FiretaskBase):
+    required_params = ["path","admol_name_path_dict","admol_name_structure_dict","sites","site_adjacency", "pynta_dir", "metal", "facet",
+                       "slab_path", "calculation_directories", "coadname", "coad_stable_sites", "Ncalc_per_iter", "iter", "max_iters", "software", 
+                       "software_kwargs", "software_kwargs_TS", "freeze_ind", "fmaxopt"]
+    optional_params = ["concern_energy_tol","ignore_errors"]
+    def run_task(self, fw_spec):
+        path = self["path"]
+        admol_name_path_dict = self["admol_name_path_dict"]
+        admol_name_structure_dict = {k: Molecule().from_adjacency_list(v,check_consistency=False) for k,v in self["admol_name_structure_dict"].items()}
+        sites = []
+        for site in self["sites"]:
+            site["normal"] = np.array(site["normal"])
+            site["position"] = np.array(site["position"])
+            site["indices"] = tuple(site["indices"])
+            sites.append(site)
+        site_adjacency = {int(k):[int(x) for x in v] for k,v in self["site_adjacency"].items()}
+        pynta_dir = self["pynta_dir"]
+        metal = self["metal"]
+        facet = self["facet"]
+        slab_path = self["slab_path"]
+        calculation_directories = self["calculation_directories"]
+        coadname  = self["coadname"]
+        Ncalc_per_iter = self["Ncalc_per_iter"]
+        iter = self["iter"]
+        coad_stable_sites = self["coad_stable_sites"]
+        concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None
+        ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False
+        software_kwargs = self["software_kwargs"]
+        software = self["software"]
+        freeze_ind = self["freeze_ind"]
+        software_kwargs_TS = self["software_kwargs_TS"]
+        fmaxopt = self["fmaxopt"]
+        max_iters = self["max_iters"]
+        
+        coad = admol_name_structure_dict[coadname]
+        coad_simple = remove_slab(coad)
+        
+        coad_path = os.path.join(pynta_dir,"Adsorbates",coadname)
+        slab = read(slab_path)
+        nslab = len(slab)
+        allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"),sites,site_adjacency,nslab,max_dist=np.inf)
+        
+        ad_energy_dict = get_lowest_adsorbate_energies(os.path.join(pynta_dir,"Adsorbates"))
+        Es = get_adsorbate_energies(coad_path)[0]
+        coadmol_E_dict = dict()
+        coadmol_stability_dict = dict()
+        for p in os.listdir(coad_path):
+            if p == "info.json" or (p not in Es.keys()):
+                continue
+            admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+"_init.xyz")),sites,site_adjacency,nslab,max_dist=np.inf,allowed_structure_site_structures=allowed_structure_site_structures)
+            admol,neighbor_sites,ninds = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+".xyz")),sites,site_adjacency,nslab,max_dist=np.inf,allowed_structure_site_structures=allowed_structure_site_structures)
+            out_struct = split_adsorbed_structures(admol,clear_site_info=False)[0]
+            out_struct_init = split_adsorbed_structures(admol_init,clear_site_info=False)[0]
+            coadmol_E_dict[out_struct] = Es[p] 
+            if admol_init.is_isomorphic(admol,save_order=True):
+                coadmol_stability_dict[out_struct_init] = True
+            else:
+                coadmol_stability_dict[out_struct_init] = False
+    
+        if not os.path.exists(os.path.join(path,"Configurations")):
+            info_paths = {adname: os.path.join(os.path.split(os.path.split(p)[0])[0],"info.json") for adname,p in admol_name_path_dict.items()}
+            imag_freq_paths = {adname: os.path.join(os.path.split(p)[0],"vib.json_vib.json") for adname,p in admol_name_path_dict.items()}
+            unstable_pairs = get_unstable_pairs(os.path.join(path,'pairs'),
+                                os.path.join(pynta_dir,"Adsorbates"),
+                                sites,site_adjacency,nslab,max_dist=np.inf,show=False, infopath_dict=info_paths, imag_freq_path_dict=imag_freq_paths)
+            
+            os.makedirs(os.path.join(path,"Configurations"))
+            for admol_name,admol in admol_name_structure_dict.items():
+                configs = get_configurations(admol, coad_simple, coad_stable_sites,  coadmol_stability_dict=coadmol_stability_dict, unstable_groups=unstable_pairs,
+                    coadmol_E_dict=coadmol_E_dict)
+                
+                with open(os.path.join(path,"Configurations",admol_name+".json"),'w') as f:
+                    json.dump([x.to_adjacency_list() for x in configs],f)
+        
+        if iter > 1:
+            with open(os.path.join(path,"pairs_datums.json"),'r') as f:
+                pairs_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)]
+            with open(os.path.join(path,"Iterations",str(iter-1),"cumulative_sample_datums.json"),'r') as f:
+                old_sample_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)]
+        elif iter == 1:
+            with open(os.path.join(path,"pairs_datums.json"),'r') as f:
+                pairs_datums = [Datum(mol=Molecule().from_adjacency_list(d["mol"],check_consistency=False), value=d["value"]) for d in json.load(f)]
+            old_sample_datums = [] 
+        
+        if iter == 0:
+            computed_configs = []
+        else:
+            with open(os.path.join(path,"Iterations",str(iter-1),"computed_configurations.json"),'r') as f:
+                computed_configs = [Molecule().from_adjacency_list(x,check_consistency=False) for x in json.load(f)]
+        
+        new_datums_E = []
+        new_computed_configs = []
+        for d in calculation_directories:
+            with open(os.path.join(d,"info.json"),'r') as f:
+                info = json.load(f)
+            adjlist = info['adjlist']
+            init_config = Molecule().from_adjacency_list(adjlist,check_consistency=False)
+            new_computed_configs.append(init_config)
+            datum_E,datums_stability = process_calculation(d,ad_energy_dict,slab,metal,facet,sites,site_adjacency,pynta_dir,coadmol_E_dict,max_dist=3.0,rxn_alignment_min=0.7,
+                coad_disruption_tol=1.1,out_file_name="out",init_file_name="init",vib_file_name="vib_vib",is_ad=None)
+            if datum_E:
+                new_datums_E.append(datum_E)
+            if datum_E and not datum_E.mol.is_isomorphic(init_config,save_order=True):
+                new_computed_configs.append(datum_E.mol)
+        
+        if not os.path.exists(os.path.join(path,"Iterations",str(iter))):
+            os.makedirs(os.path.join(path,"Iterations",str(iter)))
+        
+        with open(os.path.join(path,"Iterations",str(iter),"computed_configurations.json"),'w') as f:
+            json.dump([x.to_adjacency_list() for x in computed_configs+new_computed_configs],f)
+        
+        if iter == 0:
+            pairs_datums = new_datums_E
+            with open(os.path.join(path,"pairs_datums.json"),'w') as f:
+                json.dump([{"mol": d.mol.to_adjacency_list(),"value": d.value} for d in pairs_datums],f)
+            sampling_datums = []
+        else:
+            sampling_datums = old_sample_datums + new_datums_E
+            with open(os.path.join(path,"Iterations",str(iter),"cumulative_sample_datums.json"),'w') as f:
+                json.dump([{"mol": d.mol.to_adjacency_list(),"value": d.value}for d in sampling_datums],f)
+            
+        Nconfigs = len(admol_name_structure_dict)
+        Ncoads = 1
+        tree = train_sidt_cov_dep_regressor(pairs_datums,sampling_datums,r_site=None,
+                            r_atoms=None,node_fract_training=0.7)
+        
+        tree_file = os.path.join(path,"Iterations",str(iter),"regressor.json")
+        write_nodes(tree,tree_file)
+        
+        config_E_fws = []
+        for admol_name in admol_name_path_dict.keys():
+            st = admol_name_structure_dict[admol_name]
+            Nocc_isolated = len([a for a in st.atoms if a.is_surface_site() and any(not a2.is_surface_site() for a2 in a.bonds.keys())])
+            fw = calculate_configruation_energies_firework(admol_name,tree_file,path,coad_stable_sites,Nocc_isolated,
+                                            {k.to_adjacency_list(): v for k,v in coadmol_E_dict.items()},concern_energy_tol=concern_energy_tol,parents=[],iter=iter,ignore_errors=ignore_errors)
+            config_E_fws.append(fw)
+        
+        scfw = select_calculations_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency,
+                            pynta_dir, metal, facet, slab_path, calculation_directories, coadname,
+                            coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=config_E_fws,Ncalc_per_iter=Ncalc_per_iter,iter=iter,
+                            max_iters=max_iters,concern_energy_tol=concern_energy_tol,ignore_errors=ignore_errors)
+        
+        newwf = Workflow(config_E_fws+[scfw],name="Select Calculations "+str(iter))
+        
+        return FWAction(detours=newwf)
+
+def select_calculations_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency,
+                                pynta_dir, metal, facet, slab_path, calculation_directories, coadname,
+                                coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=[],Ncalc_per_iter=6,iter=0,max_iters=6,concern_energy_tol=None,ignore_errors=False):
+    d = {"path": path,"admol_name_path_dict": admol_name_path_dict,"admol_name_structure_dict": {k:v.to_adjacency_list() for k,v in admol_name_structure_dict.items()},
+         "sites": sites, "site_adjacency": {str(k): v for k,v in site_adjacency.items()}, "pynta_dir": pynta_dir, "metal": metal, "facet": facet, "slab_path": slab_path,
+         "calculation_directories": calculation_directories, "coadname": coadname, "coad_stable_sites": coad_stable_sites, 
+         "Ncalc_per_iter": Ncalc_per_iter, "software": software, "iter": iter, "max_iters": max_iters,
+                       "software_kwargs": software_kwargs, "software_kwargs_TS": software_kwargs_TS, "freeze_ind": freeze_ind, 
+                       "fmaxopt": fmaxopt, "concern_energy_tol": concern_energy_tol, "ignore_errors": ignore_errors}
+    t1 = SelectCalculationsTask(d)
+    return Firework([t1],parents=parents,name="Selecting Calculations "+str(iter),spec={"_priority": 4})
+
+@explicit_serialize
+class SelectCalculationsTask(FiretaskBase):
+    required_params = ["path","admol_name_path_dict","admol_name_structure_dict","sites","site_adjacency", "pynta_dir", "metal", "facet",
+                       "slab_path", "calculation_directories", "coadname", "coad_stable_sites", "iter", "software", "max_iters",
+                       "software_kwargs", "software_kwargs_TS", "freeze_ind", "fmaxopt"]
+    optional_params = ["concern_energy_tol","ignore_errors"]
+    def run_task(self, fw_spec):
+        path = self["path"]
+        admol_name_path_dict = self["admol_name_path_dict"]
+        admol_name_structure_dict = {k: Molecule().from_adjacency_list(v,check_consistency=False) for k,v in self["admol_name_structure_dict"].items()}
+        sites = []
+        sites = []
+        for site in self["sites"]:
+            site["normal"] = np.array(site["normal"])
+            site["position"] = np.array(site["position"])
+            site["indices"] = tuple(site["indices"])
+            sites.append(site)
+        site_adjacency = {int(k):[int(x) for x in v] for k,v in self["site_adjacency"].items()}
+        pynta_dir = self["pynta_dir"]
+        metal = self["metal"]
+        facet = self["facet"]
+        slab_path = self["slab_path"]
+        calculation_directories = self["calculation_directories"]
+        coadname  = self["coadname"]
+        Ncalc_per_iter = self["Ncalc_per_iter"]
+        iter = self["iter"]
+        coad_stable_sites = self["coad_stable_sites"]
+        concern_energy_tol = self["concern_energy_tol"] if "concern_energy_tol" in self.keys() else None
+        ignore_errors = self["ignore_errors"] if "ignore_errors" in self.keys() else False
+        software_kwargs = self["software_kwargs"]
+        software = self["software"]
+        freeze_ind = self["freeze_ind"]
+        software_kwargs_TS = self["software_kwargs_TS"]
+        fmaxopt = self["fmaxopt"]
+        max_iters = self["max_iters"]
+        
+        if iter == max_iters: #terminate
+            return FWAction()
+        
+        coad = admol_name_structure_dict[coadname]
+        
+        coad_path = os.path.join(pynta_dir,"Adsorbates",coadname)
+        
+        slab = read(slab_path)
+        nslab = len(slab)
+        
+        allowed_structure_site_structures = generate_allowed_structure_site_structures(os.path.join(pynta_dir,"Adsorbates"),sites,site_adjacency,nslab,max_dist=np.inf)
+        
+        ad_energy_dict = get_lowest_adsorbate_energies(os.path.join(pynta_dir,"Adsorbates"))
+        Es = get_adsorbate_energies(coad_path)[0]
+        coadmol_E_dict = dict()
+        coadmol_stability_dict = dict()
+        for p in os.listdir(coad_path):
+            if p == "info.json" or (p not in Es.keys()):
+                continue
+            admol_init,neighbor_sites_init,ninds_init = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+"_init.xyz")),sites,site_adjacency,nslab,max_dist=np.inf)
+            admol,neighbor_sites,ninds = generate_adsorbate_2D(read(os.path.join(coad_path,p,p+".xyz")),sites,site_adjacency,nslab,max_dist=np.inf)
+            out_struct = split_adsorbed_structures(admol,clear_site_info=False)[0]
+            out_struct_init = split_adsorbed_structures(admol_init,clear_site_info=False)[0]
+            coadmol_E_dict[out_struct] = Es[p] 
+            if admol_init.is_isomorphic(admol,save_order=True):
+                coadmol_stability_dict[out_struct_init] = True
+            else:
+                coadmol_stability_dict[out_struct_init] = False
+                
+        
+        #load configurations and Ncoad_energies
+        configs_of_concern_by_admol = dict()
+        Ncoad_energy_by_admol = dict()
+        for admol_name,st in admol_name_structure_dict.items():
+            config_path = os.path.join(path,"Iterations",str(iter),"configs_of_concern_"+admol_name+".json")
+            with open(config_path,'r') as f:
+                configs_of_concern_by_admol[admol_name] = [(Molecule().from_adjacency_list(k[0],check_consistency=False),k[1],k[2],k[3]) for k in json.load(f)]
+            Ncoad_energy_path = os.path.join(path,"Iterations",str(iter),"Ncoad_energy_"+admol_name+".json")
+            with open(Ncoad_energy_path,'w') as f:
+                Ncoad_energy_by_admol[admol_name] = json.load(f)
+            
+        #load tree
+        nodes = read_nodes(os.path.join(path,"Iterations",str(iter),"regressor.json"))
+        tree = MultiEvalSubgraphIsomorphicDecisionTreeRegressor([adsorbate_interaction_decomposition,adsorbate_triad_interaction_decomposition],
+                                            nodes=nodes)
+        
+        #load computed configs
+        with open(os.path.join(path,"Iterations",str(iter),"computed_configurations.json"),'r') as f:
+            computed_configs = [Molecule().from_adjacency_list(x,check_consistency=False) for x in json.load(f)]
+        
+        configs_for_calculation,admol_to_config_for_calculation= get_configs_for_calculation(configs_of_concern_by_admol,Ncoad_energy_by_admol,admol_name_structure_dict,computed_configs,tree,Ncalc_per_iter)
+
+        os.makedirs(os.path.join(path,"Iterations",str(iter),"Samples"))
+        assert len(configs_for_calculation) > 0, configs_for_calculation
+        sample_fws = []
+        calculation_directories = []
+        for i,config in enumerate(configs_for_calculation):
+            adname = None
+            for admol_name,config_list in admol_to_config_for_calculation.items():
+                if any(x is config for x in config_list):
+                    adname = admol_name
+                    break
+            else:
+                raise ValueError
+            
+            partial_admol = admol_name_structure_dict[adname]
+            admol_path = admol_name_path_dict[adname]
+            partial_atoms = read(admol_path)
+            init_atoms = mol_to_atoms(config,slab,sites,metal,partial_atoms=partial_atoms,partial_admol=partial_admol)
+            os.makedirs(os.path.join(path,"Iterations",str(iter),"Samples",str(i)))
+            init_path = os.path.join(path,"Iterations",str(iter),"Samples",str(i),"init.xyz")
+            write(init_path,init_atoms)
+            calculation_directories.append(os.path.split(init_path)[0])
+            json_out = {"adjlist": config.to_adjacency_list(), "xyz": admol_path}
+            with open(os.path.join(os.path.split(init_path)[0],'info.json'),'w') as f:
+                json.dump(json_out,f)
+            
+            if not any(bd.get_order_str() == 'R' for bd in config.get_all_edges()):
+                fwopt = optimize_firework(init_path,
+                        software,"weakopt",
+                        opt_method="MDMin",opt_kwargs={'dt': 0.05,"trajectory": "weakopt.traj"},software_kwargs=software_kwargs,order=0,
+                        run_kwargs={"fmax" : 0.5, "steps" : 30},parents=[],
+                            constraints=["freeze up to {}".format(freeze_ind)],
+                        ignore_errors=True, metal=metal, facet=facet, priority=3)
+                fwopt2 = optimize_firework(os.path.join(os.path.split(init_path)[0],"weakopt.xyz"),
+                                software,"out",
+                                opt_method="QuasiNewton",opt_kwargs={"trajectory": "out.traj"},software_kwargs=software_kwargs,order=0,
+                                run_kwargs={"fmax" : fmaxopt, "steps" : 70},parents=[fwopt],
+                                constraints=["freeze up to {}".format(freeze_ind)],
+                                ignore_errors=True, metal=metal, facet=facet, priority=2)
+            
+                fwvib = vibrations_firework(os.path.join(os.path.split(init_path)[0],"out.xyz"),
+                                            software,"vib",software_kwargs=software_kwargs,parents=[fwopt2],
+                                            constraints=["freeze up to "+str(nslab)])
+                sample_fws.extend([fwopt,fwopt2,fwvib])
+            else:
+                fwopt = optimize_firework(init_path,
+                        software,"out", sella=True, 
+                        opt_kwargs={"trajectory": "out.traj"},software_kwargs=software_kwargs_TS,
+                        order=1,
+                        run_kwargs={"fmax" : fmaxopt, "steps" : 70},parents=[],
+                            constraints=["freeze up to {}".format(freeze_ind)],
+                        ignore_errors=True, metal=metal, facet=facet, priority=3)
+        
+                fwvib = vibrations_firework(os.path.join(os.path.split(init_path)[0],"out.xyz"),
+                                            software,"vib",software_kwargs=software_kwargs,parents=[fwopt],
+                                            constraints=["freeze up to "+str(nslab)])
+                sample_fws.extend([fwopt,fwvib])
+            
+        tfw = train_covdep_model_firework(path,admol_name_path_dict,admol_name_structure_dict,sites,site_adjacency,
+                            pynta_dir, metal, facet, slab_path, calculation_directories, coadname,
+                            coad_stable_sites, software, software_kwargs, software_kwargs_TS, freeze_ind, fmaxopt, parents=sample_fws,
+                            Ncalc_per_iter=Ncalc_per_iter,iter=iter+1,max_iters=max_iters,concern_energy_tol=concern_energy_tol,ignore_errors=ignore_errors)
+        newwf = Workflow(sample_fws+[tfw],name="Train Iteration "+str(iter+1))
+        
+        return FWAction(detours=newwf)
+        
 def map_harmonically_forced_xtb(input):
     tsstruct,atom_bond_potentials,site_bond_potentials,nslab,constraints,ts_path,j,molecule_to_atom_maps,ase_to_mol_num = input
     os.makedirs(os.path.join(ts_path,str(j)))
diff --git a/pynta/transitionstate.py b/pynta/transitionstate.py
index c8fed6ce..4e456dda 100644
--- a/pynta/transitionstate.py
+++ b/pynta/transitionstate.py
@@ -168,6 +168,8 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites
     Generate unique initial structures for TS guess generation
     """
     tsstructs = []
+    slab_sites = cas.get_sites()
+    site_adjacency = cas.get_neighbor_site_list()
     ordered_adsorbates = [adsorbates[name] for name in species_names]
     for adss in itertools.product(*ordered_adsorbates):
         if num_surf_sites[0] > 0:
@@ -184,7 +186,7 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites
                 ad = adss[1][nslab:]
                 bondlengths1,sites1,site_lengths1 = get_bond_lengths_sites(mol_dict[name],adss[1],gratom_to_molecule_atom_maps[name],
                                                                         gratom_to_molecule_surface_atom_maps[name],nslab,
-                                                                        facet=facet,metal=metal,cas=cas)
+                                                                        slab_sites,site_adjacency,facet=facet,metal=metal)
                 sitetype1 = list(sites1.values())[0]
                 height1 = list(site_lengths1.values())[0]
                 surf_ind1 = list(gratom_to_molecule_surface_atom_maps[name].keys())[0]
@@ -201,7 +203,7 @@ def get_unique_TS_structs(adsorbates,species_names,slab,cas,nslab,num_surf_sites
                                 ad2 = adss[2][nslab:]
                                 bondlengths2,sites2,site_lengths2 = get_bond_lengths_sites(mol_dict[name2],adss[2],gratom_to_molecule_atom_maps[name2],
                                                                         gratom_to_molecule_surface_atom_maps[name2],nslab,
-                                                                        facet=facet,metal=metal,cas=cas)
+                                                                        slab_sites,site_adjacency,facet=facet,metal=metal)
                                 sitetype2 = list(sites2.values())[0]
                                 height2 = list(site_lengths2.values())[0]
                                 surf_ind2 = list(gratom_to_molecule_surface_atom_maps[name2].keys())[0]
@@ -263,6 +265,8 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t
     site_bond_potential_lists = []
     site_bond_dict_list = []
     out_structs = []
+    slab_sites = cas.get_sites()
+    site_adjacency = cas.get_neighbor_site_list()
     
     nodes_file = os.path.join(os.path.split(pynta.models.__file__)[0],"TS_tree_nodes.json")
     nodes = read_nodes(nodes_file)
@@ -286,7 +290,7 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t
             bondlengths,sites,site_lengths = get_bond_lengths_sites(mol_dict[name],ads,
                                                                 gratom_to_molecule_atom_maps[name],
                                                                 gratom_to_molecule_surface_atom_maps[name],
-                                                                nslab,facet=facet,metal=metal,cas=cas)
+                                                                nslab,slab_sites,site_adjacency,facet=facet,metal=metal)
             bdlength_list.append(bondlengths)
             sites_list.append(sites)
             site_lengths_list.append(site_lengths)
@@ -312,7 +316,7 @@ def generate_constraints_harmonic_parameters(tsstructs,adsorbates,slab,forward_t
             bondlengths,sites,site_lengths = get_bond_lengths_sites(mol_dict[name],ads,
                                                                 gratom_to_molecule_atom_maps[name],
                                                                 gratom_to_molecule_surface_atom_maps[name],
-                                                                nslab,facet=facet,metal=metal,cas=cas)
+                                                                nslab,slab_sites,site_adjacency,facet=facet,metal=metal)
             bdlength_list.append(bondlengths)
             sites_list.append(sites)
             site_lengths_list.append(site_lengths)
@@ -566,6 +570,10 @@ def estimate_deq_k(sidt,labels,dwell,forward_template,reverse_template,template_
             a.label = ''
     
     v = sidt.evaluate(mol)
+    
+    if hasattr(v,"value"):
+        v = v.value
+    
     if any([a.is_surface_site() for a in mol.get_labeled_atoms('*')]):
         return dwell*(v-1.0),100.0
     else:
diff --git a/pynta/utils.py b/pynta/utils.py
index 56264bfd..298db204 100644
--- a/pynta/utils.py
+++ b/pynta/utils.py
@@ -4,11 +4,107 @@
 from ase.utils.structure_comparator import SymmetryEquivalenceCheck
 from ase.io import write, read
 import ase.constraints
+from ase.geometry import get_distances
 from copy import deepcopy
 from importlib import import_module
 import numpy as np
 import copy
 
+def sites_match(site1,site2,slab,tol=0.5):
+    """determine if two sites match
+
+    Args:
+        site1 (dict): site dictionary for first site
+        site2 (dict): site dictionary for second site
+        slab (ase.Atoms): the ase.Atoms object for the slab the sites are on
+        tol (float, optional): Distance tolerance defaults to 0.5 Angstroms.
+
+    Returns:
+        _type_: _description_
+    """
+    v,dist = get_distances([site1["position"]], [site2["position"]], cell=slab.cell, pbc=slab.pbc)
+    if dist > tol:
+        return False
+    elif site1["site"] != site2["site"]:
+        return False
+    elif site1["morphology"] != site2["morphology"]:
+        return False
+    else:
+        return True
+    
+def get_occupied_sites(struct,sites,nslab,allowed_site_dict=dict(),site_bond_cutoff=2.5,
+                       site_bond_disruption_cutoff=0.5,cutoff_corrections={"ontop": 0.3}):
+    """determine what sites are occupied by what atoms
+
+    Args:
+        struct (ase.Atoms): Atoms object for the structure
+        sites (list): list of site dictionaries
+        nslab (int): number of atoms in the surface
+        allowed_site_dict (dict, optional): dictionary mapping atom index to a list of allowed (site,morphology) for that atom
+        site_bond_cutoff (float, optional): _description_. Defaults to 2.5.
+
+    Raises:
+        ValueError: _description_
+
+    Returns:
+        _type_: _description_
+    """
+    occ_sites = []
+    for i in range(nslab,len(struct)):
+        pos = struct.positions[i]
+        if i in allowed_site_dict.keys():
+            allowed_sites = allowed_site_dict[i]
+        else:
+            allowed_sites = None
+        siteout = None
+        mindist = None
+        for site in sites:
+            if allowed_sites and (site["site"],site["morphology"]) not in allowed_sites: #skip disallowed sites
+                continue
+            v,dist = get_distances([site["position"]], [pos], cell=struct.cell, pbc=struct.pbc)
+            if siteout is None:
+                siteout = site
+                mindist = dist
+                n = v
+            else:
+                if dist < mindist:
+                    mindist = dist
+                    siteout = site
+                    n = v
+        
+        if mindist is None:
+            #print(i)
+            #view(struct)
+            raise ValueError
+        
+        if (siteout["site"] not in cutoff_corrections.keys() and mindist < site_bond_cutoff) or (siteout["site"] in cutoff_corrections.keys() and mindist < site_bond_cutoff + cutoff_corrections[siteout["site"]]):
+            mindn = None
+            for j in range(nslab,len(struct)): #check for site bond disruption by other adsorbed atoms
+                if i == j:
+                    continue
+                else:
+                    AB,ABdist = get_distances([struct.positions[j]], [pos], cell=struct.cell, pbc=struct.pbc)
+                    x = np.dot(AB[0,0,:],n[0,0,:])
+                    if x < 0: #this means our target atom is closer than the other atom, which is okay (for this atom)
+                        continue
+                    dn = np.linalg.norm(AB[0,0,:] - x/mindist**2*n[0,0,:])
+                    if mindn is None:
+                        mindn = dn
+                    elif dn < mindn:
+                        mindn = dn
+                    
+                
+            if mindn is None or mindn > site_bond_disruption_cutoff:
+                s = deepcopy(siteout)
+                s["normal"] = n
+                s["bonding_index"] = i
+                s["bond_length"] = mindist
+                occ_sites.append(s)
+    
+    return occ_sites
+
+class SiteOccupationException(Exception):
+    pass
 
 def get_unique_sym(geoms):
     ''' Check for the symmetry equivalent structures in the given files
@@ -30,6 +126,7 @@ def get_unique_sym(geoms):
 
     for geom in geoms:
         adsorbate_atom_obj = read(geom)
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints for comparison
         comparision = comparator.compare(
@@ -56,6 +153,7 @@ def get_unique_sym_indices(geoms):
 
     for geom in geoms:
         adsorbate_atom_obj = read(geom)
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints before comparison
         comparision = comparator.compare(
@@ -87,6 +185,7 @@ def get_unique_sym_structs(geoms):
 
     for i,geom in enumerate(geoms_copy):
         adsorbate_atom_obj = geom
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints before comparison
         comparision = comparator.compare(
@@ -116,6 +215,7 @@ def get_unique_sym_struct_indices(geoms):
 
     for i,geom in enumerate(geoms_copy):
         adsorbate_atom_obj = geom
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints before comparison
         comparision = comparator.compare(
@@ -145,6 +245,7 @@ def get_unique_sym_struct_index_clusters(geoms):
 
     for i,geom in enumerate(geoms_copy):
         adsorbate_atom_obj = geom
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints before comparison
         comparison = None
@@ -180,6 +281,7 @@ def filter_nonunique_TS_guess_indices(geoms,Es):
 
     for j,geom in enumerate(geoms):
         adsorbate_atom_obj = read(geom)
+        adsorbate_atom_obj.set_constraint()
         adsorbate_atom_obj.pbc = True
         adsorbate_atom_obj.set_constraint() # Reset constraints before comparison
         for i,good_adsorbate in enumerate(good_adsorbates_atom_obj_list):