usnistgov · knc6 · Oct 24, 2023 · Aug 6, 2023 · Aug 11, 2023 · Aug 11, 2023
diff --git a/alignn/__init__.py b/alignn/__init__.py
@@ -1,2 +1,2 @@
 """Version number."""
-__version__ = "2023.08.01"
+__version__ = "2023.10.01"
diff --git a/alignn/config.py b/alignn/config.py
@@ -199,6 +199,7 @@ class TrainingConfig(BaseSettings):
     use_canonize: bool = True
     num_workers: int = 4
     cutoff: float = 8.0
+    cutoff_extra: float = 3.0
     max_neighbors: int = 12
     keep_data_order: bool = True
     normalize_graph_level_loss: bool = False

diff --git a/alignn/data.py b/alignn/data.py
@@ -73,9 +73,12 @@ def load_graphs(
     name: str = "dft_3d",
     neighbor_strategy: str = "k-nearest",
     cutoff: float = 8,
+    cutoff_extra: float = 3,
     max_neighbors: int = 12,
     cachedir: Optional[Path] = None,
     use_canonize: bool = False,
+    id_tag="jid",
+    # extra_feats_json=None,
 ):
     """Construct crystal graphs.
 
@@ -98,6 +101,7 @@ def atoms_to_graph(atoms):
         return Graph.atom_dgl_multigraph(
             structure,
             cutoff=cutoff,
+            cutoff_extra=cutoff_extra,
             atom_features="atomic_number",
             max_neighbors=max_neighbors,
             compute_line_graph=False,
@@ -113,9 +117,41 @@ def atoms_to_graph(atoms):
     if cachefile is not None and cachefile.is_file():
         graphs, labels = dgl.load_graphs(str(cachefile))
     else:
-        df = pd.DataFrame(dataset)
-
-        graphs = df["atoms"].progress_apply(atoms_to_graph).values
+        # print('dataset',dataset,type(dataset))
+        print("Converting to graphs!")
+        graphs = []
+        # columns=dataset.columns
+        for ii, i in tqdm(dataset.iterrows()):
+            # print('iooooo',i)
+            atoms = i["atoms"]
+            structure = (
+                Atoms.from_dict(atoms) if isinstance(atoms, dict) else atoms
+            )
+            g = Graph.atom_dgl_multigraph(
+                structure,
+                cutoff=cutoff,
+                cutoff_extra=cutoff_extra,
+                atom_features="atomic_number",
+                max_neighbors=max_neighbors,
+                compute_line_graph=False,
+                use_canonize=use_canonize,
+                neighbor_strategy=neighbor_strategy,
+                id=i[id_tag],
+            )
+            # print ('ii',ii)
+            if "extra_features" in i:
+                natoms = len(atoms["elements"])
+                # if "extra_features" in columns:
+                g.ndata["extra_features"] = torch.tensor(
+                    [i["extra_features"] for n in range(natoms)]
+                ).type(torch.get_default_dtype())
+            graphs.append(g)
+
+        # df = pd.DataFrame(dataset)
+        # print ('df',df)
+
+        # graphs = df["atoms"].progress_apply(atoms_to_graph).values
+        # print ('graphs',graphs,graphs[0])
         if cachefile is not None:
             dgl.save_graphs(str(cachefile), graphs.tolist())
 
@@ -174,7 +210,11 @@ def get_id_train_val_test(
     # full train/val test split
     # ids = ids[::-1]
     id_train = ids[:n_train]
-    id_val = ids[-(n_val + n_test) : -n_test] if n_test > 0 else ids[-(n_val + n_test) :]  # noqa:E203
+    id_val = (
+        ids[-(n_val + n_test) : -n_test]
+        if n_test > 0
+        else ids[-(n_val + n_test) :]
+    )  # noqa:E203
     id_test = ids[-n_test:] if n_test > 0 else []
     return id_train, id_val, id_test
 
@@ -192,6 +232,7 @@ def get_torch_dataset(
     name="",
     line_graph="",
     cutoff=8.0,
+    cutoff_extra=3.0,
     max_neighbors=12,
     classification=False,
     output_dir=".",
@@ -216,7 +257,9 @@ def get_torch_dataset(
         neighbor_strategy=neighbor_strategy,
         use_canonize=use_canonize,
         cutoff=cutoff,
+        cutoff_extra=cutoff_extra,
         max_neighbors=max_neighbors,
+        id_tag=id_tag,
     )
     data = StructureDataset(
         df,
@@ -259,6 +302,7 @@ def get_train_val_loaders(
     id_tag: str = "jid",
     use_canonize: bool = False,
     cutoff: float = 8.0,
+    cutoff_extra: float = 3.0,
     max_neighbors: int = 12,
     classification_threshold: Optional[float] = None,
     target_multiplication_factor: Optional[float] = None,
@@ -467,47 +511,58 @@ def get_train_val_loaders(
             name=dataset,
             line_graph=line_graph,
             cutoff=cutoff,
+            cutoff_extra=cutoff_extra,
             max_neighbors=max_neighbors,
             classification=classification_threshold is not None,
             output_dir=output_dir,
             tmp_name="train_data",
         )
-        val_data = get_torch_dataset(
-            dataset=dataset_val,
-            id_tag=id_tag,
-            atom_features=atom_features,
-            target=target,
-            target_atomwise=target_atomwise,
-            target_grad=target_grad,
-            target_stress=target_stress,
-            neighbor_strategy=neighbor_strategy,
-            use_canonize=use_canonize,
-            name=dataset,
-            line_graph=line_graph,
-            cutoff=cutoff,
-            max_neighbors=max_neighbors,
-            classification=classification_threshold is not None,
-            output_dir=output_dir,
-            tmp_name="val_data",
-        ) if len(dataset_val) > 0 else None
-        test_data = get_torch_dataset(
-            dataset=dataset_test,
-            id_tag=id_tag,
-            atom_features=atom_features,
-            target=target,
-            target_atomwise=target_atomwise,
-            target_grad=target_grad,
-            target_stress=target_stress,
-            neighbor_strategy=neighbor_strategy,
-            use_canonize=use_canonize,
-            name=dataset,
-            line_graph=line_graph,
-            cutoff=cutoff,
-            max_neighbors=max_neighbors,
-            classification=classification_threshold is not None,
-            output_dir=output_dir,
-            tmp_name="test_data",
-        ) if len(dataset_test) > 0 else None
+        val_data = (
+            get_torch_dataset(
+                dataset=dataset_val,
+                id_tag=id_tag,
+                atom_features=atom_features,
+                target=target,
+                target_atomwise=target_atomwise,
+                target_grad=target_grad,
+                target_stress=target_stress,
+                neighbor_strategy=neighbor_strategy,
+                use_canonize=use_canonize,
+                name=dataset,
+                line_graph=line_graph,
+                cutoff=cutoff,
+                cutoff_extra=cutoff_extra,
+                max_neighbors=max_neighbors,
+                classification=classification_threshold is not None,
+                output_dir=output_dir,
+                tmp_name="val_data",
+            )
+            if len(dataset_val) > 0
+            else None
+        )
+        test_data = (
+            get_torch_dataset(
+                dataset=dataset_test,
+                id_tag=id_tag,
+                atom_features=atom_features,
+                target=target,
+                target_atomwise=target_atomwise,
+                target_grad=target_grad,
+                target_stress=target_stress,
+                neighbor_strategy=neighbor_strategy,
+                use_canonize=use_canonize,
+                name=dataset,
+                line_graph=line_graph,
+                cutoff=cutoff,
+                cutoff_extra=cutoff_extra,
+                max_neighbors=max_neighbors,
+                classification=classification_threshold is not None,
+                output_dir=output_dir,
+                tmp_name="test_data",
+            )
+            if len(dataset_test) > 0
+            else None
+        )
 
         collate_fn = train_data.collate
         # print("line_graph,line_dih_graph", line_graph, line_dih_graph)
@@ -535,15 +590,19 @@ def get_train_val_loaders(
             pin_memory=pin_memory,
         )
 
-        test_loader = DataLoader(
-            test_data,
-            batch_size=1,
-            shuffle=False,
-            collate_fn=collate_fn,
-            drop_last=False,
-            num_workers=workers,
-            pin_memory=pin_memory,
-        ) if len(dataset_test) > 0 else None
+        test_loader = (
+            DataLoader(
+                test_data,
+                batch_size=1,
+                shuffle=False,
+                collate_fn=collate_fn,
+                drop_last=False,
+                num_workers=workers,
+                pin_memory=pin_memory,
+            )
+            if len(dataset_test) > 0
+            else None
+        )
 
         if save_dataloader:
             torch.save(train_loader, train_sample)
@@ -553,10 +612,10 @@ def get_train_val_loaders(
                 torch.save(test_loader, test_sample)
 
     print("n_train:", len(train_loader.dataset))
-    print("n_val  :", len(val_loader.dataset)
-          if val_loader is not None else 0)
-    print("n_test :", len(test_loader.dataset)
-          if test_loader is not None else 0)
+    print("n_val  :", len(val_loader.dataset) if val_loader is not None else 0)
+    print(
+        "n_test :", len(test_loader.dataset) if test_loader is not None else 0
+    )
     return (
         train_loader,
         val_loader,

diff --git a/alignn/examples/sample_data_ff/mlearn_data/Ni/config_rad.json b/alignn/examples/sample_data_ff/mlearn_data/Ni/config_rad.json
@@ -0,0 +1,68 @@
+{
+    "version": "112bbedebdaecf59fb18e11c929080fb2f358246",
+    "dataset": "user_data",
+    "target": "target",
+    "atom_features": "cgcnn",
+    "neighbor_strategy": "radius_graph",
+    "id_tag": "jid",
+    "random_seed": 123,
+    "classification_threshold": null,
+    "n_val": 31,
+    "n_test": 31,
+    "n_train": 263,
+    "train_ratio": 0.9,
+    "val_ratio": 0.05,
+    "test_ratio": 0.05,
+    "target_multiplication_factor": null,
+    "epochs": 50,
+    "batch_size": 5,
+    "weight_decay": 1e-05,
+    "learning_rate": 0.001,
+    "filename": "sample",
+    "warmup_steps": 2000,
+    "criterion": "l1",
+    "optimizer": "adamw",
+    "scheduler": "onecycle",
+    "pin_memory": false,
+    "save_dataloader": false,
+    "write_checkpoint": true,
+    "write_predictions": true,
+    "store_outputs": false,
+    "progress": true,
+    "log_tensorboard": false,
+    "standard_scalar_and_pca": false,
+    "use_canonize": false,
+    "num_workers": 0,
+    "cutoff": 4.0,
+    "max_neighbors": 12,
+    "keep_data_order": true,
+    "normalize_graph_level_loss": false,
+    "distributed": false,
+    "n_early_stopping": null,
+    "output_dir": "./",
+    "model": {
+        "name": "alignn_atomwise",
+        "alignn_layers": 2,
+        "gcn_layers": 4,
+        "atom_input_features": 92,
+        "edge_input_features": 80,
+        "triplet_input_features": 40,
+        "embedding_features": 64,
+        "hidden_features": 256,
+        "output_features": 1,
+        "grad_multiplier": -1,
+        "force_mult_natoms": true,
+        "calculate_gradient": true,
+        "atomwise_output_features": 0,
+        "graphwise_weight": 1.0,
+        "gradwise_weight": 10.0,
+        "stresswise_weight": 0.0,
+        "atomwise_weight": 0.0,
+        "link": "identity",
+        "use_cutoff_function": true,
+        "zero_inflated": false,
+        "classification": false,
+        "inner_cutoff": 3.0,
+        "lg_on_fly": false
+    }
+}
diff --git a/alignn/examples/sample_data_ff/mlearn_data/Ni/example.py b/alignn/examples/sample_data_ff/mlearn_data/Ni/example.py
@@ -0,0 +1,64 @@
+# train_folder_ff.py --root_dir "/mlearn_data/Ni/"
+# --config config_ni.json --output_dir=temp_ni --epochs 30
+# python ev_phonon_nvt_test.py
+
+from ase import Atom, Atoms as AseAtoms
+from jarvis.core.atoms import ase_to_atoms
+import numpy as np
+import matplotlib.pyplot as plt
+from alignn.ff.ff import AlignnAtomwiseCalculator, phonons, ev_curve
+
+model_path = "temp_ni"  # wt10_path() #default_path()
+model_path = "temp_ni_radius"
+model_path = "temp_ni_radius_cutoff"
+model_path = "temp_ni_radius_cutoff_newfunc"
+calc = AlignnAtomwiseCalculator(path=model_path)
+
+lattice_params = np.linspace(3.5, 3.8)
+lattice_params = np.linspace(3.1, 4.8)
+fcc_energies = []
+ready = True
+for a in lattice_params:
+    atoms = AseAtoms(
+        [Atom("Ni", (0, 0, 0))],
+        cell=0.5
+        * a
+        * np.array([[1.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0]]),
+        pbc=True,
+    )
+
+    atoms.set_tags(np.ones(len(atoms)))
+
+    atoms.calc = calc
+
+    e = atoms.get_potential_energy()
+    fcc_energies.append(e)
+
+plt.plot(lattice_params, fcc_energies)
+plt.title("1x1x1")
+plt.xlabel("Lattice constant (A)")
+plt.ylabel("Total energy (eV)")
+plt.savefig("ni.png")
+
+plt.close()
+
+a = 3.51
+atoms = AseAtoms(
+    [Atom("Cu", (0, 0, 0))],
+    cell=0.5
+    * a
+    * np.array([[1.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0]]),
+    pbc=True,
+)
+
+x, y, eos, kv = ev_curve(atoms=ase_to_atoms(atoms), model_path=model_path)
+print(kv)
+
+ase_to_atoms(atoms).write_poscar("POSCAR_Ni")
+phonons(atoms=ase_to_atoms(atoms), model_path=model_path, enforce_c_size=3)
+cmd = (
+    'run_alignn_ff.py --file_path POSCAR_Ni --task="nve_velocity_verlet" '
+    + "--timestep=0.1 --md_steps=2000 --temperature_K=305"
+    + " --initial_temperature_K=305 --model_path "
+    + model_path
+)