-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add qchem files * remove __all__ * refactor * remove dead code in qchem/sets/core.py * delete solitary qchem/schemas/__init__.py * delete dead code in tests/qchem/jobs/test_core.py * remove dead code in tests/qchem/sets/test_core.py * refactored the InputSetGenerator to dataclass args * added the test for get_valid_paths in drones * increased test coverage for sets: * increased test coverage for input generators * increases coverage for .write_input and .from_directory * fixed linting * increased test coverage for plots and nbo * small changes to names of the QChem makers * correcting single_point maker name in tests * added a test for run functionality * fixed linting for test_run * increasing the test coverage for qche run.py * corrected error in test_run * deleted extra unnecessary files in test_dir --------- Co-authored-by: Janosh Riebesell <[email protected]>
- Loading branch information
1 parent
ad8257c
commit b55cf83
Showing
109 changed files
with
1,921 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Module for QChem workflows.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
"""Drones for parsing VASP calculations and realtd outputs.""" | ||
|
||
from __future__ import annotations | ||
|
||
import logging | ||
import os | ||
from pathlib import Path | ||
|
||
from emmet.core.qc_tasks import TaskDoc | ||
from pymatgen.apps.borg.hive import AbstractDrone | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class QChemDrone(AbstractDrone): | ||
""" | ||
A QChem drone to parse QChem outputs. | ||
Parameters | ||
---------- | ||
**task_document_kwargs | ||
Additional keyword args passed to :obj: `.TaskDoc.from_directory`. | ||
""" | ||
|
||
def __init__(self, **task_document_kwargs) -> None: | ||
self.task_document_kwargs = task_document_kwargs | ||
|
||
def assimilate(self, path: str | Path | None = None) -> TaskDoc: | ||
""" | ||
Parse QChem output files and return the output document. | ||
Parameters | ||
---------- | ||
path : str pr Path or None | ||
Path to the directory containing mol.qout and other output files. | ||
Returns | ||
------- | ||
TaskDocument | ||
A QChem task document | ||
""" | ||
path = path or Path.cwd() | ||
try: | ||
doc = TaskDoc.from_directory(path, **self.task_document_kwargs) | ||
except Exception: | ||
import traceback | ||
|
||
logger.exception( | ||
f"Error in {Path(path).absolute()}\n{traceback.format_exc()}" | ||
) | ||
raise | ||
return doc | ||
|
||
def get_valid_paths(self, path: tuple[str, list[str], list[str]]) -> list[str]: | ||
""" | ||
Get valid paths to assimilate. | ||
Parameters | ||
---------- | ||
path : tuple of (str, list of str, list of str) | ||
Input path as a tuple generated from ``os.walk``, i.e., (parent, subdirs, | ||
files). | ||
Returns | ||
------- | ||
list of str | ||
A list of paths to assimilate. | ||
""" | ||
parent, subdirs, _ = path | ||
task_names = ["mol.qout.*"] | ||
combined_paths = [parent + os.sep + sdir for sdir in subdirs] | ||
rpath = [] | ||
for cpath in combined_paths: | ||
fnames = os.listdir(cpath) | ||
if any(name.startswith("mol.qout.") for name in fnames): | ||
rpath.append(parent) | ||
|
||
if ( | ||
not any(parent.endswith(os.sep + r) for r in task_names) | ||
and len(list(Path(parent).glob("mol.qout*"))) > 0 | ||
): | ||
rpath.append(parent) | ||
return rpath |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
"""Functions for manipulating QChem files.""" | ||
|
||
from __future__ import annotations | ||
|
||
import logging | ||
import re | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING | ||
|
||
from atomate2.common.files import copy_files, get_zfile, gunzip_files, rename_files | ||
from atomate2.utils.file_client import FileClient, auto_fileclient | ||
from atomate2.utils.path import strip_hostname | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Sequence | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@auto_fileclient | ||
def copy_qchem_outputs( | ||
src_dir: Path | str, | ||
src_host: str | None = None, | ||
additional_qchem_files: Sequence[str] = (), | ||
file_client: FileClient | None = None, | ||
) -> None: | ||
""" | ||
Copy QChem output files to the current directory. | ||
For folders containing multiple calculations (e.g., suffixed with opt_1, opt_2, | ||
etc), this function will only copy the files with the highest numbered suffix | ||
and the suffix will be removed. Additional qchem files will be also be copied | ||
with the same suffix applied. | ||
Lastly, this function will gunzip any gzipped files. | ||
Parameters | ||
---------- | ||
src_dir : str or Path | ||
The source directory. | ||
src_host : str or None | ||
The source hostname used to specify a remote filesystem. Can be given as | ||
either "username@remote_host" or just "remote_host" in which case the username | ||
will be inferred from the current user. If ``None``, the local filesystem will | ||
be used as the source. | ||
additional_qchem_files : list of str | ||
Additional files to copy. | ||
file_client : .FileClient | ||
A file client to use for performing file operations. | ||
""" | ||
src_dir = strip_hostname(src_dir) # TODO: Handle hostnames properly. | ||
|
||
logger.info(f"Copying QChem inputs from {src_dir}") | ||
opt_ext = get_largest_opt_extension(src_dir, src_host, file_client=file_client) | ||
directory_listing = file_client.listdir(src_dir, host=src_host) | ||
|
||
# find required files | ||
files = ("mol.qin", "mol.qout", *tuple(additional_qchem_files)) | ||
required_files = [get_zfile(directory_listing, r + opt_ext) for r in files] | ||
|
||
copy_files( | ||
src_dir, | ||
src_host=src_host, | ||
include_files=required_files, | ||
file_client=file_client, | ||
) | ||
|
||
gunzip_files( | ||
include_files=required_files, | ||
allow_missing=True, | ||
file_client=file_client, | ||
) | ||
|
||
# rename files to remove opt extension | ||
if opt_ext: | ||
all_files = required_files | ||
files_to_rename = { | ||
k.name.replace(".gz", ""): k.name.replace(opt_ext, "").replace(".gz", "") | ||
for k in all_files | ||
} | ||
rename_files(files_to_rename, allow_missing=True, file_client=file_client) | ||
|
||
logger.info("Finished copying inputs") | ||
|
||
|
||
@auto_fileclient | ||
def get_largest_opt_extension( | ||
directory: Path | str, | ||
host: str | None = None, | ||
file_client: FileClient | None = None, | ||
) -> str: | ||
""" | ||
Get the largest numbered opt extension of files in a directory. | ||
For example, if listdir gives ["mol.qout.opt_0.gz", "mol.qout.opt_1.gz"], | ||
this function will return ".opt_1". | ||
Parameters | ||
---------- | ||
directory : str or Path | ||
A directory to search. | ||
host : str or None | ||
The hostname used to specify a remote filesystem. Can be given as either | ||
"username@remote_host" or just "remote_host" in which case the username will be | ||
inferred from the current user. If ``None``, the local filesystem will be used. | ||
file_client : .FileClient | ||
A file client to use for performing file operations. | ||
Returns | ||
------- | ||
str | ||
The opt extension or an empty string if there were not multiple relaxations. | ||
""" | ||
opt_files = file_client.glob(Path(directory) / "*.opt*", host=host) | ||
if len(opt_files) == 0: | ||
return "" | ||
numbers = [] | ||
for file in opt_files: | ||
match = re.search(r"\.opt_(\d+)", file.name) | ||
if match: | ||
numbers.append(match.group(1)) | ||
|
||
if not numbers: | ||
return "" # No matches found | ||
max_relax = max(numbers, key=int) | ||
return f".opt_{max_relax}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Jobs for running QChem calculations.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
"""Definition of a base QChem Maker.""" | ||
|
||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass, field | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING, Callable | ||
|
||
from emmet.core.qc_tasks import TaskDoc | ||
from jobflow import Maker, Response, job | ||
from monty.serialization import dumpfn | ||
from monty.shutil import gzip_dir | ||
from pymatgen.io.qchem.inputs import QCInput | ||
|
||
from atomate2.qchem.files import copy_qchem_outputs | ||
from atomate2.qchem.run import run_qchem, should_stop_children | ||
from atomate2.qchem.sets.base import QCInputGenerator | ||
|
||
if TYPE_CHECKING: | ||
from pymatgen.core.structure import Molecule | ||
|
||
|
||
def qchem_job(method: Callable) -> job: | ||
""" | ||
Decorate the ``make`` method of QChem job makers. | ||
This is a thin wrapper around :obj:`~jobflow.core.job.Job` that configures common | ||
settings for all QChem jobs. It also configures the output schema to be a QChem | ||
:obj:`.TaskDoc`. | ||
Any makers that return QChem jobs (not flows) should decorate the ``make`` method | ||
with @qchem_job. For example: | ||
.. code-block:: python | ||
class MyQChemMaker(BaseQChemMaker): | ||
@qchem_job | ||
def make(molecule): | ||
# code to run QChem job. | ||
pass | ||
Parameters | ||
---------- | ||
method : callable | ||
A BaseQChemMaker.make method. This should not be specified directly and is | ||
implied by the decorator. | ||
Returns | ||
------- | ||
callable | ||
A decorated version of the make function that will generate QChem jobs. | ||
""" | ||
return job(method, data=QCInput, output_schema=TaskDoc) | ||
|
||
|
||
@dataclass | ||
class BaseQCMaker(Maker): | ||
""" | ||
Base QChem job maker. | ||
Parameters | ||
---------- | ||
name : str | ||
The job name. | ||
input_set_generator : .QChemInputGenerator | ||
A generator used to make the input set. | ||
write_input_set_kwargs : dict | ||
Keyword arguments that will get passed to :obj:`.write_qchem_input_set`. | ||
copy_qchem_kwargs : dict | ||
Keyword arguments that will get passed to :obj:`.copy_qchem_outputs`. | ||
run_qchem_kwargs : dict | ||
Keyword arguments that will get passed to :obj:`.run_qchem`. | ||
task_document_kwargs : dict | ||
Keyword arguments that will get passed to :obj:`.TaskDoc.from_directory`. | ||
stop_children_kwargs : dict | ||
Keyword arguments that will get passed to :obj:`.should_stop_children`. | ||
write_additional_data : dict | ||
Additional data to write to the current directory. Given as a dict of | ||
{filename: data}. Note that if using FireWorks, dictionary keys cannot contain | ||
the "." character which is typically used to denote file extensions. To avoid | ||
this, use the ":" character, which will automatically be converted to ".". E.g. | ||
``{"my_file:txt": "contents of the file"}``. | ||
""" | ||
|
||
name: str = "base qchem job" | ||
input_set_generator: QCInputGenerator = field( | ||
default_factory=lambda: QCInputGenerator( | ||
job_type="sp", scf_algorithm="diis", basis_set="def2-qzvppd" | ||
) | ||
) | ||
write_input_set_kwargs: dict = field(default_factory=dict) | ||
copy_qchem_kwargs: dict = field(default_factory=dict) | ||
run_qchem_kwargs: dict = field(default_factory=dict) | ||
task_document_kwargs: dict = field(default_factory=dict) | ||
stop_children_kwargs: dict = field(default_factory=dict) | ||
write_additional_data: dict = field(default_factory=dict) | ||
|
||
@qchem_job | ||
def make( | ||
self, molecule: Molecule, prev_qchem_dir: str | Path | None = None | ||
) -> Response: | ||
""" | ||
Run a QChem calculation. | ||
Parameters | ||
---------- | ||
molecule : Molecule | ||
A pymatgen molecule object. | ||
prev_qchem_dir : str or Path or None | ||
A previous QChem calculation directory to copy output files from. | ||
""" | ||
# copy previous inputs | ||
from_prev = prev_qchem_dir is not None | ||
if prev_qchem_dir is not None: | ||
copy_qchem_outputs(prev_qchem_dir, **self.copy_qchem_kwargs) | ||
|
||
self.write_input_set_kwargs.setdefault("from_prev", from_prev) | ||
|
||
# write qchem input files | ||
# self.input_set_generator.get_input_set(molecule).write_inputs() | ||
self.input_set_generator.get_input_set(molecule) | ||
|
||
# write any additional data | ||
for filename, data in self.write_additional_data.items(): | ||
dumpfn(data, filename.replace(":", ".")) | ||
|
||
# run qchem | ||
run_qchem(**self.run_qchem_kwargs) | ||
|
||
# parse qchem outputs | ||
task_doc = TaskDoc.from_directory(Path.cwd(), **self.task_document_kwargs) | ||
# task_doc.task_label = self.name | ||
task_doc.task_type = self.name | ||
|
||
# decide whether child jobs should proceed | ||
stop_children = should_stop_children(task_doc, **self.stop_children_kwargs) | ||
|
||
# gzip folder | ||
gzip_dir(".") | ||
|
||
return Response( | ||
stop_children=stop_children, | ||
stored_data={"custodian": task_doc.custodian}, | ||
output=task_doc, | ||
) |
Oops, something went wrong.