Merge pull request #272 from amcadmus/master

Merge devel into master
deepmodeling · Apr 21, 2022 · 7029230 · 7029230
2 parents 81c656c + 17af7f7
commit 7029230
Show file tree

Hide file tree

Showing 122 changed files with 6,112 additions and 3,036 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,6 @@ _version.py
 !tests/cp2k/aimd/cp2k.log
 !tests/cp2k/restart_aimd/ch4.log
 __pycache__
+docs/_build
+docs/formats.csv
+docs/api/
diff --git a/docs/conf.py b/docs/conf.py
@@ -14,14 +14,15 @@
 #
 import os
 import sys
+import subprocess as sp
 from datetime import date
 sys.path.insert(0, os.path.abspath('..'))
 
 
 # -- Project information -----------------------------------------------------
 
 project = 'dpdata'
-copyright = '2019-%d, Deep Modeling ' % date.today().year
+copyright = '2019-%d, DeepModeling ' % date.today().year
 author = 'Han Wang'
 
 # The short X.Y version
@@ -40,6 +41,7 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    'deepmodeling_sphinx',
     'sphinx_rtd_theme',
     'sphinx.ext.mathjax',
     'sphinx.ext.viewcode',
@@ -170,8 +172,12 @@ def run_apidoc(_):
     module = os.path.join(cur_dir, "..", "dpdata")
     main(['-M', '--tocfile', 'api', '-H', 'API documentation', '-o', os.path.join(cur_dir, "api"), module, '--force'])
 
+def run_formats(_):
+    sp.check_output([sys.executable, "make_format.py"])
+
 def setup(app):
     app.connect('builder-inited', run_apidoc)
+    app.connect('builder-inited', run_formats)
 
 
 intersphinx_mapping = {

diff --git a/docs/formats.rst b/docs/formats.rst
@@ -0,0 +1,9 @@
+Supported Formats
+=================
+
+dpdata supports the following formats:
+
+.. csv-table:: Supported Formats
+   :file: formats.csv
+   :header-rows: 1
+
diff --git a/docs/index.rst b/docs/index.rst
@@ -10,6 +10,7 @@ Welcome to dpdata's documentation!
    :maxdepth: 2
    :caption: Contents:
 
+   formats
    api/api
 
 .. mdinclude:: ../README.md

diff --git a/docs/make_format.py b/docs/make_format.py
@@ -0,0 +1,90 @@
+import csv
+from collections import defaultdict
+
+# ensure all plugins are loaded!
+import dpdata.plugins
+from dpdata.format import Format
+from dpdata.system import get_cls_name
+
+
+def get_formats() -> dict:
+    formats = defaultdict(list)
+    for kk, ff in Format.get_formats().items():
+        formats[ff].append(kk)
+    return formats
+
+def detect_overridden(cls: Format, method: str) -> bool:
+    """Check whether a method is override
+    
+    Parameters
+    ----------
+    cls : Format
+        a format
+    method : str
+        method name
+    
+    Returns
+    -------
+    bool
+        whether a method is overridden
+    """
+    return method in cls.__dict__
+
+def get_cls_link(cls: object) -> str:
+    """Returns class link.
+    
+    Parameters
+    ----------
+    cls : object
+        the class
+    
+    Returns
+    -------
+    str
+        the link of a class
+    """
+    return ':class:`%s <%s>`' % (cls.__name__, ".".join([cls.__module__, cls.__name__]))
+
+def check_supported(fmt: Format):
+    methods = set()
+    for mtd in [
+        'from_system', 'to_system',
+        'from_labeled_system', 'to_labeled_system',
+        'from_bond_order_system', 'to_bond_order_system',
+        'from_multi_systems', 'to_multi_systems',
+        ]:
+        if detect_overridden(fmt, mtd):
+            methods.add(mtd)
+            if mtd == 'to_system':
+                methods.add('to_labeled_system')
+    if fmt.MultiMode != fmt.MultiModes.NotImplemented:
+        methods.add('from_multi_systems')
+        methods.add('to_multi_systems')
+    return methods
+
+method_links = {
+    "from_system": ":func:`System() <dpdata.system.System>`",
+    "to_system": ":func:`System.to() <dpdata.system.System.to>`",
+    "from_labeled_system": ":func:`LabeledSystem() <dpdata.system.LabeledSystem>`",
+    "to_labeled_system": ":func:`LabeledSystem.to() <dpdata.system.System.to>`",
+    "from_bond_order_system": ":func:`BondOrderSystem() <dpdata.bond_order_system.BondOrderSystem>`",
+    "to_bond_order_system": ":func:`BondOrderSystem.to() <dpdata.system.System.to>`",
+    "from_multi_systems": ":func:`MultiSystems.load_systems_from_file() <dpdata.system.MultiSystems.load_systems_from_file>`",
+    "to_multi_systems": ":func:`MultiSystems.to() <dpdata.system.MultiSystems.to>`",
+}
+
+if __name__ == "__main__":
+    formats = get_formats()
+    with open('formats.csv', 'w', newline='') as csvfile:
+        fieldnames = [
+            'Class', 'Alias', 'Supported Functions',
+            ]
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+        writer.writeheader()
+        for kk, vv in formats.items():
+            writer.writerow({
+                'Class': get_cls_link(kk),
+                'Alias': '\n'.join(('``%s``' % vvv for vvv in vv)),
+                'Supported Functions': '\n'.join(method_links[mtd] for mtd in check_supported(kk)),
+            })
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
@@ -1,6 +1,8 @@
+from ast import dump
 import os,sys
 import numpy as np
-from .scf import ry2ev, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords
+from .scf import ry2ev, bohr2ang, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords
+import re
 
 # Read in geometries from an ABACUS MD trajectory.
 # The atomic coordinates are read in from generated files in OUT.XXXX.
@@ -24,126 +26,49 @@ def get_path_out(fname, inlines):
 
 def get_coord_dump_freq(inlines):
     for line in inlines:
-        if  len(line)>0 and "md_dumpmdfred" in line and "md_dumpmdfred" == line.split()[0]:
+        if  len(line)>0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]:
             return int(line.split()[1])
     return 1
 
-# set up a cell according to cell info in cif file.
-# maybe useful later
-'''
-def setup_cell(a, b, c, alpha, beta, gamma):
-    cell = np.zeros(3, 3)
-    cell[0, 0] = a
-    cell[1, 0] = b*np.cos(gamma/180*np.pi)
-    cell[1, 1] = b*np.sin(gamma/180*np.pi)
-    cell[2, 0] = c*np.cos(beta/180*np.pi)
-    cell[2, 1] = c*(b*np.cos(alpha/180*np.pi) - cell[1, 0]*np.cos(beta/180*np.pi))/cell[1, 1]
-    cell[2, 2] = np.sqrt(c**2 - cell[2, 0]**2 - cell[2, 1]**2)
-    return cell
-'''
-
-def get_single_coord_from_cif(pos_file, atom_names, natoms, cell):
-    assert(len(atom_names) == len(natoms))
-    nele = len(atom_names)
+def get_coords_from_dump(dumplines, natoms):
+    nlines = len(dumplines)
     total_natoms = sum(natoms)
-    coord = np.zeros([total_natoms, 3])
-    a = 0
-    b = 0
-    c = 0
-    alpha = 0
-    beta = 0
-    gamma = 0
-    with open(pos_file, "r") as fp:
-        lines = fp.read().split("\n")
-    for line in lines:
-        if "_cell_length_a" in line:
-            a = float(line.split()[1])
-        if "_cell_length_b" in line:
-            b = float(line.split()[1])
-        if "_cell_length_c" in line:
-            c = float(line.split()[1])  
-        if "_cell_angle_alpha" in line:
-            alpha = float(line.split()[1])
-        if "_cell_angle_beta" in line:
-            beta = float(line.split()[1])
-        if "_cell_angle_gamma" in line:
-            gamma = float(line.split()[1])
-    assert(a > 0 and b > 0 and c > 0 and alpha > 0 and beta > 0 and gamma > 0)
-    #cell = setup_cell(a, b, c, alpha, beta, gamma)
-    coord_lines = get_block(lines=lines, keyword="_atom_site_fract_z", skip=0, nlines = total_natoms)
-
-    ia_idx = 0
-    for it in range(nele):
-        for ia in range(natoms[it]):
-            coord_line = coord_lines[ia_idx].split()
-            assert(coord_line[0] == atom_names[it])
-            coord[ia_idx, 0] = float(coord_line[1])
-            coord[ia_idx, 1] = float(coord_line[2])
-            coord[ia_idx, 2] = float(coord_line[3])
-            ia_idx+=1
-    coord = np.matmul(coord, cell)
-    # important! Coordinates are converted to Cartesian coordinate.
-    return coord
+    nframes_dump = int(nlines/(total_natoms + 13))
 
-
-def get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell):
-    total_natoms = sum(natoms)
-    #cell = np.zeros(ndump, 3, 3)
-    coords = np.zeros([ndump, total_natoms, 3])
-    pos_file = os.path.join(path_out, "STRU_READIN_ADJUST.cif")
-    # frame 0 file is different from any other frames
-    coords[0] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
-    for dump_idx in range(1, ndump):
-        pos_file = os.path.join(path_out, "md_pos_%d.cif" %(dump_idx*dump_freq))
-        #print("dump_idx = %s" %dump_idx)
-        coords[dump_idx] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
-    return coords
+    cells = np.zeros([nframes_dump, 3, 3])
+    stresses = np.zeros([nframes_dump, 3, 3])
+    forces = np.zeros([nframes_dump, total_natoms, 3])
+    coords = np.zeros([nframes_dump, total_natoms, 3])
+    iframe = 0
+    for iline in range(nlines):
+        if "MDSTEP" in dumplines[iline]:
+            # read in LATTICE_CONSTANT
+            celldm = float(dumplines[iline+1].split(" ")[-1])
+            # read in LATTICE_VECTORS
+            for ix in range(3):
+                cells[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+3+ix])[-3:]]) * celldm
+                stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]])
+            for iat in range(total_natoms):
+                coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm
+                forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]])
+            iframe += 1
+    assert(iframe == nframes_dump)
+    cells *= bohr2ang
+    coords *= bohr2ang
+    stresses *= kbar2evperang3
+    return coords, cells, forces, stresses
 
-def get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names):
-    stress = None
-    total_natoms = sum(natoms)
-    for line in inlines:
-        if len(line)>0 and "stress" in line and "stress" == line.split()[0] and "1" == line.split()[1]:
-            stress = np.zeros([ndump, 3, 3])
-            break
-    if type(stress) != np.ndarray:
-        print("The ABACUS program has no stress output. Stress will not be read.")
+def get_energy(outlines, ndump, dump_freq):
+    energy = []
     nenergy = 0
-    nforce = 0
-    nstress = 0
-    energy = np.zeros(ndump)
-    force = np.zeros([ndump, total_natoms, 3])
-
     for line_idx, line in enumerate(outlines):
         if "final etot is" in line:
             if nenergy%dump_freq == 0:
-                energy[int(nenergy/dump_freq)] = float(line.split()[-2])
+                energy.append(float(line.split()[-2]))
             nenergy+=1
-        if "TOTAL-FORCE (eV/Angstrom)" in line:
-            for iatom in range(0, total_natoms):
-                force_line = outlines[line_idx+5+iatom]
-                atom_force = [float(i) for i in force_line.split()[1:]]
-                assert(len(atom_force) == 3)
-                atom_force = np.array(atom_force)
-                if nforce%dump_freq == 0:
-                    force[int(nforce/dump_freq), iatom] = atom_force
-            nforce+=1
-            assert(nforce==nenergy)
-        if "TOTAL-STRESS (KBAR)" in line:
-            for idx in range(0, 3):
-                stress_line = outlines[line_idx+4+idx]
-                single_stress = [float(i) for i in stress_line.split()]
-                if len(single_stress) != 3:
-                    print(single_stress)
-                assert(len(single_stress) == 3)
-                single_stress = np.array(single_stress)
-                if nstress%dump_freq == 0:
-                    stress[int(nstress/dump_freq), idx] = single_stress
-            nstress+=1
-            assert(nstress==nforce)
-    if type(stress) == np.ndarray:
-        stress *= kbar2evperang3
-    return energy, force, stress
+    assert(ndump == len(energy))
+    energy = np.array(energy)
+    return energy
 
 
 def get_frame (fname):
@@ -164,23 +89,27 @@ def get_frame (fname):
     atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) 
     # This coords is not to be used.
     dump_freq = get_coord_dump_freq(inlines = inlines)
-    ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
+    #ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
     # number of dumped geometry files
-    coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
-
-    # TODO: Read in energies, forces and pressures.
+    #coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
+    with open(os.path.join(path_out, "MD_dump"), 'r') as fp:
+        dumplines = fp.read().split('\n')
+    coords, cells, force, stress = get_coords_from_dump(dumplines, natoms)
+    ndump = np.shape(coords)[0]
     with open(os.path.join(path_out, "running_md.log"), 'r') as fp:
         outlines = fp.read().split('\n')
-    energy, force, stress = get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names)
-    if type(stress) == np.ndarray:
-        stress *= np.linalg.det(cell)
+    energy = get_energy(outlines, ndump, dump_freq)
+    for iframe in range(ndump):
+        stress[iframe] *= np.linalg.det(cells[iframe, :, :].reshape([3, 3]))
+    if np.sum(np.abs(stress[0])) < 1e-10:
+        stress = None
     data = {}
     data['atom_names'] = atom_names
     data['atom_numbs'] = natoms
     data['atom_types'] = types
-    data['cells'] = np.zeros([ndump, 3, 3])
-    for idx in range(ndump):
-        data['cells'][:, :, :] = cell
+    data['cells'] = cells
+    #for idx in range(ndump):
+    #    data['cells'][:, :, :] = cell
     data['coords'] = coords
     data['energies'] = energy
     data['forces'] = force