Merge pull request #82 from deepmodeling/devel

support for pwmat
deepmodeling · Feb 5, 2020 · 2dac5ff · 2dac5ff
2 parents 94022da + b931cd0
commit 2dac5ff
Show file tree

Hide file tree

Showing 22 changed files with 28,478 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -107,7 +107,9 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
 | QE      | log    | True         | False   | System        | 'qe/cp/traj'  |
 | QE      | log    | True         | True    | LabeledSystem | 'qe/cp/traj'  |
 |quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
-
+| PWmat   | atom.config | False        | False   | System        | 'pwmat/atom.config'  |
+| PWmat   | movement    | True         | True    | LabeledSystem | 'pwmat/movement'     |
+| PWmat   | OUT.MLMD    | True         | True    | LabeledSystem | 'pwmat/out.mlmd'     |
 ## Access data
 These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
 ```python

diff --git a/dpdata/pwmat/atomconfig.py b/dpdata/pwmat/atomconfig.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3 
+
+import numpy as np
+
+def _to_system_data_lower(lines) :
+    system = {}
+    natoms = int(lines[0].split()[0])
+    cell = []
+    for idx, ii in enumerate(lines):
+        if 'lattice' in ii or 'Lattice' in ii or 'LATTICE' in ii:
+            for kk in range(idx+1,idx+1+3):
+                vector=[float(jj) for jj in lines[kk].split()[0:3]]
+                cell.append(vector)
+    system['cells'] = [np.array(cell)]
+    coord = []
+    atomic_number = []
+    atom_numbs = []
+    for idx, ii in enumerate(lines):
+        if 'Position' in ii or 'POSITION' in ii or 'position' in ii:
+            for kk in range(idx+1,idx+1+natoms):
+                min = kk
+                for jj in range(kk+1,idx+1+natoms):
+                    if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
+                        min = jj
+                        lines[min], lines[kk] = lines[kk],lines[min]
+            for gg in range(idx+1,idx+1+natoms):
+                tmpv = [float(jj) for jj in lines[gg].split()[1:4]]
+                tmpv = np.matmul(np.array(tmpv), system['cells'][0])
+                coord.append(tmpv)
+                tmpn = int(lines[gg].split()[0])
+                atomic_number.append(tmpn)
+    for ii in np.unique(sorted(atomic_number)) :
+        atom_numbs.append(atomic_number.count(ii))
+    system['atom_numbs'] = [int(ii) for ii in atom_numbs]
+    system['coords'] = [np.array(coord)]
+    system['orig'] = np.zeros(3)
+    atom_types = []
+    for idx,ii in enumerate(system['atom_numbs']) :
+        for jj in range(ii) :
+            atom_types.append(idx)
+    system['atom_types'] = np.array(atom_types, dtype = int)
+    ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
+            'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
+            'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
+            'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
+            'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
+            'Md', 'No', 'Lr']
+
+    system['atom_names'] = [ELEMENTS[ii-1] for ii in np.unique(sorted(atomic_number))]
+    return system
+
+
+def to_system_data(lines) :
+    return _to_system_data_lower(lines)
+
+
+def from_system_data(system, f_idx = 0, skip_zeros = True) :
+    ret = ''
+    natoms = sum(system['atom_numbs'])
+    ret += '%d' % natoms
+    ret += '\n'
+    ret += 'LATTICE'
+    ret += '\n'
+    for ii in system['cells'][f_idx] :
+        for jj in ii :
+            ret += '%.16e ' % jj
+        ret += '\n'
+    ret += 'POSITION'
+    ret += '\n'
+    atom_numbs = system['atom_numbs']
+    atom_names = system['atom_names']
+    atype = system['atom_types']
+    posis = system['coords'][f_idx]    
+    # atype_idx = [[idx,tt] for idx,tt in enumerate(atype)]
+    # sort_idx = np.argsort(atype, kind = 'mergesort')
+    sort_idx = np.lexsort((np.arange(len(atype)), atype))
+    atype = atype[sort_idx]
+    posis = posis[sort_idx]
+    symbal = []
+    for ii, jj in zip(atom_numbs, atom_names):
+        for kk in range(ii):
+            symbal.append(jj) 
+    ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
+            'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
+            'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
+            'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
+            'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
+            'Md', 'No', 'Lr']
+    atomic_numbers = []
+    for ii in symbal:
+        atomic_numbers.append(ELEMENTS.index(ii)+1)
+    posi_list = []
+    for jj, ii in zip(atomic_numbers,posis) :
+        ii = np.matmul(ii, np.linalg.inv(system['cells'][0]))
+        posi_list.append('%d %15.10f %15.10f %15.10f 1 1 1' % \
+                         (jj, ii[0], ii[1], ii[2])
+        )
+    for kk in range(len(posi_list)):
+        min = kk
+        for jj in range(kk,len(posi_list)):
+            if int(posi_list[jj].split()[0]) < int(posi_list[min].split()[0]):
+                min = jj
+                posi_list[min], posi_list[kk] = posi_list[kk],posi_list[min]
+    posi_list.append('')
+    ret += '\n'.join(posi_list)
+    return ret
diff --git a/dpdata/pwmat/movement.py b/dpdata/pwmat/movement.py
@@ -0,0 +1,180 @@
+import numpy as np
+
+def system_info (lines, type_idx_zero = False) :
+    atom_names = []
+    atom_numbs = []
+    nelm = 0
+    natoms = int(lines[0].split()[0])
+    iteration = float(lines[0].split('Etot')[0].split('=')[1].split(',')[0])
+#    print(iteration)
+    if iteration > 0 :
+        nelm = 40
+    else:
+        nelm = 100
+    atomic_number = []
+    for idx,ii in enumerate(lines):
+        if 'Position' in ii:
+            for kk in range(idx+1,idx+1+natoms) :
+                min = kk
+                for jj in range(kk+1,idx+1+natoms):
+                    if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
+                        min = jj
+                        lines[min], lines[kk] = lines[kk],lines[min]
+            for gg in range(idx+1,idx+1+natoms):
+                tmpn = int(lines[gg].split()[0])
+                atomic_number.append(tmpn)
+    for ii in np.unique(sorted(atomic_number)) :
+        atom_numbs.append(atomic_number.count(ii))
+    atom_types = []
+    for idx,ii in enumerate(atom_numbs) :
+        for jj in range(ii) :
+            if type_idx_zero :
+                atom_types.append(idx)
+            else :
+                atom_types.append(idx+1)
+    ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
+            'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
+            'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
+            'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
+            'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
+            'Md', 'No', 'Lr']
+    for ii in np.unique(sorted(atomic_number)):
+        atom_names.append(ELEMENTS[ii-1])
+    return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm
+
+
+def get_movement_block(fp) :
+    blk = []
+    for ii in fp :
+        if not ii:
+            return blk
+        blk.append(ii.rstrip('\n'))
+        if '------------' in ii:
+            return blk
+    return blk
+
+# we assume that the force is printed ...
+def get_frames (fname, begin = 0, step = 1) :
+    fp = open(fname)
+    blk = get_movement_block(fp)
+
+    atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True)
+    ntot = sum(atom_numbs)
+
+    all_coords = []
+    all_cells = []
+    all_energies = []
+    all_atomic_energy = []
+    all_forces = []
+    all_virials = []    
+
+    cc = 0
+    while len(blk) > 0 :
+        if cc >= begin and (cc - begin) % step == 0 :
+            coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm)
+            if is_converge : 
+                if len(coord) == 0:
+                    break
+                all_coords.append(coord)
+                all_cells.append(cell)
+                all_energies.append(energy)
+                all_forces.append(force)
+                if virial is not None :
+                    all_virials.append(virial)
+        blk = get_movement_block(fp)
+        cc += 1
+
+    if len(all_virials) == 0 :
+        all_virials = None
+    else :
+        all_virials = np.array(all_virials)
+    fp.close()
+    return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), \
+            np.array(all_energies), np.array(all_forces), all_virials
+
+
+def analyze_block(lines, ntot, nelm) :
+    coord = []
+    cell = []
+    energy = None
+#    atomic_energy = []
+    force = []
+    virial = None
+    is_converge = True
+    sc_index = 0
+    for idx,ii in enumerate(lines) :
+        if 'Iteration' in ii:
+            sc_index = int(ii.split('SCF =')[1])
+            if sc_index >= nelm:
+                is_converge = False
+            energy = float(ii.split('Etot,Ep,Ek (eV)')[1].split()[1])
+        elif '----------' in ii:
+            assert((force is not None) and len(coord) > 0 and len(cell) > 0)
+            # all_coords.append(coord)
+            # all_cells.append(cell)
+            # all_energies.append(energy)
+            # all_forces.append(force)
+            # if virial is not None :
+            #     all_virials.append(virial)
+            return coord, cell, energy, force, virial, is_converge
+#        elif 'NPT' in ii:
+#            tmp_v = []
+        elif 'Lattice vector' in ii:
+            if 'stress' in lines[idx+1]:
+                tmp_v = []
+                for dd in range(3) :
+                    tmp_l = lines[idx+1+dd]
+                    cell.append([float(ss)
+                         for ss in tmp_l.split()[0:3]])
+                    tmp_v.append([float(stress) for stress in tmp_l.split()[5:8]])
+                virial = np.zeros([3,3])
+                virial[0][0] = tmp_v[0][0]
+                virial[0][1] = tmp_v[0][1]
+                virial[0][2] = tmp_v[0][2]
+                virial[1][0] = tmp_v[1][0]
+                virial[1][1] = tmp_v[1][1]
+                virial[1][2] = tmp_v[1][2]
+                virial[2][0] = tmp_v[2][0]
+                virial[2][1] = tmp_v[2][1]
+                virial[2][2] = tmp_v[2][2]
+                volume = np.linalg.det(np.array(cell))
+                virial = virial*160.2*10.0/volume
+            else:
+                for dd in range(3) :
+                    tmp_l = lines[idx+1+dd]
+                    cell.append([float(ss)
+                         for ss in tmp_l.split()[0:3]])
+
+#            else :
+#                for dd in range(3) :
+#                    tmp_l = lines[idx+1+dd]
+#                    cell.append([float(ss) 
+#                                 for ss in tmp_l.split()[0:3]])
+#                virial = np.zeros([3,3])
+        elif 'Position' in ii:
+            for kk in range(idx+1, idx+1+ntot):
+                min = kk
+                for jj in range(kk+1,idx+1+ntot):
+                    if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
+                        min = jj
+                        lines[min], lines[kk] = lines[kk],lines[min]
+            for gg in range(idx+1,idx+1+ntot):
+                info = [float(jj) for jj in lines[gg].split()[1:4]]
+                info = np.matmul(np.array(info),np.array(cell))
+                coord.append(info)
+        elif 'Force' in ii:
+            for kk in range(idx+1, idx+1+ntot):
+                min = kk
+                for jj in range(kk+1,idx+1+ntot):
+                    if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
+                        min = jj
+                        lines[min], lines[kk] = lines[kk],lines[min]
+            for gg in range(idx+1,idx+1+ntot):
+                info = [float(ss) for ss in lines[gg].split()]
+                force.append(info[1:4])
+#        elif 'Atomic-Energy' in ii:
+#            for jj in range(idx+1, idx+1+ntot) :
+#                tmp_l = lines[jj]
+#                info = [float(ss) for ss in tmp_l.split()]
+#                atomic_energy.append(info[1])
+    return coord, cell, energy, force, virial, is_converge
diff --git a/dpdata/system.py b/dpdata/system.py
@@ -18,6 +18,8 @@
 import dpdata.gaussian.log
 import dpdata.cp2k.output
 from dpdata.cp2k.output import Cp2kSystems
+import dpdata.pwmat.movement
+import dpdata.pwmat.atomconfig
 from copy import deepcopy
 from monty.json import MSONable
 from monty.serialization import loadfn,dumpfn
@@ -86,6 +88,7 @@ def __init__ (self,
                 - ``qe/cp/traj``: Quantum Espresso CP trajectory files. should have: file_name+'.in' and file_name+'.pos'
                 - ``siesta/output``: siesta SCF output file
                 - ``siesta/aimd_output``: siesta aimd output file
+                - ``pwmat/atom.config``: pwmat atom.config
         type_map : list of str
             Needed by formats lammps/lmp and lammps/dump. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`.
             If not provided the atom names are assigned to `'Type_1'`, `'Type_2'`, `'Type_3'`...
@@ -652,7 +655,34 @@ def from_siesta_aiMD_output(self, fname):
         self.data['coords'], \
         _e, _f, _v \
             = dpdata.siesta.aiMD_output.get_aiMD_frame(fname)
-
+    @register_from_funcs.register_funcs('atom.config')
+    @register_from_funcs.register_funcs('final.config')
+    @register_from_funcs.register_funcs('pwmat/atom.config')
+    @register_from_funcs.register_funcs('pwmat/final.config')
+    def from_pwmat_atomconfig(self, file_name) :
+        with open(file_name) as fp:
+            lines = [line.rstrip('\n') for line in fp]
+            self.data = dpdata.pwmat.atomconfig.to_system_data(lines)
+        self.rot_lower_triangular()
+
+    @register_to_funcs.register_funcs("pwmat/atom.config")
+    def to_pwmat_atomconfig(self, file_name, frame_idx = 0) :
+        """
+        Dump the system in pwmat atom.config format
+
+        Parameters
+        ----------
+        file_name : str
+            The output file name
+        frame_idx : int
+            The index of the frame to dump
+        """
+        assert(frame_idx < len(self.data['coords']))
+        w_str = dpdata.pwmat.atomconfig.from_system_data(self.data, frame_idx)
+        with open(file_name, 'w') as fp:
+            fp.write(w_str)
+
+
     def affine_map(self, trans, f_idx = 0) :
         assert(np.linalg.det(trans) != 0)
         self.data['cells'][f_idx] = np.matmul(self.data['cells'][f_idx], trans)
@@ -891,6 +921,8 @@ def __init__ (self,
                 - ``gaussian/md``: gaussian ab initio molecular dynamics
                 - ``cp2k/output``: cp2k output file
                 - ``cp2k/aimd_output``: cp2k aimd output  dir(contains *pos*.xyz and *.log file)
+                - ``pwmat/movement``: pwmat md output file
+                - ``pwmat/out.mlmd``: pwmat scf output file
 
         type_map : list of str
             Needed by formats deepmd/raw and deepmd/npy. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`.
@@ -1111,6 +1143,34 @@ def from_cp2k_output(self, file_name) :
             self.data['energies'], \
             self.data['forces'], \
             = dpdata.cp2k.output.get_frames(file_name)
+    @register_from_funcs.register_funcs('movement')
+    @register_from_funcs.register_funcs('MOVEMENT')
+    @register_from_funcs.register_funcs('mlmd')
+    @register_from_funcs.register_funcs('MLMD')
+    @register_from_funcs.register_funcs('pwmat/movement')
+    @register_from_funcs.register_funcs('pwmat/MOVEMENT')
+    @register_from_funcs.register_funcs('pwmat/mlmd')
+    @register_from_funcs.register_funcs('pwmat/MLMD')
+    def from_pwmat_output(self, file_name, begin = 0, step = 1) :
+        self.data['atom_names'], \
+            self.data['atom_numbs'], \
+            self.data['atom_types'], \
+            self.data['cells'], \
+            self.data['coords'], \
+            self.data['energies'], \
+            self.data['forces'], \
+            tmp_virial, \
+            = dpdata.pwmat.movement.get_frames(file_name, begin = begin, step = step)
+        if tmp_virial is not None :
+            self.data['virials'] = tmp_virial
+        # scale virial to the unit of eV
+        if 'virials' in self.data :
+            v_pref = 1 * 1e3 / 1.602176621e6
+            for ii in range (self.get_nframes()) :
+                vol = np.linalg.det(np.reshape(self.data['cells'][ii], [3,3]))
+                self.data['virials'][ii] *= v_pref * vol
+        # rotate the system to lammps convention
+        self.rot_lower_triangular()
 
 
     def sub_system(self, f_idx) :