From 3a7171c811835a2cbba8c16e04c08455d19ddac9 Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:25:35 +0800 Subject: [PATCH 1/6] quipGapXYZ: add unit convert and synonym matching --- dpdata/xyz/quip_gap_xyz.py | 189 ++++++++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 53 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index b23b27e0..e1892837 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -6,6 +6,14 @@ from collections import OrderedDict import numpy as np +from ..unit import EnergyConversion, ForceConversion, LengthConversion + +e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value() +e_conv_au2eV = EnergyConversion("hartree", "eV").value() +f_conv_kcalpermolperang2eVperang = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() +f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value() +f_conv_kcalpermolperbohr2eVperang = ForceConversion("kcal_mol/bohr", "eV/angstrom").value() +f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value() class QuipGapxyzSystems: @@ -38,10 +46,13 @@ def get_block_generator(self): lines.append(self.file_object.readline()) if not lines[-1]: raise RuntimeError( - f"this xyz file may lack of lines, should be {atom_num + 2};lines:{lines}" + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) ) yield lines + @staticmethod def handle_single_xyz_frame(lines): atom_num = int(lines[0].strip("\n").strip()) @@ -82,56 +93,68 @@ def handle_single_xyz_frame(lines): force_array = None virials = None for kv_dict in prop_list: - if kv_dict["key"] == "species": - if kv_dict["datatype"] != "S": - raise RuntimeError( - "datatype for species must be 'S' instead of {}".format( - kv_dict["datatype"] + try: + if kv_dict["key"] == "species": + if kv_dict["datatype"] != "S": + raise RuntimeError( + "datatype for species must be 'S' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - type_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "pos": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + type_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "pos": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - coords_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length - continue - elif kv_dict["key"] == "Z": - if kv_dict["datatype"] != "I": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + coords_array = data_array[:, used_colomn : used_colomn + field_length] + used_colomn += field_length + continue + elif kv_dict["key"] == "Z": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - Z_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "force": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + Z_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "tags": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for tags must be 'I' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - force_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length + field_length = int(kv_dict["value"]) + used_colomn += field_length + continue + elif kv_dict["key"] == "force" or kv_dict["key"] == "forces" : + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + force_array = data_array[:, used_colomn : used_colomn + field_length] + used_colomn += field_length + continue + except Exception as e: + print("unknown field {}".format(kv_dict["key"]), e) continue - else: - raise RuntimeError("unknown field {}".format(kv_dict["key"])) type_num_dict = OrderedDict() atom_type_list = [] @@ -164,22 +187,82 @@ def handle_single_xyz_frame(lines): ).astype("float32") else: virials = None + + try: + e_units = np.array([field_dict["energy-unit"].lower()]) + f_units = np.array([field_dict["force-unit"].lower()]) + except: + pass + #print('No units information contained.') + info_dict = {} + info_dict["nopbc"] = False info_dict["atom_names"] = list(type_num_array[:, 0]) info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) info_dict["atom_types"] = np.array(atom_type_list).astype(int) - info_dict["cells"] = np.array( - [ - np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape( - 3, 3 - ) - ] - ).astype("float32") + info_dict["coords"] = np.array([coords_array]).astype("float32") + ''' info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") info_dict["forces"] = np.array([force_array]).astype("float32") + ''' + + try: + if e_units == "kcal/mol": + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_kcalpermol2eV + elif e_units in ["hartree", "au", "a.u."]: + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + elif e_units == "ev": + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + else: info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + except Exception: + try: + possible_fields = [ + "energy", + "energies", + "Energies", + "potential-energy.energy", + "Energy" + ] + for key in possible_fields: + if key in field_dict: + info_dict["energies"] = np.array([field_dict[key]], dtype="float32") + break + else: + raise ValueError("No valid energy field found in field_dict.") + except KeyError: + raise ValueError("Error while accessing energy fields in field_dict.") + + try: + if f_units == "kcal/mol/angstrom": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang + elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang + elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": + info_dict["forces"] = np.array([force_array]).astype("float32") + else: info_dict["forces"] = np.array([force_array]).astype("float32") + except: + info_dict["forces"] = np.array([force_array]).astype("float32") + if virials is not None: info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) + if "Lattice" in field_dict and field_dict["Lattice"].strip(): + lattice_values = list(filter(bool, field_dict["Lattice"].split(" "))) + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + else: + lattice_values = np.array([[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]) + + info_dict["nopbc"] = True + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + return info_dict From 974c1b25c4c236614a648c2806b79fd5c614c33b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:29:38 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/xyz/quip_gap_xyz.py | 106 +++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e1892837..304f4239 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -6,13 +6,18 @@ from collections import OrderedDict import numpy as np -from ..unit import EnergyConversion, ForceConversion, LengthConversion + +from ..unit import EnergyConversion, ForceConversion e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value() e_conv_au2eV = EnergyConversion("hartree", "eV").value() -f_conv_kcalpermolperang2eVperang = ForceConversion("kcal_mol/angstrom", "eV/angstrom").value() +f_conv_kcalpermolperang2eVperang = ForceConversion( + "kcal_mol/angstrom", "eV/angstrom" +).value() f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value() -f_conv_kcalpermolperbohr2eVperang = ForceConversion("kcal_mol/bohr", "eV/angstrom").value() +f_conv_kcalpermolperbohr2eVperang = ForceConversion( + "kcal_mol/bohr", "eV/angstrom" +).value() f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -46,13 +51,10 @@ def get_block_generator(self): lines.append(self.file_object.readline()) if not lines[-1]: raise RuntimeError( - "this xyz file may lack of lines, should be {};lines:{}".format( - atom_num + 2, lines - ) + f"this xyz file may lack of lines, should be {atom_num + 2};lines:{lines}" ) yield lines - @staticmethod def handle_single_xyz_frame(lines): atom_num = int(lines[0].strip("\n").strip()) @@ -115,7 +117,9 @@ def handle_single_xyz_frame(lines): ) ) field_length = int(kv_dict["value"]) - coords_array = data_array[:, used_colomn : used_colomn + field_length] + coords_array = data_array[ + :, used_colomn : used_colomn + field_length + ] used_colomn += field_length continue elif kv_dict["key"] == "Z": @@ -140,8 +144,8 @@ def handle_single_xyz_frame(lines): ) field_length = int(kv_dict["value"]) used_colomn += field_length - continue - elif kv_dict["key"] == "force" or kv_dict["key"] == "forces" : + continue + elif kv_dict["key"] == "force" or kv_dict["key"] == "forces": if kv_dict["datatype"] != "R": raise RuntimeError( "datatype for pos must be 'R' instead of {}".format( @@ -149,7 +153,9 @@ def handle_single_xyz_frame(lines): ) ) field_length = int(kv_dict["value"]) - force_array = data_array[:, used_colomn : used_colomn + field_length] + force_array = data_array[ + :, used_colomn : used_colomn + field_length + ] used_colomn += field_length continue except Exception as e: @@ -187,14 +193,13 @@ def handle_single_xyz_frame(lines): ).astype("float32") else: virials = None - + try: e_units = np.array([field_dict["energy-unit"].lower()]) f_units = np.array([field_dict["force-unit"].lower()]) except: pass - #print('No units information contained.') - + # print('No units information contained.') info_dict = {} info_dict["nopbc"] = False @@ -203,19 +208,29 @@ def handle_single_xyz_frame(lines): info_dict["atom_types"] = np.array(atom_type_list).astype(int) info_dict["coords"] = np.array([coords_array]).astype("float32") - ''' + """ info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") info_dict["forces"] = np.array([force_array]).astype("float32") - ''' + """ try: if e_units == "kcal/mol": - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_kcalpermol2eV + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") + * e_conv_kcalpermol2eV + ) elif e_units in ["hartree", "au", "a.u."]: - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + ) elif e_units == "ev": - info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") - else: info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) + else: + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) except Exception: try: possible_fields = [ @@ -223,31 +238,50 @@ def handle_single_xyz_frame(lines): "energies", "Energies", "potential-energy.energy", - "Energy" + "Energy", ] for key in possible_fields: if key in field_dict: - info_dict["energies"] = np.array([field_dict[key]], dtype="float32") + info_dict["energies"] = np.array( + [field_dict[key]], dtype="float32" + ) break else: raise ValueError("No valid energy field found in field_dict.") except KeyError: raise ValueError("Error while accessing energy fields in field_dict.") - + try: if f_units == "kcal/mol/angstrom": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang - elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang - elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") - else: info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperang2eVperang + ) + elif ( + f_units == "hartree/angstrom" + or f_units == "hartree/ang" + or f_units == "hartree/ang." + ): + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_auperang2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperbohr2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_au2eVperang + ) + elif ( + f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang." + ): + info_dict["forces"] = np.array([force_array]).astype("float32") + else: + info_dict["forces"] = np.array([force_array]).astype("float32") except: - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: info_dict["virials"] = virials @@ -258,7 +292,9 @@ def handle_single_xyz_frame(lines): [np.array(lattice_values).reshape(3, 3)] ).astype("float32") else: - lattice_values = np.array([[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]) + lattice_values = np.array( + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + ) info_dict["nopbc"] = True info_dict["cells"] = np.array( From da807b88812090a5db28ce5fbeb064b638b44ffd Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:36:12 +0800 Subject: [PATCH 3/6] style: add Exception after except --- dpdata/xyz/quip_gap_xyz.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e1892837..6f40f75f 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -191,7 +191,7 @@ def handle_single_xyz_frame(lines): try: e_units = np.array([field_dict["energy-unit"].lower()]) f_units = np.array([field_dict["force-unit"].lower()]) - except: + except Exception: pass #print('No units information contained.') @@ -246,7 +246,7 @@ def handle_single_xyz_frame(lines): elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") - except: + except Exception: info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: From 37b96b8cce07a27022cd6b18e253505591de2f66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 10:46:01 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/xyz/quip_gap_xyz.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index 28846ec8..d08791dd 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -257,15 +257,15 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": + elif f_units == "kcal/mol/bohr": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": + elif f_units == "kcal/mol/bohr": info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") except Exception: - info_dict["forces"] = np.array([force_array]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") ======= info_dict["forces"] = ( np.array([force_array]).astype("float32") From aebec7c0ac5ce842e2da4a7ed7ff2caacafb8170 Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 18:53:52 +0800 Subject: [PATCH 5/6] fix: removed redundant --- dpdata/xyz/quip_gap_xyz.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index d08791dd..11b91a23 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -253,20 +253,6 @@ def handle_single_xyz_frame(lines): try: if f_units == "kcal/mol/angstrom": -<<<<<<< HEAD - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang - elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang - elif f_units == "kcal/mol/bohr": - info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang - elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.": - info_dict["forces"] = np.array([force_array]).astype("float32") - else: info_dict["forces"] = np.array([force_array]).astype("float32") - except Exception: - info_dict["forces"] = np.array([force_array]).astype("float32") -======= info_dict["forces"] = ( np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang @@ -296,7 +282,6 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") except: info_dict["forces"] = np.array([force_array]).astype("float32") ->>>>>>> origin/devel if virials is not None: info_dict["virials"] = virials From 81a27d84b7cc74553e87078863f115063860369e Mon Sep 17 00:00:00 2001 From: SchrodingersCattt Date: Wed, 3 Jul 2024 19:00:50 +0800 Subject: [PATCH 6/6] style: add Exception after except --- dpdata/xyz/quip_gap_xyz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index 11b91a23..6c5db29f 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -280,7 +280,7 @@ def handle_single_xyz_frame(lines): info_dict["forces"] = np.array([force_array]).astype("float32") else: info_dict["forces"] = np.array([force_array]).astype("float32") - except: + except Exception: info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: