diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index b23b27e0..6c5db29f 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -7,6 +7,19 @@ import numpy as np +from ..unit import EnergyConversion, ForceConversion + +e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value() +e_conv_au2eV = EnergyConversion("hartree", "eV").value() +f_conv_kcalpermolperang2eVperang = ForceConversion( + "kcal_mol/angstrom", "eV/angstrom" +).value() +f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value() +f_conv_kcalpermolperbohr2eVperang = ForceConversion( + "kcal_mol/bohr", "eV/angstrom" +).value() +f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value() + class QuipGapxyzSystems: """deal with QuipGapxyzFile.""" @@ -82,56 +95,72 @@ def handle_single_xyz_frame(lines): force_array = None virials = None for kv_dict in prop_list: - if kv_dict["key"] == "species": - if kv_dict["datatype"] != "S": - raise RuntimeError( - "datatype for species must be 'S' instead of {}".format( - kv_dict["datatype"] + try: + if kv_dict["key"] == "species": + if kv_dict["datatype"] != "S": + raise RuntimeError( + "datatype for species must be 'S' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - type_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "pos": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + type_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "pos": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - coords_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length - continue - elif kv_dict["key"] == "Z": - if kv_dict["datatype"] != "I": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + coords_array = data_array[ + :, used_colomn : used_colomn + field_length + ] + used_colomn += field_length + continue + elif kv_dict["key"] == "Z": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - Z_array = data_array[ - :, used_colomn : used_colomn + field_length - ].flatten() - used_colomn += field_length - continue - elif kv_dict["key"] == "force": - if kv_dict["datatype"] != "R": - raise RuntimeError( - "datatype for pos must be 'R' instead of {}".format( - kv_dict["datatype"] + field_length = int(kv_dict["value"]) + Z_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() + used_colomn += field_length + continue + elif kv_dict["key"] == "tags": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for tags must be 'I' instead of {}".format( + kv_dict["datatype"] + ) ) - ) - field_length = int(kv_dict["value"]) - force_array = data_array[:, used_colomn : used_colomn + field_length] - used_colomn += field_length + field_length = int(kv_dict["value"]) + used_colomn += field_length + continue + elif kv_dict["key"] == "force" or kv_dict["key"] == "forces": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + force_array = data_array[ + :, used_colomn : used_colomn + field_length + ] + used_colomn += field_length + continue + except Exception as e: + print("unknown field {}".format(kv_dict["key"]), e) continue - else: - raise RuntimeError("unknown field {}".format(kv_dict["key"])) type_num_dict = OrderedDict() atom_type_list = [] @@ -165,21 +194,111 @@ def handle_single_xyz_frame(lines): else: virials = None + try: + e_units = np.array([field_dict["energy-unit"].lower()]) + f_units = np.array([field_dict["force-unit"].lower()]) + except Exception: + pass + # print('No units information contained.') + info_dict = {} + info_dict["nopbc"] = False info_dict["atom_names"] = list(type_num_array[:, 0]) info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) info_dict["atom_types"] = np.array(atom_type_list).astype(int) - info_dict["cells"] = np.array( - [ - np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape( - 3, 3 - ) - ] - ).astype("float32") + info_dict["coords"] = np.array([coords_array]).astype("float32") + """ info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") info_dict["forces"] = np.array([force_array]).astype("float32") + """ + + try: + if e_units == "kcal/mol": + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") + * e_conv_kcalpermol2eV + ) + elif e_units in ["hartree", "au", "a.u."]: + info_dict["energies"] = ( + np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV + ) + elif e_units == "ev": + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) + else: + info_dict["energies"] = np.array([field_dict["energy"]]).astype( + "float32" + ) + except Exception: + try: + possible_fields = [ + "energy", + "energies", + "Energies", + "potential-energy.energy", + "Energy", + ] + for key in possible_fields: + if key in field_dict: + info_dict["energies"] = np.array( + [field_dict[key]], dtype="float32" + ) + break + else: + raise ValueError("No valid energy field found in field_dict.") + except KeyError: + raise ValueError("Error while accessing energy fields in field_dict.") + + try: + if f_units == "kcal/mol/angstrom": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperang2eVperang + ) + elif ( + f_units == "hartree/angstrom" + or f_units == "hartree/ang" + or f_units == "hartree/ang." + ): + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_auperang2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") + * f_conv_kcalpermolperbohr2eVperang + ) + elif f_units == "kcal/mol/bohr": + info_dict["forces"] = ( + np.array([force_array]).astype("float32") * f_conv_au2eVperang + ) + elif ( + f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang." + ): + info_dict["forces"] = np.array([force_array]).astype("float32") + else: + info_dict["forces"] = np.array([force_array]).astype("float32") + except Exception: + info_dict["forces"] = np.array([force_array]).astype("float32") + if virials is not None: info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) + if "Lattice" in field_dict and field_dict["Lattice"].strip(): + lattice_values = list(filter(bool, field_dict["Lattice"].split(" "))) + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + else: + lattice_values = np.array( + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + ) + + info_dict["nopbc"] = True + info_dict["cells"] = np.array( + [np.array(lattice_values).reshape(3, 3)] + ).astype("float32") + return info_dict