Refactor property fitting interface (#4471)

Two main changes： 1. The program can read data files in `npy` format with any prefix(`tc.npy`, `band_gap.npy`...). One just needs to write the name of the property and the corresponding dimension in `model/fitting` in `input.json`. 2. Data normalisation has been added to the program. Specifically, the mean and standard deviation of the properties are calculated when calculating the stat, the output of `fitting_net` is multiplied by the standard deviation and then added to the mean to get the output of the `PropertyModel`, and when calculating the loss, the loss is again normalised.  ## Summary by CodeRabbit ## Release Notes - **New Features** - Introduced new parameters for property loss calculations and model fitting, enhancing flexibility and functionality. - Added methods for retrieving property names and checking property intensity across various classes. - **Bug Fixes** - Improved validation and error handling for property-related calculations. - **Documentation** - Enhanced documentation for property fitting, including detailed parameter explanations and structured training examples. - Added documentation for new parameters in the JSON configuration related to property fitting. - **Tests** - Added new test cases to validate the functionality of updated methods and properties. - Updated existing tests to utilize dynamic property names instead of hardcoded strings. - **Chores** - Updated configuration files and test data to align with new property handling features.  --------- Signed-off-by: Chenqqian Zhang <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
deepmodeling · Dec 25, 2024 · f9f1759 · f9f1759
1 parent f8605ee
commit f9f1759
Show file tree

Hide file tree

Showing 43 changed files with 652 additions and 146 deletions.
diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py
@@ -42,6 +42,9 @@
 from .polar_atomic_model import (
     DPPolarAtomicModel,
 )
+from .property_atomic_model import (
+    DPPropertyAtomicModel,
+)
 
 __all__ = [
     "BaseAtomicModel",
@@ -50,6 +53,7 @@
     "DPDipoleAtomicModel",
     "DPEnergyAtomicModel",
     "DPPolarAtomicModel",
+    "DPPropertyAtomicModel",
     "DPZBLLinearEnergyAtomicModel",
     "LinearEnergyAtomicModel",
     "PairTabAtomicModel",

diff --git a/deepmd/dpmodel/atomic_model/property_atomic_model.py b/deepmd/dpmodel/atomic_model/property_atomic_model.py
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
 from deepmd.dpmodel.fitting.property_fitting import (
     PropertyFittingNet,
 )
@@ -15,3 +17,25 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
                 "fitting must be an instance of PropertyFittingNet for DPPropertyAtomicModel"
             )
         super().__init__(descriptor, fitting, type_map, **kwargs)
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, np.ndarray],
+        atype: np.ndarray,
+    ):
+        """Apply the stat to each atomic output.
+
+        In property fitting, each output will be multiplied by label std and then plus the label average value.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc. It is useless in property fitting.
+
+        """
+        out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+        for kk in self.bias_keys:
+            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        return ret
diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
@@ -41,10 +41,9 @@ class PropertyFittingNet(InvarFitting):
             this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
     intensive
             Whether the fitting property is intensive.
-    bias_method
-            The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'.
-            If 'normal' is used, the computed bias will be added to the atomic output.
-            If 'no_bias' is used, no bias will be added to the atomic output.
+    property_name:
+            The name of fitting property, which should be consistent with the property name in the dataset.
+            If the data file is named `humo.npy`, this parameter should be "humo".
     resnet_dt
             Time-step `dt` in the resnet construction:
             :math:`y = x + dt * \phi (Wx + b)`
@@ -74,7 +73,7 @@ def __init__(
         rcond: Optional[float] = None,
         trainable: Union[bool, list[bool]] = True,
         intensive: bool = False,
-        bias_method: str = "normal",
+        property_name: str = "property",
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
@@ -89,9 +88,8 @@ def __init__(
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
-        self.bias_method = bias_method
         super().__init__(
-            var_name="property",
+            var_name=property_name,
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             dim_out=task_dim,
@@ -113,9 +111,9 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 3, 1)
+        check_version_compatibility(data.pop("@version"), 4, 1)
         data.pop("dim_out")
-        data.pop("var_name")
+        data["property_name"] = data.pop("var_name")
         data.pop("tot_ener_zero")
         data.pop("layer_name")
         data.pop("use_aparam_as_mask", None)
@@ -131,6 +129,8 @@ def serialize(self) -> dict:
             **InvarFitting.serialize(self),
             "type": "property",
             "task_dim": self.task_dim,
+            "intensive": self.intensive,
         }
+        dd["@version"] = 4
 
         return dd
diff --git a/deepmd/dpmodel/model/property_model.py b/deepmd/dpmodel/model/property_model.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.dpmodel.atomic_model.dp_atomic_model import (
-    DPAtomicModel,
+from deepmd.dpmodel.atomic_model import (
+    DPPropertyAtomicModel,
 )
 from deepmd.dpmodel.model.base_model import (
     BaseModel,
@@ -13,7 +13,7 @@
     make_model,
 )
 
-DPPropertyModel_ = make_model(DPAtomicModel)
+DPPropertyModel_ = make_model(DPPropertyAtomicModel)
 
 
 @BaseModel.register("property")

diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
@@ -779,9 +779,17 @@ def test_property(
     tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
-    data.add("property", dp.task_dim, atomic=False, must=True, high_prec=True)
+    var_name = dp.get_var_name()
+    assert isinstance(var_name, str)
+    data.add(var_name, dp.task_dim, atomic=False, must=True, high_prec=True)
     if has_atom_property:
-        data.add("atom_property", dp.task_dim, atomic=True, must=False, high_prec=True)
+        data.add(
+            f"atom_{var_name}",
+            dp.task_dim,
+            atomic=True,
+            must=False,
+            high_prec=True,
+        )
 
     if dp.get_dim_fparam() > 0:
         data.add(
@@ -832,12 +840,12 @@ def test_property(
         aproperty = ret[1]
         aproperty = aproperty.reshape([numb_test, natoms * dp.task_dim])
 
-    diff_property = property - test_data["property"][:numb_test]
+    diff_property = property - test_data[var_name][:numb_test]
     mae_property = mae(diff_property)
     rmse_property = rmse(diff_property)
 
     if has_atom_property:
-        diff_aproperty = aproperty - test_data["atom_property"][:numb_test]
+        diff_aproperty = aproperty - test_data[f"atom_{var_name}"][:numb_test]
         mae_aproperty = mae(diff_aproperty)
         rmse_aproperty = rmse(diff_aproperty)
 
@@ -854,7 +862,7 @@ def test_property(
         detail_path = Path(detail_file)
 
         for ii in range(numb_test):
-            test_out = test_data["property"][ii].reshape(-1, 1)
+            test_out = test_data[var_name][ii].reshape(-1, 1)
             pred_out = property[ii].reshape(-1, 1)
 
             frame_output = np.hstack((test_out, pred_out))
@@ -868,7 +876,7 @@ def test_property(
 
         if has_atom_property:
             for ii in range(numb_test):
-                test_out = test_data["atom_property"][ii].reshape(-1, 1)
+                test_out = test_data[f"atom_{var_name}"][ii].reshape(-1, 1)
                 pred_out = aproperty[ii].reshape(-1, 1)
 
                 frame_output = np.hstack((test_out, pred_out))

diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
@@ -70,8 +70,6 @@ class DeepEvalBackend(ABC):
         "dipole_derv_c_redu": "virial",
         "dos": "atom_dos",
         "dos_redu": "dos",
-        "property": "atom_property",
-        "property_redu": "property",
         "mask_mag": "mask_mag",
         "mask": "mask",
         # old models in v1
@@ -276,6 +274,10 @@ def get_has_spin(self) -> bool:
         """Check if the model has spin atom types."""
         return False
 
+    def get_var_name(self) -> str:
+        """Get the name of the fitting property."""
+        raise NotImplementedError
+
     @abstractmethod
     def get_ntypes_spin(self) -> int:
         """Get the number of spin atom types of this model. Only used in old implement."""

diff --git a/deepmd/infer/deep_property.py b/deepmd/infer/deep_property.py
@@ -37,25 +37,41 @@ class DeepProperty(DeepEval):
         Keyword arguments.
     """
 
-    @property
     def output_def(self) -> ModelOutputDef:
-        """Get the output definition of this model."""
-        return ModelOutputDef(
+        """
+        Get the output definition of this model.
+        But in property_fitting, the output definition is not known until the model is loaded.
+        So we need to rewrite the output definition after the model is loaded.
+        See detail in change_output_def.
+        """
+        pass
+
+    def change_output_def(self) -> None:
+        """
+        Change the output definition of this model.
+        In property_fitting, the output definition is known after the model is loaded.
+        We need to rewrite the output definition and related information.
+        """
+        self.output_def = ModelOutputDef(
             FittingOutputDef(
                 [
                     OutputVariableDef(
-                        "property",
-                        shape=[-1],
+                        self.get_var_name(),
+                        shape=[self.get_task_dim()],
                         reducible=True,
                         atomic=True,
+                        intensive=self.get_intensive(),
                     ),
                 ]
             )
         )
-
-    def change_output_def(self) -> None:
-        self.output_def["property"].shape = self.task_dim
-        self.output_def["property"].intensive = self.get_intensive()
+        self.deep_eval.output_def = self.output_def
+        self.deep_eval._OUTDEF_DP2BACKEND[self.get_var_name()] = (
+            f"atom_{self.get_var_name()}"
+        )
+        self.deep_eval._OUTDEF_DP2BACKEND[f"{self.get_var_name()}_redu"] = (
+            self.get_var_name()
+        )
 
     @property
     def task_dim(self) -> int:
@@ -120,10 +136,12 @@ def eval(
             aparam=aparam,
             **kwargs,
         )
-        atomic_property = results["property"].reshape(
+        atomic_property = results[self.get_var_name()].reshape(
             nframes, natoms, self.get_task_dim()
         )
-        property = results["property_redu"].reshape(nframes, self.get_task_dim())
+        property = results[f"{self.get_var_name()}_redu"].reshape(
+            nframes, self.get_task_dim()
+        )
 
         if atomic:
             return (
@@ -141,5 +159,9 @@ def get_intensive(self) -> bool:
         """Get whether the property is intensive."""
         return self.deep_eval.get_intensive()
 
+    def get_var_name(self) -> str:
+        """Get the name of the fitting property."""
+        return self.deep_eval.get_var_name()
+
 
 __all__ = ["DeepProperty"]
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
@@ -184,6 +184,15 @@ def get_dim_aparam(self) -> int:
     def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
 
+    def get_var_name(self) -> str:
+        """Get the name of the property."""
+        if hasattr(self.dp.model["Default"], "get_var_name") and callable(
+            getattr(self.dp.model["Default"], "get_var_name")
+        ):
+            return self.dp.model["Default"].get_var_name()
+        else:
+            raise NotImplementedError
+
     @property
     def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
@@ -200,7 +209,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
             return DeepGlobalPolar
         elif "wfc" in model_output_type:
             return DeepWFC
-        elif "property" in model_output_type:
+        elif self.get_var_name() in model_output_type:
             return DeepProperty
         else:
             raise RuntimeError("Unknown model type")