From 20677762198a57f653d2bdc34c77464ef760303b Mon Sep 17 00:00:00 2001 From: YuanbinLiu Date: Fri, 8 Nov 2024 07:11:59 +0000 Subject: [PATCH] fix minor issues --- autoplex/auto/rss/flow.py | 373 ++++++++++++++++--------------- autoplex/data/common/flows.py | 12 +- autoplex/data/common/utils.py | 7 +- autoplex/data/rss/flows.py | 1 - autoplex/fitting/common/utils.py | 17 +- 5 files changed, 210 insertions(+), 200 deletions(-) diff --git a/autoplex/auto/rss/flow.py b/autoplex/auto/rss/flow.py index 7075e751..40fec7db 100644 --- a/autoplex/auto/rss/flow.py +++ b/autoplex/auto/rss/flow.py @@ -41,191 +41,194 @@ def make(self, config_file: str | None = None, **kwargs): Path to the configuration file that defines the setup parameters for the whole RSS workflow. If not provided, the default file 'rss_default_configuration.yaml' will be used. **kwargs: dict, optional - Additional optional keyword arguments to customize the job execution. The allowed keys and values are: - - tag: str - Tag of systems. It can also be used for setting up elements and stoichiometry. - For example, the tag of 'SiO2' will be recognized as a 1:2 ratio of Si to O and - passed into the parameters of buildcell. However, note that this will be overwritten - if the stoichiometric ratio of elements is defined in the 'buildcell_options'. - - train_from_scratch : bool - If True, it starts the workflow from scratch. - If False, it resumes from a previous state. - - resume_from_previous_state: dict | None - A dictionary containing the state information required to resume a previously interrupted - or saved RSS workflow. When 'train_from_scratch' is set to False, this parameter is mandatory - for the workflow to pick up from a saved state. - Expected keys within this dictionary: - - test_error: float - The test error from the last completed training step. - - pre_database_dir: str - Path to the directory containing the pre-existing database for resuming. - - mlip_path: str - Path to the file of a previous MLIP model. - - isolated_atom_energies: dict - A dictionary of isolated atom energy values, with atomic numbers as keys - and their energies as valuables. - - generated_struct_numbers: list[int] - Expected number of generated randomized unit cells by buildcell. - - buildcell_options: list[dict] | None - Customized parameters for buildcell. Default is None. - - fragment: Atoms | list[Atoms] | None - Fragment(s) for random structures, e.g., molecules, to be placed indivudally intact. - atoms.arrays should have a 'fragment_id' key with unique identifiers for each fragment if in same Atoms. - atoms.cell must be defined (e.g., Atoms.cell = np.eye(3)*20). - - fragment_numbers: list[str] | None - Numbers of each fragment to be included in the random structures. Defaults to 1 for all specified. - - num_processes_buildcell: int - Number of processes to use for parallel computation during buildcell generation. - - num_of_initial_selected_structs: list[int] | None - Number of structures to be sampled directly from the buildcell-generated randomized cells. - - num_of_rss_selected_structs: int - Number of structures to be selected from each RSS iteration. - - initial_selection_enabled: bool - If true, sample structures from initially generated randomized cells using CUR. - - rss_selection_method: str - Method for selecting samples from the RSS trajectories: - Boltzmann flat histogram in enthalpy first, then CUR. - Options include: - - 'bcur1s': Execute bcur with one shot (1s) - - 'bcur2i': Execute bcur with two iterations (2i) - - bcur_params: dict | None - Parameters for Boltzmann CUR selection. The default dictionary includes: - - soap_paras: dict - SOAP descriptor parameters: - - l_max: int - Maximum degree of spherical harmonics (default 12). - - n_max: int - Maximum number of radial basis functions (default 12). - - atom_sigma: float - Width of Gaussian smearing (default 0.0875). - - cutoff: float - Radial cutoff distance (default 10.5). - - cutoff_transition_width: float - Width of the transition region (default 1.0). - - zeta: float - Exponent for dot-product SOAP kernel (default 4.0). - - average: bool - Whether to average the SOAP vectors (default True). - - species: bool - Whether to consider species information (default True). - - kt: float - Temperature in eV for Boltzmann weighting (default 0.3). - - frac_of_bcur: float - Fraction of Boltzmann CUR selections (default 0.8). - - bolt_max_num: int - Maximum number of Boltzmann selections (default 3000). - - kernel_exp: float - Exponent for the kernel (default 4.0). - - energy_label: str - Label for the energy data (default 'energy'). - - random_seed: int | None - A seed to ensure reproducibility of CUR selection. Default is None. - - include_isolated_atom: bool - If true, perform single-point calculations for isolated atoms. - - isolatedatom_box: list[float] - List of the lattice constants for an isolated atom configuration. - - e0_spin: bool - If true, include spin polarization in isolated atom and dimer calculations. Default is False. - - include_dimer: bool - If true, perform single-point calculations for dimers only once. Default is False. - - dimer_box: list[float] - The lattice constants of a dimer box. - - dimer_range: list[float] - Range of distances for dimer calculations. - - dimer_num: int - Number of different distances to consider for dimer calculations. Default is 21. - - custom_incar: dict | None - Dictionary of custom VASP input parameters. If provided, will update the - default parameters. Default is None. - - custom_potcar: dict | None - Dictionary of POTCAR settings to update. Keys are element symbols, values are the desired POTCAR labels. - Default is None. - - vasp_ref_file: str - Reference file for VASP data. Default is 'vasp_ref.extxyz'. - - config_types: list[str] - Configuration types for the VASP calculations. Default is None. - - rss_group: list[str] - Group name for RSS to setting up regularization. - - test_ratio: float - The proportion of the test set after splitting the data. The value is allowed to be set to 0; - in this case, the testing error would not be meaningful anymore. - - regularization: bool - If True, apply regularization. This only works for GAP to date. Default is False. - - scheme: str - Method to use for regularization. Options are: - - 'linear_hull': for single-composition system, use 2D convex hull (E, V) - - 'volume-stoichiometry': for multi-composition system, use 3D convex hull of (E, V, mole-fraction) - - reg_minmax: list[tuple] - list of tuples of (min, max) values for energy, force, virial sigmas for regularization. - - distillation: bool - If true, apply data distillation. Default is True. - - force_max: float | None - Maximum force value to exclude structures. Default is 50. - - force_label: str | None - The label of force values to use for distillation. Default is 'REF_forces'. - - pre_database_dir: str | None - Directory where the previous database was saved. - - mlip_type: str - Choose one specific MLIP type to be fitted: 'GAP' | 'J-ACE' | 'P-ACE' | 'NEQUIP' | 'M3GNET' | 'MACE'. - Default is 'GAP'. - - ref_energy_name: str - Reference energy name. Default is 'REF_energy'. - - ref_force_name: str - Reference force name. Default is 'REF_forces'. - - ref_virial_name: str - Reference virial name. Default is 'REF_virial'. - - auto_delta: bool - If true, apply automatic determination of delta for GAP terms. Default is False. - - num_processes_fit: int - Number of processes used for fitting. Default is 1. - - device_for_fitting: str - Device to be used for model fitting, either "cpu" or "cuda". - - **fit_kwargs: - Additional keyword arguments for the MLIP fitting process. - - scalar_pressure_method: str - Method for adding external pressures. - Acceptable options are: - - 'exp': Applies pressure using an exponential distribution. - - 'uniform': Applies pressure using a uniform distribution. - - scalar_exp_pressure: float - Scalar exponential pressure. Default is 100. - - scalar_pressure_exponential_width: float - Width for scalar pressure exponential. Default is 0.2. - - scalar_pressure_low: float - Low limit for scalar pressure. Default is 0. - - scalar_pressure_high: float - High limit for scalar pressure. Default is 50. - - max_steps: int - Maximum number of steps for relaxation. Default is 200. - - force_tol: float - Force residual tolerance for relaxation. Default is 0.05. - - stress_tol: float - Stress residual tolerance for relaxation. Default is 0.05. - - hookean_repul: bool - If true, apply Hookean repulsion. Default is False. - - hookean_paras: dict[tuple[int, int], tuple[float, float]] | None - Parameters for Hookean repulsion as a dictionary of tuples. Default is None. - - keep_symmetry: bool - If true, preserve symmetry during relaxation. Default is False. - - write_traj: bool - If true, write trajectory of RSS. Default is True. - - num_processes_rss: int - Number of processes used for running RSS. Default is 1. - - device_for_rss: str - Specify device to use "cuda" or "cpu" for running RSS. Default is "cpu". - - stop_criterion: float - Convergence criterion for stopping RSS iterations. Default is 0.01. - - max_iteration_number: int - Maximum number of RSS iterations to perform. Default is 25. - - num_groups: int - Number of structure groups, used for assigning tasks across multiple nodes. - For example, if there are 10,000 trajectories to relax and 'num_groups=10', - the trajectories will be divided into 10 groups and 10 independent jobs will be created, - with each job handling 1,000 trajectories. - - initial_kt: float - Initial temperature (in eV) for Boltzmann sampling. Default is 0.3. - - current_iter_index: int - Index for the current RSS iteration. Default is 1. + Additional optional keyword arguments to customize the job execution. + + Keyword Arguments + ----------------- + - tag: str + Tag of systems. It can also be used for setting up elements and stoichiometry. + For example, the tag of 'SiO2' will be recognized as a 1:2 ratio of Si to O and + passed into the parameters of buildcell. However, note that this will be overwritten + if the stoichiometric ratio of elements is defined in the 'buildcell_options'. + - train_from_scratch : bool + If True, it starts the workflow from scratch. + If False, it resumes from a previous state. + - resume_from_previous_state: dict | None + A dictionary containing the state information required to resume a previously interrupted + or saved RSS workflow. When 'train_from_scratch' is set to False, this parameter is mandatory + for the workflow to pick up from a saved state. + Expected keys within this dictionary: + - test_error: float + The test error from the last completed training step. + - pre_database_dir: str + Path to the directory containing the pre-existing database for resuming. + - mlip_path: str + Path to the file of a previous MLIP model. + - isolated_atom_energies: dict + A dictionary of isolated atom energy values, with atomic numbers as keys + and their energies as valuables. + - generated_struct_numbers: list[int] + Expected number of generated randomized unit cells by buildcell. + - buildcell_options: list[dict] | None + Customized parameters for buildcell. Default is None. + - fragment: Atoms | list[Atoms] | None + Fragment(s) for random structures, e.g., molecules, to be placed indivudally intact. + atoms.arrays should have a 'fragment_id' key with unique identifiers for each fragment if in same Atoms. + atoms.cell must be defined (e.g., Atoms.cell = np.eye(3)*20). + - fragment_numbers: list[str] | None + Numbers of each fragment to be included in the random structures. Defaults to 1 for all specified. + - num_processes_buildcell: int + Number of processes to use for parallel computation during buildcell generation. + - num_of_initial_selected_structs: list[int] | None + Number of structures to be sampled directly from the buildcell-generated randomized cells. + - num_of_rss_selected_structs: int + Number of structures to be selected from each RSS iteration. + - initial_selection_enabled: bool + If true, sample structures from initially generated randomized cells using CUR. + - rss_selection_method: str + Method for selecting samples from the RSS trajectories: + Boltzmann flat histogram in enthalpy first, then CUR. + Options include: + - 'bcur1s': Execute bcur with one shot (1s) + - 'bcur2i': Execute bcur with two iterations (2i) + - bcur_params: dict | None + Parameters for Boltzmann CUR selection. The default dictionary includes: + - soap_paras: dict + SOAP descriptor parameters: + - l_max: int + Maximum degree of spherical harmonics (default 12). + - n_max: int + Maximum number of radial basis functions (default 12). + - atom_sigma: float + Width of Gaussian smearing (default 0.0875). + - cutoff: float + Radial cutoff distance (default 10.5). + - cutoff_transition_width: float + Width of the transition region (default 1.0). + - zeta: float + Exponent for dot-product SOAP kernel (default 4.0). + - average: bool + Whether to average the SOAP vectors (default True). + - species: bool + Whether to consider species information (default True). + - kt: float + Temperature in eV for Boltzmann weighting (default 0.3). + - frac_of_bcur: float + Fraction of Boltzmann CUR selections (default 0.8). + - bolt_max_num: int + Maximum number of Boltzmann selections (default 3000). + - kernel_exp: float + Exponent for the kernel (default 4.0). + - energy_label: str + Label for the energy data (default 'energy'). + - random_seed: int | None + A seed to ensure reproducibility of CUR selection. Default is None. + - include_isolated_atom: bool + If true, perform single-point calculations for isolated atoms. + - isolatedatom_box: list[float] + List of the lattice constants for an isolated atom configuration. + - e0_spin: bool + If true, include spin polarization in isolated atom and dimer calculations. Default is False. + - include_dimer: bool + If true, perform single-point calculations for dimers only once. Default is False. + - dimer_box: list[float] + The lattice constants of a dimer box. + - dimer_range: list[float] + Range of distances for dimer calculations. + - dimer_num: int + Number of different distances to consider for dimer calculations. Default is 21. + - custom_incar: dict | None + Dictionary of custom VASP input parameters. If provided, will update the + default parameters. Default is None. + - custom_potcar: dict | None + Dictionary of POTCAR settings to update. Keys are element symbols, values are the desired POTCAR labels. + Default is None. + - vasp_ref_file: str + Reference file for VASP data. Default is 'vasp_ref.extxyz'. + - config_types: list[str] + Configuration types for the VASP calculations. Default is None. + - rss_group: list[str] + Group name for RSS to setting up regularization. + - test_ratio: float + The proportion of the test set after splitting the data. The value is allowed to be set to 0; + in this case, the testing error would not be meaningful anymore. + - regularization: bool + If True, apply regularization. This only works for GAP to date. Default is False. + - scheme: str + Method to use for regularization. Options are: + - 'linear_hull': for single-composition system, use 2D convex hull (E, V) + - 'volume-stoichiometry': for multi-composition system, use 3D convex hull of (E, V, mole-fraction) + - reg_minmax: list[tuple] + list of tuples of (min, max) values for energy, force, virial sigmas for regularization. + - distillation: bool + If true, apply data distillation. Default is True. + - force_max: float | None + Maximum force value to exclude structures. Default is 50. + - force_label: str | None + The label of force values to use for distillation. Default is 'REF_forces'. + - pre_database_dir: str | None + Directory where the previous database was saved. + - mlip_type: str + Choose one specific MLIP type to be fitted: 'GAP' | 'J-ACE' | 'P-ACE' | 'NEQUIP' | 'M3GNET' | 'MACE'. + Default is 'GAP'. + - ref_energy_name: str + Reference energy name. Default is 'REF_energy'. + - ref_force_name: str + Reference force name. Default is 'REF_forces'. + - ref_virial_name: str + Reference virial name. Default is 'REF_virial'. + - auto_delta: bool + If true, apply automatic determination of delta for GAP terms. Default is False. + - num_processes_fit: int + Number of processes used for fitting. Default is 1. + - device_for_fitting: str + Device to be used for model fitting, either "cpu" or "cuda". + - **fit_kwargs: + Additional keyword arguments for the MLIP fitting process. + - scalar_pressure_method: str + Method for adding external pressures. + Acceptable options are: + - 'exp': Applies pressure using an exponential distribution. + - 'uniform': Applies pressure using a uniform distribution. + - scalar_exp_pressure: float + Scalar exponential pressure. Default is 100. + - scalar_pressure_exponential_width: float + Width for scalar pressure exponential. Default is 0.2. + - scalar_pressure_low: float + Low limit for scalar pressure. Default is 0. + - scalar_pressure_high: float + High limit for scalar pressure. Default is 50. + - max_steps: int + Maximum number of steps for relaxation. Default is 200. + - force_tol: float + Force residual tolerance for relaxation. Default is 0.05. + - stress_tol: float + Stress residual tolerance for relaxation. Default is 0.05. + - hookean_repul: bool + If true, apply Hookean repulsion. Default is False. + - hookean_paras: dict[tuple[int, int], tuple[float, float]] | None + Parameters for Hookean repulsion as a dictionary of tuples. Default is None. + - keep_symmetry: bool + If true, preserve symmetry during relaxation. Default is False. + - write_traj: bool + If true, write trajectory of RSS. Default is True. + - num_processes_rss: int + Number of processes used for running RSS. Default is 1. + - device_for_rss: str + Specify device to use "cuda" or "cpu" for running RSS. Default is "cpu". + - stop_criterion: float + Convergence criterion for stopping RSS iterations. Default is 0.01. + - max_iteration_number: int + Maximum number of RSS iterations to perform. Default is 25. + - num_groups: int + Number of structure groups, used for assigning tasks across multiple nodes. + For example, if there are 10,000 trajectories to relax and 'num_groups=10', + the trajectories will be divided into 10 groups and 10 independent jobs will be created, + with each job handling 1,000 trajectories. + - initial_kt: float + Initial temperature (in eV) for Boltzmann sampling. Default is 0.3. + - current_iter_index: int + Index for the current RSS iteration. Default is 1. Output ------ diff --git a/autoplex/data/common/flows.py b/autoplex/data/common/flows.py index 48212169..e5550cc1 100644 --- a/autoplex/data/common/flows.py +++ b/autoplex/data/common/flows.py @@ -17,7 +17,10 @@ GAPRelaxMaker, ) from atomate2.vasp.jobs.core import StaticMaker -from atomate2.vasp.powerups import update_user_incar_settings +from atomate2.vasp.powerups import ( + update_user_incar_settings, + update_user_potcar_settings, +) from atomate2.vasp.sets.core import StaticSetGenerator from custodian.vasp.handlers import ( FrozenJobErrorHandler, @@ -41,9 +44,12 @@ get_supercell_job, plot_force_distribution, ) -from autoplex.data.common.utils import ElementCollection +from autoplex.data.common.utils import ( + ElementCollection, + flatten, +) -__all__ = ["GenerateTrainingDataForTesting", "DFTStaticMaker"] +__all__ = ["GenerateTrainingDataForTesting", "DFTStaticLabelling"] logging.basicConfig(level=logging.DEBUG, format="[%(levelname)s] %(message)s") diff --git a/autoplex/data/common/utils.py b/autoplex/data/common/utils.py index 887e9530..f3b51964 100644 --- a/autoplex/data/common/utils.py +++ b/autoplex/data/common/utils.py @@ -79,15 +79,16 @@ def flatten(atoms_object: Atoms | Iterable, recursive: bool = False) -> list[Ato def rms_dict(x_ref: np.ndarray | list, x_pred: np.ndarray | list) -> dict: """Compute RMSE and standard deviation of predictions with reference data. - x_ref and x_pred should be of same shape. + Adapted and adjusted from libatoms GAP tutorial page + https://libatoms.github.io/GAP/gap_fitting_tutorial.html#make-simple-plots-of-the-energies-and-forces-on-the-EMT-and-GAP-datas Parameters ---------- - ----------1ยท - x_ref : np.ndarray. + x_ref: np.ndarray. list of reference data. x_pred: np.ndarray. list of prediction. + Note that x_ref and x_pred should be of same shape. Returns ------- diff --git a/autoplex/data/rss/flows.py b/autoplex/data/rss/flows.py index 1ea06f8d..93c53909 100644 --- a/autoplex/data/rss/flows.py +++ b/autoplex/data/rss/flows.py @@ -108,4 +108,3 @@ def make(self): replace=Flow(job_list), output=final_structures, ) - \ No newline at end of file diff --git a/autoplex/fitting/common/utils.py b/autoplex/fitting/common/utils.py index a115b584..c5ffb089 100644 --- a/autoplex/fitting/common/utils.py +++ b/autoplex/fitting/common/utils.py @@ -53,6 +53,7 @@ current_dir = Path(__file__).absolute().parent GAP_DEFAULTS_FILE_PATH = current_dir / "gap-defaults.json" +MLIP_PHONON_DEFAULTS_FILE_PATH = current_dir / "gap-defaults.json" MLIP_DEFAULTS_FILE_PATH = current_dir / "mlip-defaults.json" @@ -276,8 +277,8 @@ def jace_fitting( Additional keyword arguments for ACE fitting with keys same as those in mlip-defaults.json. - Tunable hyperparameters - ----------------------- + Keyword Arguments + ----------------- order: int Order of ACE. totaldegree: int @@ -459,8 +460,8 @@ def nequip_fitting( Additional keyword arguments for NequIP fitting with keys same as those in mlip-defaults.json. - Tunable hyperparameters - ----------------------- + Keyword Arguments + ----------------- r_max: float Cutoff radius in length units num_layers: int @@ -734,8 +735,8 @@ def m3gnet_fitting( Additional keyword arguments for M3GNet fitting with keys same as those in mlip-defaults.json. - Tunable hyperparameters - ----------------------- + Keyword Arguments + ----------------- exp_name: str Name of the experiment, used for saving model checkpoints and logs. results_dir: str @@ -1096,8 +1097,8 @@ def mace_fitting( Additional keyword arguments for MACE fitting with keys same as those in mlip-defaults.json. - Tunable hyperparameters - ----------------------- + Keyword Arguments + ----------------- model: str type of model to be trained. config_type_weights: str