From b95718a90d654295ee984991dc0bf0532bce75c0 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 19 Dec 2024 03:06:28 -0500 Subject: [PATCH 1/8] fix: fix seed with multiple ranks Fix #4440. --- deepmd/pd/entrypoints/main.py | 2 +- deepmd/pt/entrypoints/main.py | 2 +- deepmd/pt/utils/dataloader.py | 6 ++++-- deepmd/tf/entrypoints/train.py | 5 +++-- deepmd/utils/random.py | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py index 3fa66312e7..bcc93432af 100644 --- a/deepmd/pd/entrypoints/main.py +++ b/deepmd/pd/entrypoints/main.py @@ -123,7 +123,7 @@ def prepare_trainer_input_single( # validation and training data # avoid the same batch sequence among devices - rank_seed = (seed + rank) % (2**32) if seed is not None else None + rank_seed = [rank, seed % (2**32)] if seed is not None else None validation_data_single = ( DpLoaderSet( validation_systems, diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py index 1e5314a821..fd4be73e84 100644 --- a/deepmd/pt/entrypoints/main.py +++ b/deepmd/pt/entrypoints/main.py @@ -138,7 +138,7 @@ def prepare_trainer_input_single( # validation and training data # avoid the same batch sequence among devices - rank_seed = (seed + rank) % (2**32) if seed is not None else None + rank_seed = [rank, seed % (2**32)] if seed is not None else None validation_data_single = ( DpLoaderSet( validation_systems, diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 67e5195f6d..907fee7487 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -50,8 +50,10 @@ def setup_seed(seed) -> None: - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) + if isinstance(seed, (list, tuple)): + mixed_seed = mix_entropy(seed) + torch.manual_seed(mixed_seed) + torch.cuda.manual_seed_all(mixed_seed) torch.backends.cudnn.deterministic = True dp_random.seed(seed) diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py index 590fa04635..1762f1049a 100755 --- a/deepmd/tf/entrypoints/train.py +++ b/deepmd/tf/entrypoints/train.py @@ -220,9 +220,10 @@ def _do_work( seed = jdata["training"].get("seed", None) if seed is not None: # avoid the same batch sequence among workers - seed += run_opt.my_rank seed = seed % (2**32) - dp_random.seed(seed) + dp_random.seed([run_opt.my_rank, seed]) + else: + dp_random.seed(seed) # setup data modifier modifier = get_modifier(jdata["model"].get("modifier", None)) diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py index b0c75600fb..10ebdf0790 100644 --- a/deepmd/utils/random.py +++ b/deepmd/utils/random.py @@ -56,7 +56,7 @@ def random(size=None): return _RANDOM_GENERATOR.random_sample(size) -def seed(val: Optional[int] = None) -> None: +def seed(val: Optional[Union[int, list[int]]] = None) -> None: """Seed the generator. Parameters From 124f4326d21bd5a8b0df17d9bf73d91461cc5d24 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 19 Dec 2024 03:08:05 -0500 Subject: [PATCH 2/8] Fix setup_seed function to handle non-list seed --- deepmd/pt/utils/dataloader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 907fee7487..6daa1a3435 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -52,6 +52,8 @@ def setup_seed(seed) -> None: if isinstance(seed, (list, tuple)): mixed_seed = mix_entropy(seed) + else: + mixed_seed = seed torch.manual_seed(mixed_seed) torch.cuda.manual_seed_all(mixed_seed) torch.backends.cudnn.deterministic = True From 15af91470dcf7af0c25be5031e5f9bd51d417a9f Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 19 Dec 2024 03:12:38 -0500 Subject: [PATCH 3/8] import mix_entropy --- deepmd/pt/utils/dataloader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 6daa1a3435..508deb218e 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -36,6 +36,7 @@ from deepmd.pt.utils.dataset import ( DeepmdDataSetForLoader, ) +from deepmd.pt.utils.utils import mix_entropy from deepmd.utils.data import ( DataRequirementItem, ) From cf6e6e2d47aa914eac823bf3746f0e65014ffa93 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 08:13:56 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/dataloader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 508deb218e..12681a304d 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -36,7 +36,9 @@ from deepmd.pt.utils.dataset import ( DeepmdDataSetForLoader, ) -from deepmd.pt.utils.utils import mix_entropy +from deepmd.pt.utils.utils import ( + mix_entropy, +) from deepmd.utils.data import ( DataRequirementItem, ) From 63dc3d31258777c11f4c63b175683f023d5358d0 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 19 Dec 2024 04:08:15 -0500 Subject: [PATCH 5/8] fix pd --- deepmd/pd/utils/dataloader.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py index 9d59ea0da7..a05d5bc14b 100644 --- a/deepmd/pd/utils/dataloader.py +++ b/deepmd/pd/utils/dataloader.py @@ -30,12 +30,16 @@ default_collate_fn, ) +from deepmd.utils import dp_random from deepmd.pd.utils import ( env, ) from deepmd.pd.utils.dataset import ( DeepmdDataSetForLoader, ) +from deepmd.pt.utils.utils import ( + mix_entropy, +) from deepmd.utils.data import ( DataRequirementItem, ) @@ -50,8 +54,13 @@ def setup_seed(seed): - paddle.seed(seed) + if isinstance(seed, (list, tuple)): + mixed_seed = mix_entropy(seed) + else: + mixed_seed = seed + paddle.seed(mixed_seed) os.environ["FLAGS_cudnn_deterministic"] = "True" + dp_random.seed(seed) class DpLoaderSet(Dataset): From c2884d094e72ff14170e386d5d9aadff1a19452a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:10:18 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pd/utils/dataloader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py index a05d5bc14b..c3663ff679 100644 --- a/deepmd/pd/utils/dataloader.py +++ b/deepmd/pd/utils/dataloader.py @@ -30,7 +30,6 @@ default_collate_fn, ) -from deepmd.utils import dp_random from deepmd.pd.utils import ( env, ) @@ -40,6 +39,9 @@ from deepmd.pt.utils.utils import ( mix_entropy, ) +from deepmd.utils import ( + dp_random, +) from deepmd.utils.data import ( DataRequirementItem, ) From de6d22b4922e05a1aa17d43b43b25415fdf8c915 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 19 Dec 2024 04:24:23 -0500 Subject: [PATCH 7/8] fix import --- deepmd/pd/utils/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py index c3663ff679..0d1d6af337 100644 --- a/deepmd/pd/utils/dataloader.py +++ b/deepmd/pd/utils/dataloader.py @@ -40,7 +40,7 @@ mix_entropy, ) from deepmd.utils import ( - dp_random, + random as dp_random, ) from deepmd.utils.data import ( DataRequirementItem, From 2d4a8e980af4c416c14e320655b33938bfecc5cb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:25:44 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pd/utils/dataloader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py index 0d1d6af337..80b3e7cb8b 100644 --- a/deepmd/pd/utils/dataloader.py +++ b/deepmd/pd/utils/dataloader.py @@ -39,9 +39,7 @@ from deepmd.pt.utils.utils import ( mix_entropy, ) -from deepmd.utils import ( - random as dp_random, -) +from deepmd.utils import random as dp_random from deepmd.utils.data import ( DataRequirementItem, )