From 2683d061f4e365bb5d2692d726963cc8f369a409 Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Tue, 10 May 2022 15:43:40 -0700 Subject: [PATCH 1/5] FIX-#4450: Ensure Modin successfully initializes when Ray cluster has no resources. Signed-off-by: Rehan Durrani --- modin/core/execution/ray/common/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index 0543dd1b3da..29cf65818e0 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -217,7 +217,13 @@ def initialize_ray( _move_stdlib_ahead_of_site_packages ) ray.worker.global_worker.run_function_on_all_workers(_import_pandas) - num_cpus = int(ray.cluster_resources()["CPU"]) + num_cpus = ray.cluster_resources().get("CPU", None) + if num_cpus is None: + import warnings + warnings.warn("The current Ray cluster does not have any CPU Resources.\nModin uses the number of CPUs to determine how many partitions to create.\nNumber of partitions defaulting to 4. To update, run the following python code:\n\tfrom modin.config import NPartitions\n\tNPartitions.put(desired_num_cpus)") + num_cpus = 4 + else: + num_cpus = int(num_cpus) num_gpus = int(ray.cluster_resources().get("GPU", 0)) if StorageFormat.get() == "Cudf": NPartitions._put(num_gpus) From f62f847786857bb252357694b445c70412474ae0 Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Tue, 10 May 2022 16:58:04 -0700 Subject: [PATCH 2/5] Fix formatting, add pr to release notes Signed-off-by: Rehan Durrani --- docs/release_notes/release_notes-0.15.0.rst | 2 ++ modin/core/execution/ray/common/utils.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/release_notes/release_notes-0.15.0.rst b/docs/release_notes/release_notes-0.15.0.rst index 6ebcedb4d1b..e69718c04cb 100644 --- a/docs/release_notes/release_notes-0.15.0.rst +++ b/docs/release_notes/release_notes-0.15.0.rst @@ -17,6 +17,7 @@ Key Features and Updates * FIX-#4373: Fix invalid file path when trying `read_csv_glob` with `usecols` parameter (#4405) * FIX-#4394: Fix issue with multiindex metadata desync (#4395) * FIX-#4425: Add parameters to groupby pct_change (#4429) + * FIX-#4450: Ensure Modin successfully initializes when Ray cluster has no resources (#4451) * Performance enhancements * FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346) * Benchmarking enhancements @@ -60,3 +61,4 @@ Contributors @anmyachev @dchigarev @devin-petersohn +@RehanSD diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index 29cf65818e0..bec85536713 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -220,7 +220,13 @@ def initialize_ray( num_cpus = ray.cluster_resources().get("CPU", None) if num_cpus is None: import warnings - warnings.warn("The current Ray cluster does not have any CPU Resources.\nModin uses the number of CPUs to determine how many partitions to create.\nNumber of partitions defaulting to 4. To update, run the following python code:\n\tfrom modin.config import NPartitions\n\tNPartitions.put(desired_num_cpus)") + + warnings.warn( + "The current Ray cluster does not have any CPU Resources.\nModin uses the number of " + + "CPUs to determine how many partitions to create.\nNumber of partitions defaulting to" + + " 4. To update, run the following python code:\n\tfrom modin.config import " + + "NPartitions\n\tNPartitions.put(desired_num_cpus)" + ) num_cpus = 4 else: num_cpus = int(num_cpus) From a5437e81338d14dad09da88c2d39f41e853b08b1 Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Tue, 10 May 2022 17:02:02 -0700 Subject: [PATCH 3/5] Remove duplicate import warnings Signed-off-by: Rehan Durrani --- modin/core/execution/ray/common/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index bec85536713..7c16de64294 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -219,8 +219,6 @@ def initialize_ray( ray.worker.global_worker.run_function_on_all_workers(_import_pandas) num_cpus = ray.cluster_resources().get("CPU", None) if num_cpus is None: - import warnings - warnings.warn( "The current Ray cluster does not have any CPU Resources.\nModin uses the number of " + "CPUs to determine how many partitions to create.\nNumber of partitions defaulting to" From ad4b86872ef2caa057417ec392f8a2358cc0446d Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Fri, 27 May 2022 14:03:17 -0700 Subject: [PATCH 4/5] Address review comments Signed-off-by: Rehan Durrani --- modin/core/execution/ray/common/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index 7c16de64294..007c8076a4c 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -222,10 +222,12 @@ def initialize_ray( warnings.warn( "The current Ray cluster does not have any CPU Resources.\nModin uses the number of " + "CPUs to determine how many partitions to create.\nNumber of partitions defaulting to" - + " 4. To update, run the following python code:\n\tfrom modin.config import " - + "NPartitions\n\tNPartitions.put(desired_num_cpus)" + + " number of CPUs on head node. To update, run the following python code:\n\tfrom " + + "modin.config import NPartitions\n\tNPartitions.put(desired_num_cpus)" ) - num_cpus = 4 + from modin.config import CpuCount + + num_cpus = CpuCount.get() else: num_cpus = int(num_cpus) num_gpus = int(ray.cluster_resources().get("GPU", 0)) From 5035ad3c9ea3596743c41ef108b25a98dee2a5d5 Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Fri, 27 May 2022 14:10:18 -0700 Subject: [PATCH 5/5] remove redundant import Signed-off-by: Rehan Durrani --- modin/core/execution/ray/common/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index 007c8076a4c..1b975d1990c 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -225,8 +225,6 @@ def initialize_ray( + " number of CPUs on head node. To update, run the following python code:\n\tfrom " + "modin.config import NPartitions\n\tNPartitions.put(desired_num_cpus)" ) - from modin.config import CpuCount - num_cpus = CpuCount.get() else: num_cpus = int(num_cpus)