From 0e6ba5c5dea23b3643e609fbbcb6486b28f3a498 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Wed, 18 Sep 2024 14:47:27 -0400 Subject: [PATCH 1/5] =?UTF-8?q?[SARC-328]=20Impl=C3=A9menter=20les=20alert?= =?UTF-8?q?es=20:=20Nombre=20de=20jobs=20CPU/GPU=20(actives=20ou=20inactiv?= =?UTF-8?q?es)=20sur=20un=20cluster=20sur=20une=20p=C3=A9riode=20X=20(#128?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [SARC-328] Implémenter les alertes : Nombre de jobs CPU/GPU (actives ou inactives) sur un cluster sur une période X * Rebase and update comments. * Fix a column name * Select sub-dataframe with given cluster names to compute stats, then use full dataframe to check warnings Add supplementary tests * - Rename files - Remove `exclude` parameter and use only `cluster_names` for both adding and excluding clusters from checking. * Compute statistics for each cluster separately. * Use file_regression for tests. --- pyproject.toml | 3 +- sarc/alerts/usage_alerts/cluster_scraping.py | 129 ++++++++++++++++++ .../test_alert_cluster_scraping.py | 58 ++++++++ ..._nb_jobs_per_cluster_per_time_params0_.txt | 9 ++ ..._nb_jobs_per_cluster_per_time_params1_.txt | 58 ++++++++ ..._nb_jobs_per_cluster_per_time_params2_.txt | 72 ++++++++++ ..._nb_jobs_per_cluster_per_time_params3_.txt | 5 + 7 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 sarc/alerts/usage_alerts/cluster_scraping.py create mode 100644 tests/functional/usage_alerts/test_alert_cluster_scraping.py create mode 100644 tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params0_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params1_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params2_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params3_.txt diff --git a/pyproject.toml b/pyproject.toml index fb5a4e77..1b84eb67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,8 @@ disable = [ "invalid-name", "no-else-return", # Bad rule IMO (- OB) "line-too-long", # Black takes care of line length. - "logging-fstring-interpolation" + "logging-fstring-interpolation", + "duplicate-code", ] extension-pkg-whitelist = "pydantic" diff --git a/sarc/alerts/usage_alerts/cluster_scraping.py b/sarc/alerts/usage_alerts/cluster_scraping.py new file mode 100644 index 00000000..06d4a955 --- /dev/null +++ b/sarc/alerts/usage_alerts/cluster_scraping.py @@ -0,0 +1,129 @@ +import logging +import sys +from datetime import datetime, timedelta +from typing import List, Optional + +import pandas + +from sarc.config import MTL +from sarc.jobs.series import compute_time_frames, load_job_series + +logger = logging.getLogger(__name__) + + +def check_nb_jobs_per_cluster_per_time( + time_interval: Optional[timedelta] = timedelta(days=7), + time_unit=timedelta(days=1), + cluster_names: Optional[List[str]] = None, + nb_stddev=2, + verbose=False, +): + """ + Check if we have scraped enough jobs per time unit per cluster on given time interval. + Log a warning for each cluster where number of jobs per time unit is lower than a limit + computed using mean and standard deviation statistics from this cluster. + + Parameters + ---------- + time_interval: timedelta + If given, only jobs which ran in [now - time_interval, time_interval] will be used for checking. + Default is last 7 days. + If None, all jobs are used. + time_unit: timedelta + Time unit in which we must check cluster usage through time_interval. Default is 1 day. + cluster_names: list + Optional list of clusters to check. + If empty (or not specified), use all clusters available among jobs retrieved with time_interval. + nb_stddev: int + Amount of standard deviation to remove from average statistics to compute checking threshold. + For each cluster, threshold is computed as: + max(0, average - nb_stddev * stddev) + verbose: bool + If True, print supplementary info about clusters statistics. + """ + + # Parse time_interval + start, end, clip_time = None, None, False + if time_interval is not None: + end = datetime.now(tz=MTL) + start = end - time_interval + clip_time = True + + # Get data frame + df = load_job_series(start=start, end=end, clip_time=clip_time) + + # Split data frame into time frames using `time_unit` + tf = compute_time_frames(df, frame_size=time_unit) + + # List all available timestamps. + # We will check each timestamp for each cluster. + timestamps = sorted(tf["timestamp"].unique()) + + # List clusters + if cluster_names: + cluster_names = sorted(cluster_names) + else: + cluster_names = sorted(df["cluster_name"].unique()) + + # Iter for each cluster. + for cluster_name in cluster_names: + # Select only jobs for current cluster, + # group jobs by timestamp, and count jobs for each timestamp. + f_stats = ( + tf[tf["cluster_name"] == cluster_name] + .groupby(["timestamp"])[["job_id"]] + .count() + ) + + # Create a dataframe with all available timestamps + # and associate each timestamp to 0 jobs by default. + c = ( + pandas.DataFrame({"timestamp": timestamps, "count": [0] * len(timestamps)}) + .groupby(["timestamp"])[["count"]] + .sum() + ) + # Set each timestamp valid for this cluster with real number of jobs scraped in this timestamp. + c.loc[f_stats.index, "count"] = f_stats["job_id"] + + # We now have number of jobs for each timestamp for this cluster, + # with count 0 for timestamps where no jobs run on cluster, + + # Compute average number of jobs per timestamp for this cluster + avg = c["count"].mean() + # Compute standard deviation of job count per timestamp for this cluster + stddev = c["count"].std() + # Compute threshold to use for warnings: - nb_stddev * + threshold = max(0, avg - nb_stddev * stddev) + + if verbose: + print(f"[{cluster_name}]", file=sys.stderr) + print(c, file=sys.stderr) + print(f"avg {avg}, stddev {stddev}, threshold {threshold}", file=sys.stderr) + print(file=sys.stderr) + + if threshold == 0: + # If threshold is zero, no check can be done, as jobs count will be always >= 0. + # Instead, we log a general warning. + msg = f"[{cluster_name}] threshold 0 ({avg} - {nb_stddev} * {stddev})." + if len(timestamps) == 1: + msg += ( + f" Only 1 timestamp found. Either time_interval ({time_interval}) is too short, " + f"or this cluster should not be currently checked" + ) + else: + msg += ( + f" Either nb_stddev is too high, time_interval ({time_interval}) is too short, " + f"or this cluster should not be currently checked" + ) + logger.warning(msg) + else: + # With a non-null threshold, we can check each timestamp. + for timestamp in timestamps: + nb_jobs = c.loc[timestamp]["count"] + if nb_jobs < threshold: + logger.warning( + f"[{cluster_name}][{timestamp}] " + f"insufficient cluster scraping: {nb_jobs} jobs / cluster / time unit; " + f"minimum required for this cluster: {threshold} ({avg} - {nb_stddev} * {stddev}); " + f"time unit: {time_unit}" + ) diff --git a/tests/functional/usage_alerts/test_alert_cluster_scraping.py b/tests/functional/usage_alerts/test_alert_cluster_scraping.py new file mode 100644 index 00000000..2d1313c0 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_scraping.py @@ -0,0 +1,58 @@ +import functools +import re + +import pytest + +from sarc.alerts.usage_alerts.cluster_scraping import check_nb_jobs_per_cluster_per_time + +from ..jobs.test_func_load_job_series import MOCK_TIME +from .common import _get_warnings + +get_warnings = functools.partial( + _get_warnings, + module="sarc.alerts.usage_alerts.cluster_scraping:cluster_scraping.py", +) + + +@pytest.mark.freeze_time(MOCK_TIME) +@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl") +@pytest.mark.parametrize( + "params", + [ + # Check with default params. In last 7 days from now (mock time: 2023-11-22), + # there is only 2 jobs from 1 cluster in 1 timestamp. So, threshold will be 0. + dict(verbose=True), + # Check with no time interval (i.e. all jobs). + dict(time_interval=None, verbose=True), + # Check with a supplementary cluster `another_cluster` which is not in data frame. + dict( + time_interval=None, + cluster_names=[ + "fromage", + "mila", + "patate", + "raisin", + "another_cluster", + ], + verbose=True, + ), + # Check above case with 2 clusters ignored. + dict( + time_interval=None, + cluster_names=[ + "mila", + "raisin", + "another_cluster", + ], + ), + ], +) +def test_check_nb_jobs_per_cluster_per_time(params, capsys, caplog, file_regression): + check_nb_jobs_per_cluster_per_time(**params) + file_regression.check( + re.sub( + r"WARNING +sarc\.alerts\.usage_alerts\.cluster_scraping:cluster_scraping.py:[0-9]+ +", + "", + f"{capsys.readouterr().err}\n{caplog.text}", + ) + ) diff --git a/tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params0_.txt b/tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params0_.txt new file mode 100644 index 00000000..966b36e6 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_scraping/test_check_nb_jobs_per_cluster_per_time_params0_.txt @@ -0,0 +1,9 @@ + load job series: 0%| | 0/2 [00:00 Date: Fri, 20 Sep 2024 18:35:29 -0400 Subject: [PATCH 2/5] =?UTF-8?q?[SARC-329]=20Impl=C3=A9menter=20les=20alert?= =?UTF-8?q?es=20:=20Proportion=20de=20jobs=20CPU=20avec=20stats=20promethe?= =?UTF-8?q?us=20sur=20un=20noeud=20donn=C3=A9=20plus=20bas=20qu=E2=80=99un?= =?UTF-8?q?=20threshold=20X=20(#132)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [SARC-329] Implémenter les alertes : Proportion de jobs CPU avec stats prometheus sur un noeud donné plus bas qu’un threshold X * Use file_regression for tests. * (cleanup) Remove unused code in tests for SARC-328 --- .../prometheus_stats_occurrences.py | 177 ++++++++++++++++++ tests/functional/conftest.py | 27 ++- tests/functional/jobs/factory.py | 12 +- .../test_alert_cluster_scraping.py | 6 - .../test_prometheus_scraping_stats.py | 82 ++++++++ ...eck_prometheus_scraping_stats_params0_.txt | 0 ...eck_prometheus_scraping_stats_params1_.txt | 1 + ...eck_prometheus_scraping_stats_params2_.txt | 12 ++ ...eck_prometheus_scraping_stats_params3_.txt | 13 ++ ...eck_prometheus_scraping_stats_params4_.txt | 9 + ...eck_prometheus_scraping_stats_params5_.txt | 12 ++ ...eck_prometheus_scraping_stats_params6_.txt | 18 ++ ...eck_prometheus_scraping_stats_params7_.txt | 7 + ...eck_prometheus_scraping_stats_params8_.txt | 8 + 14 files changed, 371 insertions(+), 13 deletions(-) create mode 100644 sarc/alerts/usage_alerts/prometheus_stats_occurrences.py create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats.py create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params0_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params1_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params2_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params3_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params4_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params5_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params6_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params7_.txt create mode 100644 tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params8_.txt diff --git a/sarc/alerts/usage_alerts/prometheus_stats_occurrences.py b/sarc/alerts/usage_alerts/prometheus_stats_occurrences.py new file mode 100644 index 00000000..5ebf4c41 --- /dev/null +++ b/sarc/alerts/usage_alerts/prometheus_stats_occurrences.py @@ -0,0 +1,177 @@ +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Sequence, Union + +from sarc.config import MTL +from sarc.jobs.series import compute_time_frames, load_job_series + +logger = logging.getLogger(__name__) + + +class PrometheusStatInfo: + """Prometheus stat context, used in checking below.""" + + def __init__(self, name): + self.name = name + self.col_has = f"has_{name}" + self.col_ratio = f"ratio_{name}" + self.avg = None + self.stddev = None + self.threshold = None + + +def check_prometheus_stats_occurrences( + time_interval: Optional[timedelta] = timedelta(days=7), + time_unit=timedelta(days=1), + minimum_runtime: Optional[timedelta] = timedelta(minutes=5), + cluster_names: Optional[List[str]] = None, + group_by_node: Optional[Sequence[str]] = ("mila",), + min_jobs_per_group: Optional[Union[int, Dict[str, int]]] = None, + nb_stddev=2, +): + """ + Check if we have scrapped Prometheus stats for enough jobs per node per cluster per time unit. + Log a warning for each node / cluster where ratio of jobs with Prometheus stats is lower than + a threshold computed using mean and standard deviation statistics from all clusters. + + Parameters + ---------- + time_interval: timedelta + If given, only jobs which ran in [now - time_interval, time_interval] will be used for checking. + Default is last 7 days. + If None, all jobs are used. + time_unit: timedelta + Time unit in which we must check cluster usage through time_interval. Default is 1 day. + minimum_runtime: timedelta + If given, only jobs which ran at least for this minimum runtime will be used for checking. + Default is 5 minutes. + If None, set to 0. + cluster_names: list + Optional list of clusters to check. + + There may have clusters we don't want to check among retrieved jobs (eg. clusters in maintenance). + On the opposite, we may expect to see jobs in a cluster, but there are actually no jobs in this cluster. + To cover such cases, one can specify the complete list of expected clusters with `cluster_names`. + Jobs from clusters not in this list will be ignored both to compute statistics and in checking phase. + If a cluster in this list does not appear in jobs, a warning will be logged. + + If empty (or not specified), use all clusters available among jobs retrieved with time_interval. + group_by_node: Sequence + Optional sequence of clusters to group by node. + For clusters in this list, we will check each node separately (ie. a "group" is a cluster node). + By default, we check the entire cluster (i.e. the "group" is the cluster itself). + min_jobs_per_group: int | dict + Minimum number of jobs required for checking in each group. + Either an integer, as minimum number for any group, + or a dictionary mapping a cluster name to minimum number in each group of this cluster + A group is either a cluster node, if cluster name is in `group_by_node`, + or the entire cluster otherwise. + Default is 1 job per group. + nb_stddev: float + Amount of standard deviation to remove from average statistics to compute checking threshold. + Threshold is computed as: + max(0, average - nb_stddev * stddev) + """ + + # Parse time_interval and get data frame + start, end, clip_time = None, None, False + if time_interval is not None: + end = datetime.now(tz=MTL) + start = end - time_interval + clip_time = True + df = load_job_series(start=start, end=end, clip_time=clip_time) + + # Parse minimum_runtime, and select only jobs where + # elapsed time >= minimum runtime and allocated.gres_gpu == 0 + if minimum_runtime is None: + minimum_runtime = timedelta(seconds=0) + df = df[ + (df["elapsed_time"] >= minimum_runtime.total_seconds()) + & (df["allocated.gres_gpu"] == 0) + ] + + # List clusters + cluster_names = cluster_names or sorted(df["cluster_name"].unique()) + + # Split data frame into time frames using `time_unit` + df = compute_time_frames(df, frame_size=time_unit) + + # Duplicates lines per node to count each job for each node where it runs + df = df.explode("nodes") + + # If cluster not in group_by_node, + # then we must count jobs for the entire cluster, not per node. + # To simplify the code, let's just define 1 common node for all cluster jobs + cluster_node_name = "(all)" + df.loc[~df["cluster_name"].isin(group_by_node), "nodes"] = cluster_node_name + + # Add a column to ease job count + df.loc[:, "task_"] = 1 + + # Generate Prometheus context for each Prometheus stat we want to check. + prom_contexts = [ + PrometheusStatInfo(name=prom_col) + for prom_col in ["cpu_utilization", "system_memory"] + ] + + # Add columns to check if job has prometheus stats + for prom in prom_contexts: + df.loc[:, prom.col_has] = ~df[prom.name].isnull() + + # Group per timestamp per cluster per node, and count jobs and prometheus stats. + # If "cluster_names" are given, use only jobs in these clusters. + f_stats = ( + df[df["cluster_name"].isin(cluster_names)] + .groupby(["timestamp", "cluster_name", "nodes"])[ + [prom_info.col_has for prom_info in prom_contexts] + ["task_"] + ] + .sum() + ) + + # Compute ratio of job with Prometheus stat for each group, + # then compute threshold for each Prometheus stat. + for prom in prom_contexts: + f_stats[prom.col_ratio] = f_stats[prom.col_has] / f_stats["task_"] + prom.avg = f_stats[prom.col_ratio].mean() + prom.stddev = f_stats[prom.col_ratio].std() + prom.threshold = max(0, prom.avg - nb_stddev * prom.stddev) + + # Parse min_jobs_per_group + default_min_jobs = 1 + if min_jobs_per_group is None: + min_jobs_per_group = {} + elif isinstance(min_jobs_per_group, int): + default_min_jobs = min_jobs_per_group + min_jobs_per_group = {} + assert isinstance(min_jobs_per_group, dict) + + # Now we can check + clusters_seen = set() + for row in f_stats.itertuples(): + timestamp, cluster_name, node = row.Index + clusters_seen.add(cluster_name) + nb_jobs = row.task_ + if nb_jobs >= min_jobs_per_group.get(cluster_name, default_min_jobs): + grouping_type = "cluster" if node == cluster_node_name else "node / cluster" + grouping_name = ( + f"[{cluster_name}]" + if node == cluster_node_name + else f"[{cluster_name}][{node}]" + ) + for prom in prom_contexts: + local_stat = getattr(row, prom.col_has) / nb_jobs + if local_stat < prom.threshold: + logger.warning( + f"[{timestamp}]{grouping_name} insufficient Prometheus data for {prom.name}: " + f"{round(local_stat * 100, 2)} % of CPU jobs / {grouping_type} / time unit; " + f"minimum required: {prom.threshold} ({prom.avg} - {nb_stddev} * {prom.stddev}); " + f"time unit: {time_unit}" + ) + + # Check clusters listed in `cluster_names` but not found in jobs. + for cluster_name in cluster_names: + if cluster_name not in clusters_seen: + # No stats found for this cluster. Warning + logger.warning( + f"[{cluster_name}] no Prometheus data available: no job found" + ) diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 73d96083..6caef495 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -41,9 +41,9 @@ def clear_db(db): db.clusters.drop() -def fill_db(db, with_users=False, with_clusters=False): +def fill_db(db, with_users=False, with_clusters=False, job_patch=None): db.allocations.insert_many(create_allocations()) - db.jobs.insert_many(create_jobs()) + db.jobs.insert_many(create_jobs(job_patch=job_patch)) db.diskusage.insert_many(create_diskusages()) if with_users: db.users.insert_many(create_users()) @@ -58,7 +58,7 @@ def fill_db(db, with_users=False, with_clusters=False): def create_db_configuration_fixture( - db_name, empty=False, with_users=False, scope="function" + db_name, empty=False, with_users=False, job_patch=None, scope="function" ): @pytest.fixture(scope=scope) def fixture(standard_config_object): @@ -66,7 +66,7 @@ def fixture(standard_config_object): db = cfg.mongo.database_instance clear_db(db) if not empty: - fill_db(db, with_users=with_users) + fill_db(db, with_users=with_users, job_patch=job_patch) yield return fixture @@ -106,6 +106,16 @@ def fixture(client_config_object): ) +read_only_db_with_many_cpu_jobs_config_object = create_db_configuration_fixture( + db_name="sarc-read-only-with-many-cpu-jobs-test", + scope="session", + job_patch={ + "allocated": {"billing": 0, "cpu": 0, "gres_gpu": 0, "mem": 0, "node": 0}, + "requested": {"billing": 0, "cpu": 0, "gres_gpu": 0, "mem": 0, "node": 0}, + }, +) + + read_only_db_with_users_config_object = create_db_configuration_fixture( db_name="sarc-read-only-with-users-test", with_users=True, @@ -141,6 +151,15 @@ def read_only_db(standard_config, read_only_db_config_object): yield cfg.mongo.database_instance +@pytest.fixture +def read_only_db_with_many_cpu_jobs( + standard_config, read_only_db_with_many_cpu_jobs_config_object +): + cfg = custom_db_config(standard_config, "sarc-read-only-with-many-cpu-jobs-test") + with using_config(cfg) as cfg: + yield cfg.mongo.database_instance + + @pytest.fixture def read_only_db_with_users(standard_config, read_only_db_with_users_config_object): cfg = custom_db_config(standard_config, "sarc-read-only-with-users-test") diff --git a/tests/functional/jobs/factory.py b/tests/functional/jobs/factory.py index 81b4385b..04464f3a 100644 --- a/tests/functional/jobs/factory.py +++ b/tests/functional/jobs/factory.py @@ -45,13 +45,17 @@ class JobFactory: def __init__( - self, first_submit_time: None | datetime = None, first_job_id: int = 1 + self, + first_submit_time: None | datetime = None, + first_job_id: int = 1, + job_patch: dict | None = None, ): self.jobs = [] self._first_submit_time = first_submit_time or datetime( 2023, 2, 14, tzinfo=MTL ).astimezone(UTC) self._first_job_id = first_job_id + self.job_patch = job_patch or {} @property def next_job_id(self): @@ -96,6 +100,8 @@ def format_kwargs(self, kwargs): def create_job(self, **kwargs): job = copy.deepcopy(base_job) + if self.job_patch: + job.update(self.job_patch) job.update(self.format_kwargs(kwargs)) return job @@ -189,9 +195,9 @@ def _create_user(username: str, with_drac=True): } -def create_jobs(job_factory: JobFactory | None = None): +def create_jobs(job_factory: JobFactory | None = None, job_patch: dict | None = None): if job_factory is None: - job_factory = JobFactory() + job_factory = JobFactory(job_patch=job_patch) for status in [ "CANCELLED", diff --git a/tests/functional/usage_alerts/test_alert_cluster_scraping.py b/tests/functional/usage_alerts/test_alert_cluster_scraping.py index 2d1313c0..e4ddf903 100644 --- a/tests/functional/usage_alerts/test_alert_cluster_scraping.py +++ b/tests/functional/usage_alerts/test_alert_cluster_scraping.py @@ -6,12 +6,6 @@ from sarc.alerts.usage_alerts.cluster_scraping import check_nb_jobs_per_cluster_per_time from ..jobs.test_func_load_job_series import MOCK_TIME -from .common import _get_warnings - -get_warnings = functools.partial( - _get_warnings, - module="sarc.alerts.usage_alerts.cluster_scraping:cluster_scraping.py", -) @pytest.mark.freeze_time(MOCK_TIME) diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats.py b/tests/functional/usage_alerts/test_prometheus_scraping_stats.py new file mode 100644 index 00000000..f9c476b1 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats.py @@ -0,0 +1,82 @@ +import functools +import re + +import pytest + +from sarc.alerts.usage_alerts.prometheus_stats_occurrences import ( + check_prometheus_stats_occurrences, +) +from sarc.client import get_jobs +from tests.functional.jobs.test_func_load_job_series import MOCK_TIME + +from ..jobs.test_func_job_statistics import generate_fake_timeseries + + +@pytest.mark.freeze_time(MOCK_TIME) +@pytest.mark.usefixtures("read_only_db_with_many_cpu_jobs", "tzlocal_is_mtl") +@pytest.mark.parametrize( + "params", + [ + # Check with default params. In last 7 days from now (mock time: 2023-11-22), + # there is only 2 jobs from 1 cluster in 1 timestamp, both with no cpu_utilization + # and no system_memory. So threshold will be 0 everywhere, and no warning will be printed. + dict(), + # Check with no time_interval. + dict(time_interval=None), + # Check with no time_interval and low amount of stddev (0.25), to get more warnings. + dict(time_interval=None, nb_stddev=0.25), + # Check with no time_interval, 0.25 stddev, and 1 extra cluster. + # Expected 1 more warning, no other changes . + dict( + time_interval=None, + nb_stddev=0.25, + cluster_names=[ + "raisin", + "patate", + "fromage", + "mila", + "invisible-cluster", + ], + ), + # Check with no time_interval, 0.25 stddev, with only 2 clusters. Thresholds will change. + dict(time_interval=None, nb_stddev=0.25, cluster_names=["raisin", "mila"]), + # Check with no time_interval, 0.25 stddev, and no group_by_node. + dict(time_interval=None, nb_stddev=0.25, group_by_node=()), + # Check with no time_interval, 0.25 stddev, and group_by_node for all clusters. + # Many changes. + dict( + time_interval=None, + nb_stddev=0.25, + group_by_node=["raisin", "patate", "fromage", "mila"], + ), + # Check with no time_interval, 0.25 stddev, group_by_node for all clusters, and min jobs to 2. + dict( + time_interval=None, + nb_stddev=0.25, + group_by_node=["raisin", "patate", "fromage", "mila"], + min_jobs_per_group=2, + ), + # Check with no time_interval, 0.25 stddev, group_by_node for all clusters, and min jobs set for one cluster. + dict( + time_interval=None, + nb_stddev=0.25, + group_by_node=["raisin", "patate", "fromage", "mila"], + min_jobs_per_group={"raisin": 3}, + ), + ], +) +def test_check_prometheus_scraping_stats(params, monkeypatch, caplog, file_regression): + monkeypatch.setattr( + "sarc.jobs.series.get_job_time_series", generate_fake_timeseries + ) + + for job in get_jobs(): + job.statistics(save=True) + check_prometheus_stats_occurrences(**params) + file_regression.check( + re.sub( + r"WARNING +sarc\.alerts\.usage_alerts\.prometheus_stats_occurrences:prometheus_stats_occurrences.py:[0-9]+ +", + "", + caplog.text, + ) + ) diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params0_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params0_.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params1_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params1_.txt new file mode 100644 index 00000000..fbcf0bb9 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params1_.txt @@ -0,0 +1 @@ +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.368376726851694 (0.9230769230769231 - 2 * 0.2773500981126146); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params2_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params2_.txt new file mode 100644 index 00000000..1557ab3c --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params2_.txt @@ -0,0 +1,12 @@ +[2023-02-14 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][fromage] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.8537393985487695 (0.9230769230769231 - 0.25 * 0.2773500981126146); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params3_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params3_.txt new file mode 100644 index 00000000..546c1aea --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params3_.txt @@ -0,0 +1,13 @@ +[2023-02-14 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][fromage] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.8537393985487695 (0.9230769230769231 - 0.25 * 0.2773500981126146); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[invisible-cluster] no Prometheus data available: no job found diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params4_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params4_.txt new file mode 100644 index 00000000..c454361a --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params4_.txt @@ -0,0 +1,9 @@ +[2023-02-14 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.8209430584957905 (0.9 - 0.25 * 0.31622776601683794); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.09459074466105402 (0.2 - 0.25 * 0.42163702135578396); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params5_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params5_.txt new file mode 100644 index 00000000..8789fc64 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params5_.txt @@ -0,0 +1,12 @@ +[2023-02-14 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][fromage] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][patate] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.8537393985487695 (0.9230769230769231 - 0.25 * 0.2773500981126146); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / cluster / time unit; minimum required: 0.059962701821302505 (0.15384615384615385 - 0.25 * 0.3755338080994054); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params6_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params6_.txt new file mode 100644 index 00000000..65c8a3d9 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params6_.txt @@ -0,0 +1,18 @@ +[2023-02-14 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin][bart] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][fromage][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][patate][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin][bart] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin][cn-c022] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin][cn-d001] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][patate][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin][cn-b099] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin][cn-c017] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-19 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.8900144875849911 (0.9473684210526315 - 0.25 * 0.22941573387056177); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params7_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params7_.txt new file mode 100644 index 00000000..039d2ed4 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params7_.txt @@ -0,0 +1,7 @@ +[2023-02-14 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for cpu_utilization: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.8900144875849911 (0.9473684210526315 - 0.25 * 0.22941573387056177); time unit: 1 day, 0:00:00 +[2023-11-21 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 diff --git a/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params8_.txt b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params8_.txt new file mode 100644 index 00000000..f591c487 --- /dev/null +++ b/tests/functional/usage_alerts/test_prometheus_scraping_stats/test_check_prometheus_scraping_stats_params8_.txt @@ -0,0 +1,8 @@ +[2023-02-14 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-15 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-16 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][fromage][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-17 00:01:00-05:00][patate][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][mila][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][patate][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 +[2023-02-18 00:01:00-05:00][raisin][cn-c021] insufficient Prometheus data for system_memory: 0.0 % of CPU jobs / node / cluster / time unit; minimum required: 0.026437715984160393 (0.10526315789473684 - 0.25 * 0.3153017676423058); time unit: 1 day, 0:00:00 From ee50c58b61b8d7e9f352de377f82b3cb9c41b3bd Mon Sep 17 00:00:00 2001 From: Bruno Carrez Date: Mon, 30 Sep 2024 20:44:09 -0400 Subject: [PATCH 3/5] SARC-292 SARC-293 users exceptions (#103) * fix conftest to comply to exception implementation in sarc/ldap/supervisor.py * conftest exception for delegations and supervisors overrides * test for supervisors overrides * supervisors override implementation * add teacher_delegations in User API * add test for teacher_delegations + lint * remove print * add teachers delegations implementation * fix tests made without exceptions file * lint * fix test_query_to_ldap_server_and_writing_to_output_json (ignore co_supervisor too, there was no case with co_supervisor before exceptions handling was implemented, so the test had to be updated) * updated test_acquire_users_supervisors and test_acquire_users_co_supervisors to use a user not affected by supervisors overrides mock data * lint/black/whatever * lint * Move the supervisors override and prof delegation exceptions to sarc.ldap.users_exceptions.py * fix test `test_load_job_series_with_users` to include the new `teacher_delegations` field in the user collection * lint * fix tests: `test_get_user` and `test_get_users` * update lint exceptions ( disable too-many-positional-arguments ) * fix test `test_acquire_ldap_revision_change` --- pyproject.toml | 1 + sarc/alerts/cache.py | 1 - sarc/alerts/common.py | 1 - sarc/client/users/api.py | 2 + sarc/users/acquire.py | 10 ++ sarc/users/revision.py | 1 + sarc/users/supervisor.py | 1 + sarc/users/users_exceptions.py | 56 +++++++++++ tests/conftest.py | 20 +++- .../cli/acquire/test_acquire_users.py | 93 ++++++++++++++++--- .../test_func_get_user/test_get_user.txt | 1 + .../test_func_get_user/test_get_users.txt | 3 + .../jobs/test_func_load_job_series.py | 1 + .../test_load_job_series_with_users.txt | 52 +++++------ tests/functional/users/test_acquire_ldap.py | 4 +- tests/functional/users/test_read_mila_ldap.py | 1 + 16 files changed, 203 insertions(+), 45 deletions(-) create mode 100644 sarc/users/users_exceptions.py diff --git a/pyproject.toml b/pyproject.toml index 1b84eb67..19afb8e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ disable = [ "import-outside-toplevel", # These imports are useful to reduce loading times "too-many-arguments", "too-many-locals", + "too-many-positional-arguments", "missing-module-docstring", "missing-class-docstring", "missing-function-docstring", diff --git a/sarc/alerts/cache.py b/sarc/alerts/cache.py index 9f7c9363..a0b11458 100644 --- a/sarc/alerts/cache.py +++ b/sarc/alerts/cache.py @@ -22,7 +22,6 @@ class CachedResult: @dataclass(unsafe_hash=True) class Timespan: - # Time duration duration: timedelta diff --git a/sarc/alerts/common.py b/sarc/alerts/common.py index fe117bb9..97435269 100644 --- a/sarc/alerts/common.py +++ b/sarc/alerts/common.py @@ -315,7 +315,6 @@ class HealthMonitorConfig: checks: dict[str, TaggedSubclass[HealthCheck]] = field(default_factory=dict) def __post_init__(self): - all_checks = {} # Parameterize the checks diff --git a/sarc/client/users/api.py b/sarc/client/users/api.py index 390c550e..269d73bd 100644 --- a/sarc/client/users/api.py +++ b/sarc/client/users/api.py @@ -29,6 +29,8 @@ class User(BaseModel): mila: Credentials drac: Optional[Credentials] + teacher_delegations: Optional[list[str]] = None + mila_ldap: dict drac_members: Optional[dict] drac_roles: Optional[dict] diff --git a/sarc/users/acquire.py b/sarc/users/acquire.py index 7c3f8851..c8d0eecf 100644 --- a/sarc/users/acquire.py +++ b/sarc/users/acquire.py @@ -20,6 +20,10 @@ from sarc.users.mymila import fetch_mymila from sarc.users.read_mila_ldap import fetch_ldap from sarc.users.revision import commit_matches_to_database +from sarc.users.users_exceptions import ( + apply_users_delegation_exceptions, + apply_users_supervisor_exceptions, +) def run( @@ -135,6 +139,12 @@ def run( for _, user in DD_persons_matched.items(): fill_computed_fields(user) + # apply delegation exceptions + apply_users_delegation_exceptions(DD_persons_matched, cfg.ldap, span) + + # apply supervisor exceptions + apply_users_supervisor_exceptions(DD_persons_matched, cfg.ldap, span) + # These associations can now be propagated to the database. span.add_event("Committing matches to database ...") commit_matches_to_database( diff --git a/sarc/users/revision.py b/sarc/users/revision.py index 849552e2..26642b34 100644 --- a/sarc/users/revision.py +++ b/sarc/users/revision.py @@ -114,6 +114,7 @@ def user_insert(newuser: dict) -> list: "mila", "drac_roles", "drac_members", + "teacher_delegations", ) update = { diff --git a/sarc/users/supervisor.py b/sarc/users/supervisor.py index d3b35679..5e3fe635 100644 --- a/sarc/users/supervisor.py +++ b/sarc/users/supervisor.py @@ -177,6 +177,7 @@ def sortkey(x): return sorted(supervisors, key=sortkey, reverse=True) +# pylint: disable=too-many-branches def resolve_supervisors( ldap_people: list[dict], group_to_prof: dict, exceptions: dict ) -> SupervisorMatchingErrors: diff --git a/sarc/users/users_exceptions.py b/sarc/users/users_exceptions.py new file mode 100644 index 00000000..cef0fdf3 --- /dev/null +++ b/sarc/users/users_exceptions.py @@ -0,0 +1,56 @@ +from sarc.config import LDAPConfig +from sarc.users.read_mila_ldap import load_ldap_exceptions + + +def apply_users_delegation_exceptions(DD_persons, ldap_config: LDAPConfig, span): + """ + Apply manual exceptions to users; + these exceptions are defined in the exceptions.json file refered in the LDAPConfig. + """ + span.add_event("Applying users delegation exceptions ...") + # Load exceptions + exceptions = load_ldap_exceptions(ldap_config) + + for _, user in DD_persons.items(): + if ( + exceptions + and user["mila_ldap"]["mila_email_username"] in exceptions["delegations"] + ): + span.add_event( + f"Applying delegation exception for {user['mila_ldap']['mila_email_username']} ..." + ) + user["teacher_delegations"] = exceptions["delegations"][ + user["mila_ldap"]["mila_email_username"] + ] + + +def apply_users_supervisor_exceptions(DD_persons, ldap_config: LDAPConfig, span): + """ + Apply manual exceptions to users; + these exceptions are defined in the exceptions.json file refered in the LDAPConfig. + """ + span.add_event("Applying users supervisor exceptions ...") + # Load exceptions + exceptions = load_ldap_exceptions(ldap_config) + + for _, user in DD_persons.items(): + # if there is a supervisors override, use it whatever the student status may be + if exceptions and user["mila_ldap"]["mila_email_username"] in exceptions.get( + "supervisors_overrides", [] + ): + span.add_event( + f"Applying supervisor exception for {user['mila_ldap']['mila_email_username']} ..." + ) + supervisors = exceptions["supervisors_overrides"][ + user["mila_ldap"]["mila_email_username"] + ] + user["mila_ldap"]["supervisor"] = None + user["mila_ldap"]["co_supervisor"] = None + if len(supervisors) >= 1: + user["mila_ldap"]["supervisor"] = supervisors[0] + else: + user["mila_ldap"]["supervisor"] = None + if len(supervisors) >= 2: + user["mila_ldap"]["co_supervisor"] = supervisors[1] + else: + user["mila_ldap"]["co_supervisor"] = None diff --git a/tests/conftest.py b/tests/conftest.py index 7f259610..073b2e1e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -216,8 +216,24 @@ def file_contents(): """ exceptions_json_path = """ { - "not_prof": [], - "not_student": [] + "not_teacher": [], + "not_student": [], + "delegations": { + "john.smith003@mila.quebec": [ + "john.smith004@mila.quebec", + "john.smith005@mila.quebec" + ] + }, + "supervisors_overrides": { + "john.smith001@mila.quebec": [ + "john.smith003@mila.quebec" + ], + "john.smith002@mila.quebec": [ + "john.smith003@mila.quebec", + "john.smith004@mila.quebec" + ] + } + } """ diff --git a/tests/functional/cli/acquire/test_acquire_users.py b/tests/functional/cli/acquire/test_acquire_users.py index 04f7a235..2ad7cf36 100644 --- a/tests/functional/cli/acquire/test_acquire_users.py +++ b/tests/functional/cli/acquire/test_acquire_users.py @@ -118,6 +118,37 @@ def test_acquire_users(cli_main, patch_return_values, mock_file, captrace): js_user = get_user(drac_account_username="stranger.person") assert js_user is None + # test supervisor overrides + js_user = get_user(mila_email_username="john.smith001@mila.quebec") + assert js_user is not None + assert js_user.mila_ldap["supervisor"] == "john.smith003@mila.quebec" + assert js_user.mila_ldap["co_supervisor"] == None + + js_user = get_user(mila_email_username="john.smith002@mila.quebec") + assert js_user is not None + assert js_user.mila_ldap["supervisor"] == "john.smith003@mila.quebec" + assert js_user.mila_ldap["co_supervisor"] == "john.smith004@mila.quebec" + + # test delegations + # john.smith003 should have delegations for john.smith004 and john.smith005 + # john.smith004 should have no delegations + # john.smith005 should have no delegations + + js_user = get_user(mila_email_username="john.smith003@mila.quebec") + assert js_user is not None + assert js_user.teacher_delegations is not None + assert "john.smith004@mila.quebec" in js_user.teacher_delegations + assert "john.smith005@mila.quebec" in js_user.teacher_delegations + assert "john.smith006@mila.quebec" not in js_user.teacher_delegations + + js_user = get_user(mila_email_username="john.smith004@mila.quebec") + assert js_user is not None + assert js_user.teacher_delegations == None + + js_user = get_user(mila_email_username="john.smith005@mila.quebec") + assert js_user is not None + assert js_user.teacher_delegations == None + # Check traces # NB: We don't check logging here, because # this execution won't display "acquire users" logs, @@ -132,14 +163,28 @@ def test_acquire_users(cli_main, patch_return_values, mock_file, captrace): assert spans[1].name == "match_drac_to_mila_accounts" assert spans[1].status.status_code == StatusCode.OK - assert len(spans[1].events) == 4 + assert len(spans[1].events) == 9 assert ( spans[1].events[0].name == "Loading mila_ldap, drac_roles and drac_members from files ..." ) assert spans[1].events[1].name == "Loading matching config from file ..." assert spans[1].events[2].name == "Matching DRAC/CC to mila accounts ..." - assert spans[1].events[3].name == "Committing matches to database ..." + assert spans[1].events[3].name == "Applying users delegation exceptions ..." + assert ( + spans[1].events[4].name + == "Applying delegation exception for john.smith003@mila.quebec ..." + ) + assert spans[1].events[5].name == "Applying users supervisor exceptions ..." + assert ( + spans[1].events[6].name + == "Applying supervisor exception for john.smith001@mila.quebec ..." + ) + assert ( + spans[1].events[7].name + == "Applying supervisor exception for john.smith002@mila.quebec ..." + ) + assert spans[1].events[8].name == "Committing matches to database ..." @pytest.mark.parametrize( @@ -184,12 +229,16 @@ def test_acquire_users_supervisors( nbr_users = 4 nbr_profs = 2 + # for the test we will use the user with index 3, + # which is the first user who has no supervisor override in the mock data + # so that this test won't be affected by the previous test + patch_return_values( { "sarc.users.read_mila_ldap.query_ldap": fake_raw_ldap_data( nbr_users, hardcoded_values_by_user={ - 2: { # The first user who is not a prof is the one with index 2 + 3: { # The first user who is not a prof is the one with index 3 "supervisor": ldap_supervisor } }, @@ -198,7 +247,7 @@ def test_acquire_users_supervisors( nbr_users=nbr_users, nbr_profs=nbr_profs, hardcoded_values_by_user={ - 2: { # The first user who is not a prof is the one with index 2 + 3: { # The first user who is not a prof is the one with index 3 "Supervisor Principal": mymila_supervisor } }, @@ -221,8 +270,8 @@ def test_acquire_users_supervisors( # Validate the results of all of this by inspecting the database. js_user = get_user( - mila_email_username=f"john.smith002@mila.quebec" - ) # We modified the user with index 2; thus this is the one we retrieve + mila_email_username=f"john.smith003@mila.quebec" + ) # We modified the user with index 3; thus this is the one we retrieve assert js_user.mila_ldap["supervisor"] == expected_supervisor @@ -268,12 +317,16 @@ def test_acquire_users_co_supervisors( nbr_users = 4 nbr_profs = 2 + # for the test we will use the user with index 3, + # which is the first user who has no supervisor override in the mock data + # so that this test won't be affected by the previous test + patch_return_values( { "sarc.users.read_mila_ldap.query_ldap": fake_raw_ldap_data( nbr_users, hardcoded_values_by_user={ - 2: { # The first user who is not a prof is the one with index 2 + 3: { # The first user who is not a prof is the one with index 3 "co_supervisor": ldap_co_supervisor } }, @@ -282,7 +335,7 @@ def test_acquire_users_co_supervisors( nbr_users=nbr_users, nbr_profs=nbr_profs, hardcoded_values_by_user={ - 2: { # The first user who is not a prof is the one with index 2 + 3: { # The first user who is not a prof is the one with index 3 "Co-Supervisor": mymila_co_supervisor } }, @@ -305,8 +358,8 @@ def test_acquire_users_co_supervisors( # Validate the results of all of this by inspecting the database. js_user = get_user( - mila_email_username=f"john.smith002@mila.quebec" - ) # We modified the user with index 2; thus this is the one we retrieve + mila_email_username=f"john.smith003@mila.quebec" + ) # We modified the user with index 3; thus this is the one we retrieve assert js_user.mila_ldap["co_supervisor"] == expected_co_supervisor @@ -411,12 +464,26 @@ def test_acquire_users_prompt( assert spans[1].name == "match_drac_to_mila_accounts" assert spans[1].status.status_code == StatusCode.OK - assert len(spans[1].events) == 5 + assert len(spans[1].events) == 10 assert ( spans[1].events[0].name == "Loading mila_ldap, drac_roles and drac_members from files ..." ) assert spans[1].events[1].name == "Loading matching config from file ..." assert spans[1].events[2].name == "Matching DRAC/CC to mila accounts ..." - assert spans[1].events[3].name == "Committing matches to database ..." - assert spans[1].events[4].name == "Saving 1 manual matches ..." + assert spans[1].events[3].name == "Applying users delegation exceptions ..." + assert ( + spans[1].events[4].name + == "Applying delegation exception for john.smith003@mila.quebec ..." + ) + assert spans[1].events[5].name == "Applying users supervisor exceptions ..." + assert ( + spans[1].events[6].name + == "Applying supervisor exception for john.smith001@mila.quebec ..." + ) + assert ( + spans[1].events[7].name + == "Applying supervisor exception for john.smith002@mila.quebec ..." + ) + assert spans[1].events[8].name == "Committing matches to database ..." + assert spans[1].events[9].name == "Saving 1 manual matches ..." diff --git a/tests/functional/client/test_func_get_user/test_get_user.txt b/tests/functional/client/test_func_get_user/test_get_user.txt index 15f758f1..36270706 100644 --- a/tests/functional/client/test_func_get_user/test_get_user.txt +++ b/tests/functional/client/test_func_get_user/test_get_user.txt @@ -7,6 +7,7 @@ Found user: "active": true }, "drac": null, + "teacher_delegations": null, "mila_ldap": { "co_supervisor": null, "display_name": "M/Ms Bonhomme", diff --git a/tests/functional/client/test_func_get_user/test_get_users.txt b/tests/functional/client/test_func_get_user/test_get_users.txt index 5152c905..9180c913 100644 --- a/tests/functional/client/test_func_get_user/test_get_users.txt +++ b/tests/functional/client/test_func_get_user/test_get_users.txt @@ -7,6 +7,7 @@ Found 3 users(s): "active": true }, "drac": null, + "teacher_delegations": null, "mila_ldap": { "co_supervisor": null, "display_name": "M/Ms Bonhomme", @@ -34,6 +35,7 @@ Found 3 users(s): "email": "petitbonhomme@example.com", "active": true }, + "teacher_delegations": null, "mila_ldap": { "co_supervisor": null, "display_name": "M/Ms Petitbonhomme", @@ -74,6 +76,7 @@ Found 3 users(s): "email": "beaubonhomme@example.com", "active": true }, + "teacher_delegations": null, "mila_ldap": { "co_supervisor": null, "display_name": "M/Ms Beaubonhomme", diff --git a/tests/functional/jobs/test_func_load_job_series.py b/tests/functional/jobs/test_func_load_job_series.py index 847e7173..a3fbc76a 100644 --- a/tests/functional/jobs/test_func_load_job_series.py +++ b/tests/functional/jobs/test_func_load_job_series.py @@ -93,6 +93,7 @@ "user.name", "user.record_start", "user.record_end", + "user.teacher_delegations", "user.mila.username", "user.mila.email", "user.mila.active", diff --git a/tests/functional/jobs/test_func_load_job_series/test_load_job_series_with_users.txt b/tests/functional/jobs/test_func_load_job_series/test_load_job_series_with_users.txt index 1451c56f..1d4505df 100644 --- a/tests/functional/jobs/test_func_load_job_series/test_load_job_series_with_users.txt +++ b/tests/functional/jobs/test_func_load_job_series/test_load_job_series_with_users.txt @@ -1,28 +1,28 @@ Found 4 users and 24 job(s): -| | job_id | cluster_name | user | user.primary_email | user.name | user.record_start | user.record_end | user.mila.username | user.mila.email | user.mila.active | user.drac.username | user.drac.email | user.drac.active | user.mila_ldap.co_supervisor | user.mila_ldap.display_name | user.mila_ldap.mila_cluster_gid | user.mila_ldap.mila_cluster_uid | user.mila_ldap.mila_cluster_username | user.mila_ldap.mila_email_username | user.mila_ldap.status | user.mila_ldap.supervisor | user.drac_members.activation_status | user.drac_members.email | user.drac_members.name | user.drac_members.permission | user.drac_members.sponsor | user.drac_members.username | user.drac_roles.email | user.drac_roles.nom | user.drac_roles.status | user.drac_roles.username | user.drac_roles.état du compte | -|---:|----------:|:---------------|:-------------------|:--------------------------|:-------------------|:--------------------|------------------:|:---------------------|:--------------------------|-------------------:|:---------------------|:--------------------------|-------------------:|-------------------------------:|:------------------------------|----------------------------------:|----------------------------------:|:---------------------------------------|:-------------------------------------|:------------------------|----------------------------:|:--------------------------------------|:--------------------------|:-------------------------|:-------------------------------|:----------------------------|:-----------------------------|:--------------------------|:----------------------|:-------------------------|:---------------------------|:---------------------------------| -| 0 | 1 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 1 | 2 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 2 | 3 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 3 | 4 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 4 | 5 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 5 | 6 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 6 | 7 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 7 | 8 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 8 | 9 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 9 | 10 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 10 | 11 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 11 | 12 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 12 | 13 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 13 | 14 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 14 | 15 | fromage | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 15 | 16 | patate | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 16 | 17 | raisin | bonhomme | bonhomme | nan | NaT | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | -| 17 | 18 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 18 | 19 | mila | grosbonhomme | grosbonhomme | nan | NaT | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | -| 19 | 20 | raisin | beaubonhomme | beaubonhomme@mila.quebec | M/Ms Beaubonhomme | 2024-04-11 00:00:00 | | beaubonhomme_mila | beaubonhomme@mila.quebec | 1 | beaubonhomme | beaubonhomme@example.com | 1 | | M/Ms Beaubonhomme | 1.5e+09 | 1.5e+09 | beaubonhomme | beaubonhomme@mila.quebec | enabled | | activated | beaubonhomme@example.com | M/Ms Beaubonhomme | Manager | BigProf | beaubonhomme | beaubonhomme@example.com | M/Ms Beaubonhomme | Activated | beaubonhomme | activé | -| 20 | 1000000 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 21 | 1000000 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 22 | 23 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | -| 23 | 999999999 | mila | petitbonhomme_mila | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | \ No newline at end of file +| | job_id | cluster_name | user | user.primary_email | user.name | user.record_start | user.record_end | user.teacher_delegations | user.mila.username | user.mila.email | user.mila.active | user.drac.username | user.drac.email | user.drac.active | user.mila_ldap.co_supervisor | user.mila_ldap.display_name | user.mila_ldap.mila_cluster_gid | user.mila_ldap.mila_cluster_uid | user.mila_ldap.mila_cluster_username | user.mila_ldap.mila_email_username | user.mila_ldap.status | user.mila_ldap.supervisor | user.drac_members.activation_status | user.drac_members.email | user.drac_members.name | user.drac_members.permission | user.drac_members.sponsor | user.drac_members.username | user.drac_roles.email | user.drac_roles.nom | user.drac_roles.status | user.drac_roles.username | user.drac_roles.état du compte | +|---:|----------:|:---------------|:-------------------|:--------------------------|:-------------------|:--------------------|------------------:|---------------------------:|:---------------------|:--------------------------|-------------------:|:---------------------|:--------------------------|-------------------:|-------------------------------:|:------------------------------|----------------------------------:|----------------------------------:|:---------------------------------------|:-------------------------------------|:------------------------|----------------------------:|:--------------------------------------|:--------------------------|:-------------------------|:-------------------------------|:----------------------------|:-----------------------------|:--------------------------|:----------------------|:-------------------------|:---------------------------|:---------------------------------| +| 0 | 1 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 1 | 2 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 2 | 3 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 3 | 4 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 4 | 5 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 5 | 6 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 6 | 7 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 7 | 8 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 8 | 9 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 9 | 10 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 10 | 11 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 11 | 12 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 12 | 13 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 13 | 14 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 14 | 15 | fromage | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 15 | 16 | patate | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 16 | 17 | raisin | bonhomme | bonhomme | nan | NaT | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | +| 17 | 18 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 18 | 19 | mila | grosbonhomme | grosbonhomme | nan | NaT | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | +| 19 | 20 | raisin | beaubonhomme | beaubonhomme@mila.quebec | M/Ms Beaubonhomme | 2024-04-11 00:00:00 | | | beaubonhomme_mila | beaubonhomme@mila.quebec | 1 | beaubonhomme | beaubonhomme@example.com | 1 | | M/Ms Beaubonhomme | 1.5e+09 | 1.5e+09 | beaubonhomme | beaubonhomme@mila.quebec | enabled | | activated | beaubonhomme@example.com | M/Ms Beaubonhomme | Manager | BigProf | beaubonhomme | beaubonhomme@example.com | M/Ms Beaubonhomme | Activated | beaubonhomme | activé | +| 20 | 1000000 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 21 | 1000000 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 22 | 23 | raisin | petitbonhomme | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | +| 23 | 999999999 | mila | petitbonhomme_mila | petitbonhomme@mila.quebec | M/Ms Petitbonhomme | 2024-04-11 00:00:00 | | | petitbonhomme_mila | petitbonhomme@mila.quebec | 1 | petitbonhomme | petitbonhomme@example.com | 1 | | M/Ms Petitbonhomme | 1.5e+09 | 1.5e+09 | petitbonhomme | petitbonhomme@mila.quebec | enabled | | activated | petitbonhomme@example.com | M/Ms Petitbonhomme | Manager | BigProf | petitbonhomme | petitbonhomme@example.com | M/Ms Petitbonhomme | Activated | petitbonhomme | activé | \ No newline at end of file diff --git a/tests/functional/users/test_acquire_ldap.py b/tests/functional/users/test_acquire_ldap.py index 8184de63..876fe8a1 100644 --- a/tests/functional/users/test_acquire_ldap.py +++ b/tests/functional/users/test_acquire_ldap.py @@ -76,7 +76,7 @@ def test_acquire_ldap_revision_change(patch_return_values, mock_file): Then, one third acquisition, with no change in the LDAP data. This should result in no change in the database. """ - nbr_users = 3 + nbr_users = 4 patch_return_values( { @@ -110,7 +110,7 @@ def test_acquire_ldap_revision_change(patch_return_values, mock_file): "sarc.users.read_mila_ldap.query_ldap": fake_raw_ldap_data( nbr_users, hardcoded_values_by_user={ - 2: { # The first user who is not a prof is the one with index 2 + 3: { # The first user who is not a prof and not overrided is the one with index 3 "supervisor": "new_supervisor@mila.quebec" } }, diff --git a/tests/functional/users/test_read_mila_ldap.py b/tests/functional/users/test_read_mila_ldap.py index 81d36ca9..30dfe633 100644 --- a/tests/functional/users/test_read_mila_ldap.py +++ b/tests/functional/users/test_read_mila_ldap.py @@ -46,6 +46,7 @@ def test_query_to_ldap_server_and_writing_to_output_json( # resolve_supervisors is not called here e["supervisor"] = None + e["co_supervisor"] = None assert e == processed_user From 8a496251edde2affa435af0306aee3a56ab30b44 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Sun, 6 Oct 2024 00:55:18 -0400 Subject: [PATCH 4/5] =?UTF-8?q?[SARC-331]=20Impl=C3=A9menter=20les=20alert?= =?UTF-8?q?es=20:=20GPU-util=20moyen=20d=E2=80=99un=20user=20sur=20une=20p?= =?UTF-8?q?=C3=A9riode=20X=20plus=20bas=20qu=E2=80=99un=20threshold=20X=20?= =?UTF-8?q?(#133)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [SARC-331] Implémenter les alertes : GPU-util moyen d’un user sur une période X plus bas qu’un threshold X * Use file_regression for tests. * pylint: disable too-many-positional-arguments --------- Co-authored-by: Bruno Carrez --- pyproject.toml | 1 + sarc/alerts/usage_alerts/gpu_util_per_user.py | 77 +++++++++++++++++++ .../test_alert_gpu_util_per_user.py | 51 ++++++++++++ .../test_alert_gpu_util_per_user_params0_.txt | 0 .../test_alert_gpu_util_per_user_params1_.txt | 4 + .../test_alert_gpu_util_per_user_params2_.txt | 3 + .../test_alert_gpu_util_per_user_params3_.txt | 3 + .../test_alert_gpu_util_per_user_params4_.txt | 2 + 8 files changed, 141 insertions(+) create mode 100644 sarc/alerts/usage_alerts/gpu_util_per_user.py create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user.py create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params0_.txt create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params1_.txt create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params2_.txt create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params3_.txt create mode 100644 tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params4_.txt diff --git a/pyproject.toml b/pyproject.toml index 19afb8e3..9eb03c8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ disable = [ "line-too-long", # Black takes care of line length. "logging-fstring-interpolation", "duplicate-code", + "too-many-positional-arguments", ] extension-pkg-whitelist = "pydantic" diff --git a/sarc/alerts/usage_alerts/gpu_util_per_user.py b/sarc/alerts/usage_alerts/gpu_util_per_user.py new file mode 100644 index 00000000..652c2674 --- /dev/null +++ b/sarc/alerts/usage_alerts/gpu_util_per_user.py @@ -0,0 +1,77 @@ +import logging +from datetime import datetime, timedelta +from typing import Optional + +from sarc.config import MTL +from sarc.jobs.series import compute_cost_and_waste, load_job_series + +logger = logging.getLogger(__name__) + + +def check_gpu_util_per_user( + threshold: timedelta, + time_interval: Optional[timedelta] = timedelta(days=7), + minimum_runtime: Optional[timedelta] = timedelta(minutes=5), +): + """ + Check if users have enough utilization of GPUs. + Log a warning for each user if average GPU-util of user jobs + in time interval is lower than a given threshold. + + For a given user job, GPU-util is computed as + gpu_utilization * gpu_equivalent_cost + (with gpu_equivalent_cost as elapsed_time * allocated.gres_gpu). + + Parameters + ---------- + threshold: timedelta + Minimum value for average GPU-util expected per user. + We assume GPU-util is expressed in GPU-seconds, + thus threshold can be expressed with a timedelta. + time_interval + If given, only jobs which ran in [now - time_interval, time_interval] will be used for checking. + Default is last 7 days. + If None, all jobs are used. + minimum_runtime + If given, only jobs which ran at least for this minimum runtime will be used for checking. + Default is 5 minutes. + If None, set to 0. + """ + # Parse time_interval + start, end, clip_time = None, None, False + if time_interval is not None: + end = datetime.now(tz=MTL) + start = end - time_interval + clip_time = True + + # Get data frame. We clip time if start and end are available, + # so that minimum_runtime is compared to job running time in given interval. + df = load_job_series(start=start, end=end, clip_time=clip_time) + + # Parse minimum_runtime, and select only jobs where + # elapsed time >= minimum runtime and allocated.gres_gpu > 0 + if minimum_runtime is None: + minimum_runtime = timedelta(seconds=0) + df = df[ + (df["elapsed_time"] >= minimum_runtime.total_seconds()) + & (df["allocated.gres_gpu"] > 0) + ] + + # Compute cost + df = compute_cost_and_waste(df) + + # Compute GPU-util for each job + df["gpu_util"] = df["gpu_utilization"] * df["gpu_equivalent_cost"] + + # Compute average GPU-util per user + f_stats = df.groupby(["user"])[["gpu_util"]].mean() + + # Now we can check + for row in f_stats.itertuples(): + user = row.Index + gpu_util = row.gpu_util + if gpu_util < threshold.total_seconds(): + logger.warning( + f"[{user}] insufficient average gpu_util: {gpu_util} GPU-seconds; " + f"minimum required: {threshold} ({threshold.total_seconds()} GPU-seconds)" + ) diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user.py b/tests/functional/usage_alerts/test_alert_gpu_util_per_user.py new file mode 100644 index 00000000..199ae37a --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_gpu_util_per_user.py @@ -0,0 +1,51 @@ +import functools +import re +from datetime import timedelta + +import pytest + +from sarc.alerts.usage_alerts.gpu_util_per_user import check_gpu_util_per_user +from sarc.client import get_jobs +from tests.functional.jobs.test_func_load_job_series import MOCK_TIME + +from ..jobs.test_func_job_statistics import generate_fake_timeseries + + +@pytest.mark.freeze_time(MOCK_TIME) +@pytest.mark.usefixtures("read_only_db", "tzlocal_is_mtl") +@pytest.mark.parametrize( + "params", + [ + # Check with default params. In last 7 days from now (mock time: 2023-11-22), + # there is only 2 jobs, both with no gpu_utilization, so, no warnings. + dict(threshold=timedelta()), + # Check with no time_interval and a threshold to 7 days + dict(threshold=timedelta(hours=7), time_interval=None), + # Check with no time_interval and threshold to 6 days + dict(threshold=timedelta(hours=6), time_interval=None), + # Check with a valid time_interval + dict(threshold=timedelta(hours=8), time_interval=timedelta(days=276)), + # Check will all params, including minimum_runtime + dict( + threshold=timedelta(hours=8), + time_interval=timedelta(days=276), + minimum_runtime=timedelta(seconds=39000), + ), + ], +) +def test_alert_gpu_util_per_user(params, caplog, monkeypatch, file_regression): + monkeypatch.setattr( + "sarc.jobs.series.get_job_time_series", generate_fake_timeseries + ) + + for job in get_jobs(): + job.statistics(save=True) + + check_gpu_util_per_user(**params) + file_regression.check( + re.sub( + r"WARNING +sarc\.alerts\.usage_alerts\.gpu_util_per_user:gpu_util_per_user.py:[0-9]+ +", + "", + caplog.text, + ) + ) diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params0_.txt b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params0_.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params1_.txt b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params1_.txt new file mode 100644 index 00000000..f947035b --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params1_.txt @@ -0,0 +1,4 @@ +[beaubonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 7:00:00 (25200.0 GPU-seconds) +[bonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 7:00:00 (25200.0 GPU-seconds) +[grosbonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 7:00:00 (25200.0 GPU-seconds) +[petitbonhomme] insufficient average gpu_util: 22784.166666666668 GPU-seconds; minimum required: 7:00:00 (25200.0 GPU-seconds) diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params2_.txt b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params2_.txt new file mode 100644 index 00000000..8870360a --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params2_.txt @@ -0,0 +1,3 @@ +[beaubonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 6:00:00 (21600.0 GPU-seconds) +[bonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 6:00:00 (21600.0 GPU-seconds) +[grosbonhomme] insufficient average gpu_util: 21585.0 GPU-seconds; minimum required: 6:00:00 (21600.0 GPU-seconds) diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params3_.txt b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params3_.txt new file mode 100644 index 00000000..0045c641 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params3_.txt @@ -0,0 +1,3 @@ +[beaubonhomme] insufficient average gpu_util: 19816.229166666668 GPU-seconds; minimum required: 8:00:00 (28800.0 GPU-seconds) +[grosbonhomme] insufficient average gpu_util: 9023.729166666666 GPU-seconds; minimum required: 8:00:00 (28800.0 GPU-seconds) +[petitbonhomme] insufficient average gpu_util: 28780.0 GPU-seconds; minimum required: 8:00:00 (28800.0 GPU-seconds) diff --git a/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params4_.txt b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params4_.txt new file mode 100644 index 00000000..42a8b9d1 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_gpu_util_per_user/test_alert_gpu_util_per_user_params4_.txt @@ -0,0 +1,2 @@ +[beaubonhomme] insufficient average gpu_util: 19816.229166666668 GPU-seconds; minimum required: 8:00:00 (28800.0 GPU-seconds) +[petitbonhomme] insufficient average gpu_util: 28780.0 GPU-seconds; minimum required: 8:00:00 (28800.0 GPU-seconds) From ec4528013c6242ea3326aac5caba0670d59877b6 Mon Sep 17 00:00:00 2001 From: notoraptor Date: Sun, 6 Oct 2024 01:40:21 -0400 Subject: [PATCH 5/5] =?UTF-8?q?[SARC-332]=20Impl=C3=A9menter=20les=20alert?= =?UTF-8?q?es=20:=20un=20cluster=20ne=20r=C3=A9pond=20pas=20depuis=20X=20t?= =?UTF-8?q?emps=20(#134)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [SARC-332] Implémenter les alertes : un cluster ne répond pas depuis X temps * Change date parsing * Set parsed date timezone to MTL * Parse date only with MTL timezone --------- Co-authored-by: Bruno Carrez --- sarc/alerts/usage_alerts/cluster_response.py | 46 +++++++++++++++++++ tests/functional/conftest.py | 22 ++++++--- tests/functional/jobs/factory.py | 20 ++++++++ .../test_alert_cluster_response.py | 29 ++++++++++++ .../test_check_cluster_response_279_days_.txt | 4 ++ .../test_check_cluster_response_280_days_.txt | 3 ++ .../test_check_cluster_response_281_days_.txt | 2 + .../test_check_cluster_response_282_days_.txt | 1 + .../test_check_cluster_response_283_days_.txt | 0 .../test_check_cluster_response_365_days_.txt | 0 .../test_check_cluster_response_default_.txt | 4 ++ 11 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 sarc/alerts/usage_alerts/cluster_response.py create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response.py create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_279_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_280_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_281_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_282_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_283_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_365_days_.txt create mode 100644 tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_default_.txt diff --git a/sarc/alerts/usage_alerts/cluster_response.py b/sarc/alerts/usage_alerts/cluster_response.py new file mode 100644 index 00000000..17d57cee --- /dev/null +++ b/sarc/alerts/usage_alerts/cluster_response.py @@ -0,0 +1,46 @@ +import logging +from datetime import datetime, time, timedelta + +from sarc.client.job import get_available_clusters +from sarc.config import MTL + +logger = logging.getLogger(__name__) + + +def check_cluster_response(time_interval: timedelta = timedelta(days=7)): + """ + Check if we scraped clusters recently. + Log a warning for each cluster not scraped since `time_interval` from now. + + Parameters + ---------- + time_interval: timedelta + Interval of time (until current time) in which we want to see cluster scrapings. + For each cluster, if the latest scraping occurred before this period, a warning will be logged. + Default is 7 days. + """ + # Get current date + current_date = datetime.now(tz=MTL) + # Get the oldest date allowed from now + oldest_allowed_date = current_date - time_interval + # Check each available cluster + for cluster in get_available_clusters(): + if cluster.end_date is None: + logger.warning( + f"[{cluster.cluster_name}] no end_date available, cannot check last scraping" + ) + else: + # Cluster's latest scraping date should be in `cluster.end_date`. + # NB: We assume cluster's `end_date` is stored as a date string, + # so we must first convert it to a datetime object. + # `en_date` is parsed the same way as start/end parameters in `get_jobs()`. + cluster_end_date = datetime.combine( + datetime.strptime(cluster.end_date, "%Y-%m-%d"), time.min + ).replace(tzinfo=MTL) + # Now we can check. + if cluster_end_date < oldest_allowed_date: + logger.warning( + f"[{cluster.cluster_name}] no scraping since {cluster_end_date}, " + f"oldest required: {oldest_allowed_date}, " + f"current time: {current_date}" + ) diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 6caef495..95188fc6 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -11,7 +11,7 @@ from .allocations.factory import create_allocations from .diskusage.factory import create_diskusages -from .jobs.factory import create_jobs, create_users +from .jobs.factory import create_cluster_entries, create_jobs, create_users @pytest.fixture @@ -51,14 +51,16 @@ def fill_db(db, with_users=False, with_clusters=False, job_patch=None): if with_clusters: # Fill collection `clusters`. cluster_names = {job["cluster_name"] for job in db.jobs.find({})} - db.clusters.insert_many( - {"cluster_name": cluster_name, "start_date": None, "end_date": None} - for cluster_name in cluster_names - ) + db.clusters.insert_many(create_cluster_entries(db)) def create_db_configuration_fixture( - db_name, empty=False, with_users=False, job_patch=None, scope="function" + db_name, + empty=False, + with_users=False, + with_clusters=False, + job_patch=None, + scope="function", ): @pytest.fixture(scope=scope) def fixture(standard_config_object): @@ -66,7 +68,12 @@ def fixture(standard_config_object): db = cfg.mongo.database_instance clear_db(db) if not empty: - fill_db(db, with_users=with_users, job_patch=job_patch) + fill_db( + db, + with_users=with_users, + with_clusters=with_clusters, + job_patch=job_patch, + ) yield return fixture @@ -119,6 +126,7 @@ def fixture(client_config_object): read_only_db_with_users_config_object = create_db_configuration_fixture( db_name="sarc-read-only-with-users-test", with_users=True, + with_clusters=True, scope="session", ) diff --git a/tests/functional/jobs/factory.py b/tests/functional/jobs/factory.py index 04464f3a..7cf83065 100644 --- a/tests/functional/jobs/factory.py +++ b/tests/functional/jobs/factory.py @@ -279,6 +279,26 @@ def create_jobs(job_factory: JobFactory | None = None, job_patch: dict | None = return job_factory.jobs +def create_cluster_entries(db): + """Generate cluster entries to fill collection `clusters`.""" + cluster_names = sorted({job["cluster_name"] for job in db.jobs.find({})}) + cluster_entries = [] + + date_format = "%Y-%m-%d" + + for i, cluster_name in enumerate(cluster_names): + cluster_end_time = end_time - timedelta(days=i) + cluster_start_time = cluster_end_time - timedelta(days=1) + cluster_entries.append( + { + "cluster_name": cluster_name, + "start_date": cluster_start_time.strftime(date_format), + "end_date": cluster_end_time.strftime(date_format), + } + ) + return cluster_entries + + json_raw = { "metadata": { "plugin": {"type": "openapi/dbv0.0.37", "name": "Slurm OpenAPI DB v0.0.37"}, diff --git a/tests/functional/usage_alerts/test_alert_cluster_response.py b/tests/functional/usage_alerts/test_alert_cluster_response.py new file mode 100644 index 00000000..7c676787 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response.py @@ -0,0 +1,29 @@ +import re +from datetime import timedelta + +import pytest + +from sarc.alerts.usage_alerts.cluster_response import check_cluster_response +from tests.functional.jobs.test_func_load_job_series import MOCK_TIME + +PARAMETERS = { + "default": dict(), # default is 7 days + **{ + f"{days}-days": dict(time_interval=timedelta(days=days)) + for days in [365, 283, 282, 281, 280, 279] + }, +} + + +@pytest.mark.freeze_time(MOCK_TIME) +@pytest.mark.usefixtures("read_only_db_with_users", "tzlocal_is_mtl") +@pytest.mark.parametrize("params", PARAMETERS.values(), ids=PARAMETERS.keys()) +def test_check_cluster_response(params, caplog, file_regression): + check_cluster_response(**params) + file_regression.check( + re.sub( + r"WARNING +sarc\.alerts\.usage_alerts\.cluster_response:cluster_response.py:[0-9]+ +", + "", + caplog.text, + ) + ) diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_279_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_279_days_.txt new file mode 100644 index 00000000..a9efb68f --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_279_days_.txt @@ -0,0 +1,4 @@ +[fromage] no scraping since 2023-02-15 00:00:00-05:00, oldest required: 2023-02-15 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[mila] no scraping since 2023-02-14 00:00:00-05:00, oldest required: 2023-02-15 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[patate] no scraping since 2023-02-13 00:00:00-05:00, oldest required: 2023-02-15 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[raisin] no scraping since 2023-02-12 00:00:00-05:00, oldest required: 2023-02-15 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_280_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_280_days_.txt new file mode 100644 index 00000000..ae0db00f --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_280_days_.txt @@ -0,0 +1,3 @@ +[mila] no scraping since 2023-02-14 00:00:00-05:00, oldest required: 2023-02-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[patate] no scraping since 2023-02-13 00:00:00-05:00, oldest required: 2023-02-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[raisin] no scraping since 2023-02-12 00:00:00-05:00, oldest required: 2023-02-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_281_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_281_days_.txt new file mode 100644 index 00000000..87dd8986 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_281_days_.txt @@ -0,0 +1,2 @@ +[patate] no scraping since 2023-02-13 00:00:00-05:00, oldest required: 2023-02-13 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[raisin] no scraping since 2023-02-12 00:00:00-05:00, oldest required: 2023-02-13 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_282_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_282_days_.txt new file mode 100644 index 00000000..53ab1822 --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_282_days_.txt @@ -0,0 +1 @@ +[raisin] no scraping since 2023-02-12 00:00:00-05:00, oldest required: 2023-02-12 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_283_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_283_days_.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_365_days_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_365_days_.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_default_.txt b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_default_.txt new file mode 100644 index 00000000..6ab94bee --- /dev/null +++ b/tests/functional/usage_alerts/test_alert_cluster_response/test_check_cluster_response_default_.txt @@ -0,0 +1,4 @@ +[fromage] no scraping since 2023-02-15 00:00:00-05:00, oldest required: 2023-11-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[mila] no scraping since 2023-02-14 00:00:00-05:00, oldest required: 2023-11-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[patate] no scraping since 2023-02-13 00:00:00-05:00, oldest required: 2023-11-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00 +[raisin] no scraping since 2023-02-12 00:00:00-05:00, oldest required: 2023-11-14 19:00:00-05:00, current time: 2023-11-21 19:00:00-05:00