From b4b19b7e54b8e102a693c44191e9d7cd2e70c028 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Wed, 29 May 2024 21:03:38 +0200 Subject: [PATCH 1/3] ignore extracting metrics for online openstack instances --- src/openstack_cloud/openstack_manager.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py index bc9a5617c..f7d13212d 100644 --- a/src/openstack_cloud/openstack_manager.py +++ b/src/openstack_cloud/openstack_manager.py @@ -1497,7 +1497,7 @@ def _issue_reconciliation_metrics( """ runner_states = self._get_openstack_runner_status(ssh_connection) - metric_stats = self._issue_runner_metrics(runner_states) + metric_stats = self._issue_runner_metrics(ssh_connection) self._issue_reconciliation_metric( metric_stats=metric_stats, reconciliation_start_ts=reconciliation_start_ts, @@ -1505,7 +1505,7 @@ def _issue_reconciliation_metrics( runner_states=runner_states, ) - def _issue_runner_metrics(self, runner_states: RunnerByHealth) -> IssuedMetricEventsStats: + def _issue_runner_metrics(self, ssh_conn: SshConnection) -> IssuedMetricEventsStats: """Issue runner metrics. Args: @@ -1515,21 +1515,21 @@ def _issue_runner_metrics(self, runner_states: RunnerByHealth) -> IssuedMetricEv The stats of issued metric events. """ total_stats: IssuedMetricEventsStats = {} + try: - online_runners = { - runner_info.runner_name - for runner_info in self.get_github_runner_info() - if runner_info.online - } - except GithubApiError: - logger.exception( - "Failed to retrieve set of online runners. Will not issue runner metrics" - ) + openstack_instances = self._get_openstack_instances(ssh_conn) + except openstack.exceptions.SDKException: + logger.exception("Failed to get openstack instances to ignore when extracting metrics." + " Will not issue runner metrics") return total_stats + # Don't extract metrics for instances which are still there, as it might be + # the case that the metrics have not yet been pulled (they get pulled right before server termination). + os_online_runners = {instance.name for instance in openstack_instances if instance.status == _INSTANCE_STATUS_ACTIVE} + for extracted_metrics in runner_metrics.extract( metrics_storage_manager=metrics_storage, - ignore_runners=set(runner_states.healthy) | online_runners, + ignore_runners=os_online_runners, ): try: job_metrics = github_metrics.job( From 29e3cf13e57f47a3652e985f8dad5c57691ffaa4 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Thu, 30 May 2024 15:46:01 +0200 Subject: [PATCH 2/3] fix/add unit tests --- src/openstack_cloud/openstack_manager.py | 17 +++-- tests/unit/test_openstack_manager.py | 88 +++++++++++++++++------- 2 files changed, 76 insertions(+), 29 deletions(-) diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py index f7d13212d..099660ffd 100644 --- a/src/openstack_cloud/openstack_manager.py +++ b/src/openstack_cloud/openstack_manager.py @@ -1509,7 +1509,7 @@ def _issue_runner_metrics(self, ssh_conn: SshConnection) -> IssuedMetricEventsSt """Issue runner metrics. Args: - runner_states: The states of the runners. + ssh_conn: SSH connection to the runner instance. Returns: The stats of issued metric events. @@ -1519,13 +1519,20 @@ def _issue_runner_metrics(self, ssh_conn: SshConnection) -> IssuedMetricEventsSt try: openstack_instances = self._get_openstack_instances(ssh_conn) except openstack.exceptions.SDKException: - logger.exception("Failed to get openstack instances to ignore when extracting metrics." - " Will not issue runner metrics") + logger.exception( + "Failed to get openstack instances to ignore when extracting metrics." + " Will not issue runner metrics" + ) return total_stats # Don't extract metrics for instances which are still there, as it might be - # the case that the metrics have not yet been pulled (they get pulled right before server termination). - os_online_runners = {instance.name for instance in openstack_instances if instance.status == _INSTANCE_STATUS_ACTIVE} + # the case that the metrics have not yet been pulled + # (they get pulled right before server termination). + os_online_runners = { + instance.name + for instance in openstack_instances + if instance.status == _INSTANCE_STATUS_ACTIVE + } for extracted_metrics in runner_metrics.extract( metrics_storage_manager=metrics_storage, diff --git a/tests/unit/test_openstack_manager.py b/tests/unit/test_openstack_manager.py index a10942407..5628df68b 100644 --- a/tests/unit/test_openstack_manager.py +++ b/tests/unit/test_openstack_manager.py @@ -19,7 +19,7 @@ from metrics.runner import RUNNER_INSTALLED_TS_FILE_NAME from metrics.storage import MetricsStorage from openstack_cloud import openstack_manager -from openstack_cloud.openstack_manager import MAX_METRICS_FILE_SIZE +from openstack_cloud.openstack_manager import MAX_METRICS_FILE_SIZE, METRICS_EXCHANGE_PATH from runner_type import RunnerByHealth, RunnerGithubInfo CLOUD_NAME = "microstack" @@ -639,19 +639,19 @@ def test_reconcile_pulls_metric_files( monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms)) monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms)) openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock( - return_value=RunnerByHealth(healthy=("test_runner",), unhealthy=()) - ) - test_file_content = secrets.token_hex(16) - ssh_connection_mock.get.side_effect = lambda remote, local: Path(local).write_text( - test_file_content + return_value=RunnerByHealth(healthy=(), unhealthy=("test_runner",)) ) - + ssh_connection_mock.get.side_effect = MagicMock() openstack_manager_for_reconcile.reconcile(quantity=0) - assert (ms.path / "pre-job-metrics.json").exists() - assert (ms.path / "pre-job-metrics.json").read_text() == test_file_content - assert (ms.path / "post-job-metrics.json").exists() - assert (ms.path / "post-job-metrics.json").read_text() == test_file_content + ssh_connection_mock.get.assert_any_call( + remote=str(METRICS_EXCHANGE_PATH / "pre-job-metrics.json"), + local=str(ms.path / "pre-job-metrics.json"), + ) + ssh_connection_mock.get.assert_any_call( + remote=str(METRICS_EXCHANGE_PATH / "post-job-metrics.json"), + local=str(ms.path / "post-job-metrics.json"), + ) def test_reconcile_does_not_pull_too_large_files( @@ -735,47 +735,87 @@ def test_reconcile_issue_reconciliation_metrics( ) -def test_reconcile_ignores_metrics_for_healthy_and_online_runners( - openstack_manager_for_reconcile, monkeypatch, tmp_path +def test_reconcile_ignores_metrics_for_openstack_online_runners( + openstack_manager_for_reconcile, + monkeypatch, + tmp_path, + patched_create_connection_context: MagicMock, ): """ - arrange: Combination of runner status and github online status. + arrange: Combination of runner status/github status and openstack status. act: Call reconcile. - assert: The healthy and online runners are ignored. + assert: The runners returned with status ACTIVE are ignored for metrics extraction. """ runner_metrics_path = tmp_path / "runner_fs" runner_metrics_path.mkdir() ms = MetricsStorage(path=runner_metrics_path, runner_name="test_runner") monkeypatch.setattr(openstack_manager.metrics_storage, "create", MagicMock(return_value=ms)) monkeypatch.setattr(openstack_manager.metrics_storage, "get", MagicMock(return_value=ms)) + instance_name = openstack_manager_for_reconcile.instance_name + runner_names = { + k: f"{instance_name}-{k}" + for k in [ + "healthy_online", + "healthy_offline", + "unhealthy_online", + "unhealthy_offline", + "openstack_online_no_github_status", + ] + } openstack_manager_for_reconcile._get_openstack_runner_status = MagicMock( return_value=RunnerByHealth( - healthy=("healthy_online", "healthy_offline"), - unhealthy=("unhealthy_online", "unhealthy_offline"), + healthy=(runner_names["healthy_online"], runner_names["healthy_offline"]), + unhealthy=(runner_names["unhealthy_online"], runner_names["unhealthy_offline"]), ) ) openstack_manager_for_reconcile.get_github_runner_info = MagicMock( return_value=( - RunnerGithubInfo(runner_name="healthy_online", runner_id=0, online=True, busy=True), - RunnerGithubInfo(runner_name="unhealthy_online", runner_id=1, online=True, busy=False), - RunnerGithubInfo(runner_name="healthy_offline", runner_id=2, online=False, busy=False), RunnerGithubInfo( - runner_name="unhealthy_offline", runner_id=3, online=False, busy=False + runner_name=runner_names["healthy_online"], runner_id=0, online=True, busy=True + ), + RunnerGithubInfo( + runner_name=runner_names["unhealthy_online"], runner_id=1, online=True, busy=False + ), + RunnerGithubInfo( + runner_name=runner_names["healthy_offline"], runner_id=2, online=False, busy=False + ), + RunnerGithubInfo( + runner_name=runner_names["unhealthy_offline"], + runner_id=3, + online=False, + busy=False, ), ) ) - openstack_manager.runner_metrics.extract.return_value = (MagicMock() for _ in range(2)) + openstack_online_runner_names = [ + runner_names["healthy_online"], + runner_names["healthy_offline"], + runner_names["unhealthy_online"], + runner_names["openstack_online_no_github_status"], + ] + openstack_online_runners = [ + openstack_manager.openstack.compute.v2.server.Server(name=runner_name, status="ACTIVE") + for runner_name in openstack_online_runner_names + ] + openstack_offline_runners = [ + openstack_manager.openstack.compute.v2.server.Server(name=runner_name, status="SHUTOFF") + for runner_name in [runner_names["unhealthy_offline"]] + ] + patched_create_connection_context.list_servers.return_value = ( + openstack_online_runners + openstack_offline_runners + ) + + openstack_manager.runner_metrics.extract.return_value = (MagicMock() for _ in range(1)) openstack_manager.runner_metrics.issue_events.side_effect = [ {metric_events.RunnerStart, metric_events.RunnerStop}, - {metric_events.RunnerStart}, ] openstack_manager_for_reconcile.reconcile(quantity=0) openstack_manager.runner_metrics.extract.assert_called_once_with( metrics_storage_manager=metrics.storage, - ignore_runners={"healthy_online", "healthy_offline", "unhealthy_online"}, + ignore_runners=set(openstack_online_runner_names), ) From c4ad5d630e18c940242d5aad2e0912a8603d30b9 Mon Sep 17 00:00:00 2001 From: Christopher Bartz Date: Thu, 30 May 2024 16:23:43 +0200 Subject: [PATCH 3/3] fix: ssh_connection -> conn --- src/openstack_cloud/openstack_manager.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/openstack_cloud/openstack_manager.py b/src/openstack_cloud/openstack_manager.py index 099660ffd..51247c5c4 100644 --- a/src/openstack_cloud/openstack_manager.py +++ b/src/openstack_cloud/openstack_manager.py @@ -1318,7 +1318,7 @@ def reconcile(self, quantity: int) -> int: end_ts = time.time() self._issue_reconciliation_metrics( - ssh_connection=conn, reconciliation_start_ts=start_ts, reconciliation_end_ts=end_ts + conn=conn, reconciliation_start_ts=start_ts, reconciliation_end_ts=end_ts ) return delta @@ -1482,7 +1482,7 @@ def _issue_runner_installed_metric( def _issue_reconciliation_metrics( self, - ssh_connection: SshConnection, + conn: OpenstackConnection, reconciliation_start_ts: float, reconciliation_end_ts: float, ) -> None: @@ -1491,13 +1491,13 @@ def _issue_reconciliation_metrics( This includes the metrics for the runners and the reconciliation metric itself. Args: - ssh_connection: The SSH connection to the runner instance. + conn: The connection object to access OpenStack cloud. reconciliation_start_ts: The timestamp of when reconciliation started. reconciliation_end_ts: The timestamp of when reconciliation ended. """ - runner_states = self._get_openstack_runner_status(ssh_connection) + runner_states = self._get_openstack_runner_status(conn) - metric_stats = self._issue_runner_metrics(ssh_connection) + metric_stats = self._issue_runner_metrics(conn) self._issue_reconciliation_metric( metric_stats=metric_stats, reconciliation_start_ts=reconciliation_start_ts, @@ -1505,11 +1505,11 @@ def _issue_reconciliation_metrics( runner_states=runner_states, ) - def _issue_runner_metrics(self, ssh_conn: SshConnection) -> IssuedMetricEventsStats: + def _issue_runner_metrics(self, conn: OpenstackConnection) -> IssuedMetricEventsStats: """Issue runner metrics. Args: - ssh_conn: SSH connection to the runner instance. + conn: The connection object to access OpenStack cloud. Returns: The stats of issued metric events. @@ -1517,7 +1517,7 @@ def _issue_runner_metrics(self, ssh_conn: SshConnection) -> IssuedMetricEventsSt total_stats: IssuedMetricEventsStats = {} try: - openstack_instances = self._get_openstack_instances(ssh_conn) + openstack_instances = self._get_openstack_instances(conn) except openstack.exceptions.SDKException: logger.exception( "Failed to get openstack instances to ignore when extracting metrics."