diff --git a/package/features/environment.py b/package/features/environment.py index 1966c27da1..ad507fb5cb 100644 --- a/package/features/environment.py +++ b/package/features/environment.py @@ -109,7 +109,7 @@ def _setup_context_with_venv(context, venv_dir): "botocore", "dynaconf==3.1.5", "PyYAML>=4.2, <6.0", - "click<8.0" + "click<8.0", ], env=context.env, ) diff --git a/package/kedro_viz/models/graph.py b/package/kedro_viz/models/graph.py index a6e4cd07a5..1b8c2678b9 100644 --- a/package/kedro_viz/models/graph.py +++ b/package/kedro_viz/models/graph.py @@ -38,7 +38,7 @@ from enum import Enum from pathlib import Path from types import FunctionType -from typing import Any, Dict, List, Optional, Set, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Union, cast import pandas as pd import plotly.express as px @@ -49,6 +49,12 @@ from kedro.pipeline.pipeline import TRANSCODING_SEPARATOR, _strip_transcoding from pandas.core.frame import DataFrame +# only import MetricsDataSet at top-level for type-checking +# so it doesn't blow up if user doesn't have the dataset dependencies installed. +if TYPE_CHECKING: # pragma: no cover + from kedro.extras.datasets.tracking.metrics_dataset import MetricsDataSet + + logger = logging.getLogger(__name__) @@ -514,9 +520,10 @@ def __post_init__(self, data_node: DataNode): dataset = cast(MetricsDataSet, dataset) if not dataset._exists() or self.filepath is None: return - load_path = get_filepath_str(dataset._get_load_path(), dataset._protocol) - with dataset._fs.open(load_path, **dataset._fs_open_args_load) as fs_file: - self.metrics = json.load(fs_file) + metrics = self.load_latest_metrics_data(dataset) + if not metrics: + return + self.metrics = metrics metrics_data = self.load_metrics_versioned_data(self.filepath) if not metrics_data: return @@ -528,6 +535,28 @@ def __post_init__(self, data_node: DataNode): if not data_node.is_free_input: self.run_command = f'kedro run --to-outputs="{data_node.full_name}"' + @staticmethod + def load_latest_metrics_data( + dataset: "MetricsDataSet", + ) -> Optional[Dict[str, float]]: + """Load data for latest versions of the metrics dataset. + Below operation is also on kedro.io.core -> fetched_latest_load_version() + However it is a cached function and hence cannot be relied upon + Args: + dataset: the latest version of the metrics dataset + Returns: + A dictionary containing json data for the latest version + """ + pattern = str(dataset._get_versioned_path("*")) + version_paths = sorted(dataset._glob_function(pattern), reverse=True) + most_recent = next( + (path for path in version_paths if dataset._exists_function(path)), None + ) + if not most_recent: + return None + with dataset._fs.open(most_recent, **dataset._fs_open_args_load) as fs_file: + return json.load(fs_file) + @staticmethod def load_metrics_versioned_data( filepath: str, num_versions: int = 10 diff --git a/package/tests/test_models/test_graph/test_graph_nodes.py b/package/tests/test_models/test_graph/test_graph_nodes.py index b6922d920c..db59848583 100644 --- a/package/tests/test_models/test_graph/test_graph_nodes.py +++ b/package/tests/test_models/test_graph/test_graph_nodes.py @@ -25,8 +25,10 @@ # # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=too-many-public-methods import datetime import json +import shutil from pathlib import Path from textwrap import dedent from unittest.mock import MagicMock, call, patch @@ -35,6 +37,7 @@ import pytest from kedro.extras.datasets.pandas import CSVDataSet, ParquetDataSet from kedro.extras.datasets.spark import SparkDataSet +from kedro.extras.datasets.tracking.metrics_dataset import MetricsDataSet from kedro.io import MemoryDataSet, PartitionedDataSet from kedro.pipeline.node import node @@ -397,14 +400,14 @@ def test_plotly_data_node_dataset_not_exist(self, patched_import): plotly_node_metadata = DataNodeMetadata(data_node=plotly_data_node) assert not hasattr(plotly_node_metadata, "plot") - @patch("json.load") @patch("kedro_viz.models.graph.DataNodeMetadata.load_metrics_versioned_data") + @patch("kedro_viz.models.graph.DataNodeMetadata.load_latest_metrics_data") @patch("kedro_viz.models.graph.DataNodeMetadata.create_metrics_plot") def test_metrics_data_node_metadata( self, patched_metrics_plot, + patched_latest_metrics, patched_data_loader, - patched_json_load, ): mock_metrics_data = { "recommendations": 0.0009277445547700936, @@ -432,8 +435,8 @@ def test_metrics_data_node_metadata( } ] } - patched_json_load.return_value = mock_metrics_data patched_data_loader.return_value = mock_version_data + patched_latest_metrics.return_value = mock_metrics_data patched_metrics_plot.return_value = mock_plot_data metrics_data_node = MagicMock() metrics_data_node.is_plot_node.return_value = False @@ -448,22 +451,35 @@ def test_metrics_data_node_metadata_dataset_not_exist(self): metrics_data_node.is_metric_node.return_value = True metrics_data_node.kedro_obj._exists.return_value = False metrics_node_metadata = DataNodeMetadata(data_node=metrics_data_node) - assert not hasattr(metrics_node_metadata, "metric") + assert not hasattr(metrics_node_metadata, "metrics") assert not hasattr(metrics_node_metadata, "plot") - @patch("json.load") + @patch("kedro_viz.models.graph.DataNodeMetadata.load_latest_metrics_data") + def test_metrics_data_node_metadata_latest_metrics_not_exist( + self, + patched_latest_metrics, + ): + patched_latest_metrics.return_value = None + metrics_data_node = MagicMock() + metrics_data_node.is_plot_node.return_value = False + metrics_data_node.is_metric_node.return_value = True + metrics_node_metadata = DataNodeMetadata(data_node=metrics_data_node) + assert not hasattr(metrics_node_metadata, "metrics") + assert not hasattr(metrics_node_metadata, "plot") + + @patch("kedro_viz.models.graph.DataNodeMetadata.load_latest_metrics_data") @patch("kedro_viz.models.graph.DataNodeMetadata.load_metrics_versioned_data") def test_metrics_data_node_metadata_versioned_dataset_not_exist( self, patched_data_loader, - patched_json_load, + patched_latest_metrics, ): mock_metrics_data = { "recommendations": 0.0009277445547700936, "recommended_controls": 0.001159680693462617, "projected_optimization": 0.0013916168321551402, } - patched_json_load.return_value = mock_metrics_data + patched_latest_metrics.return_value = mock_metrics_data patched_data_loader.return_value = {} metrics_data_node = MagicMock() metrics_data_node.is_plot_node.return_value = False @@ -488,7 +504,7 @@ def test_data_node_metadata_create_metrics_plot(self): assert "layout" in test_plot @pytest.fixture - def metrics_filepath(self, tmpdir): + def metrics_filepath(self, tmp_path): dir_name = ["2021-09-10T09.02.44.245Z", "2021-09-10T09.03.23.733Z"] filename = "metrics.json" json_content = [ @@ -503,7 +519,7 @@ def metrics_filepath(self, tmpdir): "projected_optimization": 0.30057499608184196, }, ] - source_dir = Path(tmpdir / filename) + source_dir = Path(tmp_path / filename) for index, directory in enumerate(dir_name): filepath = Path(source_dir / directory / filename) filepath.parent.mkdir(parents=True, exist_ok=True) @@ -511,7 +527,30 @@ def metrics_filepath(self, tmpdir): return source_dir @pytest.fixture - def metrics_filepath_invalid_timestamp(self, tmpdir): + def metrics_filepath_reload(self, tmp_path): + dir_name = ["2021-09-10T09.03.55.245Z", "2021-09-10T09.03.56.733Z"] + filename = "metrics.json" + json_content = [ + { + "recommendations": 0.4, + "recommended_controls": 0.5, + "projected_optimization": 0.6, + }, + { + "recommendations": 0.7, + "recommended_controls": 0.8, + "projected_optimization": 0.9, + }, + ] + source_dir = Path(tmp_path / filename) + for index, directory in enumerate(dir_name): + filepath = Path(source_dir / directory / filename) + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.write_text(json.dumps(json_content[index])) + return source_dir + + @pytest.fixture + def metrics_filepath_invalid_timestamp(self, tmp_path): dir_name = ["2021", "2021"] filename = "metrics.json" json_content = [ @@ -526,13 +565,31 @@ def metrics_filepath_invalid_timestamp(self, tmpdir): "projected_optimization": 0.30057499608184196, }, ] - source_dir = Path(tmpdir / filename) + source_dir = Path(tmp_path / filename) for index, directory in enumerate(dir_name): filepath = Path(source_dir / directory / filename) filepath.parent.mkdir(parents=True, exist_ok=True) filepath.write_text(json.dumps(json_content[index])) return source_dir + def test_load_latest_metrics(self, tmp_path): + # Note - filepath is assigned temp.json as temp solution instead of metrics_filepath + # as it fails on windows build. This will be cleaned up in the future. + filename = "temp.json" + dataset = MetricsDataSet(filepath=filename) + data = {"col1": 1, "col2": 0.23, "col3": 0.002} + dataset.save(data) + assert DataNodeMetadata.load_latest_metrics_data(dataset) == data + new_data = {"col1": 3, "col2": 3.23, "col3": 3.002} + dataset.save(new_data) + assert DataNodeMetadata.load_latest_metrics_data(dataset) == new_data + shutil.rmtree(filename) + + def test_load_latest_metrics_fail(self, mocker, metrics_filepath): + dataset = MetricsDataSet(filepath=f"{metrics_filepath}") + mocker.patch.object(dataset, "_exists_function", return_value=False) + assert DataNodeMetadata.load_latest_metrics_data(dataset) is None + def test_load_metrics_versioned_data(self, metrics_filepath): mock_metrics_json = { datetime.datetime(2021, 9, 10, 9, 2, 44, 245000): {