Skip to content

Commit

Permalink
Merge branch 'main' into preview-FE
Browse files Browse the repository at this point in the history
  • Loading branch information
SajidAlamQB authored Sep 4, 2024
2 parents 5b7b0b1 + 023a05b commit 826f957
Show file tree
Hide file tree
Showing 13 changed files with 1,072 additions and 47 deletions.
33 changes: 33 additions & 0 deletions package/features/steps/cli_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ def exec_viz_command(context):
)


@when("I execute the kedro viz run command with lite option")
def exec_viz_lite_command(context):
"""Execute Kedro-Viz command."""
context.result = ChildTerminatingPopen(
[context.kedro, "viz", "run", "--lite", "--no-browser"],
env=context.env,
cwd=str(context.root_project_dir),
)


@then("kedro-viz should start successfully")
def check_kedroviz_up(context):
"""Check that Kedro-Viz is up and responding to requests."""
Expand All @@ -169,3 +179,26 @@ def check_kedroviz_up(context):
)
finally:
context.result.terminate()


@then("I store the response from main endpoint")
def get_main_api_response(context):
max_duration = 30 # 30 seconds
end_by = time() + max_duration

while time() < end_by:
try:
response = requests.get("http://localhost:4141/api/main")
context.response = response.json()
assert response.status_code == 200
except Exception:
sleep(2.0)
continue
else:
break


@then("I compare the responses in regular and lite mode")
def compare_main_api_responses(context):
regular_mode_response = requests.get("http://localhost:4141/api/main").json()
assert context.response == regular_mode_response
14 changes: 14 additions & 0 deletions package/features/viz.feature
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,17 @@ Feature: Viz plugin in new project
When I execute the kedro viz run command
Then kedro-viz should start successfully

Scenario: Execute viz lite with latest Kedro
Given I have installed kedro version "latest"
And I have run a non-interactive kedro new with spaceflights-pandas starter
When I execute the kedro viz run command with lite option
Then kedro-viz should start successfully

Scenario: Compare viz responses in regular and lite mode
Given I have installed kedro version "latest"
And I have run a non-interactive kedro new with spaceflights-pandas starter
When I execute the kedro viz run command with lite option
Then I store the response from main endpoint
Given I have installed the project's requirements
When I execute the kedro viz run command
Then I compare the responses in regular and lite mode
14 changes: 13 additions & 1 deletion package/kedro_viz/data_access/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import networkx as nx
from kedro.io import DataCatalog
from kedro.io.core import DatasetError
from kedro.io.memory_dataset import MemoryDataset
from kedro.pipeline import Pipeline as KedroPipeline
from kedro.pipeline.node import Node as KedroNode
from sqlalchemy.orm import sessionmaker
Expand Down Expand Up @@ -316,7 +318,17 @@ def add_dataset(
Returns:
The GraphNode instance representing the dataset that was added to the NodesRepository.
"""
obj = self.catalog.get_dataset(dataset_name)
try:
obj = self.catalog.get_dataset(dataset_name)
except DatasetError:
# This is to handle dataset factory patterns when running
# Kedro Viz in lite mode. The `get_dataset` function
# of DataCatalog calls AbstractDataset.from_config
# which tries to create a Dataset instance from the pattern

# pylint: disable=abstract-class-instantiated
obj = MemoryDataset() # type: ignore[abstract]

layer = self.catalog.get_layer_for_dataset(dataset_name)
graph_node: Union[DataNode, TranscodedDataNode, ParametersNode]
(
Expand Down
76 changes: 76 additions & 0 deletions package/kedro_viz/integrations/kedro/data_catalog_lite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog``
to provide a MemoryDataset instance when running Kedro-Viz in lite mode.
"""

import copy
from typing import Any, Optional

from kedro.io.core import AbstractDataset, DatasetError, generate_timestamp
from kedro.io.data_catalog import DataCatalog, _resolve_credentials
from kedro.io.memory_dataset import MemoryDataset


class DataCatalogLite(DataCatalog):
"""``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog``
to provide a MemoryDataset instance by overriding ``from_config`` of ``DataCatalog``
when running Kedro-Viz in lite mode.
"""

@classmethod
def from_config(
cls,
catalog: Optional[dict[str, dict[str, Any]]],
credentials: Optional[dict[str, dict[str, Any]]] = None,
load_versions: Optional[dict[str, str]] = None,
save_version: Optional[str] = None,
) -> DataCatalog:
datasets = {}
dataset_patterns = {}
catalog = copy.deepcopy(catalog) or {}
credentials = copy.deepcopy(credentials) or {}
save_version = save_version or generate_timestamp()
load_versions = copy.deepcopy(load_versions) or {}
user_default = {}

for ds_name, ds_config in catalog.items():
if not isinstance(ds_config, dict):
raise DatasetError(
f"Catalog entry '{ds_name}' is not a valid dataset configuration. "
"\nHint: If this catalog entry is intended for variable interpolation, "
"make sure that the key is preceded by an underscore."
)

try:
ds_config = _resolve_credentials(
ds_config, credentials
) # noqa: PLW2901
if cls._is_pattern(ds_name):
# Add each factory to the dataset_patterns dict.
dataset_patterns[ds_name] = ds_config

else:
try:
datasets[ds_name] = AbstractDataset.from_config(
ds_name, ds_config, load_versions.get(ds_name), save_version
)
except DatasetError:
# pylint: disable=abstract-class-instantiated
datasets[ds_name] = MemoryDataset() # type: ignore[abstract]
except KeyError:
# pylint: disable=abstract-class-instantiated
datasets[ds_name] = MemoryDataset() # type: ignore[abstract]

sorted_patterns = cls._sort_patterns(dataset_patterns)
if sorted_patterns:
# If the last pattern is a catch-all pattern, pop it and set it as the default
if cls._specificity(list(sorted_patterns.keys())[-1]) == 0:
last_pattern = sorted_patterns.popitem()
user_default = {last_pattern[0]: last_pattern[1]}

return cls(
datasets=datasets,
dataset_patterns=sorted_patterns,
load_versions=load_versions,
save_version=save_version,
default_pattern=user_default,
)
99 changes: 84 additions & 15 deletions package/kedro_viz/integrations/kedro/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@

import json
import logging
import sys
from pathlib import Path
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, Optional, Set, Tuple
from unittest.mock import patch

from kedro import __version__
from kedro.framework.project import configure_project, pipelines
from kedro.framework.project import configure_project, pipelines, settings
from kedro.framework.session import KedroSession
from kedro.framework.session.store import BaseSessionStore
from kedro.framework.startup import bootstrap_project
from kedro.io import DataCatalog
from kedro.pipeline import Pipeline

from kedro_viz.constants import VIZ_METADATA_ARGS
from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite
from kedro_viz.integrations.kedro.lite_parser import LiteParser

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -69,33 +73,29 @@ def _get_dataset_stats(project_path: Path) -> Dict:
return {}


def load_data(
def _load_data_helper(
project_path: Path,
env: Optional[str] = None,
include_hooks: bool = False,
package_name: Optional[str] = None,
extra_params: Optional[Dict[str, Any]] = None,
) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]:
"""Load data from a Kedro project.
is_lite: bool = False,
):
"""Helper to load data from a Kedro project.
Args:
project_path: the path where the Kedro project is located.
env: the Kedro environment to load the data. If not provided.
it will use Kedro default, which is local.
include_hooks: A flag to include all registered hooks in your Kedro Project.
package_name: The name of the current package
extra_params: Optional dictionary containing extra project parameters
for underlying KedroContext. If specified, will update (and therefore
take precedence over) the parameters retrieved from the project
configuration.
is_lite: A flag to run Kedro-Viz in lite mode.
Returns:
A tuple containing the data catalog and the pipeline dictionary
and the session store.
A tuple containing the data catalog, pipeline dictionary, session store
and dataset stats dictionary.
"""
if package_name:
configure_project(package_name)
else:
# bootstrap project when viz is run in dev mode
bootstrap_project(project_path)

with KedroSession.create(
project_path=project_path,
Expand All @@ -109,12 +109,81 @@ def load_data(

context = session.load_context()
session_store = session._store

# Update the DataCatalog class for a custom implementation
# to handle kedro.io.core.DatasetError from
# `settings.DATA_CATALOG_CLASS.from_config`
if is_lite:
settings.DATA_CATALOG_CLASS = DataCatalogLite

catalog = context.catalog

# Pipelines is a lazy dict-like object, so we force it to populate here
# in case user doesn't have an active session down the line when it's first accessed.
# Useful for users who have `get_current_session` in their `register_pipelines()`.
pipelines_dict = dict(pipelines)
stats_dict = _get_dataset_stats(project_path)

return catalog, pipelines_dict, session_store, stats_dict


def load_data(
project_path: Path,
env: Optional[str] = None,
include_hooks: bool = False,
package_name: Optional[str] = None,
extra_params: Optional[Dict[str, Any]] = None,
is_lite: bool = False,
) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]:
"""Load data from a Kedro project.
Args:
project_path: the path where the Kedro project is located.
env: the Kedro environment to load the data. If not provided.
it will use Kedro default, which is local.
include_hooks: A flag to include all registered hooks in your Kedro Project.
package_name: The name of the current package
extra_params: Optional dictionary containing extra project parameters
for underlying KedroContext. If specified, will update (and therefore
take precedence over) the parameters retrieved from the project
configuration.
is_lite: A flag to run Kedro-Viz in lite mode.
Returns:
A tuple containing the data catalog, pipeline dictionary, session store
and dataset stats dictionary.
"""
if package_name:
configure_project(package_name)
else:
# bootstrap project when viz is run in dev mode
bootstrap_project(project_path)

if is_lite:
lite_parser = LiteParser(package_name)
unresolved_imports = lite_parser.parse(project_path)
sys_modules_patch = sys.modules.copy()

if unresolved_imports and len(unresolved_imports) > 0:
modules_to_mock: Set[str] = set()

for unresolved_module_set in unresolved_imports.values():
modules_to_mock = modules_to_mock.union(unresolved_module_set)

mocked_modules = lite_parser.create_mock_modules(modules_to_mock)
sys_modules_patch.update(mocked_modules)

logger.warning(
"Kedro-Viz has mocked the following dependencies for lite-mode.\n"
"%s \n"
"In order to get a complete experience of Viz, "
"please install the missing Kedro project dependencies\n",
list(mocked_modules.keys()),
)

# Patch actual sys modules
with patch.dict("sys.modules", sys_modules_patch):
return _load_data_helper(
project_path, env, include_hooks, extra_params, is_lite
)
else:
return _load_data_helper(
project_path, env, include_hooks, extra_params, is_lite
)
Loading

0 comments on commit 826f957

Please sign in to comment.