Skip to content

Commit

Permalink
Ss/add_omegaconfig_support (#73)
Browse files Browse the repository at this point in the history
* remove deprecated kedro.extras.datasets package

* Replaced `AbstractDataSet` with `AbstractDataset`

* Bumped kedro dependency to 0.18.5

* Replaced `kedro.extras.datasets` with `kedro_datasets`

* Updated example to use OmegaConfig

* Updated example project settings

* Updated e2e

* Replaced `CSVDataSet` with `CSVDataset` ; Expanded e2e to 3.11

* Added staged files

* Updated import sequence in example

* Updated changelog

* Update CHANGELOG.md

---------

Co-authored-by: Markus Sagen <[email protected]>
  • Loading branch information
SiddhantSadangi and MarkusSagen authored Nov 10, 2023
1 parent 423ef49 commit 978564b
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
max-parallel: 4
matrix:
os: [ubuntu-latest, macos-latest]
python-version: [3.7, 3.8, 3.9, "3.10"]
python-version: [3.8, 3.9, "3.10", "3.11"]
steps:
- uses: actions/checkout@v3

Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## 0.3.0

### Features
- Added `OmegaConfig` support ([#73](https://github.com/neptune-ai/kedro-neptune/pull/73))

### Fixes
- Replaced `AbstractDataSet`, `TextDataSet` and `CSVDataSet` with `AbstractDataset`, `TextDataset` and `CSVDataSet` respectively ([#73](https://github.com/neptune-ai/kedro-neptune/pull/73))
- Replaced `kedro.extras.datasets` with `kedro_datasets` ([#73](https://github.com/neptune-ai/kedro-neptune/pull/73))

### Changes
- Added `kedro_datasets` to requirements, and bumped `kedro` to `>=0.18.5` ([#73](https://github.com/neptune-ai/kedro-neptune/pull/73))

## 0.2.0

### Fixes
Expand Down
2 changes: 1 addition & 1 deletion examples/planets/conf/base/catalog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
# Documentation for this file format can be found in "The Data Catalog"
# Link: https://kedro.readthedocs.io/en/stable/data/data_catalog.html
planets:
type: pandas.CSVDataSet
type: pandas.CSVDataset
filepath: data/planets/planets.csv
2 changes: 1 addition & 1 deletion examples/planets/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.kedro]
package_name = "planets"
project_name = "planets"
project_version = "0.18.2"
kedro_init_version = "0.18.5"
21 changes: 16 additions & 5 deletions examples/planets/src/planets/pipelines/furthest_planet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
Dict,
)

import neptune
import pandas as pd
from kedro.pipeline import (
Pipeline,
node,
)
from neptune import new as neptune


# ------ Looking for furthest planet -------
Expand All @@ -22,15 +22,19 @@ def distances(planets: pd.DataFrame) -> Any:
def furthest(distances_to_planets: pd.DataFrame) -> Dict[str, Any]:
furthest_planet = distances_to_planets.iloc[distances_to_planets["Distance from Sun"].argmax()]
return dict(
furthest_planet_name=furthest_planet.Planet, furthest_planet_distance=furthest_planet["Distance from Sun"]
furthest_planet_name=furthest_planet.Planet,
furthest_planet_distance=furthest_planet["Distance from Sun"],
)


def travel_time(furthest_planet_distance: float, furthest_planet_name: str, travel_speed: float) -> float:
travel_hours = furthest_planet_distance / travel_speed

neptune_run = neptune.init_run(
capture_stdout=False, capture_stderr=False, capture_hardware_metrics=False, source_files=[]
capture_stdout=False,
capture_stderr=False,
capture_hardware_metrics=False,
source_files=[],
)

neptune_run["furthest_planet/name"] = furthest_planet_name
Expand All @@ -56,12 +60,19 @@ def create_pipeline(**kwargs):
node(
furthest,
["distances_to_planets"],
dict(furthest_planet_name="furthest_planet_name", furthest_planet_distance="furthest_planet_distance"),
dict(
furthest_planet_name="furthest_planet_name",
furthest_planet_distance="furthest_planet_distance",
),
name="furthest",
),
node(
travel_time,
["furthest_planet_distance", "furthest_planet_name", "params:travel_speed"],
[
"furthest_planet_distance",
"furthest_planet_name",
"params:travel_speed",
],
"travel_hours",
name="travel_time",
),
Expand Down
6 changes: 3 additions & 3 deletions examples/planets/src/planets/pipelines/moons_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
)

try:
from neptune.new.handler import Handler
from neptune.new.utils import stringify_unsupported
except ImportError:
from neptune.handler import Handler
from neptune.utils import stringify_unsupported
except ImportError:
from neptune.new.handler import Handler
from neptune.new.utils import stringify_unsupported


# ------- Number of moons predictor --------
Expand Down
14 changes: 11 additions & 3 deletions examples/planets/src/planets/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,19 @@

# Class that manages how configuration is loaded.
# from kedro.config import TemplatedConfigLoader
from kedro.config import OmegaConfigLoader # noqa: E402

# CONFIG_LOADER_CLASS = TemplatedConfigLoader
CONFIG_LOADER_CLASS = OmegaConfigLoader
# Keyword arguments to pass to the `CONFIG_LOADER_CLASS` constructor.
# CONFIG_LOADER_ARGS = {
# "globals_pattern": "*globals.yml",
# }
CONFIG_LOADER_ARGS = {
"config_patterns": {
# "spark" : ["spark*/"],
# "parameters": ["parameters*", "parameters*/**", "**/parameters*"],
"credentials_neptune": ["credentials_neptune*"],
"neptune": ["neptune*"],
}
}

# Class that manages the Data Catalog.
# from kedro.io import DataCatalog
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ python = "^3.7"
importlib-metadata = { version = "*", python = "<3.8" }

# Base requirements
kedro = ">=0.18.0"
kedro = ">=0.18.5"
kedro-datasets = ">=1.8.0"
"ruamel.yaml" = "^0.17.0"

# dev
Expand Down
18 changes: 9 additions & 9 deletions src/kedro_neptune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
)

import click
from kedro.extras.datasets.text import TextDataSet
from kedro.framework.hooks import hook_impl
from kedro.framework.project import settings
from kedro.framework.session import KedroSession
Expand All @@ -39,12 +38,13 @@
MemoryDataSet,
)
from kedro.io.core import (
AbstractDataSet,
AbstractDataset,
Version,
get_filepath_str,
)
from kedro.pipeline import Pipeline
from kedro.pipeline.node import Node
from kedro_datasets.text import TextDataset
from ruamel.yaml import YAML

from kedro_neptune.config import get_neptune_config
Expand Down Expand Up @@ -213,7 +213,7 @@ def _connection_mode(enabled: bool) -> str:
return "async" if enabled else "debug"


class NeptuneRunDataSet(AbstractDataSet):
class NeptuneRunDataSet(AbstractDataset):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._run: Optional[neptune.Run] = None
Expand Down Expand Up @@ -273,7 +273,7 @@ def _release(self) -> None:
self._loaded = False


class BinaryFileDataSet(TextDataSet):
class BinaryFileDataSet(TextDataset):
def __init__(
self,
filepath: str,
Expand Down Expand Up @@ -312,11 +312,11 @@ class NeptuneFileDataSet(BinaryFileDataSet):
Args:
filepath: Filepath in POSIX format to a text file prefixed with a protocol like s3://.
Same as for Kedro TextDataSet.
Same as for Kedro TextDataset.
credentials: Credentials required to get access to the underlying filesystem.
Same as for Kedro TextDataSet.
Same as for Kedro TextDataset.
fs_args: Extra arguments to pass into underlying filesystem class constructor.
Same as for Kedro TextDataSet.
Same as for Kedro TextDataset.
Examples:
Log a file to Neptune from any Kedro catalog YML file:
Expand All @@ -328,7 +328,7 @@ class NeptuneFileDataSet(BinaryFileDataSet):
Log a file to Neptune that has already been defined as a Kedro DataSet in any catalog YML file:
example_iris_data:
type: pandas.CSVDataSet
type: pandas.CSVDataset
filepath: data/01_raw/iris.csv
example_iris_data@neptune:
Expand Down Expand Up @@ -373,7 +373,7 @@ def log_parameters(namespace: Handler, catalog: DataCatalog):
namespace[f"parameters/{param_name}"] = value


def log_dataset_metadata(namespace: Handler, name: str, dataset: AbstractDataSet):
def log_dataset_metadata(namespace: Handler, name: str, dataset: AbstractDataset):
additional_parameters = {}
try:
additional_parameters = dataset._describe()
Expand Down
4 changes: 2 additions & 2 deletions src/kedro_neptune/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class NeptuneConfig:

def get_neptune_config(settings) -> NeptuneConfig:
config_loader = settings.CONFIG_LOADER_CLASS(settings.CONF_SOURCE, **settings.CONFIG_LOADER_ARGS)
credentials = config_loader.get("credentials_neptune*")
config = config_loader.get("neptune*")
credentials = config_loader["credentials_neptune"]
config = config_loader["neptune"]

api_token = parse_config_value(credentials["neptune"]["api_token"])
project = parse_config_value(config["neptune"]["project"])
Expand Down
3 changes: 2 additions & 1 deletion tests/kedro_neptune/utils/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def assert_structure(travel_speed: int = 10000):
assert run["kedro/catalog/datasets/planets"].fetch() == {
"filepath": f"{os.getcwd()}/data/planets/planets.csv",
"name": "planets",
"protocol": "file",
"save_args": {"index": False},
"type": "CSVDataSet",
"type": "CSVDataset",
"version": "None",
}

Expand Down

0 comments on commit 978564b

Please sign in to comment.