codecentric-oss · ankeko · May 15, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 15, 2024
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -19,7 +19,7 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false

diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -3,8 +3,6 @@ name: 🧪 pytest
 on:
   pull_request:
     types: [ opened, reopened, synchronize ]
-  pull_request_target:
-    types: [ opened, reopened, synchronize ]
   workflow_call:
 
 jobs:
@@ -13,7 +11,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [ "3.9","3.10","3.11"]
-        os: [ ubuntu-latest, macos-latest]
+        os: [ ubuntu-latest, macos-12]
     runs-on: ${{ matrix.os }}
     timeout-minutes: 30
     name: 🧪 Run pytests
@@ -27,11 +25,13 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false
+          pip install --no-build-isolation pendulum==2.1.2
           poetry install --no-interaction -E visu -E tensorflow --with dev --no-ansi
+
       - run: mkdir results && touch results/test-results-${{ matrix.python-version }}-${{matrix.os}}-summary.md
       - name: 🧪 Run tests
         uses: nuhrberg/pytest-summary@main

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,15 @@
 
 <!--next-version-placeholder-->
 
+## v0.14.1 (2024-04-16)
+
+### Fix
+
+* Test pipeline pendulum dependency ([`299426a`](https://github.com/codecentric-oss/niceml/commit/299426a27cfaf4f46e79958f600cdc1a8ad05466))
+* Update poetry in Github pipelines to 1.7.1 ([`2d0dd2e`](https://github.com/codecentric-oss/niceml/commit/2d0dd2e2f250fe7517c12b68f7d36802ddd73964))
+* Load non-parq files with DFLoader load_df ([`b693710`](https://github.com/codecentric-oss/niceml/commit/b693710fdf03962a543579342ba95b359b634974))
+* Add original model id to eval experiment info ([`494c101`](https://github.com/codecentric-oss/niceml/commit/494c101fcd5993971cdb1b39895494cf6de82293))
+
 ## v0.14.0 (2024-02-08)
 
 ### Feature

diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ It provides pipelines for a variety of ML tasks, like
 All you have to do is configure your pipeline, and you're ready to go!
 
 You can also add your own components to the build-in dashboard, 
-where you can compair the results and performance of your ML models.
+where you can compare the results and performance of your ML models.
 
 Further documentation is available at [niceML.io](https://niceml.io).
 

diff --git a/configs/jobs/job_eval/job_eval_objdet/job_eval_objdet_number.yaml b/configs/jobs/job_eval/job_eval_objdet/job_eval_objdet_number.yaml
@@ -12,7 +12,7 @@ defaults:
   - ops/[email protected]: exptests_default.yaml
   # experiment locations
   - shared/locations@globals: exp_locations.yaml
-  # ressources
+  # resources
   - resources/[email protected]: res_mlflow_base.yaml
   - _self_
 

diff --git a/configs/jobs/job_eval/job_eval_reg/job_eval_reg_number.yaml b/configs/jobs/job_eval/job_eval_reg/job_eval_reg_number.yaml
@@ -12,7 +12,7 @@ defaults:
   - ops/[email protected]: exptests_default.yaml
   # experiment locations
   - shared/locations@globals: exp_locations.yaml
-  # ressources
+  # resources
   - resources/[email protected]: res_mlflow_base.yaml
   - _self_
 

diff --git a/docs/index.md b/docs/index.md
@@ -12,7 +12,7 @@ It provides pipelines for a variety of ML tasks, like
 All you have to do is configure your pipeline, and you're ready to go!
 
 You can also add your own components to the built-in dashboard, 
-where you can compair the results and performance of your ML models.
+where you can compare the results and performance of your ML models.
 
 
 The documentation of **niceML** is separated into four paths:

diff --git a/niceml/__init__.py b/niceml/__init__.py
@@ -1 +1 @@
-__version__ = "0.14.0"
+__version__ = "0.14.1"
diff --git a/niceml/config/envconfig.py b/niceml/config/envconfig.py
@@ -13,6 +13,7 @@
 DESCRIPTION_KEY = "DESCRIPTION"
 LOCAL_EXP_CACHE_PATH_KEY = "LOCAL_EXP_CACHE_PATH"
 LAST_MODIFIED_KEY = "LAST_MODIFIED"
+PRETRAINED_MODEL_KEY = "PRETRAINED_MODEL"
 
 
 def replace_id_keys(input_str: str, short_id: str, run_id: str) -> str:

diff --git a/niceml/dagster/ops/evalcopyexp.py b/niceml/dagster/ops/evalcopyexp.py
@@ -7,7 +7,13 @@
 from fsspec import AbstractFileSystem
 from tqdm import tqdm
 
-from niceml.config.envconfig import DESCRIPTION_KEY, RUN_ID_KEY, SHORT_ID_KEY
+from niceml.config.envconfig import (
+    DESCRIPTION_KEY,
+    RUN_ID_KEY,
+    SHORT_ID_KEY,
+    PRETRAINED_MODEL_KEY,
+    ENVIRONMENT_KEY,
+)
 from niceml.dagster.ops.experiment import create_exp_settings
 from niceml.experiments.experimentcontext import ExperimentContext
 from niceml.experiments.expfilenames import ExperimentFilenames, ExpEvalCopyNames
@@ -84,6 +90,7 @@ def change_ids_from_expinfo(
     with file_system.open(exp_info_path, "r") as cur_file:
         data = yaml.load(cur_file, Loader=yaml.SafeLoader)
 
+    data[ENVIRONMENT_KEY][PRETRAINED_MODEL_KEY] = data[SHORT_ID_KEY]
     data[RUN_ID_KEY] = run_id
     data[SHORT_ID_KEY] = short_id
     data[DESCRIPTION_KEY] = (

diff --git a/niceml/dagster/ops/filelockops.py b/niceml/dagster/ops/filelockops.py
@@ -24,6 +24,7 @@ def release_locks(_: OpExecutionContext, filelock_dict: dict):
     """op for releasing locks"""
     for filelock in filelock_dict.values():
         filelock.release()
+    return filelock_dict
 
 
 @op(config_schema=dict(filelock_dict=HydraMapField(FileLock)))
@@ -34,3 +35,4 @@ def clear_locks(context: OpExecutionContext):
     filelock_dict = instantiated_op_config["filelock_dict"]
     for filelock in filelock_dict.values():
         filelock.force_delete()
+    return filelock_dict
diff --git a/niceml/dashboard/components/expviscomponent.py b/niceml/dashboard/components/expviscomponent.py
@@ -25,8 +25,8 @@ def __init__(
         target_value_list: Optional[List[Any]] = None,
         assert_on_error: bool = False,
     ):
-        # Create empty list for chart images
         self.component_name: Optional[str] = component_name
+        # Create empty list for chart images
         self.chart_images_list: List[Image.Image] = []
         self.meta_function = meta_function
         self.target_value_list = [] if target_value_list is None else target_value_list

diff --git a/niceml/dashboard/components/prefixviscomponent.py b/niceml/dashboard/components/prefixviscomponent.py
@@ -47,6 +47,7 @@ def _render(
             subset_name: Optional[str]: Render the experiment data to a subset
 
         """
+        # select components for prefix
         exp_data_list: List[ExperimentData] = [
             exp_manager.get_exp_by_id(exp_id) for exp_id in exp_ids
         ]
@@ -74,11 +75,14 @@ def _render(
                         comp.get_component_name() or f"Component {comp_index}"
                     )
 
+        # arrange tabs
         comp_index = 0
         if self.use_tabs:
             st_comp_list = list(st.tabs(comp_names))
         else:
             st_comp_list = [st.expander(label) for label in comp_names]
+
+        # render components
         for comp_key, cur_comps in self.components.items():
             if comp_key in exp_dict:
                 for comp in cur_comps:

diff --git a/niceml/data/dataloaders/dfloaders.py b/niceml/data/dataloaders/dfloaders.py
@@ -8,12 +8,12 @@
 from niceml.data.dataloaders.interfaces.dfloader import DfLoader
 from niceml.data.storages.localstorage import LocalStorage
 from niceml.data.storages.storageinterface import StorageInterface
-from niceml.experiments.loaddatafunctions import LoadParquetFile
-from niceml.utilities.ioutils import read_parquet, write_parquet
+from niceml.experiments.loaddatafunctions import LoadParquetFile, LoadCsvFile
+from niceml.utilities.ioutils import read_parquet, write_parquet, read_csv, write_csv
 
 
 class SimpleDfLoader(DfLoader):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files"""
+    """SimpleLoader for parquet or csv files"""
 
     def __init__(
         self,
@@ -25,22 +25,29 @@ def __init__(
         self.storage = storage or LocalStorage()
         self.working_dir = working_dir
 
-    def load_df(self, df_path: str) -> pd.DataFrame:
-        """Loads and returns a dataframe from a given parquet file path"""
-        target_path = join(self.working_dir, df_path) if self.working_dir else df_path
-        return LoadParquetFile().load_data(target_path, self.storage)
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
+        """Loads and returns a dataframe from a given parquet or csv file path"""
+        target_path = (
+            self.storage.join_paths(self.working_dir, df_path)
+            if self.working_dir
+            else df_path
+        )
+        if ".parq" in target_path:
+            return LoadParquetFile().load_data(target_path, self.storage)
+        else:
+            return LoadCsvFile().load_data(target_path, self.storage, **kwargs)
 
 
 class SimpleDfLoaderFactory(DfLoaderFactory):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files"""
+    """SimpleLoader for parquet or csv files"""
 
     def create_df_loader(self, storage: StorageInterface, working_dir: str) -> DfLoader:
         """Returns SimpleDfLoader"""
         return SimpleDfLoader(storage, working_dir)
 
 
 class RemoteDiskCachedDfLoader(DfLoader):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files from cache or remote storage"""
+    """SimpleLoader for parquet or csv files from cache or remote storage"""
 
     def __init__(
         self,
@@ -53,7 +60,7 @@ def __init__(
         self.cache_path = cache_dir
         self.working_dir = working_dir
 
-    def load_df(self, df_path: str) -> pd.DataFrame:
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
         """Loads and returns dataframe from cache"""
         target_path = (
             self.storage.join_paths(self.working_dir, df_path)
@@ -62,14 +69,20 @@ def load_df(self, df_path: str) -> pd.DataFrame:
         )
         cached_filepath = join(self.cache_path, target_path)
         if isfile(cached_filepath):
-            dataframe = read_parquet(cached_filepath)
-        else:
+            if ".parq" in target_path:
+                dataframe = read_parquet(cached_filepath)
+            else:
+                dataframe = read_csv(cached_filepath, **kwargs)
+        elif ".parq" in target_path:
             dataframe = LoadParquetFile().load_data(target_path, self.storage)
             write_parquet(dataframe, cached_filepath)
+        else:
+            dataframe = LoadCsvFile().load_data(target_path, self.storage, **kwargs)
+            write_csv(dataframe, cached_filepath, **kwargs)
         return dataframe
 
 
-class RemoteDiskCachedDfLoaderFactory(  # QUEST: still used?
+class RemoteDiskCachedDfLoaderFactory(
     DfLoaderFactory
 ):  # pylint: disable=too-few-public-methods
     """Factory of RemoteDiskCachedDfLoader"""

diff --git a/niceml/data/dataloaders/interfaces/dfloader.py b/niceml/data/dataloaders/interfaces/dfloader.py
@@ -8,5 +8,5 @@ class DfLoader(ABC):  # pylint: disable=too-few-public-methods
     """Abstract class DfLoader (Dataframe Loader)"""
 
     @abstractmethod
-    def load_df(self, df_path: str) -> pd.DataFrame:
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
         """Loads and returns the dataframe"""
diff --git a/niceml/dlframeworks/keras/models/clsmodelfactory.py b/niceml/dlframeworks/keras/models/clsmodelfactory.py
@@ -1,3 +1,4 @@
+"""Module for ClsModelFactory"""
 from typing import List, Optional
 
 from tensorflow.keras import layers
@@ -15,7 +16,10 @@
 
 
 class ClsModelFactory(ModelFactory):
-    def __init__(
+    """Model factory for classification models. Used to create the model
+    before training"""
+
+    def __init__(  # noqa:PLR0913
         self,
         model: Model,
         dense_layer_list: List[int],
@@ -25,7 +29,9 @@ def __init__(
         allow_preconvolution: bool = False,
         dropout_prob_list: Optional[List[float]] = None,
         additional_conv_layers: Optional[List[int]] = None,
+        trainable_base_model: Optional[bool] = True,
     ):
+        """Initialize the CLSModelFactory"""
         self.model_params = model
         self.dense_layer_list = dense_layer_list
         self.use_scale_lambda = use_scale_lambda
@@ -36,15 +42,20 @@ def __init__(
         )
         self.dense_activation = dense_activation
         self.additional_conv_layers = additional_conv_layers
+        self.trainable_base_model = trainable_base_model
 
     def create_model(self, data_desc: DataDescription):
+        """Creates a model for training according to the data_description"""
         input_dd: InputImageDataDescription = check_instance(
             data_desc, InputImageDataDescription
         )
         output_dd: OutputVectorDataDescription = check_instance(
             data_desc, OutputVectorDataDescription
         )
-        if not self.allow_preconvolution and input_dd.get_input_channel_count() != 3:
+        if (
+            not self.allow_preconvolution
+            and input_dd.get_input_channel_count() != 3  # noqa:PLR2004
+        ):
             raise Exception(
                 f"Input channels must have the size of 3! Instead "
                 f"{input_dd.get_input_channel_count()}"
@@ -53,6 +64,7 @@ def create_model(self, data_desc: DataDescription):
         input_shape = input_size.to_numpy_shape() + (3,)
         in_layer = layers.Input(shape=input_shape, name="image")
         actual_layer = in_layer
+        self.model_params.trainable = self.trainable_base_model
         model: Model = self.model_params
         actual_layer = model(actual_layer)
 
@@ -87,6 +99,7 @@ def create_dense_layers(
     dense_activation,
     dropout_prob_list: Optional[List[float]] = None,
 ):
+    """Creates dense layers for model"""
     if dropout_prob_list is None:
         dropout_prob_list = []
     actual_layer = layers.Flatten()(actual_layer)

diff --git a/niceml/experiments/loaddatafunctions.py b/niceml/experiments/loaddatafunctions.py
@@ -25,23 +25,28 @@ class LoadYamlFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads yaml data from a cloud storage"""
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads yaml file from cloud storage"""
         data = storage.download_as_str(file_path)
         return yaml.load(data, Loader=yaml.SafeLoader)
 
 
 class LoadCsvFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads csv data from a cloud storage"""
 
-    def load_data(self, file_path: str, storage: StorageInterface):
+    def load_data(
+        self, file_path: str, storage: StorageInterface, **kwargs
+    ) -> pd.DataFrame:
+        """Loads csv file from cloud storage"""
         data = storage.download_as_str(file_path)
-        data_frame = pd.read_csv(io.BytesIO(data))
+        data_frame = pd.read_csv(io.BytesIO(data), **kwargs)
         return data_frame
 
 
 class LoadParquetFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads parquet data from a cloud storage"""
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads parquet file from cloud storage"""
         data = storage.download_as_str(file_path)
         if data == b"":
             raise FileNotFoundError("File empty")
@@ -54,10 +59,12 @@ class LoadImageFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads image data from a cloud storage"""
 
     def __init__(self, target_size: ImageSize, output_dtype=np.uint8):
+        """Initialize LoadImageFile object"""
         self.target_size = target_size
         self.output_dtype = output_dtype
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads image file from cloud storage"""
         data = storage.download_as_str(file_path)
         image: Image.Image = Image.open(io.BytesIO(data))
         if self.target_size is not None:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "niceml"
-version = "0.14.0"
+version = "0.14.1"
 description = "Welcome to niceML 🍦, a Python-based MLOps framework that uses TensorFlow and Dagster. This framework streamlines the development, and maintenance of machine learning models, providing an end-to-end solution for building efficient and scalable pipelines."
 authors = [
     "Denis Stalz-John <[email protected]>",