From e1840d68c07b46707ecd9c1e0dd5c1b8490c12d1 Mon Sep 17 00:00:00 2001 From: sanyhe Date: Tue, 23 Jul 2024 13:40:26 +0800 Subject: [PATCH] perf: format the code of mean normalization method. --- geochemistrypi/data_mining/constants.py | 2 +- .../data_mining/data/preprocessing.py | 55 ++++++++++++++----- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/geochemistrypi/data_mining/constants.py b/geochemistrypi/data_mining/constants.py index 147d6b9..7d41ab5 100644 --- a/geochemistrypi/data_mining/constants.py +++ b/geochemistrypi/data_mining/constants.py @@ -97,7 +97,7 @@ IMPUTING_STRATEGY = ["Mean Value", "Median Value", "Most Frequent Value", "Constant(Specified Value)"] -FEATURE_SCALING_STRATEGY = ["Min-max Scaling", "Standardization", "MeanNormalization"] +FEATURE_SCALING_STRATEGY = ["Min-max Scaling", "Standardization", "Mean Normalization"] SAMPLE_BALANCE_STRATEGY = ["Over Sampling", "Under Sampling", "Oversampling and Undersampling"] diff --git a/geochemistrypi/data_mining/data/preprocessing.py b/geochemistrypi/data_mining/data/preprocessing.py index f4ff0d3..efdad72 100644 --- a/geochemistrypi/data_mining/data/preprocessing.py +++ b/geochemistrypi/data_mining/data/preprocessing.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from typing import List +from typing import List, Optional import numpy as np import pandas as pd @@ -23,45 +23,70 @@ class MeanNormalScaler(BaseEstimator, TransformerMixin): """ - def __init__(self, copy=True): + def __init__(self: object, copy: bool = True): self.copy = copy self.mean_ = None self.scale_ = None - def fit(self, X: pd.DataFrame, y=None): + def fit(self: object, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> object: """ Compute the mean and range (max - min) for each feature. - Parameters: + Parameters ---------- - X (pd.DataFrame): The input dataframe where each column represents a feature. - y : (ignored). + X : pd.DataFrame + The input dataframe where each column represents a feature. - Returns: - self: Fitted transformer. + y : pd.DataFrame, optional (default: None) + Ignored. + + Returns + ------- + self : object + Fitted transformer. """ self.mean_ = np.mean(X, axis=0) self.scale_ = np.std(X, axis=0) return self - def transform(self, X, y=None, copy=None): + def transform(self: object, X: pd.DataFrame, y: Optional[pd.DataFrame] = None, copy: bool = None) -> np.ndarray: """ Apply mean normalization to the data. - Parameters: + Parameters ---------- - X (pd.DataFrame): The input dataframe where each column represents a feature. + X : pd.DataFrame + The input dataframe where each column represents a feature. + + y : pd.DataFrame, optional (default: None) + Ignored. + copy : bool, optional (default: None) Copy the input X or not. - Returns: - np.ndarray: The normalized data. + Returns + ------- + X_tr : np.ndarray + The normalized data. """ copy = copy if copy is not None else self.copy X = X if not self.copy else X.copy() return (X - self.mean_) / self.scale_ - def inverse_transform(self, X): + def inverse_transform(self: object, X: pd.DataFrame) -> np.ndarray: + """ + Reverse the mean normalization transformation. + + Parameters + ---------- + X : pd.DataFrame + The input dataframe where each column represents a feature. + + Returns + ------- + X_tr : np.ndarray + The original data. + """ X = X if not self.copy else X.copy() return X * self.scale_ + self.mean_ @@ -92,7 +117,7 @@ def feature_scaler(X: pd.DataFrame, method: List[str], method_idx: int) -> tuple scaler = MinMaxScaler() elif method[method_idx] == "Standardization": scaler = StandardScaler() - elif method[method_idx] == "MeanNormalization": + elif method[method_idx] == "Mean Normalization": scaler = MeanNormalScaler() try: X_scaled = scaler.fit_transform(X)