From 588c7df31a7a7038ca066b45d078c163f66eab61 Mon Sep 17 00:00:00 2001 From: Quentin Suire Date: Sun, 15 Oct 2023 17:56:59 +0200 Subject: [PATCH] Rename OneHotEncoder option `sparse` to `sparse_output` The option `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4 of scikit-learn. --- learntools/ml_intermediate/ex3.py | 2 +- learntools/time_series/ex3.py | 4 ++-- notebooks/deep_learning_intro/raw/ex3.ipynb | 2 +- notebooks/ml_intermediate/raw/ex3.ipynb | 2 +- notebooks/ml_intermediate/raw/tut3.ipynb | 4 ++-- notebooks/time_series/raw/ex3.ipynb | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/learntools/ml_intermediate/ex3.py b/learntools/ml_intermediate/ex3.py index bf3f55bde..e21c2efec 100644 --- a/learntools/ml_intermediate/ex3.py +++ b/learntools/ml_intermediate/ex3.py @@ -125,7 +125,7 @@ class OneHot(CodingProblem): "`X_valid[low_cardinality_cols]`, respectively.") _solution = CS( """# Apply one-hot encoder to each column with categorical data -OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False) +OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False) OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[low_cardinality_cols])) OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[low_cardinality_cols])) diff --git a/learntools/time_series/ex3.py b/learntools/time_series/ex3.py index 5e2e31c01..5d1355dc7 100644 --- a/learntools/time_series/ex3.py +++ b/learntools/time_series/ex3.py @@ -118,7 +118,7 @@ class Q4(EqualityCheckProblem): # Create holiday features ```python from sklearn.preprocessing import OneHotEncoder -ohe = OneHotEncoder(sparse=False) +ohe = OneHotEncoder(sparse_output=False) X_holidays = pd.DataFrame( ____, @@ -134,7 +134,7 @@ class Q4(EqualityCheckProblem): # Create holiday features # Scikit-learn solution from sklearn.preprocessing import OneHotEncoder -ohe = OneHotEncoder(sparse=False) +ohe = OneHotEncoder(sparse_output=False) X_holidays = pd.DataFrame( ohe.fit_transform(holidays), diff --git a/notebooks/deep_learning_intro/raw/ex3.ipynb b/notebooks/deep_learning_intro/raw/ex3.ipynb index ae08d502b..afcd08fb6 100644 --- a/notebooks/deep_learning_intro/raw/ex3.ipynb +++ b/notebooks/deep_learning_intro/raw/ex3.ipynb @@ -63,7 +63,7 @@ "preprocessor = make_column_transformer(\n", " (StandardScaler(),\n", " make_column_selector(dtype_include=np.number)),\n", - " (OneHotEncoder(sparse=False),\n", + " (OneHotEncoder(sparse_output=False),\n", " make_column_selector(dtype_include=object)),\n", ")\n", "\n", diff --git a/notebooks/ml_intermediate/raw/ex3.ipynb b/notebooks/ml_intermediate/raw/ex3.ipynb index d9337f225..7dff4d572 100644 --- a/notebooks/ml_intermediate/raw/ex3.ipynb +++ b/notebooks/ml_intermediate/raw/ex3.ipynb @@ -538,7 +538,7 @@ "source": [ "#%%RM_IF(PROD)%%\n", "# Apply one-hot encoder to each column with categorical data\n", - "OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)\n", + "OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)\n", "OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[low_cardinality_cols]))\n", "OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[low_cardinality_cols]))\n", "\n", diff --git a/notebooks/ml_intermediate/raw/tut3.ipynb b/notebooks/ml_intermediate/raw/tut3.ipynb index 2f50a36f0..75faaf260 100644 --- a/notebooks/ml_intermediate/raw/tut3.ipynb +++ b/notebooks/ml_intermediate/raw/tut3.ipynb @@ -224,7 +224,7 @@ "\n", "We use the [`OneHotEncoder`](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html) class from scikit-learn to get one-hot encodings. There are a number of parameters that can be used to customize its behavior. \n", "- We set `handle_unknown='ignore'` to avoid errors when the validation data contains classes that aren't represented in the training data, and\n", - "- setting `sparse=False` ensures that the encoded columns are returned as a numpy array (instead of a sparse matrix).\n", + "- setting `sparse_output=False` ensures that the encoded columns are returned as a numpy array (instead of a sparse matrix).\n", "\n", "To use the encoder, we supply only the categorical columns that we want to be one-hot encoded. For instance, to encode the training data, we supply `X_train[object_cols]`. (`object_cols` in the code cell below is a list of the column names with categorical data, and so `X_train[object_cols]` contains all of the categorical data in the training set.)" ] @@ -238,7 +238,7 @@ "from sklearn.preprocessing import OneHotEncoder\n", "\n", "# Apply one-hot encoder to each column with categorical data\n", - "OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)\n", + "OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)\n", "OH_cols_train = pd.DataFrame(OH_encoder.fit_transform(X_train[object_cols]))\n", "OH_cols_valid = pd.DataFrame(OH_encoder.transform(X_valid[object_cols]))\n", "\n", diff --git a/notebooks/time_series/raw/ex3.ipynb b/notebooks/time_series/raw/ex3.ipynb index a65d907fb..12d621cef 100644 --- a/notebooks/time_series/raw/ex3.ipynb +++ b/notebooks/time_series/raw/ex3.ipynb @@ -513,7 +513,7 @@ "# Scikit-learn solution\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", - "ohe = OneHotEncoder(sparse=False)\n", + "ohe = OneHotEncoder(sparse_output=False)\n", "\n", "X_holidays = pd.DataFrame(\n", " ohe.fit_transform(holidays),\n",