From fc95e6b9166a59f0ceef30fd5960aeb54c138b46 Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 17 Jul 2024 11:36:35 +1200
Subject: [PATCH 1/8] Ensemble updated

---
 _episodes/04-ensemble-methods.md | 90 ++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/_episodes/04-ensemble-methods.md b/_episodes/04-ensemble-methods.md
index f4dc28a..cdd24c1 100644
--- a/_episodes/04-ensemble-methods.md
+++ b/_episodes/04-ensemble-methods.md
@@ -168,6 +168,96 @@ plt.show()
 
 There is still some overfitting indicated by the regions that contain only single points but using the same hyper-parameter settings used to fit the decision tree classifier, we can see that overfitting is reduced. 
 
+## Stacking: classification
+import seaborn as sns
+penguins = sns.load_dataset('penguins')
+
+feature_names = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
+penguins.dropna(subset=feature_names, inplace=True)
+
+species_names = penguins['species'].unique()
+
+# Define data and targets
+X = penguins[feature_names]
+
+y = penguins.species
+
+# Split data in training and test set
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
+
+print(f'train size: {X_train.shape}')
+print(f'test size: {X_test.shape}')
+
+from sklearn.ensemble import (
+    GradientBoostingClassifier,
+    RandomForestClassifier,
+    VotingClassifier,
+)
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.tree import DecisionTreeClassifier
+
+# training estimators 
+rf_clf = RandomForestClassifier(n_estimators=100, max_depth=7, min_samples_leaf=1, random_state=5)
+gb_clf = GradientBoostingClassifier(random_state=5)
+gp_clf = GaussianProcessClassifier(1.0 * RBF(1.0), random_state=5)
+dt_clf = DecisionTreeClassifier(max_depth=5, random_state=5)
+
+voting_reg = VotingClassifier([("rf", rf_clf), ("gb", gb_clf), ("gp", gp_clf), ("dt", dt_clf)])
+
+# fit voting estimator
+voting_reg.fit(X_train, y_train)
+
+# lets also train the individual models for comparison
+rf_clf.fit(X_train, y_train)
+gb_clf.fit(X_train, y_train)
+gp_clf.fit(X_train, y_train)
+dt_clf.fit(X_train, y_train)
+
+import matplotlib.pyplot as plt
+
+# make predictions
+X_test_20 = X_test[:20] # first 20 for visualisation
+
+rf_pred = rf_clf.predict(X_test_20)
+gb_pred = gb_clf.predict(X_test_20)
+gp_pred = gp_clf.predict(X_test_20)
+dt_pred = dt_clf.predict(X_test_20)
+voting_pred = voting_reg.predict(X_test_20)
+
+print(rf_pred)
+print(gb_pred)
+print(gp_pred)
+print(dt_pred)
+print(voting_pred)
+
+plt.figure()
+plt.plot(gb_pred,  "o", color="green", label="GradientBoostingClassifier")
+plt.plot(rf_pred,  "o", color="blue", label="RandomForestClassifier")
+plt.plot(gp_pred,  "o", color="darkblue", label="GuassianProcessClassifier")
+plt.plot(dt_pred,  "o", color="lightblue", label="DecisionTreeClassifier")
+plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
+
+plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
+plt.ylabel("predicted")
+plt.xlabel("training samples")
+plt.legend(loc="best")
+plt.title("Regressor predictions and their average")
+
+plt.show()
+
+print(f'random forest: {rf_clf.score(X_test, y_test)}')
+
+print(f'gradient boost: {gb_clf.score(X_test, y_test)}')
+
+print(f'guassian process: {gp_clf.score(X_test, y_test)}')
+
+print(f'decision tree: {dt_clf.score(X_test, y_test)}')
+
+print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
+
 ## Stacking a regression problem
 
 We've had a look at a bagging approach but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 

From a481ff51d95e1391898cb9ab1988954aab1cb8cf Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 17 Jul 2024 11:42:32 +1200
Subject: [PATCH 2/8] Ensemble updated regression house price

---
 _episodes/04-ensemble-methods.md | 75 ++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/_episodes/04-ensemble-methods.md b/_episodes/04-ensemble-methods.md
index cdd24c1..118ce23 100644
--- a/_episodes/04-ensemble-methods.md
+++ b/_episodes/04-ensemble-methods.md
@@ -262,6 +262,81 @@ print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 
 We've had a look at a bagging approach but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 
 
+### California house price prediction
+
+import sklearn
+from sklearn.datasets import fetch_california_housing
+from sklearn.model_selection import train_test_split
+X, y = fetch_california_housing(return_X_y=True, as_frame=True)
+
+print(X.shape)
+print(y.shape)
+
+print(X.head())
+print("======================================")
+## Target is in units of 100,000
+print(y.head())
+
+# split into train and test sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
+
+print(f'train size: {X_train.shape}')
+print(f'test size: {X_test.shape}')
+
+from sklearn.ensemble import (
+    GradientBoostingRegressor,
+    RandomForestRegressor,
+    VotingRegressor,
+)
+from sklearn.linear_model import LinearRegression
+
+# training estimators 
+rf_reg = RandomForestRegressor(random_state=5)
+gb_reg = GradientBoostingRegressor(random_state=5)
+linear_reg = LinearRegression()
+voting_reg = VotingRegressor([("rf", rf_reg), ("gb", gb_reg), ("lr", linear_reg)])
+
+# fit voting estimator
+voting_reg.fit(X_train, y_train)
+
+# lets also train the individual models for comparison
+rf_reg.fit(X_train, y_train)
+gb_reg.fit(X_train, y_train)
+linear_reg.fit(X_train, y_train)
+
+import matplotlib.pyplot as plt
+
+# make predictions
+X_test_20 = X_test[:20] # first 20 for visualisation
+
+rf_pred = rf_reg.predict(X_test_20)
+gb_pred = gb_reg.predict(X_test_20)
+linear_pred = linear_reg.predict(X_test_20)
+voting_pred = voting_reg.predict(X_test_20)
+
+plt.figure()
+plt.plot(gb_pred,  "o", color="navy", label="GradientBoostingRegressor")
+plt.plot(rf_pred,  "o", color="blue", label="RandomForestRegressor")
+plt.plot(linear_pred,  "o", color="skyblue", label="LinearRegression")
+plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
+
+plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
+plt.ylabel("predicted")
+plt.xlabel("training samples")
+plt.legend(loc="best")
+plt.title("Regressor predictions and their average")
+
+plt.show()
+
+print(f'random forest: {rf_reg.score(X_test, y_test)}')
+
+print(f'gradient boost: {gb_reg.score(X_test, y_test)}')
+
+print(f'linear regression: {linear_reg.score(X_test, y_test)}')
+
+print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
+
+
 ### The diabetes dataset 
 The diabetes dataset, contains 10 baseline variables for 442 diabetes patients where the target attribute is quantitative measure of disease progression one year after baseline. For more information see [Efron et al., (2004)](https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf). The useful thing about this data it is available as part of the [sci-kit learn library](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
 

From bf329fbfbecabdd92b3daf8ecc34ab460a7c2196 Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 17 Jul 2024 15:58:10 +1200
Subject: [PATCH 3/8] voting regressor house price

---
 fig/house_price_voting_regressor.svg | 1194 ++++++++++++++++++++++++++
 1 file changed, 1194 insertions(+)
 create mode 100644 fig/house_price_voting_regressor.svg

diff --git a/fig/house_price_voting_regressor.svg b/fig/house_price_voting_regressor.svg
new file mode 100644
index 0000000..c6936b8
--- /dev/null
+++ b/fig/house_price_voting_regressor.svg
@@ -0,0 +1,1194 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="460.8pt" height="345.6pt" viewBox="0 0 460.8 345.6" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-17T15:55:56.397301</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.7.1, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 345.6 
+L 460.8 345.6 
+L 460.8 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 57.6 307.584 
+L 414.72 307.584 
+L 414.72 41.472 
+L 57.6 41.472 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1"/>
+    <g id="xtick_2"/>
+    <g id="xtick_3"/>
+    <g id="xtick_4"/>
+    <g id="xtick_5"/>
+    <g id="xtick_6"/>
+    <g id="xtick_7"/>
+    <g id="xtick_8"/>
+    <g id="text_1">
+     <!-- training samples -->
+     <g transform="translate(194.417812 319.182437) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-67" d="M 2906 1791 
+Q 2906 2416 2648 2759 
+Q 2391 3103 1925 3103 
+Q 1463 3103 1205 2759 
+Q 947 2416 947 1791 
+Q 947 1169 1205 825 
+Q 1463 481 1925 481 
+Q 2391 481 2648 825 
+Q 2906 1169 2906 1791 
+z
+M 3481 434 
+Q 3481 -459 3084 -895 
+Q 2688 -1331 1869 -1331 
+Q 1566 -1331 1297 -1286 
+Q 1028 -1241 775 -1147 
+L 775 -588 
+Q 1028 -725 1275 -790 
+Q 1522 -856 1778 -856 
+Q 2344 -856 2625 -561 
+Q 2906 -266 2906 331 
+L 2906 616 
+Q 2728 306 2450 153 
+Q 2172 0 1784 0 
+Q 1141 0 747 490 
+Q 353 981 353 1791 
+Q 353 2603 747 3093 
+Q 1141 3584 1784 3584 
+Q 2172 3584 2450 3431 
+Q 2728 3278 2906 2969 
+L 2906 3500 
+L 3481 3500 
+L 3481 434 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6d" d="M 3328 2828 
+Q 3544 3216 3844 3400 
+Q 4144 3584 4550 3584 
+Q 5097 3584 5394 3201 
+Q 5691 2819 5691 2113 
+L 5691 0 
+L 5113 0 
+L 5113 2094 
+Q 5113 2597 4934 2840 
+Q 4756 3084 4391 3084 
+Q 3944 3084 3684 2787 
+Q 3425 2491 3425 1978 
+L 3425 0 
+L 2847 0 
+L 2847 2094 
+Q 2847 2600 2669 2842 
+Q 2491 3084 2119 3084 
+Q 1678 3084 1418 2786 
+Q 1159 2488 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1356 3278 1631 3431 
+Q 1906 3584 2284 3584 
+Q 2666 3584 2933 3390 
+Q 3200 3197 3328 2828 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-74"/>
+      <use xlink:href="#DejaVuSans-72" x="39.208984"/>
+      <use xlink:href="#DejaVuSans-61" x="80.322266"/>
+      <use xlink:href="#DejaVuSans-69" x="141.601562"/>
+      <use xlink:href="#DejaVuSans-6e" x="169.384766"/>
+      <use xlink:href="#DejaVuSans-69" x="232.763672"/>
+      <use xlink:href="#DejaVuSans-6e" x="260.546875"/>
+      <use xlink:href="#DejaVuSans-67" x="323.925781"/>
+      <use xlink:href="#DejaVuSans-20" x="387.402344"/>
+      <use xlink:href="#DejaVuSans-73" x="419.189453"/>
+      <use xlink:href="#DejaVuSans-61" x="471.289062"/>
+      <use xlink:href="#DejaVuSans-6d" x="532.568359"/>
+      <use xlink:href="#DejaVuSans-70" x="629.980469"/>
+      <use xlink:href="#DejaVuSans-6c" x="693.457031"/>
+      <use xlink:href="#DejaVuSans-65" x="721.240234"/>
+      <use xlink:href="#DejaVuSans-73" x="782.763672"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="mfc8c73d474" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="289.211098" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- 1.0 -->
+      <g transform="translate(34.696875 293.010317) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="258.053137" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 1.5 -->
+      <g transform="translate(34.696875 261.852356) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="226.895176" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 2.0 -->
+      <g transform="translate(34.696875 230.694395) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="195.737215" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 2.5 -->
+      <g transform="translate(34.696875 199.536433) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="164.579254" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 3.0 -->
+      <g transform="translate(34.696875 168.378472) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-33"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="133.421293" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 3.5 -->
+      <g transform="translate(34.696875 137.220511) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-33"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="102.263332" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 4.0 -->
+      <g transform="translate(34.696875 106.06255) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-34"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#mfc8c73d474" x="57.6" y="71.105371" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 4.5 -->
+      <g transform="translate(34.696875 74.904589) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- predicted -->
+     <g transform="translate(28.617187 198.245187) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-72" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-65" x="102.339844"/>
+      <use xlink:href="#DejaVuSans-64" x="163.863281"/>
+      <use xlink:href="#DejaVuSans-69" x="227.339844"/>
+      <use xlink:href="#DejaVuSans-63" x="255.123047"/>
+      <use xlink:href="#DejaVuSans-74" x="310.103516"/>
+      <use xlink:href="#DejaVuSans-65" x="349.3125"/>
+      <use xlink:href="#DejaVuSans-64" x="410.835938"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_9">
+    <defs>
+     <path id="m5c856ca772" d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" style="stroke: #000080"/>
+    </defs>
+    <g clip-path="url(#p3f62924c94)">
+     <use xlink:href="#m5c856ca772" x="73.832727" y="273.630468" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="90.919809" y="229.717444" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="108.00689" y="295.488" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="125.093971" y="257.041512" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="142.181053" y="271.039495" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="159.268134" y="242.201555" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="176.355215" y="224.918113" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="193.442297" y="218.081114" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="210.529378" y="164.396859" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="227.616459" y="62.668117" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="244.703541" y="244.652989" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="261.790622" y="232.747558" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="278.877703" y="216.854426" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="295.964785" y="288.521497" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="313.051866" y="262.514434" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="330.138947" y="200.963581" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="347.226029" y="140.388116" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="364.31311" y="140.217196" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="381.400191" y="222.23332" style="fill: #000080; stroke: #000080"/>
+     <use xlink:href="#m5c856ca772" x="398.487273" y="223.366839" style="fill: #000080; stroke: #000080"/>
+    </g>
+   </g>
+   <g id="line2d_10">
+    <defs>
+     <path id="m87a24366d7" d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" style="stroke: #0000ff"/>
+    </defs>
+    <g clip-path="url(#p3f62924c94)">
+     <use xlink:href="#m87a24366d7" x="73.832727" y="272.610136" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="90.919809" y="233.301876" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="108.00689" y="257.373881" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="125.093971" y="253.204335" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="142.181053" y="274.606115" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="159.268134" y="257.425615" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="176.355215" y="238.199284" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="193.442297" y="212.689638" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="210.529378" y="174.964825" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="227.616459" y="53.568" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="244.703541" y="258.500565" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="261.790622" y="210.737885" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="278.877703" y="229.035728" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="295.964785" y="272.628208" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="313.051866" y="271.578808" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="330.138947" y="219.629139" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="347.226029" y="139.67095" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="364.31311" y="109.488819" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="381.400191" y="228.167667" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m87a24366d7" x="398.487273" y="241.383004" style="fill: #0000ff; stroke: #0000ff"/>
+    </g>
+   </g>
+   <g id="line2d_11">
+    <defs>
+     <path id="m7c90bf28bf" d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" style="stroke: #87ceeb"/>
+    </defs>
+    <g clip-path="url(#p3f62924c94)">
+     <use xlink:href="#m7c90bf28bf" x="73.832727" y="245.949966" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="90.919809" y="230.620161" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="108.00689" y="288.671051" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="125.093971" y="244.095747" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="142.181053" y="242.105403" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="159.268134" y="235.632116" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="176.355215" y="259.870854" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="193.442297" y="229.109921" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="210.529378" y="152.52502" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="227.616459" y="105.071573" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="244.703541" y="234.812694" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="261.790622" y="243.163077" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="278.877703" y="202.548732" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="295.964785" y="224.571454" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="313.051866" y="226.059343" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="330.138947" y="191.843464" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="347.226029" y="142.732369" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="364.31311" y="131.894447" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="381.400191" y="192.647204" style="fill: #87ceeb; stroke: #87ceeb"/>
+     <use xlink:href="#m7c90bf28bf" x="398.487273" y="200.961616" style="fill: #87ceeb; stroke: #87ceeb"/>
+    </g>
+   </g>
+   <g id="line2d_12">
+    <defs>
+     <path id="m536028c284" d="M -5 5 
+L 5 -5 
+M -5 -5 
+L 5 5 
+" style="stroke: #ff0000"/>
+    </defs>
+    <g clip-path="url(#p3f62924c94)">
+     <use xlink:href="#m536028c284" x="73.832727" y="264.063524" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="90.919809" y="231.21316" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="108.00689" y="280.510977" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="125.093971" y="251.447198" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="142.181053" y="262.583671" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="159.268134" y="245.086429" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="176.355215" y="240.996084" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="193.442297" y="219.960224" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="210.529378" y="163.962235" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="227.616459" y="73.76923" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="244.703541" y="245.988749" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="261.790622" y="228.88284" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="278.877703" y="216.146295" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="295.964785" y="261.907053" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="313.051866" y="253.384195" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="330.138947" y="204.145395" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="347.226029" y="140.930478" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="364.31311" y="127.200154" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="381.400191" y="214.349397" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m536028c284" x="398.487273" y="221.90382" style="fill: #ff0000; stroke: #ff0000"/>
+    </g>
+   </g>
+   <g id="patch_3">
+    <path d="M 57.6 307.584 
+L 57.6 41.472 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 414.72 307.584 
+L 414.72 41.472 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 57.6 307.584 
+L 414.72 307.584 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 57.6 41.472 
+L 414.72 41.472 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_11">
+    <!-- Regressor predictions and their average -->
+    <g transform="translate(115.66125 35.472) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-52" d="M 2841 2188 
+Q 3044 2119 3236 1894 
+Q 3428 1669 3622 1275 
+L 4263 0 
+L 3584 0 
+L 2988 1197 
+Q 2756 1666 2539 1819 
+Q 2322 1972 1947 1972 
+L 1259 1972 
+L 1259 0 
+L 628 0 
+L 628 4666 
+L 2053 4666 
+Q 2853 4666 3247 4331 
+Q 3641 3997 3641 3322 
+Q 3641 2881 3436 2590 
+Q 3231 2300 2841 2188 
+z
+M 1259 4147 
+L 1259 2491 
+L 2053 2491 
+Q 2509 2491 2742 2702 
+Q 2975 2913 2975 3322 
+Q 2975 3731 2742 3939 
+Q 2509 4147 2053 4147 
+L 1259 4147 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-68" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-76" d="M 191 3500 
+L 800 3500 
+L 1894 563 
+L 2988 3500 
+L 3597 3500 
+L 2284 0 
+L 1503 0 
+L 191 3500 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-52"/>
+     <use xlink:href="#DejaVuSans-65" x="64.982422"/>
+     <use xlink:href="#DejaVuSans-67" x="126.505859"/>
+     <use xlink:href="#DejaVuSans-72" x="189.982422"/>
+     <use xlink:href="#DejaVuSans-65" x="228.845703"/>
+     <use xlink:href="#DejaVuSans-73" x="290.369141"/>
+     <use xlink:href="#DejaVuSans-73" x="342.46875"/>
+     <use xlink:href="#DejaVuSans-6f" x="394.568359"/>
+     <use xlink:href="#DejaVuSans-72" x="455.75"/>
+     <use xlink:href="#DejaVuSans-20" x="496.863281"/>
+     <use xlink:href="#DejaVuSans-70" x="528.650391"/>
+     <use xlink:href="#DejaVuSans-72" x="592.126953"/>
+     <use xlink:href="#DejaVuSans-65" x="630.990234"/>
+     <use xlink:href="#DejaVuSans-64" x="692.513672"/>
+     <use xlink:href="#DejaVuSans-69" x="755.990234"/>
+     <use xlink:href="#DejaVuSans-63" x="783.773438"/>
+     <use xlink:href="#DejaVuSans-74" x="838.753906"/>
+     <use xlink:href="#DejaVuSans-69" x="877.962891"/>
+     <use xlink:href="#DejaVuSans-6f" x="905.746094"/>
+     <use xlink:href="#DejaVuSans-6e" x="966.927734"/>
+     <use xlink:href="#DejaVuSans-73" x="1030.306641"/>
+     <use xlink:href="#DejaVuSans-20" x="1082.40625"/>
+     <use xlink:href="#DejaVuSans-61" x="1114.193359"/>
+     <use xlink:href="#DejaVuSans-6e" x="1175.472656"/>
+     <use xlink:href="#DejaVuSans-64" x="1238.851562"/>
+     <use xlink:href="#DejaVuSans-20" x="1302.328125"/>
+     <use xlink:href="#DejaVuSans-74" x="1334.115234"/>
+     <use xlink:href="#DejaVuSans-68" x="1373.324219"/>
+     <use xlink:href="#DejaVuSans-65" x="1436.703125"/>
+     <use xlink:href="#DejaVuSans-69" x="1498.226562"/>
+     <use xlink:href="#DejaVuSans-72" x="1526.009766"/>
+     <use xlink:href="#DejaVuSans-20" x="1567.123047"/>
+     <use xlink:href="#DejaVuSans-61" x="1598.910156"/>
+     <use xlink:href="#DejaVuSans-76" x="1660.189453"/>
+     <use xlink:href="#DejaVuSans-65" x="1719.369141"/>
+     <use xlink:href="#DejaVuSans-72" x="1780.892578"/>
+     <use xlink:href="#DejaVuSans-61" x="1822.005859"/>
+     <use xlink:href="#DejaVuSans-67" x="1883.285156"/>
+     <use xlink:href="#DejaVuSans-65" x="1946.761719"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 238.815313 108.1845 
+L 407.72 108.1845 
+Q 409.72 108.1845 409.72 106.1845 
+L 409.72 48.472 
+Q 409.72 46.472 407.72 46.472 
+L 238.815313 46.472 
+Q 236.815313 46.472 236.815313 48.472 
+L 236.815313 106.1845 
+Q 236.815313 108.1845 238.815313 108.1845 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_13">
+     <g>
+      <use xlink:href="#m5c856ca772" x="250.815313" y="54.570438" style="fill: #000080; stroke: #000080"/>
+     </g>
+    </g>
+    <g id="text_12">
+     <!-- GradientBoostingRegressor -->
+     <g transform="translate(268.815313 58.070438) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-47" d="M 3809 666 
+L 3809 1919 
+L 2778 1919 
+L 2778 2438 
+L 4434 2438 
+L 4434 434 
+Q 4069 175 3628 42 
+Q 3188 -91 2688 -91 
+Q 1594 -91 976 548 
+Q 359 1188 359 2328 
+Q 359 3472 976 4111 
+Q 1594 4750 2688 4750 
+Q 3144 4750 3555 4637 
+Q 3966 4525 4313 4306 
+L 4313 3634 
+Q 3963 3931 3569 4081 
+Q 3175 4231 2741 4231 
+Q 1884 4231 1454 3753 
+Q 1025 3275 1025 2328 
+Q 1025 1384 1454 906 
+Q 1884 428 2741 428 
+Q 3075 428 3337 486 
+Q 3600 544 3809 666 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-42" d="M 1259 2228 
+L 1259 519 
+L 2272 519 
+Q 2781 519 3026 730 
+Q 3272 941 3272 1375 
+Q 3272 1813 3026 2020 
+Q 2781 2228 2272 2228 
+L 1259 2228 
+z
+M 1259 4147 
+L 1259 2741 
+L 2194 2741 
+Q 2656 2741 2882 2914 
+Q 3109 3088 3109 3444 
+Q 3109 3797 2882 3972 
+Q 2656 4147 2194 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 2241 4666 
+Q 2963 4666 3353 4366 
+Q 3744 4066 3744 3513 
+Q 3744 3084 3544 2831 
+Q 3344 2578 2956 2516 
+Q 3422 2416 3680 2098 
+Q 3938 1781 3938 1306 
+Q 3938 681 3513 340 
+Q 3088 0 2303 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-47"/>
+      <use xlink:href="#DejaVuSans-72" x="77.490234"/>
+      <use xlink:href="#DejaVuSans-61" x="118.603516"/>
+      <use xlink:href="#DejaVuSans-64" x="179.882812"/>
+      <use xlink:href="#DejaVuSans-69" x="243.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="271.142578"/>
+      <use xlink:href="#DejaVuSans-6e" x="332.666016"/>
+      <use xlink:href="#DejaVuSans-74" x="396.044922"/>
+      <use xlink:href="#DejaVuSans-42" x="435.253906"/>
+      <use xlink:href="#DejaVuSans-6f" x="503.857422"/>
+      <use xlink:href="#DejaVuSans-6f" x="565.039062"/>
+      <use xlink:href="#DejaVuSans-73" x="626.220703"/>
+      <use xlink:href="#DejaVuSans-74" x="678.320312"/>
+      <use xlink:href="#DejaVuSans-69" x="717.529297"/>
+      <use xlink:href="#DejaVuSans-6e" x="745.3125"/>
+      <use xlink:href="#DejaVuSans-67" x="808.691406"/>
+      <use xlink:href="#DejaVuSans-52" x="872.167969"/>
+      <use xlink:href="#DejaVuSans-65" x="937.150391"/>
+      <use xlink:href="#DejaVuSans-67" x="998.673828"/>
+      <use xlink:href="#DejaVuSans-72" x="1062.150391"/>
+      <use xlink:href="#DejaVuSans-65" x="1101.013672"/>
+      <use xlink:href="#DejaVuSans-73" x="1162.537109"/>
+      <use xlink:href="#DejaVuSans-73" x="1214.636719"/>
+      <use xlink:href="#DejaVuSans-6f" x="1266.736328"/>
+      <use xlink:href="#DejaVuSans-72" x="1327.917969"/>
+     </g>
+    </g>
+    <g id="line2d_14">
+     <g>
+      <use xlink:href="#m87a24366d7" x="250.815313" y="69.248563" style="fill: #0000ff; stroke: #0000ff"/>
+     </g>
+    </g>
+    <g id="text_13">
+     <!-- RandomForestRegressor -->
+     <g transform="translate(268.815313 72.748563) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-46" d="M 628 4666 
+L 3309 4666 
+L 3309 4134 
+L 1259 4134 
+L 1259 2759 
+L 3109 2759 
+L 3109 2228 
+L 1259 2228 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-52"/>
+      <use xlink:href="#DejaVuSans-61" x="67.232422"/>
+      <use xlink:href="#DejaVuSans-6e" x="128.511719"/>
+      <use xlink:href="#DejaVuSans-64" x="191.890625"/>
+      <use xlink:href="#DejaVuSans-6f" x="255.367188"/>
+      <use xlink:href="#DejaVuSans-6d" x="316.548828"/>
+      <use xlink:href="#DejaVuSans-46" x="413.960938"/>
+      <use xlink:href="#DejaVuSans-6f" x="467.855469"/>
+      <use xlink:href="#DejaVuSans-72" x="529.037109"/>
+      <use xlink:href="#DejaVuSans-65" x="567.900391"/>
+      <use xlink:href="#DejaVuSans-73" x="629.423828"/>
+      <use xlink:href="#DejaVuSans-74" x="681.523438"/>
+      <use xlink:href="#DejaVuSans-52" x="720.732422"/>
+      <use xlink:href="#DejaVuSans-65" x="785.714844"/>
+      <use xlink:href="#DejaVuSans-67" x="847.238281"/>
+      <use xlink:href="#DejaVuSans-72" x="910.714844"/>
+      <use xlink:href="#DejaVuSans-65" x="949.578125"/>
+      <use xlink:href="#DejaVuSans-73" x="1011.101562"/>
+      <use xlink:href="#DejaVuSans-73" x="1063.201172"/>
+      <use xlink:href="#DejaVuSans-6f" x="1115.300781"/>
+      <use xlink:href="#DejaVuSans-72" x="1176.482422"/>
+     </g>
+    </g>
+    <g id="line2d_15">
+     <g>
+      <use xlink:href="#m7c90bf28bf" x="250.815313" y="83.926688" style="fill: #87ceeb; stroke: #87ceeb"/>
+     </g>
+    </g>
+    <g id="text_14">
+     <!-- LinearRegression -->
+     <g transform="translate(268.815313 87.426688) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-4c" d="M 628 4666 
+L 1259 4666 
+L 1259 531 
+L 3531 531 
+L 3531 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-4c"/>
+      <use xlink:href="#DejaVuSans-69" x="55.712891"/>
+      <use xlink:href="#DejaVuSans-6e" x="83.496094"/>
+      <use xlink:href="#DejaVuSans-65" x="146.875"/>
+      <use xlink:href="#DejaVuSans-61" x="208.398438"/>
+      <use xlink:href="#DejaVuSans-72" x="269.677734"/>
+      <use xlink:href="#DejaVuSans-52" x="310.791016"/>
+      <use xlink:href="#DejaVuSans-65" x="375.773438"/>
+      <use xlink:href="#DejaVuSans-67" x="437.296875"/>
+      <use xlink:href="#DejaVuSans-72" x="500.773438"/>
+      <use xlink:href="#DejaVuSans-65" x="539.636719"/>
+      <use xlink:href="#DejaVuSans-73" x="601.160156"/>
+      <use xlink:href="#DejaVuSans-73" x="653.259766"/>
+      <use xlink:href="#DejaVuSans-69" x="705.359375"/>
+      <use xlink:href="#DejaVuSans-6f" x="733.142578"/>
+      <use xlink:href="#DejaVuSans-6e" x="794.324219"/>
+     </g>
+    </g>
+    <g id="line2d_16">
+     <g>
+      <use xlink:href="#m536028c284" x="250.815313" y="98.604813" style="fill: #ff0000; stroke: #ff0000"/>
+     </g>
+    </g>
+    <g id="text_15">
+     <!-- VotingRegressor -->
+     <g transform="translate(268.815313 102.104813) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-56" d="M 1831 0 
+L 50 4666 
+L 709 4666 
+L 2188 738 
+L 3669 4666 
+L 4325 4666 
+L 2547 0 
+L 1831 0 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-56"/>
+      <use xlink:href="#DejaVuSans-6f" x="60.658203"/>
+      <use xlink:href="#DejaVuSans-74" x="121.839844"/>
+      <use xlink:href="#DejaVuSans-69" x="161.048828"/>
+      <use xlink:href="#DejaVuSans-6e" x="188.832031"/>
+      <use xlink:href="#DejaVuSans-67" x="252.210938"/>
+      <use xlink:href="#DejaVuSans-52" x="315.6875"/>
+      <use xlink:href="#DejaVuSans-65" x="380.669922"/>
+      <use xlink:href="#DejaVuSans-67" x="442.193359"/>
+      <use xlink:href="#DejaVuSans-72" x="505.669922"/>
+      <use xlink:href="#DejaVuSans-65" x="544.533203"/>
+      <use xlink:href="#DejaVuSans-73" x="606.056641"/>
+      <use xlink:href="#DejaVuSans-73" x="658.15625"/>
+      <use xlink:href="#DejaVuSans-6f" x="710.255859"/>
+      <use xlink:href="#DejaVuSans-72" x="771.4375"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="p3f62924c94">
+   <rect x="57.6" y="41.472" width="357.12" height="266.112"/>
+  </clipPath>
+ </defs>
+</svg>

From 14bf8b6d9497ea8d16e2cf258debd1238f8df8cb Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Tue, 23 Jul 2024 11:19:35 +1200
Subject: [PATCH 4/8] voting regressor house price content change

---
 _episodes/04-ensemble-methods.md | 101 ++-----------------------------
 1 file changed, 6 insertions(+), 95 deletions(-)

diff --git a/_episodes/04-ensemble-methods.md b/_episodes/04-ensemble-methods.md
index 118ce23..d195cbb 100644
--- a/_episodes/04-ensemble-methods.md
+++ b/_episodes/04-ensemble-methods.md
@@ -166,104 +166,14 @@ plt.show()
 
 ![random forest clf space](../fig/EM_rf_clf_space.png)
 
-There is still some overfitting indicated by the regions that contain only single points but using the same hyper-parameter settings used to fit the decision tree classifier, we can see that overfitting is reduced. 
-
-## Stacking: classification
-import seaborn as sns
-penguins = sns.load_dataset('penguins')
-
-feature_names = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
-penguins.dropna(subset=feature_names, inplace=True)
-
-species_names = penguins['species'].unique()
-
-# Define data and targets
-X = penguins[feature_names]
-
-y = penguins.species
-
-# Split data in training and test set
-from sklearn.model_selection import train_test_split
-
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
-
-print(f'train size: {X_train.shape}')
-print(f'test size: {X_test.shape}')
-
-from sklearn.ensemble import (
-    GradientBoostingClassifier,
-    RandomForestClassifier,
-    VotingClassifier,
-)
-from sklearn.gaussian_process import GaussianProcessClassifier
-from sklearn.gaussian_process.kernels import RBF
-from sklearn.tree import DecisionTreeClassifier
-
-# training estimators 
-rf_clf = RandomForestClassifier(n_estimators=100, max_depth=7, min_samples_leaf=1, random_state=5)
-gb_clf = GradientBoostingClassifier(random_state=5)
-gp_clf = GaussianProcessClassifier(1.0 * RBF(1.0), random_state=5)
-dt_clf = DecisionTreeClassifier(max_depth=5, random_state=5)
-
-voting_reg = VotingClassifier([("rf", rf_clf), ("gb", gb_clf), ("gp", gp_clf), ("dt", dt_clf)])
-
-# fit voting estimator
-voting_reg.fit(X_train, y_train)
-
-# lets also train the individual models for comparison
-rf_clf.fit(X_train, y_train)
-gb_clf.fit(X_train, y_train)
-gp_clf.fit(X_train, y_train)
-dt_clf.fit(X_train, y_train)
-
-import matplotlib.pyplot as plt
-
-# make predictions
-X_test_20 = X_test[:20] # first 20 for visualisation
-
-rf_pred = rf_clf.predict(X_test_20)
-gb_pred = gb_clf.predict(X_test_20)
-gp_pred = gp_clf.predict(X_test_20)
-dt_pred = dt_clf.predict(X_test_20)
-voting_pred = voting_reg.predict(X_test_20)
-
-print(rf_pred)
-print(gb_pred)
-print(gp_pred)
-print(dt_pred)
-print(voting_pred)
-
-plt.figure()
-plt.plot(gb_pred,  "o", color="green", label="GradientBoostingClassifier")
-plt.plot(rf_pred,  "o", color="blue", label="RandomForestClassifier")
-plt.plot(gp_pred,  "o", color="darkblue", label="GuassianProcessClassifier")
-plt.plot(dt_pred,  "o", color="lightblue", label="DecisionTreeClassifier")
-plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
-
-plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
-plt.ylabel("predicted")
-plt.xlabel("training samples")
-plt.legend(loc="best")
-plt.title("Regressor predictions and their average")
-
-plt.show()
-
-print(f'random forest: {rf_clf.score(X_test, y_test)}')
-
-print(f'gradient boost: {gb_clf.score(X_test, y_test)}')
-
-print(f'guassian process: {gp_clf.score(X_test, y_test)}')
-
-print(f'decision tree: {dt_clf.score(X_test, y_test)}')
-
-print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
+There is still some overfitting indicated by the regions that contain only single points but using the same hyper-parameter settings used to fit the decision tree classifier, we can see that overfitting is reduced.
 
 ## Stacking a regression problem
 
 We've had a look at a bagging approach but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 
 
 ### California house price prediction
-
+~~~
 import sklearn
 from sklearn.datasets import fetch_california_housing
 from sklearn.model_selection import train_test_split
@@ -335,7 +245,8 @@ print(f'gradient boost: {gb_reg.score(X_test, y_test)}')
 print(f'linear regression: {linear_reg.score(X_test, y_test)}')
 
 print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
-
+~~~
+{: .language-python}
 
 ### The diabetes dataset 
 The diabetes dataset, contains 10 baseline variables for 442 diabetes patients where the target attribute is quantitative measure of disease progression one year after baseline. For more information see [Efron et al., (2004)](https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf). The useful thing about this data it is available as part of the [sci-kit learn library](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
@@ -459,8 +370,8 @@ print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 ~~~
 {: .language-python}
 
-Each of our models score a pretty poor 0.52-0.53, which is barely better than a coin flip. However what we can see is that the stacked result generated by the voting regressor produces a slightly improved score of 0.55, which is better than any of the three models/estimators taken individually. The whole model is greater than the sum of the individual parts. And of course, we could try and improve our accuracy score by tweaking with our indivdual model hyperparameters, or adjusting our training data features and train-test-split data.
-
+## Review this
+Each of our models score 0.61-0.82, which is a good accuracy score, do note that the toy datasets are not representative of real world data. However what we can see is that the stacked result generated by the voting regressor fits different sub-models and then averages the individual predictions to form a final prediction. The benefit of this approach is that it reduces overfitting and increases generalizability. Of course, we could try and improve our accuracy score by tweaking with our indivdual model hyperparameters, using more advaced boosted models or adjusting our training data features and train-test-split data.
 
 > ## Exercise: Stacking a classification problem.
 > Sci-kit learn also has method for stacking ensemble classifiers ```sklearn.ensemble.VotingClassifier``` do you think you could apply a stack to the penguins dataset using a random forest, SVM and decision tree classifier, or a selection of any other classifier estimators available in sci-kit learn? 

From 15762c20bbf0d00e2cd9acf1a122364cb626d27b Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Tue, 23 Jul 2024 13:42:19 +1200
Subject: [PATCH 5/8] voting regressor house price page changed

---
 _episodes/04-ensemble-methods.md | 99 ++++++++++++++++----------------
 1 file changed, 48 insertions(+), 51 deletions(-)

diff --git a/_episodes/04-ensemble-methods.md b/_episodes/04-ensemble-methods.md
index d195cbb..2e0b164 100644
--- a/_episodes/04-ensemble-methods.md
+++ b/_episodes/04-ensemble-methods.md
@@ -170,29 +170,54 @@ There is still some overfitting indicated by the regions that contain only singl
 
 ## Stacking a regression problem
 
-We've had a look at a bagging approach but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 
+We've had a look at a bagging approach, but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 
 
 ### California house price prediction
+The California housing dataset for regression problems contains 8 features such as, Median Income, House Age, Average Rooms, Average Bedrooms etc. for 20,640 properties. The target variable is the median house value for those 20,640 properties, note that all prices are in units of $100,000. This toy dataset is available as part of the [scikit learn library](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
+
 ~~~
 import sklearn
 from sklearn.datasets import fetch_california_housing
-from sklearn.model_selection import train_test_split
+
+# load the dataset
 X, y = fetch_california_housing(return_X_y=True, as_frame=True)
 
+## All price variables are in units of $100,000
 print(X.shape)
-print(y.shape)
-
 print(X.head())
-print("======================================")
-## Target is in units of 100,000
+
+print("Housing price as the target: ")
+
+## Target is in units of $100,000
 print(y.head())
+print(y.shape)
+~~~
+{: .language-python}
+
+For the the purposes of learning how to create and use ensemble methods and since it is a toy dataset, we will blindly use this dataset without inspecting it, cleaning or pre-processing it further. 
+
+> ## Exercise: Investigate and visualise the dataset
+> For this episode we simply want to learn how to build and use an Ensemble rather than actually solve a regression problem. To build up your skills as an ML practitioner, investigate and visualise this dataset. What can you say about the dataset itself, and what can you summarise about about any potential relationships or prediction problems?
+{: .challenge}
+
+Lets start by splitting the dataset into training and testing subsets:
+
+~~~
+# split into train and test sets, We are selecting an 80%-20% train-test split.
+from sklearn.model_selection import train_test_split
 
-# split into train and test sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
 
 print(f'train size: {X_train.shape}')
 print(f'test size: {X_test.shape}')
+~~~
+{: .language-python}
+
+Lets stack a series of regression models. In the same way the RandomForest classifier derives a results from a series of trees, we will combine the results from a series of different models in our stack. This is done using what's called an ensemble meta-estimator called a VotingRegressor. 
+
+We'll apply a Voting regressor to a random forest, gradient boosting and linear regressor.
 
+~~~
 from sklearn.ensemble import (
     GradientBoostingRegressor,
     RandomForestRegressor,
@@ -200,20 +225,23 @@ from sklearn.ensemble import (
 )
 from sklearn.linear_model import LinearRegression
 
-# training estimators 
+# Initialize estimators 
 rf_reg = RandomForestRegressor(random_state=5)
 gb_reg = GradientBoostingRegressor(random_state=5)
 linear_reg = LinearRegression()
 voting_reg = VotingRegressor([("rf", rf_reg), ("gb", gb_reg), ("lr", linear_reg)])
 
-# fit voting estimator
+# fit/train voting estimator
 voting_reg.fit(X_train, y_train)
 
-# lets also train the individual models for comparison
+# lets also fit/train the individual models for comparison
 rf_reg.fit(X_train, y_train)
 gb_reg.fit(X_train, y_train)
 linear_reg.fit(X_train, y_train)
+~~~
+{: .language-python}
 
+~~~
 import matplotlib.pyplot as plt
 
 # make predictions
@@ -237,48 +265,18 @@ plt.legend(loc="best")
 plt.title("Regressor predictions and their average")
 
 plt.show()
-
-print(f'random forest: {rf_reg.score(X_test, y_test)}')
-
-print(f'gradient boost: {gb_reg.score(X_test, y_test)}')
-
-print(f'linear regression: {linear_reg.score(X_test, y_test)}')
-
-print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 ~~~
 {: .language-python}
 
-### The diabetes dataset 
-The diabetes dataset, contains 10 baseline variables for 442 diabetes patients where the target attribute is quantitative measure of disease progression one year after baseline. For more information see [Efron et al., (2004)](https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf). The useful thing about this data it is available as part of the [sci-kit learn library](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
 
 ~~~
-from sklearn.datasets import load_diabetes
-
-print(load_diabetes())
-~~~
-{: .language-python}
-
-For more details on this SKLearn dataset see [this link for details.](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset)
-
-For the the purposes of learning how to create and use ensemble methods we are about to commit a cardinal sin of machine learning and blindly use this dataset without inspecting it any further.
-
-> ## Exercise: Investigate and visualise the dataset
-> For this episode we simply want to learn how to build and use an Ensemble rather than actually solve a regression problem. To build up your skills as an ML practitioner, investigate and visualise this dataset. What can you say about the dataset itself, and what can you summarise about about any potential relationships or prediction problems?
-{: .challenge}
-
-Lets start by splitting the dataset into training and testing subsets:
-
-~~~
-from sklearn.model_selection import train_test_split
+print(f'random forest: {rf_reg.score(X_test, y_test)}')
 
-# load in data
-X, y = load_diabetes(return_X_y=True)
+print(f'gradient boost: {gb_reg.score(X_test, y_test)}')
 
-# split into train and test sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
+print(f'linear regression: {linear_reg.score(X_test, y_test)}')
 
-print(f'train size: {X_train.shape}')
-print(f'test size: {X_test.shape}')
+print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 ~~~
 {: .language-python}
 
@@ -321,7 +319,6 @@ voting_reg.fit(X_train, y_train)
 rf_reg.fit(X_train, y_train)
 gb_reg.fit(X_train, y_train)
 linear_reg.fit(X_train, y_train)
-
 ~~~
 {: .language-python}
 
@@ -339,9 +336,9 @@ linear_pred = linear_reg.predict(X_test_20)
 voting_pred = voting_reg.predict(X_test_20)
 
 plt.figure()
-plt.plot(rf_pred,  "o", color="navy", label="GradientBoostingRegressor")
-plt.plot(gb_pred,  "o", color="blue", label="RandomForestRegressor")
-plt.plot(linear_pred,  "o", color="skyblue", label="LinearRegression")
+plt.plot(gb_pred,  "o", color="black", label="GradientBoostingRegressor")
+plt.plot(rf_pred,  "o", color="blue", label="RandomForestRegressor")
+plt.plot(linear_pred,  "o", color="green", label="LinearRegression")
 plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
 
 plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
@@ -350,11 +347,12 @@ plt.xlabel("training samples")
 plt.legend(loc="best")
 plt.title("Regressor predictions and their average")
 
+
 plt.show()
 ~~~
 {: .language-python}
 
-![Regressor predictions and average from stack](../fig/EM_stacked_plot.png)
+![Regressor predictions and average from stack](../fig/house_price_voting_regressor.svg)
 
 
 FInally, lets see how the average compares against each single estimator in the stack? 
@@ -370,11 +368,10 @@ print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 ~~~
 {: .language-python}
 
-## Review this
 Each of our models score 0.61-0.82, which is a good accuracy score, do note that the toy datasets are not representative of real world data. However what we can see is that the stacked result generated by the voting regressor fits different sub-models and then averages the individual predictions to form a final prediction. The benefit of this approach is that it reduces overfitting and increases generalizability. Of course, we could try and improve our accuracy score by tweaking with our indivdual model hyperparameters, using more advaced boosted models or adjusting our training data features and train-test-split data.
 
 > ## Exercise: Stacking a classification problem.
-> Sci-kit learn also has method for stacking ensemble classifiers ```sklearn.ensemble.VotingClassifier``` do you think you could apply a stack to the penguins dataset using a random forest, SVM and decision tree classifier, or a selection of any other classifier estimators available in sci-kit learn? 
+> Scikit learn also has method for stacking ensemble classifiers ```sklearn.ensemble.VotingClassifier``` do you think you could apply a stack to the penguins dataset using a random forest, SVM and decision tree classifier, or a selection of any other classifier estimators available in sci-kit learn? 
 > 
 > ~~~
 > penguins = sns.load_dataset('penguins')

From efc80c36fa1ac229c8b0ed0822cbaa7580a40f03 Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 24 Jul 2024 10:04:59 +1200
Subject: [PATCH 6/8] dimensionality reduction refactor and ensemble clean up

---
 _episodes/04-ensemble-methods.md         | 81 +++------------------
 _episodes/06-dimensionality-reduction.md | 90 +++++++++++-------------
 2 files changed, 51 insertions(+), 120 deletions(-)

diff --git a/_episodes/04-ensemble-methods.md b/_episodes/04-ensemble-methods.md
index 2e0b164..f261bbd 100644
--- a/_episodes/04-ensemble-methods.md
+++ b/_episodes/04-ensemble-methods.md
@@ -173,7 +173,7 @@ There is still some overfitting indicated by the regions that contain only singl
 We've had a look at a bagging approach, but we'll now take a look at a stacking approach and apply it to a regression problem. We'll also introduce a new dataset to play around with. 
 
 ### California house price prediction
-The California housing dataset for regression problems contains 8 features such as, Median Income, House Age, Average Rooms, Average Bedrooms etc. for 20,640 properties. The target variable is the median house value for those 20,640 properties, note that all prices are in units of $100,000. This toy dataset is available as part of the [scikit learn library](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
+The California housing dataset for regression problems contains 8 training features such as, Median Income, House Age, Average Rooms, Average Bedrooms etc. for 20,640 properties. The target variable is the median house value for those 20,640 properties, note that all prices are in units of $100,000. This toy dataset is available as part of the [scikit learn library](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html). We'll start by loading the dataset to very briefly inspect the attributes by printing them out.
 
 ~~~
 import sklearn
@@ -217,69 +217,6 @@ Lets stack a series of regression models. In the same way the RandomForest class
 
 We'll apply a Voting regressor to a random forest, gradient boosting and linear regressor.
 
-~~~
-from sklearn.ensemble import (
-    GradientBoostingRegressor,
-    RandomForestRegressor,
-    VotingRegressor,
-)
-from sklearn.linear_model import LinearRegression
-
-# Initialize estimators 
-rf_reg = RandomForestRegressor(random_state=5)
-gb_reg = GradientBoostingRegressor(random_state=5)
-linear_reg = LinearRegression()
-voting_reg = VotingRegressor([("rf", rf_reg), ("gb", gb_reg), ("lr", linear_reg)])
-
-# fit/train voting estimator
-voting_reg.fit(X_train, y_train)
-
-# lets also fit/train the individual models for comparison
-rf_reg.fit(X_train, y_train)
-gb_reg.fit(X_train, y_train)
-linear_reg.fit(X_train, y_train)
-~~~
-{: .language-python}
-
-~~~
-import matplotlib.pyplot as plt
-
-# make predictions
-X_test_20 = X_test[:20] # first 20 for visualisation
-
-rf_pred = rf_reg.predict(X_test_20)
-gb_pred = gb_reg.predict(X_test_20)
-linear_pred = linear_reg.predict(X_test_20)
-voting_pred = voting_reg.predict(X_test_20)
-
-plt.figure()
-plt.plot(gb_pred,  "o", color="navy", label="GradientBoostingRegressor")
-plt.plot(rf_pred,  "o", color="blue", label="RandomForestRegressor")
-plt.plot(linear_pred,  "o", color="skyblue", label="LinearRegression")
-plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
-
-plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
-plt.ylabel("predicted")
-plt.xlabel("training samples")
-plt.legend(loc="best")
-plt.title("Regressor predictions and their average")
-
-plt.show()
-~~~
-{: .language-python}
-
-
-~~~
-print(f'random forest: {rf_reg.score(X_test, y_test)}')
-
-print(f'gradient boost: {gb_reg.score(X_test, y_test)}')
-
-print(f'linear regression: {linear_reg.score(X_test, y_test)}')
-
-print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
-~~~
-{: .language-python}
-
 Lets stack a series of regression models. In the same way the RandomForest classifier derives a results from a series of trees, we will combine the results from a series of different models in our stack. This is done using what's called an ensemble meta-estimator called a VotingRegressor. 
 
 We'll apply a Voting regressor to a random forest, gradient boosting and linear regressor.
@@ -306,23 +243,23 @@ from sklearn.ensemble import (
 )
 from sklearn.linear_model import LinearRegression
 
-# training estimators 
+# Initialize estimators 
 rf_reg = RandomForestRegressor(random_state=5)
 gb_reg = GradientBoostingRegressor(random_state=5)
 linear_reg = LinearRegression()
-voting_reg = VotingRegressor([("gb", rf_reg), ("rf", gb_reg), ("lr", linear_reg)])
+voting_reg = VotingRegressor([("rf", rf_reg), ("gb", gb_reg), ("lr", linear_reg)])
 
-# fit voting estimator
+# fit/train voting estimator
 voting_reg.fit(X_train, y_train)
 
-# lets also train the individual models for comparison
+# lets also fit/train the individual models for comparison
 rf_reg.fit(X_train, y_train)
 gb_reg.fit(X_train, y_train)
 linear_reg.fit(X_train, y_train)
 ~~~
 {: .language-python}
 
-We fit the voting regressor in the same way we would fit a single model. When the voting regressor is instantiated we pass it a parameter containing a list of tuples that contain the estimators we wish to stack: in this case the random forest, gradient boosting and linear regressors. To get a sense of what this is doing lets predict the first 20 samples in the test portion of the data and plot the results. 
+We fit the voting regressor in the same way we would fit a single model. When the voting regressor is instantiated we pass it a parameter containing a list of tuples that contain the estimators we wish to stack: in this case the random forest, gradient boosting and linear regressors. To get a sense of what this is doing lets predict the first 20 samples in the test portion of the data and plot the results.
 
 ~~~
 import matplotlib.pyplot as plt
@@ -347,15 +284,13 @@ plt.xlabel("training samples")
 plt.legend(loc="best")
 plt.title("Regressor predictions and their average")
 
-
 plt.show()
 ~~~
 {: .language-python}
 
 ![Regressor predictions and average from stack](../fig/house_price_voting_regressor.svg)
 
-
-FInally, lets see how the average compares against each single estimator in the stack? 
+Finally, lets see how the average compares against each single estimator in the stack? 
 
 ~~~
 print(f'random forest: {rf_reg.score(X_test, y_test)}')
@@ -368,7 +303,7 @@ print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
 ~~~
 {: .language-python}
 
-Each of our models score 0.61-0.82, which is a good accuracy score, do note that the toy datasets are not representative of real world data. However what we can see is that the stacked result generated by the voting regressor fits different sub-models and then averages the individual predictions to form a final prediction. The benefit of this approach is that it reduces overfitting and increases generalizability. Of course, we could try and improve our accuracy score by tweaking with our indivdual model hyperparameters, using more advaced boosted models or adjusting our training data features and train-test-split data.
+Each of our models score between 0.61-0.82, which at the high end is good, but at the low end is a pretty poor prediction accuracy score. Do note that the toy datasets are not representative of real world data. However what we can see is that the stacked result generated by the voting regressor fits different sub-models and then averages the individual predictions to form a final prediction. The benefit of this approach is that, it reduces overfitting and increases generalizability. Of course, we could try and improve our accuracy score by tweaking with our indivdual model hyperparameters, using more advaced boosted models or adjusting our training data features and train-test-split data.
 
 > ## Exercise: Stacking a classification problem.
 > Scikit learn also has method for stacking ensemble classifiers ```sklearn.ensemble.VotingClassifier``` do you think you could apply a stack to the penguins dataset using a random forest, SVM and decision tree classifier, or a selection of any other classifier estimators available in sci-kit learn? 
diff --git a/_episodes/06-dimensionality-reduction.md b/_episodes/06-dimensionality-reduction.md
index 05b4b7b..bbe48e8 100644
--- a/_episodes/06-dimensionality-reduction.md
+++ b/_episodes/06-dimensionality-reduction.md
@@ -28,7 +28,40 @@ The MNIST dataset contains 70,000 images of handwritten numbers, and are labelle
 To make this episode a bit less computationally intensive, the Scikit-Learn example that we will work with is a smaller sample of 1797 images. Each image is 8x8 in size for a total of 64 pixels per image, resulting in 64 features for us to work with. The pixels can take a value between 0-15 (4bits). Let's retrieve and inspect the Scikit-Learn dataset with the following code:
 
 ~~~
-from sklearn import datasets
+import numpy as np
+import matplotlib.pyplot as plt
+import sklearn.cluster as skl_cluster
+from sklearn import manifold, decomposition, datasets
+
+# Let's define these here to avoid repetitive code
+def plots(x_manifold):
+    tx = x_manifold[:, 0]
+    ty = x_manifold[:, 1]
+
+    # without labels
+    fig = plt.figure(1, figsize=(4, 4))
+    plt.scatter(tx, ty, edgecolor='k',label=labels)
+    plt.show()
+
+def plot_clusters(x_manifold, clusters):
+    tx = x_manifold[:, 0]
+    ty = x_manifold[:, 1]
+    fig = plt.figure(1, figsize=(4, 4))
+    plt.scatter(tx, ty, s=5, linewidth=0, c=clusters)
+    for cluster_x, cluster_y in Kmean.cluster_centers_:
+        plt.scatter(cluster_x, cluster_y, s=100, c='r', marker='x')
+    plt.show()
+
+def plot_clusters_labels(x_manifold, labels):
+    tx = x_manifold[:, 0]
+    ty = x_manifold[:, 1]
+
+    # with labels
+    fig = plt.figure(1, figsize=(5, 4))
+    plt.scatter(tx, ty, c=labels, cmap="nipy_spectral", 
+            edgecolor='k', label=labels)
+    plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
+    plt.show()
 
 # load in dataset as a Pandas Dataframe, return X and Y
 features, labels = datasets.load_digits(return_X_y=True, as_frame=True)
@@ -49,8 +82,6 @@ As humans we are pretty good at object and pattern recognition. We can look at t
 > 
 > > ## Solution
 > > ~~~
-> > import matplotlib.pyplot as plt
-> > import numpy as np
 > > 
 > > print(features.iloc[0])
 > > image_1D = features.iloc[0]
@@ -107,12 +138,9 @@ For more in depth explanations of PCA please see the following links:
 Let's apply PCA to the MNIST dataset and retain the two most-major components: 
 
 ~~~
-from sklearn import decomposition
-
 # PCA with 2 components
 pca = decomposition.PCA(n_components=2)
-pca.fit(features)
-x_pca = pca.transform(features)
+x_pca = pca.fit_transform(features)
 
 print(x_pca.shape)
 ~~~
@@ -121,16 +149,7 @@ print(x_pca.shape)
 This returns us an array of 1797x2 where the 2 remaining columns(our new "features" or "dimensions") contain vector representations of the first principle components (column 0) and second principle components (column 1) for each of the images. We can plot these two new features against each other:
 
 ~~~
-import numpy as np
-import matplotlib.pyplot as plt
-
-tx = x_pca[:, 0]
-ty = x_pca[:, 1]
-
-# without labels
-fig = plt.figure(1, figsize=(4, 4))
-plt.scatter(tx, ty, edgecolor='k',label=labels)
-plt.show()
+plots(x_pca)
 ~~~
 {: .language-python}
 
@@ -139,18 +158,10 @@ plt.show()
 We now have a 2D representation of our 64D dataset that we can work with instead. Let's try some quick K-means clustering on our 2D representation of the data. Because we already have some knowledge about our data we can set `k=10` for the 10 digits present in the dataset.
 
 ~~~
-import sklearn.cluster as skl_cluster
-
 Kmean = skl_cluster.KMeans(n_clusters=10)
-
 Kmean.fit(x_pca)
 clusters = Kmean.predict(x_pca,labels)
-
-fig = plt.figure(1, figsize=(4, 4))
-plt.scatter(tx, ty, s=5, linewidth=0, c=clusters)
-for cluster_x, cluster_y in Kmean.cluster_centers_:
-    plt.scatter(cluster_x, cluster_y, s=100, c='r', marker='x')
-plt.show()
+plot_clusters(x_pca, clusters)
 ~~~
 {: .language-python}
 
@@ -159,6 +170,9 @@ plt.show()
 And now we can compare how these clusters look against our actual image labels by colour coding our first scatter plot:
 
 ~~~
+tx = x_pca[:, 0]
+ty = x_pca[:, 1]
+
 fig = plt.figure(1, figsize=(5, 4))
 plt.scatter(tx, ty, c=labels, cmap="nipy_spectral", 
         edgecolor='k',label=labels)
@@ -186,45 +200,27 @@ For more in depth explanations of t-SNE and manifold learning please see the fol
 Scikit-Learn allows us to apply t-SNE in a relatively simple way. Lets code and apply t-SNE to the MNIST dataset in the same manner that we did for the PCA example, and reduce the data down from 64D to 2D again:
 
 ~~~
-from sklearn import manifold
-
 # t-SNE embedding
 # initialising with "pca" explicitly preserves global structure
 tsne = manifold.TSNE(n_components=2, init='pca', random_state = 0)
 x_tsne = tsne.fit_transform(features)
 
-
-fig = plt.figure(1, figsize=(4, 4))
-plt.scatter(x_tsne[:, 0], x_tsne[:, 1], edgecolor='k')
-plt.show()
+plots(x_tsne)
 ~~~
 {: .language-python}
 
 ![Reduction using PCA](../fig/tsne_unlabelled.png)
 
-
 It looks like t-SNE has done a much better job of splitting our data up into clusters using only a 2D representation of the data. Once again, let's run a simple k-means clustering on this new 2D representation, and compare with the actual color-labelled data:
 
 ~~~
-import sklearn.cluster as skl_cluster
-
 Kmean = skl_cluster.KMeans(n_clusters=10)
 
 Kmean.fit(x_tsne)
 clusters = Kmean.predict(x_tsne,labels)
 
-fig = plt.figure(1, figsize=(4, 4))
-plt.scatter(x_tsne[:,0], x_tsne[:,1], s=5, linewidth=0, c=clusters)
-for cluster_x, cluster_y in Kmean.cluster_centers_:
-    plt.scatter(cluster_x, cluster_y, s=100, c='r', marker='x')
-plt.show()
-
-# with labels
-fig = plt.figure(1, figsize=(5, 4))
-plt.scatter(x_tsne[:, 0], x_tsne[:, 1], c=labels, cmap="nipy_spectral", 
-        edgecolor='k',label=labels)
-plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
-plt.show()
+plot_clusters(x_tsne, clusters)
+plot_clusters_labels(x_tsne, labels)
 ~~~
 {: .language-python}
 

From 3c2ad8a7e4e60e547d252a294c3f548564313705 Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 24 Jul 2024 13:32:24 +1200
Subject: [PATCH 7/8] clean up ep 04,06 and fig

---
 _episodes/06-dimensionality-reduction.md |   9 +-
 fig/house_price_voting_regressor.svg     | 210 +++++++++++------------
 2 files changed, 106 insertions(+), 113 deletions(-)

diff --git a/_episodes/06-dimensionality-reduction.md b/_episodes/06-dimensionality-reduction.md
index bbe48e8..2070ae3 100644
--- a/_episodes/06-dimensionality-reduction.md
+++ b/_episodes/06-dimensionality-reduction.md
@@ -170,14 +170,7 @@ plot_clusters(x_pca, clusters)
 And now we can compare how these clusters look against our actual image labels by colour coding our first scatter plot:
 
 ~~~
-tx = x_pca[:, 0]
-ty = x_pca[:, 1]
-
-fig = plt.figure(1, figsize=(5, 4))
-plt.scatter(tx, ty, c=labels, cmap="nipy_spectral", 
-        edgecolor='k',label=labels)
-plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
-plt.show()
+plot_clusters_labels(x_pca, labels)
 ~~~
 {: .language-python}
 
diff --git a/fig/house_price_voting_regressor.svg b/fig/house_price_voting_regressor.svg
index c6936b8..753cde5 100644
--- a/fig/house_price_voting_regressor.svg
+++ b/fig/house_price_voting_regressor.svg
@@ -6,7 +6,7 @@
   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2024-07-17T15:55:56.397301</dc:date>
+    <dc:date>2024-07-24T13:20:26.580306</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
@@ -331,12 +331,12 @@ z
     <g id="ytick_1">
      <g id="line2d_1">
       <defs>
-       <path id="mfc8c73d474" d="M 0 0 
+       <path id="m63c3647be7" d="M 0 0 
 L -3.5 0 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="289.211098" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="289.211098" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_2">
@@ -395,7 +395,7 @@ z
     <g id="ytick_2">
      <g id="line2d_2">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="258.053137" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="258.053137" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_3">
@@ -437,7 +437,7 @@ z
     <g id="ytick_3">
      <g id="line2d_3">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="226.895176" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="226.895176" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_4">
@@ -478,7 +478,7 @@ z
     <g id="ytick_4">
      <g id="line2d_4">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="195.737215" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="195.737215" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_5">
@@ -493,7 +493,7 @@ z
     <g id="ytick_5">
      <g id="line2d_5">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="164.579254" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="164.579254" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_6">
@@ -542,7 +542,7 @@ z
     <g id="ytick_6">
      <g id="line2d_6">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="133.421293" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="133.421293" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_7">
@@ -557,7 +557,7 @@ z
     <g id="ytick_7">
      <g id="line2d_7">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="102.263332" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="102.263332" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_8">
@@ -593,7 +593,7 @@ z
     <g id="ytick_8">
      <g id="line2d_8">
       <g>
-       <use xlink:href="#mfc8c73d474" x="57.6" y="71.105371" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m63c3647be7" x="57.6" y="71.105371" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_9">
@@ -671,7 +671,7 @@ z
    </g>
    <g id="line2d_9">
     <defs>
-     <path id="m5c856ca772" d="M 0 3 
+     <path id="ma76a034d2b" d="M 0 3 
 C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
 C 2.683901 1.55874 3 0.795609 3 0 
 C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
@@ -681,34 +681,34 @@ C -2.683901 -1.55874 -3 -0.795609 -3 0
 C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
 C -1.55874 2.683901 -0.795609 3 0 3 
 z
-" style="stroke: #000080"/>
+" style="stroke: #000000"/>
     </defs>
-    <g clip-path="url(#p3f62924c94)">
-     <use xlink:href="#m5c856ca772" x="73.832727" y="273.630468" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="90.919809" y="229.717444" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="108.00689" y="295.488" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="125.093971" y="257.041512" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="142.181053" y="271.039495" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="159.268134" y="242.201555" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="176.355215" y="224.918113" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="193.442297" y="218.081114" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="210.529378" y="164.396859" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="227.616459" y="62.668117" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="244.703541" y="244.652989" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="261.790622" y="232.747558" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="278.877703" y="216.854426" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="295.964785" y="288.521497" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="313.051866" y="262.514434" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="330.138947" y="200.963581" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="347.226029" y="140.388116" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="364.31311" y="140.217196" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="381.400191" y="222.23332" style="fill: #000080; stroke: #000080"/>
-     <use xlink:href="#m5c856ca772" x="398.487273" y="223.366839" style="fill: #000080; stroke: #000080"/>
+    <g clip-path="url(#p4259d74935)">
+     <use xlink:href="#ma76a034d2b" x="73.832727" y="273.630468" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="90.919809" y="229.717444" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="108.00689" y="295.488" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="125.093971" y="257.041512" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="142.181053" y="271.039495" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="159.268134" y="242.201555" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="176.355215" y="224.918113" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="193.442297" y="218.081114" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="210.529378" y="164.396859" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="227.616459" y="62.668117" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="244.703541" y="244.652989" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="261.790622" y="232.747558" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="278.877703" y="216.854426" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="295.964785" y="288.521497" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="313.051866" y="262.514434" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="330.138947" y="200.963581" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="347.226029" y="140.388116" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="364.31311" y="140.217196" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="381.400191" y="222.23332" style="stroke: #000000"/>
+     <use xlink:href="#ma76a034d2b" x="398.487273" y="223.366839" style="stroke: #000000"/>
     </g>
    </g>
    <g id="line2d_10">
     <defs>
-     <path id="m87a24366d7" d="M 0 3 
+     <path id="m5573d2c2e0" d="M 0 3 
 C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
 C 2.683901 1.55874 3 0.795609 3 0 
 C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
@@ -720,32 +720,32 @@ C -1.55874 2.683901 -0.795609 3 0 3
 z
 " style="stroke: #0000ff"/>
     </defs>
-    <g clip-path="url(#p3f62924c94)">
-     <use xlink:href="#m87a24366d7" x="73.832727" y="272.610136" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="90.919809" y="233.301876" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="108.00689" y="257.373881" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="125.093971" y="253.204335" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="142.181053" y="274.606115" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="159.268134" y="257.425615" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="176.355215" y="238.199284" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="193.442297" y="212.689638" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="210.529378" y="174.964825" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="227.616459" y="53.568" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="244.703541" y="258.500565" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="261.790622" y="210.737885" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="278.877703" y="229.035728" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="295.964785" y="272.628208" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="313.051866" y="271.578808" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="330.138947" y="219.629139" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="347.226029" y="139.67095" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="364.31311" y="109.488819" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="381.400191" y="228.167667" style="fill: #0000ff; stroke: #0000ff"/>
-     <use xlink:href="#m87a24366d7" x="398.487273" y="241.383004" style="fill: #0000ff; stroke: #0000ff"/>
+    <g clip-path="url(#p4259d74935)">
+     <use xlink:href="#m5573d2c2e0" x="73.832727" y="272.610136" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="90.919809" y="233.301876" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="108.00689" y="257.373881" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="125.093971" y="253.204335" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="142.181053" y="274.606115" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="159.268134" y="257.425615" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="176.355215" y="238.199284" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="193.442297" y="212.689638" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="210.529378" y="174.964825" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="227.616459" y="53.568" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="244.703541" y="258.500565" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="261.790622" y="210.737885" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="278.877703" y="229.035728" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="295.964785" y="272.628208" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="313.051866" y="271.578808" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="330.138947" y="219.629139" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="347.226029" y="139.67095" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="364.31311" y="109.488819" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="381.400191" y="228.167667" style="fill: #0000ff; stroke: #0000ff"/>
+     <use xlink:href="#m5573d2c2e0" x="398.487273" y="241.383004" style="fill: #0000ff; stroke: #0000ff"/>
     </g>
    </g>
    <g id="line2d_11">
     <defs>
-     <path id="m7c90bf28bf" d="M 0 3 
+     <path id="m1fd8759477" d="M 0 3 
 C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
 C 2.683901 1.55874 3 0.795609 3 0 
 C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
@@ -755,60 +755,60 @@ C -2.683901 -1.55874 -3 -0.795609 -3 0
 C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
 C -1.55874 2.683901 -0.795609 3 0 3 
 z
-" style="stroke: #87ceeb"/>
+" style="stroke: #008000"/>
     </defs>
-    <g clip-path="url(#p3f62924c94)">
-     <use xlink:href="#m7c90bf28bf" x="73.832727" y="245.949966" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="90.919809" y="230.620161" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="108.00689" y="288.671051" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="125.093971" y="244.095747" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="142.181053" y="242.105403" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="159.268134" y="235.632116" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="176.355215" y="259.870854" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="193.442297" y="229.109921" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="210.529378" y="152.52502" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="227.616459" y="105.071573" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="244.703541" y="234.812694" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="261.790622" y="243.163077" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="278.877703" y="202.548732" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="295.964785" y="224.571454" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="313.051866" y="226.059343" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="330.138947" y="191.843464" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="347.226029" y="142.732369" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="364.31311" y="131.894447" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="381.400191" y="192.647204" style="fill: #87ceeb; stroke: #87ceeb"/>
-     <use xlink:href="#m7c90bf28bf" x="398.487273" y="200.961616" style="fill: #87ceeb; stroke: #87ceeb"/>
+    <g clip-path="url(#p4259d74935)">
+     <use xlink:href="#m1fd8759477" x="73.832727" y="245.949966" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="90.919809" y="230.620161" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="108.00689" y="288.671051" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="125.093971" y="244.095747" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="142.181053" y="242.105403" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="159.268134" y="235.632116" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="176.355215" y="259.870854" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="193.442297" y="229.109921" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="210.529378" y="152.52502" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="227.616459" y="105.071573" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="244.703541" y="234.812694" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="261.790622" y="243.163077" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="278.877703" y="202.548732" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="295.964785" y="224.571454" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="313.051866" y="226.059343" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="330.138947" y="191.843464" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="347.226029" y="142.732369" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="364.31311" y="131.894447" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="381.400191" y="192.647204" style="fill: #008000; stroke: #008000"/>
+     <use xlink:href="#m1fd8759477" x="398.487273" y="200.961616" style="fill: #008000; stroke: #008000"/>
     </g>
    </g>
    <g id="line2d_12">
     <defs>
-     <path id="m536028c284" d="M -5 5 
+     <path id="m47de6d2c5a" d="M -5 5 
 L 5 -5 
 M -5 -5 
 L 5 5 
 " style="stroke: #ff0000"/>
     </defs>
-    <g clip-path="url(#p3f62924c94)">
-     <use xlink:href="#m536028c284" x="73.832727" y="264.063524" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="90.919809" y="231.21316" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="108.00689" y="280.510977" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="125.093971" y="251.447198" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="142.181053" y="262.583671" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="159.268134" y="245.086429" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="176.355215" y="240.996084" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="193.442297" y="219.960224" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="210.529378" y="163.962235" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="227.616459" y="73.76923" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="244.703541" y="245.988749" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="261.790622" y="228.88284" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="278.877703" y="216.146295" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="295.964785" y="261.907053" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="313.051866" y="253.384195" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="330.138947" y="204.145395" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="347.226029" y="140.930478" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="364.31311" y="127.200154" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="381.400191" y="214.349397" style="fill: #ff0000; stroke: #ff0000"/>
-     <use xlink:href="#m536028c284" x="398.487273" y="221.90382" style="fill: #ff0000; stroke: #ff0000"/>
+    <g clip-path="url(#p4259d74935)">
+     <use xlink:href="#m47de6d2c5a" x="73.832727" y="264.063524" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="90.919809" y="231.21316" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="108.00689" y="280.510977" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="125.093971" y="251.447198" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="142.181053" y="262.583671" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="159.268134" y="245.086429" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="176.355215" y="240.996084" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="193.442297" y="219.960224" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="210.529378" y="163.962235" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="227.616459" y="73.76923" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="244.703541" y="245.988749" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="261.790622" y="228.88284" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="278.877703" y="216.146295" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="295.964785" y="261.907053" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="313.051866" y="253.384195" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="330.138947" y="204.145395" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="347.226029" y="140.930478" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="364.31311" y="127.200154" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="381.400191" y="214.349397" style="fill: #ff0000; stroke: #ff0000"/>
+     <use xlink:href="#m47de6d2c5a" x="398.487273" y="221.90382" style="fill: #ff0000; stroke: #ff0000"/>
     </g>
    </g>
    <g id="patch_3">
@@ -971,7 +971,7 @@ z
     </g>
     <g id="line2d_13">
      <g>
-      <use xlink:href="#m5c856ca772" x="250.815313" y="54.570438" style="fill: #000080; stroke: #000080"/>
+      <use xlink:href="#ma76a034d2b" x="250.815313" y="54.570438" style="stroke: #000000"/>
      </g>
     </g>
     <g id="text_12">
@@ -1065,7 +1065,7 @@ z
     </g>
     <g id="line2d_14">
      <g>
-      <use xlink:href="#m87a24366d7" x="250.815313" y="69.248563" style="fill: #0000ff; stroke: #0000ff"/>
+      <use xlink:href="#m5573d2c2e0" x="250.815313" y="69.248563" style="fill: #0000ff; stroke: #0000ff"/>
      </g>
     </g>
     <g id="text_13">
@@ -1111,7 +1111,7 @@ z
     </g>
     <g id="line2d_15">
      <g>
-      <use xlink:href="#m7c90bf28bf" x="250.815313" y="83.926688" style="fill: #87ceeb; stroke: #87ceeb"/>
+      <use xlink:href="#m1fd8759477" x="250.815313" y="83.926688" style="fill: #008000; stroke: #008000"/>
      </g>
     </g>
     <g id="text_14">
@@ -1148,7 +1148,7 @@ z
     </g>
     <g id="line2d_16">
      <g>
-      <use xlink:href="#m536028c284" x="250.815313" y="98.604813" style="fill: #ff0000; stroke: #ff0000"/>
+      <use xlink:href="#m47de6d2c5a" x="250.815313" y="98.604813" style="fill: #ff0000; stroke: #ff0000"/>
      </g>
     </g>
     <g id="text_15">
@@ -1187,7 +1187,7 @@ z
   </g>
  </g>
  <defs>
-  <clipPath id="p3f62924c94">
+  <clipPath id="p4259d74935">
    <rect x="57.6" y="41.472" width="357.12" height="266.112"/>
   </clipPath>
  </defs>

From db8362f10a80eaba4966fc0f2c10c1d957649f60 Mon Sep 17 00:00:00 2001
From: ngow210 <nidhign11@gmail.com>
Date: Wed, 24 Jul 2024 14:08:18 +1200
Subject: [PATCH 8/8] ensemble penguins classification added

---
 _episodes/ensemble_classification.md | 89 ++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 _episodes/ensemble_classification.md

diff --git a/_episodes/ensemble_classification.md b/_episodes/ensemble_classification.md
new file mode 100644
index 0000000..1499a4d
--- /dev/null
+++ b/_episodes/ensemble_classification.md
@@ -0,0 +1,89 @@
+## Stacking: classification
+import seaborn as sns
+penguins = sns.load_dataset('penguins')
+
+feature_names = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
+penguins.dropna(subset=feature_names, inplace=True)
+
+species_names = penguins['species'].unique()
+
+# Define data and targets
+X = penguins[feature_names]
+
+y = penguins.species
+
+# Split data in training and test set
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
+
+print(f'train size: {X_train.shape}')
+print(f'test size: {X_test.shape}')
+
+from sklearn.ensemble import (
+    GradientBoostingClassifier,
+    RandomForestClassifier,
+    VotingClassifier,
+)
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.tree import DecisionTreeClassifier
+
+# training estimators 
+rf_clf = RandomForestClassifier(n_estimators=100, max_depth=7, min_samples_leaf=1, random_state=5)
+gb_clf = GradientBoostingClassifier(random_state=5)
+gp_clf = GaussianProcessClassifier(1.0 * RBF(1.0), random_state=5)
+dt_clf = DecisionTreeClassifier(max_depth=5, random_state=5)
+
+voting_reg = VotingClassifier([("rf", rf_clf), ("gb", gb_clf), ("gp", gp_clf), ("dt", dt_clf)])
+
+# fit voting estimator
+voting_reg.fit(X_train, y_train)
+
+# lets also train the individual models for comparison
+rf_clf.fit(X_train, y_train)
+gb_clf.fit(X_train, y_train)
+gp_clf.fit(X_train, y_train)
+dt_clf.fit(X_train, y_train)
+
+import matplotlib.pyplot as plt
+
+# make predictions
+X_test_20 = X_test[:20] # first 20 for visualisation
+
+rf_pred = rf_clf.predict(X_test_20)
+gb_pred = gb_clf.predict(X_test_20)
+gp_pred = gp_clf.predict(X_test_20)
+dt_pred = dt_clf.predict(X_test_20)
+voting_pred = voting_reg.predict(X_test_20)
+
+print(rf_pred)
+print(gb_pred)
+print(gp_pred)
+print(dt_pred)
+print(voting_pred)
+
+plt.figure()
+plt.plot(gb_pred,  "o", color="green", label="GradientBoostingClassifier")
+plt.plot(rf_pred,  "o", color="blue", label="RandomForestClassifier")
+plt.plot(gp_pred,  "o", color="darkblue", label="GuassianProcessClassifier")
+plt.plot(dt_pred,  "o", color="lightblue", label="DecisionTreeClassifier")
+plt.plot(voting_pred,  "x", color="red", ms=10, label="VotingRegressor")
+
+plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
+plt.ylabel("predicted")
+plt.xlabel("training samples")
+plt.legend(loc="best")
+plt.title("Regressor predictions and their average")
+
+plt.show()
+
+print(f'random forest: {rf_clf.score(X_test, y_test)}')
+
+print(f'gradient boost: {gb_clf.score(X_test, y_test)}')
+
+print(f'guassian process: {gp_clf.score(X_test, y_test)}')
+
+print(f'decision tree: {dt_clf.score(X_test, y_test)}')
+
+print(f'voting regressor: {voting_reg.score(X_test, y_test)}')
\ No newline at end of file