-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #374 from parthgvora/staging
Implement SPORF
- Loading branch information
Showing
9 changed files
with
1,810 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import sys | ||
import numpy as np | ||
import pandas as pd | ||
import csv | ||
from numpy import genfromtxt | ||
|
||
from proglearn.progressive_learner import ProgressiveLearner | ||
from proglearn.voters import TreeClassificationVoter | ||
from proglearn.transformers import TreeClassificationTransformer | ||
from proglearn.transformers import ObliqueTreeClassificationTransformer | ||
from proglearn.deciders import SimpleArgmaxAverage | ||
|
||
from sklearn.model_selection import train_test_split, cross_val_score | ||
|
||
def load_simulated_data(file): | ||
data = genfromtxt(file, delimiter=',') | ||
X = data[:, :-1] | ||
y = data[:, -1] | ||
|
||
return X, y | ||
|
||
def load_data(data_file, task_num): | ||
if "Hill_Valley" in data_file: | ||
df = pd.read_csv(data_file) | ||
X = df[df.columns[:-1]].to_numpy() | ||
y = df[df.columns[-1]].to_numpy() | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True, stratify=y) | ||
|
||
if "acute" in data_file: | ||
|
||
df = pd.read_table(data_file, encoding='utf-16') | ||
df[df == "no"] = 0 | ||
df[df == "yes"] = 1 | ||
|
||
data = df.to_numpy() | ||
temps = data[:, 0] | ||
|
||
temperature = [] | ||
for i in range(len(temps)): | ||
temp_str = temps[i] | ||
temp_str = temp_str.replace(",", ".") | ||
temperature.append(float(temp_str)) | ||
|
||
data[:, 0] = np.array(temperature) | ||
|
||
X = np.array(data[:, :5], dtype=float) | ||
|
||
# 6 for task 1, 7 for task 2 | ||
if task_num == 1: | ||
y = np.array(data[:, 6], dtype=float) | ||
else: | ||
y = np.array(data[:, 7], dtype=float) | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True, stratify=y) | ||
|
||
return X_train, X_test, y_train, y_test, len(np.unique(y)) | ||
|
||
|
||
def test(data_file, reps, n_trees, task_num, | ||
default_transformer_class, default_transformer_kwargs): | ||
default_voter_class = TreeClassificationVoter | ||
default_voter_kwargs = {} | ||
|
||
default_decider_class = SimpleArgmaxAverage | ||
|
||
kappa = np.zeros(reps) | ||
for i in range(reps): | ||
X_train, X_test, y_train, y_test, n_classes = load_data(data_file, task_num) | ||
default_decider_kwargs = {"classes": np.arange(n_classes)} | ||
|
||
pl = ProgressiveLearner( | ||
default_transformer_class=default_transformer_class, | ||
default_transformer_kwargs=default_transformer_kwargs, | ||
default_voter_class=default_voter_class, | ||
default_voter_kwargs=default_voter_kwargs, | ||
default_decider_class=default_decider_class, | ||
default_decider_kwargs=default_decider_kwargs) | ||
|
||
pl.add_task(X_train, y_train, num_transformers=n_trees) | ||
|
||
y_hat = pl.predict(X_test, task_id=0) | ||
|
||
acc = np.sum(y_test == y_hat) / len(y_test) | ||
print("Accuracy after iteration ", i, ": ", acc) | ||
|
||
chance_pred = 1 / n_classes | ||
kappa[i] = (acc - chance_pred) / (1 - chance_pred) | ||
|
||
return np.mean(kappa) * 100, (np.std(kappa) * 100) / np.sqrt(reps) |
96 changes: 96 additions & 0 deletions
96
docs/tutorials/functions/sporf_decision_boundaries_functions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
from rerf.rerfClassifier import rerfClassifier | ||
|
||
import numpy as np | ||
np.random.seed(42) | ||
|
||
import matplotlib.pyplot as plt | ||
from matplotlib.colors import ListedColormap | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.preprocessing import StandardScaler | ||
from sklearn.datasets import make_moons, make_circles, make_classification | ||
from sklearn.ensemble import RandomForestClassifier | ||
|
||
from proglearn.forest import LifelongClassificationForest | ||
from proglearn.voters import TreeClassificationVoter | ||
from proglearn.transformers import TreeClassificationTransformer | ||
from proglearn.transformers import ObliqueTreeClassificationTransformer | ||
from proglearn.deciders import SimpleArgmaxAverage | ||
|
||
def test(NT, h, names, classifiers, datasets): | ||
i = 1 | ||
# iterate over datasets | ||
for ds_cnt, ds in enumerate(datasets): | ||
# preprocess dataset, split into training and test part | ||
X, y = ds | ||
X = StandardScaler().fit_transform(X) | ||
X_train, X_test, y_train, y_test = \ | ||
train_test_split(X, y, test_size=.4, random_state=42) | ||
|
||
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 | ||
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 | ||
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), | ||
np.arange(y_min, y_max, h)) | ||
|
||
# just plot the dataset first | ||
cm = plt.cm.RdBu | ||
cm_bright = ListedColormap(['#FF0000', '#0000FF']) | ||
ax = plt.subplot(len(datasets), len(classifiers) + 1, i) | ||
if ds_cnt == 0: | ||
ax.set_title("Input data") | ||
# Plot the training points | ||
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, | ||
edgecolors='k') | ||
# Plot the testing points | ||
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, | ||
edgecolors='k') | ||
ax.set_xlim(xx.min(), xx.max()) | ||
ax.set_ylim(yy.min(), yy.max()) | ||
ax.set_xticks(()) | ||
ax.set_yticks(()) | ||
i += 1 | ||
|
||
# iterate over classifiers | ||
for name, clf in zip(names, classifiers): | ||
ax = plt.subplot(len(datasets), len(classifiers) + 1, i) | ||
|
||
if "Proglearn" in name: | ||
|
||
clf = LifelongClassificationForest(oblique=True, | ||
default_feature_combinations=1, default_density=0.5) | ||
clf.add_task(X_train, y_train, n_estimators=NT) | ||
y_hat = clf.predict(X_test, task_id=0) | ||
score = np.sum(y_hat == y_test) / len(y_test) | ||
|
||
else: | ||
clf.fit(X_train, y_train) | ||
score = clf.score(X_test, y_test) | ||
|
||
# Plot the decision boundary. For that, we will assign a color to each | ||
# point in the mesh [x_min, x_max]x[y_min, y_max]. | ||
if hasattr(clf, "decision_function"): | ||
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) | ||
elif "Proglearn" in name: | ||
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()], task_id=0)[:, 1] | ||
else: | ||
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] | ||
|
||
# Put the result into a color plot | ||
Z = Z.reshape(xx.shape) | ||
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) | ||
|
||
# Plot the training points | ||
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, | ||
edgecolors='k') | ||
# Plot the testing points | ||
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, | ||
edgecolors='k', alpha=0.6) | ||
|
||
ax.set_xlim(xx.min(), xx.max()) | ||
ax.set_ylim(yy.min(), yy.max()) | ||
ax.set_xticks(()) | ||
ax.set_yticks(()) | ||
if ds_cnt == 0: | ||
ax.set_title(name) | ||
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'), | ||
size=15, horizontalalignment='right') | ||
i += 1 |
Oops, something went wrong.