Skip to content

Commit

Permalink
code refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
AkiRusProd committed Aug 17, 2023
1 parent 22e2103 commit 98a9d86
Show file tree
Hide file tree
Showing 13 changed files with 212 additions and 170 deletions.
8 changes: 4 additions & 4 deletions decision_tree_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ def print_tree(self, tree = None, tree_depth = 0):


if __name__ == "__main__":
generated_data, generated_labels = generate_clusterization_data(n_clusters = 2, n_samples = 300)
x_train, x_test, y_train, y_test = split_data(generated_data, generated_labels, ratio = 0.25)
X_train, y_train = generate_clusterization_data(n_clusters = 2, n_samples = 300)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

dtc = DecisionTreeClassifier()

dtc.fit(x_train, y_train)
y_pred = dtc.predict(x_test)
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)

dtc.print_tree()

Expand Down
15 changes: 7 additions & 8 deletions decision_tree_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,23 +116,22 @@ def print_tree(self, tree = None, tree_depth = 0):


if __name__ == "__main__":
data = generate_regression_data(100)
splited_data = split_data(data, ratio = 0.25)
x_train, x_test, y_train, y_test = splited_data[0][:, :1], splited_data[1][:, :1], splited_data[0][:, 1], splited_data[1][:, 1]
X_train, y_train = generate_regression_data(100)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

dtr = DecisionTreeRegressor()
dtr.fit(x_train, y_train)
y_pred = dtr.predict(x_test)
dtr.fit(X_train, y_train[:, 0])
y_pred = dtr.predict(X_test)

indices = np.argsort(x_test[:, 0])
indices = np.argsort(X_test[:, 0])

xs = np.array(x_test)[indices]
xs = np.array(X_test)[indices]
ys = np.array(y_pred)[indices]

f = plt.figure(figsize = (16 * 0.5, 9 * 0.5))
ax = f.add_subplot(1, 1, 1)

ax.plot(x_test, y_test, 'o')
ax.plot(X_test, y_test, 'o')
ax.plot(xs, ys, 'r')
ax.set_title('Decision Tree Regressor')
ax.set_xlabel('X')
Expand Down
10 changes: 5 additions & 5 deletions gradient_boosting_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ def predict(self, X):


if __name__ == "__main__":
generated_data, generated_labels = generate_clusterization_data(n_clusters = 2, n_samples=300)
x_train, x_test, y_train, y_test = split_data(generated_data, generated_labels, ratio = 0.25)
X_train, y_train = generate_clusterization_data(n_clusters = 2, n_samples = 300)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

gbc = GradientBoostingClassifier(n_estimators=30, learning_rate=0.1, max_depth=2)

gbc.fit(x_train, y_train)
y_pred = gbc.predict(x_train)
gbc.fit(X_train, y_train)
y_pred = gbc.predict(X_test)

print(f"accuracy: {accuracy(y_train, y_pred) * 100}%")
print(f"accuracy: {accuracy(y_test, y_pred) * 100}%")
15 changes: 7 additions & 8 deletions gradient_boosting_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,22 @@ def predict(self, X):


if __name__ == "__main__":
data = generate_regression_data(100)
splited_data = split_data(data, ratio = 0.25)
x_train, x_test, y_train, y_test = splited_data[0][:, :1], splited_data[1][:, :1], splited_data[0][:, 1], splited_data[1][:, 1]
X_train, y_train = generate_regression_data(100)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

gbr = GradientBoostingRegressor(n_estimators=30)
gbr.fit(x_train, y_train)
y_pred = gbr.predict(x_test)
gbr.fit(X_train, y_train[:, 0])
y_pred = gbr.predict(X_test)

indices = np.argsort(x_test[:, 0])
indices = np.argsort(X_test[:, 0])

xs = np.array(x_test)[indices]
xs = np.array(X_test)[indices]
ys = np.array(y_pred)[indices]

f = plt.figure(figsize = (16 * 0.5, 9 * 0.5))
ax = f.add_subplot(1, 1, 1)

ax.plot(x_test, y_test, 'o')
ax.plot(X_test, y_test, 'o')
ax.plot(xs, ys, 'r')
ax.set_title('Gradient Boosting Regressor')
ax.set_xlabel('X')
Expand Down
10 changes: 5 additions & 5 deletions k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

"""K-means"""

class K_Means():
class KMeans():

def __init__(self, k):
self.k = k
Expand Down Expand Up @@ -62,13 +62,13 @@ def predict(self, sample):


if __name__ == '__main__':
data, labels = generate_clusterization_data(n_clusters = 3)
X_train, y_train = generate_clusterization_data(n_clusters = 3)

k_means = K_Means(k = 3)
k_means = KMeans(k = 3)

centroids = k_means.fit(data)
centroids = k_means.fit(X_train)

plt.scatter(data[:,0], data[:,1], s = 40, c = labels, cmap = plt.cm.spring, edgecolors = 'k')
plt.scatter(X_train[:,0], X_train[:,1], s = 40, c = y_train, cmap = plt.cm.spring, edgecolors = 'k')
plt.scatter(centroids[:,0], centroids[:,1], s = 200, color = 'red' , marker = '*', edgecolors = 'k', label = 'centroids')

plt.legend(loc=2)
Expand Down
14 changes: 7 additions & 7 deletions knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""K-nearest neighbors"""

class KNN_Classifier():
class KNNClassifier():

def __init__(self, k = 5) -> None:
self.k = k
Expand Down Expand Up @@ -42,14 +42,14 @@ def predict(self, new_data):


if __name__ == "__main__":
data, labels = generate_clusterization_data(n_clusters = 3, n_samples = 30)
X_train, y_test = generate_clusterization_data(n_clusters = 3, n_samples = 30)

knn = KNN_Classifier(k = 5)
knn.fit(data, labels)
knn = KNNClassifier(k = 5)
knn.fit(X_train, y_test)


x_min, x_max = data[:,0].min()-1, data[:,0].max() + 1
y_min, y_max = data[:,1].min()-1, data[:,1].max() + 1
x_min, x_max = X_train[:,0].min()-1, X_train[:,0].max() + 1
y_min, y_max = X_train[:,1].min()-1, X_train[:,1].max() + 1

x_grid, y_grid = np.meshgrid(np.arange(x_min,x_max,.1),np.arange(y_min,y_max,.1))

Expand All @@ -58,7 +58,7 @@ def predict(self, new_data):
predictions = predictions.reshape(x_grid.shape)

plt.pcolormesh(x_grid, y_grid, predictions, cmap = plt.cm.Pastel2)
plt.scatter(data[:,0], data[:,1], s = 80, c = labels, cmap = plt.cm.spring, edgecolors = 'k')
plt.scatter(X_train[:,0], X_train[:,1], s = 80, c = y_test, cmap = plt.cm.spring, edgecolors = 'k')
plt.xlim(x_grid.min(), x_grid.max())
plt.ylim(y_grid.min(), y_grid.max())

Expand Down
118 changes: 118 additions & 0 deletions linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils import generate_linear_regression_data, split_data


class MSE:
def __call__(self, y_true, y_pred):
return np.sum((y_true - y_pred) ** 2) / y_true.size

def grad(self, y_true, y_pred):
return -2 * (y_true - y_pred) / y_true.size





class SGDRegressor:
def __init__ (self, n_iterations = 100, lr = 0.0001):
self.n_iterations = n_iterations
self.lr = lr

self.weight = None
self.bias = None

self.loss = MSE()

def init_weights(self, n_features):
if self.weight is None or self.bias is None:
# self.weight = np.random.uniform(-1, 1, (1, n_features)) if self.weight is None else self.weight
# self.weight = np.random.normal(0, pow(n_features, -0.5), (1, n_features)) if self.weight is None else self.weight
# self.weight = np.random.normal(0, 1, (1, n_features)) if self.weight is None else self.weight

# Xavier initialization
stdv = 1 / np.sqrt(n_features)
self.weight = np.random.uniform(-stdv ,stdv, (1, n_features)) if self.weight is None else self.weight

self.bias = np.zeros((1, 1)) if self.bias is None else self.bias

def fit(self, X, y):
n_samples, n_features = X.shape
self.init_weights(n_features)

losses = []
tqdm_range = tqdm(range(self.n_iterations), total = self.n_iterations)
for i in range(self.n_iterations):
tqdm_range.update(1)
for x_true, y_true in zip(X, y):
y_true = y_true[:, np.newaxis]

y_pred = np.matmul(x_true, self.weight) + self.bias

loss = self.loss(y_true, y_pred)

grad = self.loss.grad(y_true, y_pred)

self.weight -= self.lr * np.matmul(grad.T, x_true)
self.bias -= self.lr * np.sum(grad)
losses.append(loss)

tqdm_range.set_description(f'epoch: {i + 1}/{self.n_iterations}, loss: {loss:.7f}')

return losses

def predict(self, X):
y_pred = np.matmul(X, self.weight) + self.bias
return y_pred


class OrdinaryLeastSquares:
def __init__(self) -> None:
self.b = None

def add_bias(self, X):
return np.concatenate((X, np.ones((X.shape[0], 1))), axis = 1)

def fit(self, X, y):
X = self.add_bias(X)
#b* = (X^T * X)^-1 * X^T * y
self.b = (np.linalg.matrix_power(X.transpose().dot(X), -1)).dot(X.transpose()).dot(y)
return self.b

def predict(self, X):
X = self.add_bias(X)
return X.dot(self.b)




if __name__ == '__main__':
X_train, y_train, true_coefs = generate_linear_regression_data(300)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

plt.title("Linear Regression")
plt.xlabel("X")
plt.ylabel("Y")

plt.scatter(X_test, y_test, color ='g', s=10, label='Ground truth')

model = SGDRegressor(n_iterations=1000)
losses = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

plt.plot(X_test, y_pred, 'red', label='Gradient descent')

model = OrdinaryLeastSquares()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

plt.plot(X_test, y_pred, 'orange', label='Ordinary least squares')

y_true = np.dot(X_test, true_coefs)
plt.plot(X_test, y_true, 'blue', label='True coefficients')

plt.legend(loc=2)

plt.grid(True, linestyle='-', color='0.75')
plt.show()
10 changes: 5 additions & 5 deletions naive_bayes_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def predict(self, data):


if __name__ == "__main__":
generated_data, generated_labels = generate_clusterization_data(n_clusters = 2)
train_data, test_data, train_labels, test_labels = split_data(generated_data, generated_labels, ratio = 0.25)
X_train, y_train = generate_clusterization_data(n_clusters = 2, n_samples = 300)
X_train, X_test, y_train, y_test = split_data(X_train, y_train, ratio = 0.25)

nb = NaiveBayesClassifier()
nb.fit(train_data, train_labels)
predicted_labels = nb.predict(test_data)
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

print(f"accuracy: {accuracy(test_labels, predicted_labels) * 100}%")
print(f"accuracy: {accuracy(y_test, y_pred) * 100}%")
87 changes: 0 additions & 87 deletions ols.py

This file was deleted.

Loading

0 comments on commit 98a9d86

Please sign in to comment.