-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogistische_regression.py
174 lines (141 loc) · 5.08 KB
/
logistische_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(X):
""" Applies the logistic function to x, element-wise. """
return 1 / (1 + np.exp(-X))
def x_strich(X):
return np.column_stack((np.ones(len(X)), X))
def feature_scaling(X):
x_mean = np.mean(X, axis=0)
x_std = np.std(X, axis=0)
return (X - x_mean) / x_std, x_mean, x_std
def rescale_model(thetas, mean, std):
thetas_rescaled = np.zeros(thetas.shape[0])
for count, value in enumerate(thetas):
if count == 0:
thetas_rescaled[0] = value + thetas[1] * (mean / std)
return thetas_rescaled
def logistic_hypothesis(theta):
"""Combines given list argument in a logistic equation and returns it as a
function
Args:
thetas: list of coefficients
Returns:
lambda that models a logistc function based on thetas and x
"""
return lambda X: sigmoid(np.dot(x_strich(X), theta))
# def regulated_cost(X, y, theta, lambda_reg):
#
# return cross_entropy(X, y)(theta) + L2_regularization_cost(X, theta, lambda_reg)
# def cross_entropy(X, y):
# """
# Computes the cross-entropy for a single logit value and a given target class.
# Parameters
# ----------
# X : float64 or float32
# The logit
# y : int
# The target class
# Returns
# -------
# floatX
# The cross entropy value (negative log-likelihood)
# """
#
# def cost(theta):
# z = x_strich(X).dot(theta)
# mu = np.max([np.zeros(X.shape[0]), -z], axis=0)
# r1 = y * (mu + np.log(np.exp(-mu) + np.exp(-z - mu)))
# mu = np.max([np.zeros(X.shape[0]), z], axis=0)
# r2 = (1 - y) * (mu + np.log(np.exp(-mu) + np.exp(z - mu)))
# return r1 + r2
#
# return cost
def cross_entropy(X, y):
"""Implements cross-entropy as a function costs(theta) on given traning data
Args:
h: the hypothesis as function
x: features as 2D array with shape (m_examples, n_features)
y: ground truth labels for given features with shape (m_examples)
Returns:
lambda costs(theta) that models the cross-entropy for each x^i
"""
return lambda theta: -y * np.log(logistic_hypothesis(theta)(X) + 1e-9) - (
1 - y
) * np.log(1 - logistic_hypothesis(theta)(X) + 1e-9)
def compute_new_theta(X, y, theta, learning_rate, lambda_reg):
"""Updates learnable parameters theta
The update is done by calculating the partial derivities of
the cost function including the linear hypothesis. The
gradients scaled by a scalar are subtracted from the given
theta values.
Args:
X: 2D numpy array of x values
y: array of y values corresponding to x
theta: current theta values
learning_rate: value to scale the negative gradient
hypothesis: the hypothesis as function
Returns:
theta: Updated theta_0
"""
thetas = np.zeros(len(theta))
thetas = theta * (1 - learning_rate * (lambda_reg / len(X))) - (
learning_rate / len(X)
) * np.sum((logistic_hypothesis(theta)(X) - y) * x_strich(X).T, axis=1)
return thetas
def L2_regularization_cost(X, theta, lambda_reg):
return np.sum(theta ** 2) * (lambda_reg / (2 * len(X)))
def gradient_descent(X, y, theta, learning_rate, num_iters, lambda_reg):
"""Minimize theta values of a logistic model based on cross-entropy cost function
Args:
X: 2D numpy array of x values
y: array of y values corresponding to x
theta: current theta values
learning_rate: value to scale the negative gradient
num_iters: number of iterations updating thetas
lambda_reg: regularization strength
Returns:
history_cost: cost after each iteration
history_theta: Updated theta values after each iteration
"""
thetas = [theta]
cost = np.zeros(num_iters)
J = mean_cross_entropy_costs(X, y, lambda_reg)
cost[0] = J(thetas[0])
for i in range(1, num_iters):
thetas.append(compute_new_theta(X, y, thetas[i - 1], learning_rate, lambda_reg))
cost[i] = J(thetas[i])
return cost, thetas
def mean_cross_entropy_costs(X, y, lambda_reg=0.0):
"""Implements mean cross-entropy as a function J(theta) on given traning
data
Args:
X: features as 2D array with shape (m_examples, n_features)
y: ground truth labels for given features with shape (m_examples)
hypothesis: the hypothesis as function
cost_func: cost function
Returns:
lambda J(theta) that models the mean cross-entropy
"""
return lambda theta: np.mean(cross_entropy(X, y)(theta)) + L2_regularization_cost(
X, theta, lambda_reg
)
def plot_progress(fig, costs, learning_rate, lambda_reg):
"""Plots the costs over the iterations
Args:
costs: history of costs
"""
ax = fig.add_subplot(111)
ax.plot(
np.arange(len(costs)),
costs,
alpha=0.8,
label="LR: " + str(learning_rate) + " __ Lambda: " + str(lambda_reg),
)
ax.legend(
bbox_to_anchor=(0.0, 1.02, 1.0, 0.102),
loc="best",
ncol=4,
mode="expand",
borderaxespad=0.0,
)