-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtest_logistic.py
107 lines (84 loc) · 4 KB
/
test_logistic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
from functools import partial
import scipy
import numpy as np
import logistic
def test_manual_standard_logistic_regression():
X = np.array([[0, 4, 1],
[3, 0, 0],
[0, 0, 2]])
y = np.array([0, 1, 0])
Xsparse = scipy.sparse.csr_matrix(X)
for i in xrange(40):
theta = logistic.logistic_gradient_descent(X, y, max_iter=i, eta0=0.1)
thetaFast = logistic.fast_logistic_gradient_descent(X, y, max_iter=i, eta0=0.1)
thetaM, b = logistic.modified_logistic_gradient_descent(X, y, max_iter=i, b=0.0, eta0=0.1)
assert b == 0.0
thetaMFast, b = logistic.fast_modified_logistic_gradient_descent(X, y, max_iter=i, b=0.0, eta0=0.1)
assert b == 0.0
thetaSparse = logistic.fast_logistic_gradient_descent(Xsparse, y, max_iter=i, eta0=0.1)
thetaMSparse, b = logistic.fast_modified_logistic_gradient_descent(Xsparse, y, max_iter=i, b=0.0, eta0=0.1)
assert b == 0.0
close = partial(np.allclose, rtol=0.01, atol=0.0001)
'''
if i == 0:
assert np.allclose(theta, [0.01, 0.01, 0.01, 0.01])
elif i == 1:
answer = [0.0095, 0.0105, 0.0095, 0.009]
assert close(theta, answer)
theta[2] += 0.0003
assert not close(theta, answer)
theta[2] -= 0.0003
elif i == 2:
answer = [0.009, 0.011, 0.009, 0.008]
assert close(theta, answer)
assert not close(theta * 0.8, answer)
elif i == 3:
answer = [0.0085, 0.0115, 0.0085, 0.007]
assert close(theta, answer)
assert not close(theta + 0.0003, answer)
'''
assert np.allclose(theta, thetaFast)
# and make sure that the modified version is a superset of normal one
assert np.allclose(theta, thetaM)
assert np.allclose(theta, thetaMFast)
assert np.allclose(theta, thetaSparse)
assert np.allclose(theta, thetaMSparse)
for i in xrange(40):
# make sure C version is same as other version
thetaM, bM = logistic.modified_logistic_gradient_descent(X, y, max_iter=i, b=1.0, eta0=0.01)
thetaMFast, bMFast = logistic.fast_modified_logistic_gradient_descent(X, y, max_iter=i, b=1.0, eta0=0.01)
thetaMSparse, bMSparse = logistic.fast_modified_logistic_gradient_descent(Xsparse, y, max_iter=i, b=1.0, eta0=0.01)
assert np.allclose(thetaM, thetaMFast)
assert np.allclose(thetaM, thetaMSparse)
assert not np.allclose(thetaM + 0.001, thetaMFast)
assert bM == bMFast == bMSparse
#TODO: jperla: hardcode some values in
def test_end_to_end_logistic_regression():
pos, neg = logistic.generate_well_separable(100, 0.50)
#graph_pos_neg(pos, neg)
X = logistic.vstack([pos, neg])
y = logistic.hstack([np.array([1] * len(pos)),
np.array([0] * len(neg)),])
data = logistic.generate_random_points(100,
center=np.array([2,2]),
scale=np.array([5,5]))
#theta = logistic.logistic_gradient_descent(X, y)
thetaC = logistic.fast_logistic_gradient_descent(X, y)
theta = thetaC
#assert np.allclose(theta, thetaC)
labels = logistic.label_data(data, theta, binarize=True)
assert len([l for l in labels if l == 0]) > 10
assert len([l for l in labels if l == 1]) > 10
labels = logistic.label_data(data, thetaC, binarize=True)
assert len([l for l in labels if l == 0]) > 10
assert len([l for l in labels if l == 1]) > 10
small_data = np.array([[-1, -1], [11, 11]])
labels2 = logistic.label_data(small_data, theta, binarize=True)
assert np.allclose([0, 1], labels2)
assert not np.allclose([1, 1], labels2)
labels2 = logistic.label_data(small_data, thetaC, binarize=True)
assert np.allclose([0, 1], labels2)
assert not np.allclose([1, 1], labels2)
#TODO: jperla: test split_labeled_data()
#graph_labeled_data(data, labels)