-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathkNearestNeighbors.jl
140 lines (98 loc) · 2.67 KB
/
kNearestNeighbors.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import StatsBase: countmap
abstract Dist
type Euclidean <: Dist
end
abstract KNN
type KnnClassifier <: KNN
k::Integer
dis_func::Dist
X::Matrix
y::Vector
end
type KnnRegression <: KNN
k::Integer
dis_func::Dist
X::Matrix
y::Vector
end
function KnnClassifier(;
k::Integer = 5,
dist_func::Dist = Euclidean(),
X::Matrix = zeros(2,2),
y::Vector = zeros(2))
return KnnClassifier(k,dist_func,X,y)
end
function KnnRegression(;
k::Integer = 5,
dist_func::Dist = Euclidean(),
X::Matrix = zeros(2,2),
y::Vector = zeros(2))
return KnnClassifier(k,dist_func,X,y)
end
function train!(model::KnnClassifier, X::Matrix, y::Vector)
model.X = X
model.y = y
end
function predict(model::KnnClassifier,
x::Matrix)
n = size(x,1)
res = zeros(n)
for i = 1:n
res[i] = predict(model, x[i,:])
end
return res
end
function predict(model::KnnClassifier,
x::Vector)
n = size(model.X,1)
res = zeros(n)
for i = 1:n
res[i] = dist(model.X[i,:],x,model.dis_func)
end
ind = sortperm(res)
y_cos = model.y[ind[1:model.k]]
label = 0
label_freq = 0
for (key,value) in countmap(y_cos)
if value > label_freq
label_freq = value
label = key
end
end
return label
end
function predict(model::KnnRegression,
x::Vector)
n = size(model.X,1)
res = zeros(n)
for i = 1:n
res[i] = dist(model.X[i,:],x,model.dis_func)
end
ind = sortperm(res)
y_cos = model.y[ind[1:model.k]]
return mean(y_cos)
end
function dist(x::Vector, y::Vector, dist_func::Euclidean)
return norm(x-y)
end
function test_kneast_regression()
X_train, X_test, y_train, y_test = make_reg(n_features = 1)
model = KnnRegression()
train!(model,X_train, y_train)
predictions = predict(model,X_test)
print("regression msea", mean_squared_error(y_test, predictions))
PyPlot.scatter(X_test, y_test, color = "black")
PyPlot.scatter(X_test, predictions, color = "green")
legend(loc="upper right",fancybox="true")
end
function test_kneast_classification()
X_train, X_test, y_train, y_test = make_digits()
model = KnnClassifier()
train!(model,X_train, y_train)
predictions = predict(model,X_test)
print("classification accuracy", accuracy(y_test, predictions))
#PCA
pca_model = PCA()
train!(pca_model, X_test)
plot_in_2d(pca_model, X_test, predictions, "kneast_classification")
end