-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
123 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
function [model, L] = mlpClass(X,y,k,lambda) | ||
% Train a multilayer perceptron neural network for classification with backpropagation | ||
% logistic activation function is used. | ||
% Input: | ||
% X: d x n data matrix | ||
% Y: p x n response matrix | ||
% k: T x 1 vector to specify number of hidden nodes in each layer | ||
% lambda: regularization parameter | ||
% Ouput: | ||
% model: model structure | ||
% L: (regularized cross entropy) loss | ||
% Written by Mo Chen ([email protected]). | ||
if nargin < 4 | ||
lambda = 1e-2; | ||
end | ||
eta = 1e-3; | ||
tol = 1e-4; | ||
maxiter = 50000; | ||
L = inf(1,maxiter); | ||
|
||
Y = sparse(y,1:numel(y),1); | ||
k = [size(X,1);k(:);size(Y,1)]; | ||
T = numel(k)-1; | ||
W = cell(T,1); | ||
b = cell(T,1); | ||
for t = 1:T | ||
W{t} = randn(k(t),k(t+1)); | ||
b{t} = randn(k(t+1),1); | ||
end | ||
R = cell(T,1); | ||
Z = cell(T+1,1); | ||
Z{1} = X; | ||
for iter = 2:maxiter | ||
% forward | ||
for t = 1:T-1 | ||
Z{t+1} = sigmoid(W{t}'*Z{t}+b{t}); % 5.10 5.113 | ||
end | ||
Z{T+1} = softmax(W{T}'*Z{T}+b{T}); | ||
|
||
% loss | ||
E = Z{T+1}; | ||
Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 | ||
L(iter) = -dot(Y(:),log(E(:)))+0.5*lambda*sum(Wn); | ||
if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end | ||
|
||
% backward | ||
R{T} = Z{T+1}-Y; | ||
for t = T-1:-1:1 | ||
df = Z{t+1}.*(1-Z{t+1}); % h'(a) | ||
R{t} = df.*(W{t+1}*R{t+1}); % 5.66 | ||
end | ||
|
||
% gradient descent | ||
for t=1:T | ||
dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 | ||
db = sum(R{t},2); | ||
W{t} = W{t}-eta*dW; % 5.43 | ||
b{t} = b{t}-eta*db; | ||
end | ||
end | ||
L = L(2:iter); | ||
model.W = W; | ||
model.b = b; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
function [y, P] = mlpClassPred(model, X) | ||
% Multilayer perceptron classification prediction | ||
% logistic activation function is used. | ||
% Input: | ||
% model: model structure | ||
% X: d x n data matrix | ||
% Ouput: | ||
% y: 1 x n label vector | ||
% P: k x n probability matrix | ||
% Written by Mo Chen ([email protected]). | ||
W = model.W; | ||
b = model.b; | ||
T = length(W); | ||
Z = X; | ||
for t = 1:T-1 | ||
Z = sigmoid(W{t}'*Z+b{t}); | ||
end | ||
P = softmax(W{T}'*Z+b{T}); | ||
[~,y] = max(P,[],1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,24 @@ | ||
function [model, L] = mlpReg(X,Y,k,lambda) | ||
% Train a multilayer perceptron neural network | ||
function [model, L] = mlpReg(X,y,k,lambda) | ||
% Train a multilayer perceptron neural network for regression with backpropagation | ||
% tanh activation function is used | ||
% Input: | ||
% X: d x n data matrix | ||
% Y: p x n response matrix | ||
% y: p x n response matrix | ||
% k: T x 1 vector to specify number of hidden nodes in each layer | ||
% lambda: regularization parameter | ||
% Ouput: | ||
% model: model structure | ||
% L: loss | ||
% L: (regularized least square) loss | ||
% Written by Mo Chen ([email protected]). | ||
if nargin < 4 | ||
lambda = 1e-2; | ||
end | ||
eta = 1e-3; | ||
eta = 1e-5; | ||
tol = 1e-5; | ||
maxiter = 50000; | ||
L = inf(1,maxiter); | ||
|
||
k = [size(X,1);k(:);size(Y,1)]; | ||
k = [size(X,1);k(:);size(y,1)]; | ||
T = numel(k)-1; | ||
W = cell(T,1); | ||
b = cell(T,1); | ||
|
@@ -30,30 +32,31 @@ | |
for iter = 2:maxiter | ||
% forward | ||
for t = 1:T-1 | ||
Z{t+1} = tanh(W{t}'*Z{t}+b{t}); | ||
Z{t+1} = tanh(W{t}'*Z{t}+b{t}); % 5.10 5.113 | ||
end | ||
Z{T+1} = W{T}'*Z{T}+b{T}; | ||
Z{T+1} = W{T}'*Z{T}+b{T}; % 5.114 | ||
|
||
% loss | ||
E = Z{T+1}-Y; | ||
E = Z{T+1}-y; | ||
Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 | ||
L(iter) = dot(E(:),E(:))+lambda*sum(Wn); | ||
|
||
if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end | ||
|
||
% backward | ||
R{T} = E; % delta | ||
R{T} = E; | ||
for t = T-1:-1:1 | ||
df = 1-Z{t+1}.^2; % h'(a) | ||
R{t} = df.*(W{t+1}*R{t+1}); % delta | ||
R{t} = df.*(W{t+1}*R{t+1}); % 5.66 | ||
end | ||
|
||
% gradient descent | ||
for t=1:T | ||
dW = Z{t}*R{t}'+lambda*W{t}; | ||
dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 | ||
db = sum(R{t},2); | ||
W{t} = W{t}-eta*dW; | ||
W{t} = W{t}-eta*dW; % 5.43 | ||
b{t} = b{t}-eta*db; | ||
end | ||
end | ||
L = L(1,2:iter); | ||
L = L(2:iter); | ||
model.W = W; | ||
model.b = b; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,32 @@ | ||
clear; close all; | ||
clear; close all | ||
%% Regression | ||
n = 200; | ||
x = linspace(0,2*pi,n); | ||
y = sin(x); | ||
|
||
k = [3,4]; % two hidden layers with 3 and 4 hidden nodes | ||
h = [10,6]; % two hidden layers with 10 and 6 neurons | ||
lambda = 1e-2; | ||
[model, L] = mlpReg(x,y,k); | ||
[model, L] = mlpReg(x,y,h,lambda); | ||
t = mlpRegPred(model,x); | ||
plot(L); | ||
figure; | ||
hold on | ||
plot(x,y,'.'); | ||
plot(x,t); | ||
hold off | ||
hold off | ||
%% Classification | ||
clear; | ||
k = 2; | ||
n = 200; | ||
[X,y] = kmeansRnd(2,k,n); | ||
figure; | ||
plotClass(X,y); | ||
|
||
h = 3; | ||
lambda = 1e-2; | ||
[model, llh] = mlpClass(X,y,h,lambda); | ||
[t,p] = mlpClassPred(model,X); | ||
figure; | ||
plotClass(X,t); | ||
figure; | ||
plot(llh); |