diff --git a/chapter05/mlpClass.m b/chapter05/mlpClass.m new file mode 100644 index 0000000..0a5d645 --- /dev/null +++ b/chapter05/mlpClass.m @@ -0,0 +1,63 @@ +function [model, L] = mlpClass(X,y,k,lambda) +% Train a multilayer perceptron neural network for classification with backpropagation +% logistic activation function is used. +% Input: +% X: d x n data matrix +% Y: p x n response matrix +% k: T x 1 vector to specify number of hidden nodes in each layer +% lambda: regularization parameter +% Ouput: +% model: model structure +% L: (regularized cross entropy) loss +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 4 + lambda = 1e-2; +end +eta = 1e-3; +tol = 1e-4; +maxiter = 50000; +L = inf(1,maxiter); + +Y = sparse(y,1:numel(y),1); +k = [size(X,1);k(:);size(Y,1)]; +T = numel(k)-1; +W = cell(T,1); +b = cell(T,1); +for t = 1:T + W{t} = randn(k(t),k(t+1)); + b{t} = randn(k(t+1),1); +end +R = cell(T,1); +Z = cell(T+1,1); +Z{1} = X; +for iter = 2:maxiter +% forward + for t = 1:T-1 + Z{t+1} = sigmoid(W{t}'*Z{t}+b{t}); % 5.10 5.113 + end + Z{T+1} = softmax(W{T}'*Z{T}+b{T}); + +% loss + E = Z{T+1}; + Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 + L(iter) = -dot(Y(:),log(E(:)))+0.5*lambda*sum(Wn); + if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end + +% backward + R{T} = Z{T+1}-Y; + for t = T-1:-1:1 + df = Z{t+1}.*(1-Z{t+1}); % h'(a) + R{t} = df.*(W{t+1}*R{t+1}); % 5.66 + end + +% gradient descent + for t=1:T + dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 + db = sum(R{t},2); + W{t} = W{t}-eta*dW; % 5.43 + b{t} = b{t}-eta*db; + end +end +L = L(2:iter); +model.W = W; +model.b = b; diff --git a/chapter05/mlpClassPred.m b/chapter05/mlpClassPred.m new file mode 100644 index 0000000..0c94742 --- /dev/null +++ b/chapter05/mlpClassPred.m @@ -0,0 +1,19 @@ +function [y, P] = mlpClassPred(model, X) +% Multilayer perceptron classification prediction +% logistic activation function is used. +% Input: +% model: model structure +% X: d x n data matrix +% Ouput: +% y: 1 x n label vector +% P: k x n probability matrix +% Written by Mo Chen (sth4nth@gmail.com). +W = model.W; +b = model.b; +T = length(W); +Z = X; +for t = 1:T-1 + Z = sigmoid(W{t}'*Z+b{t}); +end +P = softmax(W{T}'*Z+b{T}); +[~,y] = max(P,[],1); \ No newline at end of file diff --git a/chapter05/mlpReg.m b/chapter05/mlpReg.m index caf42d1..d3759eb 100644 --- a/chapter05/mlpReg.m +++ b/chapter05/mlpReg.m @@ -1,22 +1,24 @@ -function [model, L] = mlpReg(X,Y,k,lambda) -% Train a multilayer perceptron neural network +function [model, L] = mlpReg(X,y,k,lambda) +% Train a multilayer perceptron neural network for regression with backpropagation +% tanh activation function is used % Input: % X: d x n data matrix -% Y: p x n response matrix +% y: p x n response matrix % k: T x 1 vector to specify number of hidden nodes in each layer % lambda: regularization parameter % Ouput: % model: model structure -% L: loss +% L: (regularized least square) loss % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 lambda = 1e-2; end -eta = 1e-3; +eta = 1e-5; +tol = 1e-5; maxiter = 50000; L = inf(1,maxiter); -k = [size(X,1);k(:);size(Y,1)]; +k = [size(X,1);k(:);size(y,1)]; T = numel(k)-1; W = cell(T,1); b = cell(T,1); @@ -30,30 +32,31 @@ for iter = 2:maxiter % forward for t = 1:T-1 - Z{t+1} = tanh(W{t}'*Z{t}+b{t}); + Z{t+1} = tanh(W{t}'*Z{t}+b{t}); % 5.10 5.113 end - Z{T+1} = W{T}'*Z{T}+b{T}; + Z{T+1} = W{T}'*Z{T}+b{T}; % 5.114 % loss - E = Z{T+1}-Y; + E = Z{T+1}-y; Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 L(iter) = dot(E(:),E(:))+lambda*sum(Wn); - + if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end + % backward - R{T} = E; % delta + R{T} = E; for t = T-1:-1:1 df = 1-Z{t+1}.^2; % h'(a) - R{t} = df.*(W{t+1}*R{t+1}); % delta + R{t} = df.*(W{t+1}*R{t+1}); % 5.66 end % gradient descent for t=1:T - dW = Z{t}*R{t}'+lambda*W{t}; + dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 db = sum(R{t},2); - W{t} = W{t}-eta*dW; + W{t} = W{t}-eta*dW; % 5.43 b{t} = b{t}-eta*db; end end -L = L(1,2:iter); +L = L(2:iter); model.W = W; model.b = b; diff --git a/chapter05/mlpRegPred.m b/chapter05/mlpRegPred.m index e3bba3f..d2e67f9 100644 --- a/chapter05/mlpRegPred.m +++ b/chapter05/mlpRegPred.m @@ -1,5 +1,6 @@ function Y = mlpRegPred(model, X) -% Multilayer perceptron prediction +% Multilayer perceptron regression prediction +% tanh activation function is used. % Input: % model: model structure % X: d x n data matrix diff --git a/demo/ch05/mlp_demo.m b/demo/ch05/mlp_demo.m index 75c170a..70b57b3 100644 --- a/demo/ch05/mlp_demo.m +++ b/demo/ch05/mlp_demo.m @@ -1,15 +1,32 @@ -clear; close all; +clear; close all +%% Regression n = 200; x = linspace(0,2*pi,n); y = sin(x); -k = [3,4]; % two hidden layers with 3 and 4 hidden nodes +h = [10,6]; % two hidden layers with 10 and 6 neurons lambda = 1e-2; -[model, L] = mlpReg(x,y,k); +[model, L] = mlpReg(x,y,h,lambda); t = mlpRegPred(model,x); plot(L); figure; hold on plot(x,y,'.'); plot(x,t); -hold off \ No newline at end of file +hold off +%% Classification +clear; +k = 2; +n = 200; +[X,y] = kmeansRnd(2,k,n); +figure; +plotClass(X,y); + +h = 3; +lambda = 1e-2; +[model, llh] = mlpClass(X,y,h,lambda); +[t,p] = mlpClassPred(model,X); +figure; +plotClass(X,t); +figure; +plot(llh); \ No newline at end of file