Merge pull request #28 from sth4nth/master

refined all docs, all test passed
PRML · Feb 21, 2016 · 6d57671 · 6d57671
2 parents e8a46c9 + 3ab4747
commit 6d57671
Show file tree

Hide file tree

Showing 124 changed files with 1,063 additions and 613 deletions.
diff --git a/TODO.txt b/TODO.txt
@@ -1,9 +1,7 @@
 TODO: 
-chapter10: compute bound terms (entropy) inside each factors
 chapter10/12: prediction functions for VB
-chapter05: MLP
-chapter08: BP, EP
+chapter07: rvm seq bug
+extract demos
 
-Help:
-standardize help descrtiption: add input/output description
-chapter04: plot multiclass data boundary
+chapter05: MLP
+chapter08: BP, EP
diff --git a/chapter01/condEntropy.m b/chapter01/condEntropy.m
@@ -1,6 +1,9 @@
 function z = condEntropy (x, y)
-% Compute conditional entropy H(x|y) of two discrete variables x and y.
-% x, y: two vectors of integers of the same length
+% Compute conditional entropy z=H(x|y) of two discrete variables x and y.
+% Input:
+%   x, y: two integer vector of the same length 
+% Output:
+%   z: conditional entropy z=H(x|y)
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/demo.m b/chapter01/demo.m
@@ -1,5 +1,5 @@
-% Done
-% demo for information theory toolbox
+
+% demos for ch01
 clear;
 k = 10;  % variable range
 n = 100;  % number of variables
@@ -10,20 +10,20 @@
 % x = randi(k,1,n);  % need statistics toolbox
 % y = randi(k,1,n);
 
-%% entropy H(x), H(y)
+%% Entropy H(x), H(y)
 Hx = entropy(x);
 Hy = entropy(y);
-%% joint entropy H(x,y)
+%% Joint entropy H(x,y)
 Hxy = jointEntropy(x,y);
-%% conditional entropy H(x|y)
+%% Conditional entropy H(x|y)
 Hx_y = condEntropy(x,y);
-%% mutual information I(x,y)
+%% Mutual information I(x,y)
 Ixy = mutInfo(x,y);
-%% relative entropy (KL divergence) KL(p(x)|p(y))
+%% Relative entropy (KL divergence) KL(p(x)|p(y))
 Dxy = relatEntropy(x,y);
-%% normalized mutual information I_n(x,y)
+%% Normalized mutual information I_n(x,y)
 nIxy = nmi(x,y);
-%% nomalized variation information I_v(x,y)
+%% Nomalized variation information I_v(x,y)
 vIxy = nvi(x,y);
 %% H(x|y) = H(x,y)-H(y)
 isequalf(Hx_y,Hxy-Hy)

diff --git a/chapter01/entropy.m b/chapter01/entropy.m
@@ -1,10 +1,15 @@
 function z = entropy(x)
-% Compute entropy H(x) of a discrete variable x.
-% x: a vectors of integers
+% Compute entropy z=H(x) of a discrete variable x.
+% Input:
+%   x: a integer vectors  
+% Output:
+%   z: entropy z=H(x)
 % Written by Mo Chen ([email protected]).
 n = numel(x);
-x = reshape(x,1,n);
-[u,~,label] = unique(x);
-p = full(mean(sparse(1:n,label,1,n,numel(u),n),1));
-z = -dot(p,log2(p+eps));
-z = max(0,z);
+[u,~,x] = unique(x);
+k = numel(u);
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+Px = nonzeros(mean(Mx,1));
+Hx = -dot(Px,log2(Px));
+z = max(0,Hx);
diff --git a/chapter01/jointEntropy.m b/chapter01/jointEntropy.m
@@ -1,6 +1,9 @@
 function z = jointEntropy(x, y)
-% Compute joint entropy H(x,y) of two discrete variables x and y.
-% x, y: two vectors of integers of the same length
+% Compute joint entropy z=H(x,y) of two discrete variables x and y.
+% Input:
+%   x, y: two integer vector of the same length 
+% Output:
+%   z: joint entroy z=H(x,y)
 % Written by Mo Chen ([email protected]).    
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/mutInfo.m b/chapter01/mutInfo.m
@@ -1,6 +1,9 @@
 function z = mutInfo(x, y)
 % Compute mutual information I(x,y) of two discrete variables x and y.
-% x, y: two vectors of integers of the same length
+% Input:
+%   x, y: two integer vector of the same length 
+% Output:
+%   z: mutual information z=I(x,y)
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/nmi.m b/chapter01/nmi.m
@@ -1,6 +1,9 @@
 function z = nmi(x, y)
-% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)).
-% x, y: two vectors of integers of the same length
+% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)) of two discrete variables x and y.
+% Input:
+%   x, y: two integer vector of the same length 
+% Ouput:
+%   z: normalized mutual information z=I(x,y)/sqrt(H(x)*H(y))
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/nvi.m b/chapter01/nvi.m
@@ -1,6 +1,9 @@
 function z = nvi(x, y)
-% Compute normalized variation information (1-I(x,y)/H(x,y)).
-% x, y: two vectors of integers of the same length
+% Compute normalized variation information z=(1-I(x,y)/H(x,y)) of two discrete variables x and y.
+% Input:
+%   x, y: two integer vector of the same length 
+% Output:
+%   z: normalized variation information z=(1-I(x,y)/H(x,y))
 % Written by Mo Chen ([email protected]).
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter01/relatEntropy.m b/chapter01/relatEntropy.m
@@ -1,6 +1,9 @@
 function z = relatEntropy (x, y)
-% Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
-% x, y: two vectors of integers of the same length
+% Compute relative entropy (a.k.a KL divergence) z=KL(p(x)||p(y)) of two discrete variables x and y.
+% Input:
+%   x, y: two integer vector of the same length 
+% Output:
+%   z: relative entropy (a.k.a KL divergence) z=KL(p(x)||p(y))
 % Written by Mo Chen ([email protected]).    
 assert(numel(x) == numel(y));
 n = numel(x);

diff --git a/chapter02/logDirichlet.m b/chapter02/logDirichlet.m
@@ -1,8 +1,11 @@
 function y = logDirichlet(X, a)
 % Compute log pdf of a Dirichlet distribution.
-%   X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
-%   a: d x k parameters
+% Input:
+%   X: d x n data matrix, each column sums to one (sum(X,1)==ones(1,n) && X>=0)
+%   a: d x k parameter of Dirichlet
 %   y: k x n probability density
+% Output:
+%   y: k x n probability density in logrithm scale y=log p(x)
 % Written by Mo Chen ([email protected]).
 X = bsxfun(@times,X,1./sum(X,1));
 if size(a,1) == 1

diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m
@@ -1,5 +1,11 @@
 function y = logGauss(X, mu, sigma)
 % Compute log pdf of a Gaussian distribution.
+% Input:
+%   X: d x n data matrix
+%   mu: mean of Gaussian
+%   sigma: variance of Gaussian
+% Output:
+%   y: probability density in logrithm scale y=log p(x)
 % Written by Mo Chen ([email protected]).
 
 [d,n] = size(X);

diff --git a/chapter02/logKde.m b/chapter02/logKde.m
@@ -1,5 +1,10 @@
 function z = logKde (X, Y, sigma)
 % Compute log pdf of kernel density estimator.
+% Input:
+%   X: d x n data matrix to be evaluate
+%   Y: d x k data matrix served as database
+% Output:
+%   z: probability density in logrithm scale z=log p(x|y)
 % Written by Mo Chen ([email protected]).
 D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X));
-z = logSumExp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2));
+z = logsumexp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2),1);
diff --git a/chapter02/logMn.m b/chapter02/logMn.m
@@ -1,11 +1,9 @@
-function z = logMn (x, p)
+function z = logMn(x, p)
 % Compute log pdf of a multinomial distribution.
+% Input:
+%   x: d x 1 integer vector 
+%   p: d x 1 probability
+% Output:
+%   z: probability density in logrithm scale z=log p(x)
 % Written by Mo Chen ([email protected]).    
-    if numel(x) ~= numel(p)
-        n = numel(x);
-        x = reshape(x,1,n);
-        [u,~,label] = unique(x);
-        x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
-    end
-    z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
-endfunction
+z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
diff --git a/chapter02/logMvGamma.m b/chapter02/logMvGamma.m
@@ -1,7 +1,13 @@
 function y = logMvGamma(x,d)
-% Compute logarithm multivariate Gamma function.
-% Gamma_p(x) = pi^(d(d-1)/4) \prod_(j=1)^d Gamma(x+(1-j)/2)
-% log(Gamma_p(x)) = d(d-1)/4 log(pi) + \sum_(j=1)^d log(Gamma(x+(1-j)/2))
+% Compute logarithm multivariate Gamma function 
+% which is used in the probability density function of the Wishart and inverse Wishart distributions.
+% Gamma_d(x) = pi^(d(d-1)/4) \prod_(j=1)^d Gamma(x+(1-j)/2)
+% log(Gamma_d(x)) = d(d-1)/4 log(pi) + \sum_(j=1)^d log(Gamma(x+(1-j)/2))
+% Input:
+%   x: m x n data matrix
+%   d: dimension
+% Output:
+%   y: m x n logarithm multivariate Gamma
 % Written by Michael Chen ([email protected]).
 s = size(x);
 x = reshape(x,1,prod(s));

diff --git a/chapter02/logSt.m b/chapter02/logSt.m
@@ -1,5 +1,12 @@
 function y = logSt(X, mu, sigma, v)
 % Compute log pdf of a Student's t distribution.
+% Input:
+%   X: d x n data matrix
+%   mu: mean
+%   sigma: variance
+%   v: degree of freedom
+% Output:
+%   y: probability density in logrithm scale y=log p(x)
 % Written by mo Chen ([email protected]).
 [d,k] = size(mu);
 

diff --git a/chapter02/logVmf.m b/chapter02/logVmf.m
@@ -1,5 +1,11 @@
 function y = logVmf(X, mu, kappa)
 % Compute log pdf of a von Mises-Fisher distribution.
+% Input:
+%   X: d x n data matrix
+%   mu: d x k mean
+%   kappa: 1 x k variance
+% Output:
+%   y: k x n probability density in logrithm scale y=log p(x)
 % Written by Mo Chen ([email protected]).
 d = size(X,1);
 c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);

diff --git a/chapter02/logWishart.m b/chapter02/logWishart.m
@@ -1,5 +1,11 @@
-function y = logWishart(Sigma, v, W)
+function y = logWishart(Sigma, W, v)
 % Compute log pdf of a Wishart distribution.
+% Input:
+%   Sigma: d x d covariance matrix
+%   W: d x d covariance parameter
+%   v: degree of freedom
+% Output:
+%   y: probability density in logrithm scale y=log p(Sigma)
 % Written by Mo Chen ([email protected]).
 d = length(Sigma);
 B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);

diff --git a/chapter03/demo.m b/chapter03/demo.m
@@ -1,18 +1,17 @@
-% Done
-% demo for chapter 03
+% demos for ch03
 clear; close all;
 d = 1;
 n = 200;
 [x,t] = linRnd(d,n);
-%%
-% model = linReg(x,t);
-% linPlot(model,x,t);
-%%
-% [model1,llh1] = linRegEm(x,t);
-% plot(llh);
-% linPlot(model,x,t);
-%%
+%% Linear regression
+model = linReg(x,t);
+plotBar(model,x,t);
+%% Empirical Bayesian linear regression via EM
+[model1,llh] = linRegEm(x,t);
+plot(llh);
+plotBar(model,x,t);
+%%  Empirical Bayesian linear regression via Mackay fix point iteration method
 [model,llh] = linRegFp(x,t);
 [y, sigma] = linPred(model,x,t);
 plot(llh);
-linPlot(model,x,t);
+plotBar(model,x,t);
diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m
@@ -1,7 +1,9 @@
 function linPlot(model, X, t)
-% Plot linear function and data
-% X: 1xn data
-% t: 1xn response
+% Plot linear function for 1d data data
+% Input:
+%   model: trained model structure
+%   X: 1 x n data
+%   t: 1 x n response
 % Written by Mo Chen ([email protected]).
 color = [255,228,225]/255; %pink
 % [x,idx] = sort(x);

diff --git a/chapter03/linPred.m b/chapter03/linPred.m
@@ -1,8 +1,13 @@
 function [y, sigma, p] = linPred(model, X, t)
-% Compute linear model reponse y = w'*X+w0 and likelihood
+% Compute linear regression model reponse y = w'*X+w0 and likelihood
+% Input:
 %   model: trained model structure
 %   X: d x n testing data
 %   t (optional): 1 x n testing response
+% Output:
+%   y: 1 x n prediction
+%   sigma: variance
+%   p: 1 x n likelihood of t
 % Written by Mo Chen ([email protected]).
 w = model.w;
 w0 = model.w0;

diff --git a/chapter03/linReg.m b/chapter03/linReg.m
@@ -1,7 +1,11 @@
 function model = linReg(X, t, lambda)
 % Fit linear regression model y=w'x+w0  
+% Input:
 %   X: d x n data
 %   t: 1 x n response
+%   lambda: regularization parameter
+% Output:
+%   model: trained model structure
 % Written by Mo Chen ([email protected]).
 if nargin < 3
     lambda = 0;

diff --git a/chapter03/linRegFp.m b/chapter03/linRegFp.m
@@ -1,8 +1,13 @@
 function [model, llh] = linRegFp(X, t, alpha, beta)
-% Fit empirical Bayesian linear model with Mackay fixed point method
-% (p.168)
+% Fit empirical Bayesian linear model with Mackay fixed point method (p.168)
+% Input:
 %   X: d x n data
 %   t: 1 x n response
+%   alpha: prior parameter
+%   beta: prior parameter
+% Output:
+%   model: trained model structure
+%   llh: loglikelihood
 % Written by Mo Chen ([email protected]).
 if nargin < 3
     alpha = 0.02;

diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m
@@ -1,10 +1,14 @@
 function [X, t] = linRnd(d, n)
 % Generate data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) 
-% where w and w0 are generated from Gauss(0,1),
-%       beta is generated from Gamma(1,1),
-%       X is generated form [0,1]
+% where w and w0 are generated from Gauss(0,1), beta is generated from
+% Gamma(1,1), X is generated form [0,1].
+% Input:
 %   d: dimension of data
 %   n: number of data
+% Output:
+%   X: d x n data matrix
+%   t: 1 x n response variable
+% Written by Mo Chen ([email protected]).
 beta = randg;   % need statistcs toolbox
 X = rand(d,n);
 w = randn(d,1);

diff --git a/chapter04/binPlot.m b/chapter04/binPlot.m
@@ -1,7 +1,10 @@
 function binPlot(model, X, t)
 % Plot binary classification result for 2d data
-%   X: 2xn data matrix
-%   t: 1xn label
+% Input:
+%   model: trained model structure
+%   X: 2 x n data matrix
+%   t: 1 x n label
+% Written by Mo Chen ([email protected]).
 assert(size(X,1) == 2);
 w = model.w;
 xi = min(X,[],2);