Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle #226 #229

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions lxmls/deep_learning/numpy_models/log_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,36 @@ def __init__(self, **config):
self.bias = np.zeros((1, config['num_classes']))
self.learning_rate = config['learning_rate']

def log_forward(self, input=None):
def log_forward(self, X):
"""Forward pass of the computation graph"""

# Linear transformation
z = np.dot(input, self.weight.T) + self.bias
z = np.dot(X, self.weight.T) + self.bias

# Softmax implemented in log domain
log_tilde_z = z - logsumexp(z, axis=1, keepdims=True)

return log_tilde_z

def predict(self, input=None):
def predict(self, X):
"""Most probable class index"""
return np.argmax(np.exp(self.log_forward(input)), axis=1)
return np.argmax(self.log_forward(X), axis=1)

def update(self, input=None, output=None):
def update(self, X, y):
"""Stochastic Gradient Descent update"""

# Probabilities of each class
class_probabilities = np.exp(self.log_forward(input))
class_probabilities = np.exp(self.log_forward(X))
batch_size, num_classes = class_probabilities.shape

# Error derivative at softmax layer
I = index2onehot(output, num_classes)
I = index2onehot(y, num_classes)
error = - (I - class_probabilities) / batch_size

# Weight gradient
gradient_weight = np.zeros(self.weight.shape)
for l in np.arange(batch_size):
gradient_weight += np.outer(error[l, :], input[l, :])
gradient_weight += np.outer(error[l, :], X[l, :])

# Bias gradient
gradient_bias = np.sum(error, axis=0, keepdims=True)
Expand Down
32 changes: 16 additions & 16 deletions lxmls/deep_learning/numpy_models/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ def __init__(self, **config):
# self.parameters
MLP.__init__(self, **config)

def predict(self, input=None):
def predict(self, X):
"""
Predict model outputs given input
"""
log_class_probabilities, _ = self.log_forward(input)
return np.argmax(np.exp(log_class_probabilities), axis=1)
log_class_probabilities, _ = self.log_forward(X)
return np.argmax(log_class_probabilities, axis=1)

def update(self, input=None, output=None):
def update(self, X, y):
"""
Update model parameters given batch of data
"""

gradients = self.backpropagation(input, output)
gradients = self.backpropagation(X, y)

learning_rate = self.config['learning_rate']
num_parameters = len(self.parameters)
Expand All @@ -39,11 +39,11 @@ def update(self, input=None, output=None):
# Update bias
self.parameters[m][1] -= learning_rate * gradients[m][1]

def log_forward(self, input):
def log_forward(self, X):
"""Forward pass for sigmoid hidden layers and output softmax"""

# Input
tilde_z = input
tilde_z = X
layer_inputs = []

# Hidden layers
Expand Down Expand Up @@ -72,17 +72,17 @@ def log_forward(self, input):

return log_tilde_z, layer_inputs

def cross_entropy_loss(self, input, output):
def cross_entropy_loss(self, X, y):
"""Cross entropy loss"""
num_examples = input.shape[0]
log_probability, _ = self.log_forward(input)
return -log_probability[range(num_examples), output].mean()
num_examples = X.shape[0]
log_probability, _ = self.log_forward(X)
return -log_probability[range(num_examples), y].mean()

def backpropagation(self, input, output):
def backpropagation(self, X, y):
"""Gradients for sigmoid hidden layers and output softmax"""

# Run forward and store activations for each layer
log_prob_y, layer_inputs = self.log_forward(input)
log_prob_y, layer_inputs = self.log_forward(X)
prob_y = np.exp(log_prob_y)

num_examples, num_clases = prob_y.shape
Expand All @@ -97,18 +97,18 @@ def backpropagation(self, input, output):

# Initial error is the cost derivative at the last layer (for cross
# entropy cost)
I = index2onehot(output, num_clases)
I = index2onehot(y, num_clases)
error = - (I - prob_y) / num_examples
errors.append(error)

# Backpropagate through each layer
for n in reversed(range(num_hidden_layers)):

# Backpropagate through linear layer
error = np.dot(error, self.parameters[n+1][0])
error = np.dot(error, self.parameters[n + 1][0])

# Backpropagate through sigmoid layer
error *= layer_inputs[n+1] * (1-layer_inputs[n+1])
error *= layer_inputs[n + 1] * (1 - layer_inputs[n + 1])

# Collect error
errors.append(error)
Expand Down
40 changes: 20 additions & 20 deletions lxmls/deep_learning/numpy_models/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,34 @@ def __init__(self, **config):
# self.parameters
RNN.__init__(self, **config)

def predict(self, input=None):
def predict(self, X):
"""
Predict model outputs given input
"""
p_y = np.exp(self.log_forward(input)[0])
return np.argmax(p_y, axis=1)
log_p_y = self.log_forward(X)[0]
return np.argmax(log_p_y, axis=1)

def update(self, input=None, output=None):
def update(self, X, y):
"""
Update model parameters given batch of data
"""
gradients = self.backpropagation(input, output)
gradients = self.backpropagation(X, y)
learning_rate = self.config['learning_rate']
# Update each parameter with SGD rule
num_parameters = len(self.parameters)
for m in range(num_parameters):
# Update weight
self.parameters[m] -= learning_rate * gradients[m]

def log_forward(self, input):
def log_forward(self, X):

# Get parameters and sizes
W_e, W_x, W_h, W_y = self.parameters
hidden_size = W_h.shape[0]
nr_steps = input.shape[0]
nr_steps = X.shape[0]

# Embedding layer
z_e = W_e[input, :]
z_e = W_e[X, :]

# Recurrent layer
h = np.zeros((nr_steps + 1, hidden_size))
Expand All @@ -48,33 +48,33 @@ def log_forward(self, input):
z_t = W_x.dot(z_e[t, :]) + W_h.dot(h[t, :])

# Non-linear
h[t+1, :] = 1.0 / (1 + np.exp(-z_t))
h[t + 1, :] = 1.0 / (1 + np.exp(-z_t))

# Output layer
y = h[1:, :].dot(W_y.T)

# Softmax
log_p_y = y - logsumexp(y, axis=1, keepdims=True)

return log_p_y, y, h, z_e, input
return log_p_y, y, h, z_e, X # why does this return its own input?

def backpropagation(self, input, output):
def backpropagation(self, X, y):

'''
Compute gradientes, with the back-propagation method
inputs:
x: vector with the (embedding) indicies of the words of a
X: matrix with the (embedding) indicies of the words of a
sentence
outputs: vector with the indicies of the tags for each word of
y: vector with the indicies of the tags for each word of
the sentence outputs:
gradient_parameters: vector with parameters gradientes
'''

# Get parameters and sizes
W_e, W_x, W_h, W_y = self.parameters
nr_steps = input.shape[0]
nr_steps = X.shape[0]

log_p_y, y, h, z_e, x = self.log_forward(input)
log_p_y, y, h, z_e, x = self.log_forward(X)
p_y = np.exp(log_p_y)

# Initialize gradients with zero entrances
Expand All @@ -87,7 +87,7 @@ def backpropagation(self, input, output):
# Solution to Exercise 6.1

# Gradient of the cost with respect to the last linear model
I = index2onehot(output, W_y.shape[0])
I = index2onehot(y, W_y.shape[0])
error = - (I - p_y) / nr_steps

# backward pass, with gradient computation
Expand Down Expand Up @@ -119,8 +119,8 @@ def backpropagation(self, input, output):

return gradient_parameters

def cross_entropy_loss(self, input, output):
def cross_entropy_loss(self, X, y):
"""Cross entropy loss"""
nr_steps = input.shape[0]
log_probability = self.log_forward(input)[0]
return -log_probability[range(nr_steps), output].mean()
nr_steps = X.shape[0]
log_probability = self.log_forward(X)[0]
return -log_probability[range(nr_steps), y].mean()
16 changes: 8 additions & 8 deletions lxmls/deep_learning/pytorch_models/log_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,34 @@ def __init__(self, **config):
self.log_softmax = torch.nn.LogSoftmax(dim=1)
self.loss_function = torch.nn.NLLLoss()

def _log_forward(self, input=None):
def _log_forward(self, X):
"""Forward pass of the computation graph in logarithm domain (pytorch)"""

# IMPORTANT: Cast to pytorch format
input = torch.from_numpy(input).float()
X = torch.from_numpy(X).float()

# Linear transformation
z = torch.matmul(input, torch.t(self.weight)) + self.bias
z = torch.matmul(X, torch.t(self.weight)) + self.bias

# Softmax implemented in log domain
log_tilde_z = self.log_softmax(z)

# NOTE that this is a pytorch class!
return log_tilde_z

def predict(self, input=None):
def predict(self, X):
"""Most probable class index"""
log_forward = self._log_forward(input).data.numpy()
log_forward = self._log_forward(X).data.numpy()
return np.argmax(log_forward, axis=1)

def update(self, input=None, output=None):
def update(self, X, y):
"""Stochastic Gradient Descent update"""

# IMPORTANT: Class indices need to be casted to LONG
true_class = torch.from_numpy(output).long()
true_class = torch.from_numpy(y).long()

# Compute negative log-likelihood loss
loss = self.loss_function(self._log_forward(input), true_class)
loss = self.loss_function(self._log_forward(X), true_class)

# Use autograd to compute the backward pass.
loss.backward()
Expand Down
21 changes: 10 additions & 11 deletions lxmls/deep_learning/pytorch_models/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,15 @@ def __init__(self, **config):
self.loss_function = torch.nn.NLLLoss()

# TODO: Move these outside fo the class as in the numpy case
def _log_forward(self, input):
def _log_forward(self, X):
"""
Forward pass
"""

# Ensure the type matches torch type
input = cast_float(input)
X = cast_float(X)

# Input
tilde_z = input
tilde_z = X

# ----------
# Solution to Exercise 6.4
Expand Down Expand Up @@ -71,15 +70,15 @@ def _log_forward(self, input):

return log_tilde_z

def gradients(self, input, output):
def gradients(self, X, y):
"""
Computes the gradients of the network with respect to cross entropy
error cost
"""
true_class = torch.from_numpy(output).long()
true_class = torch.from_numpy(y).long()

# Compute negative log-likelihood loss
_log_forward = self._log_forward(input)
_log_forward = self._log_forward(X)
loss = self.loss_function(_log_forward, true_class)
# Use autograd to compute the backward pass.
loss.backward()
Expand All @@ -90,18 +89,18 @@ def gradients(self, input, output):
nabla_parameters.append([weight.grad.data, bias.grad.data])
return nabla_parameters

def predict(self, input=None):
def predict(self, X):
"""
Predict model outputs given input
"""
log_forward = self._log_forward(input).data.numpy()
log_forward = self._log_forward(X).data.numpy()
return np.argmax(log_forward, axis=1)

def update(self, input=None, output=None):
def update(self, X, y):
"""
Update model parameters given batch of data
"""
gradients = self.gradients(input, output)
gradients = self.gradients(X, y)
learning_rate = self.config['learning_rate']
# Update each parameter with SGD rule
for m in range(self.num_layers):
Expand Down
Loading