-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
74 lines (50 loc) · 2.19 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import numpy as np
#----------------------------------------------------------------------
# taken from the Lasagne mnist example and modified
def iterate_minibatches(targets, batchsize, shuffle = False, selectedIndices = None):
# generates list of indices and target values
if not selectedIndices is None:
# a subset of indices was specified
# make a copy
indices = list(selectedIndices)
else:
indices = np.arange(len(targets))
if shuffle:
np.random.shuffle(indices)
for start_idx in range(0, len(indices) - batchsize + 1, batchsize):
excerpt = indices[start_idx:start_idx + batchsize]
yield excerpt, targets[excerpt]
import time
#----------------------------------------------------------------------
# my own gradient update update supporting learning rate decay
# like we use in Torch)
# torch learning rate decay is implemented here:
# https://github.com/torch/optim/blob/b812d2a381162bed9f0df26cab8abb4015f47471/sgd.lua#L27
#
# see other Lasagne code e.g. here: https://github.com/Lasagne/Lasagne/blob/996bf64c0aec6d481044495800b461cc62040041/lasagne/updates.py#L584
def sgdWithLearningRateDecay(loss_or_grads, params, learningRate, learningRateDecay):
from lasagne.updates import get_or_compute_grads
import theano.tensor as T
import theano
from collections import OrderedDict
from lasagne import utils
grads = get_or_compute_grads(loss_or_grads, params)
updates = OrderedDict()
t_prev = theano.shared(utils.floatX(0.))
one = T.constant(1)
t = t_prev + 1
clr = learningRate / (1 + t * learningRateDecay)
# http://leon.bottou.org/publications/pdf/tricks-2012.pdf
# for example suggests (section 5.2)
# "use learning rates of the form
# gamma_t = gamma_0 / (1 + gamma_0 * lambda * t)
# determine the best gamma_0 using a small training
# data sample"
# (lambda / 2 is the coefficient of the weights norm
# of L2 regularization)
for param, grad in zip(params, grads):
updates[param] = param - clr * grad
updates[t_prev] = t
return updates
#----------------------------------------------------------------------