-
-
Notifications
You must be signed in to change notification settings - Fork 95
/
ex07_save_load_model.nim
95 lines (75 loc) · 3.6 KB
/
ex07_save_load_model.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import ../src/arraymancer
import std / [strformat, os]
#[
A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.
*** this example is modified from the simple2layers example to show how to
save/load models. We can do this by defining our own model class
and forward procedure, then defining procedures to save/load the model. ***
]#
# ##################################################################
# Environment variables
# INPUTD is input dimension;
# HIDDEND is hidden dimension; OUTPUTD is output dimension.
let (BATCHSIZE, INPUTD, HIDDEND, OUTPUTD) = (32, 1000, 100, 10)
# Create the autograd context that will hold the computational graph
let ctx = newContext Tensor[float32]
# Create random Tensors to hold inputs and outputs, and wrap them in Variables.
let
x = ctx.variable(randomTensor[float32](BATCHSIZE, INPUTD, 1'f32))
y = randomTensor[float32](BATCHSIZE, OUTPUTD, 1'f32)
# ##################################################################
# Define the model.
type
LinearLayer = object
weight: Variable[Tensor[float32]]
bias: Variable[Tensor[float32]]
ExampleNetwork = object
hidden: LinearLayer
output: LinearLayer
template weightInit(shape: varargs[int], init_kind: untyped): Variable =
ctx.variable(
init_kind(shape, float32),
requires_grad = true)
proc newExampleNetwork(ctx: Context[Tensor[float32]]): ExampleNetwork =
result.hidden.weight = weightInit(HIDDEND, INPUTD, kaiming_normal)
result.hidden.bias = ctx.variable(zeros[float32](1, HIDDEND), requires_grad = true)
result.output.weight = weightInit(OUTPUTD, HIDDEND, yann_normal)
result.output.bias = ctx.variable(zeros[float32](1, OUTPUTD), requires_grad = true)
proc forward(network: ExampleNetwork, x: Variable): Variable =
result = x.linear(
network.hidden.weight, network.hidden.bias).relu.linear(
network.output.weight, network.output.bias)
proc save(network: ExampleNetwork) =
# this is a quick prototype, but you get the idea.
# perhaps a better way to do this would be to save all weights/biases of
# the model into a single file.
network.hidden.weight.value.write_npy("hiddenweight.npy")
network.hidden.bias.value.write_npy("hiddenbias.npy")
network.output.weight.value.write_npy("outputweight.npy")
network.output.bias.value.write_npy("outputbias.npy")
proc load(ctx: Context[Tensor[float32]]): ExampleNetwork =
result.hidden.weight = ctx.variable(read_npy[float32]("hiddenweight.npy"), requires_grad = true)
result.hidden.bias = ctx.variable(read_npy[float32]("hiddenbias.npy"), requires_grad = true)
result.output.weight = ctx.variable(read_npy[float32]("outputweight.npy"), requires_grad = true)
result.output.bias = ctx.variable(read_npy[float32]("outputbias.npy"), requires_grad = true)
var
model = if fileExists("hiddenweight.npy"): ctx.load() else: ctx.newExampleNetwork()
# quick prototype for model variable assignment.
# if a numpy file exists in the currentDir you will load the model, otherwise create a new model
optim = model.optimizer(SGD, learning_rate = 1e-4'f32)
# ##################################################################
# Training
for t in 0 ..< 250:
let
y_pred = model.forward(x)
loss = y_pred.mse_loss(y)
echo &"Epoch {t}: loss {loss.value[0]}"
loss.backprop()
optim.update()
# save model
model.save()
# simple sanity check for loading model (validates that the model is saved correctly)
var hidden_weights = model.hidden.weight.value
let newModel = ctx.load()
doAssert newModel.hidden.weight.value == hidden_weights, "loaded model weights do not match with original model"