-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFeedForward.hs
236 lines (192 loc) · 10.7 KB
/
FeedForward.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
{-# LANGUAGE BangPatterns #-}
module FeedForward ( NeuralNetwork
, WeightMatrix
, ActivationFunction
, LearningRateStrategy (..)
, BackPropStrategy (..)
, sigmoid
, sigmoid'
, getStructure
, getWeights
, getActivationFunction
, mkNeuralNetwork
, test
) where
import Data.List (foldl')
import Data.Vector (Vector(..))
import qualified Data.Vector as V (singleton, scanl', scanr', fromList, toList, zip, last, head, tail, init, zipWith3, cons)
import Debug.Trace (trace)
import Numeric.LinearAlgebra.HMatrix hiding (Vector)
import System.Random (randomRs, newStdGen)
import System.Random.MWC
import Graphics.Rendering.Plot
-- | Activation function for neurons in a layer
type ActivationFunction = Double -> Double
-- | Sigmoid function, has a nice derivative
sigmoid :: Double -> Double
sigmoid x = 1 / (1 + exp (-x))
-- | Sigmoid derivative, expressed nicely in function of the sigmoid
sigmoid' :: Double -> Double
sigmoid' x = sigmoid (x) * (1 - sigmoid x)
-- Matrix of weights between two layers
type WeightMatrix = Matrix Double
-- List of number of units in each layer, first layer is input, last is output
type Structure = [Int]
-- Learning rate strategy for training
data LearningRateStrategy = FixedRate Double -- Learning rate alpha will remain constant
deriving (Show, Eq)
-- | The Gradient Descent Strategy to use with BackPropagation
data BackPropStrategy = BatchGradientDescent -- Accumulate error on all cases before performing a weights update
| MiniBatchGradientDescent Int -- Update weights after N computations
| OnlineGradientDescent -- a.k.a "stochastic", this is mini batch with N=1 : update weights after each training case
deriving (Show, Eq)
-- | Type that defines a training example: a pair of the input vector and the target vector
type TrainingExample = (Matrix Double, Matrix Double) -- Must be column vector
-- | The training data set is the list of all training examples
type TrainingSet = [TrainingExample]
-- | Data type representing a Feed-Forward Neural Network
data NeuralNetwork = NeuralNetwork
{ structure :: [Int] -- ^ [3,4,2,1] for a 3-layered network, with 3 input units, 1 output unit, 4 units in first hidden layer and 2 units in second hidden layer
, weights :: [WeightMatrix] -- ^ Weights matrices to apply from each layer to the following
, activationFunction :: Double -> Double
}
-- | Returns a Neural Network's architecture (since internal structure is not exposed)
getStructure :: NeuralNetwork -> [Int]
getStructure (NeuralNetwork {structure = s}) = s
-- | Returns a Neural Network's weight matrix (since internal structure is not exposed)
getWeights :: NeuralNetwork -> [WeightMatrix]
getWeights (NeuralNetwork {weights = w}) = w
-- | Returns a Neural Network's activation function (since internal structure is not exposed)
getActivationFunction :: NeuralNetwork -> (Double -> Double)
getActivationFunction (NeuralNetwork {activationFunction = h}) = h
{- | Create a Feed-Forward Neural Network, whose weights are randomly initialized following this procedure:
- first, random weights are generated in the range (-1/√i, +1/√i) where i is the number of input neurons
- then use Nguyen Widrow method to readjust the weights distribution
_Note_: Biais neurons are automatically added to each layer, so do not consider them
_Note_: for now, the method is not perfect because it only computes h, i and n for the irst layer, it should be done for all layers to be accurate. **TO FIX**
v-}
mkNeuralNetwork :: ActivationFunction -> Structure -> IO NeuralNetwork
mkNeuralNetwork _ xs | length xs < 2 = error "A Neural Network must have at least one input layer and an ouput layer, so your structure must contain at least 2 numbers"
| any (<= 0) xs = error "You can't have zero or a negative number of units in a layer"
mkNeuralNetwork h xs = do
!initRandomWeights <- genInitWeights (zip xs (tail xs))
return $ NeuralNetwork
{ structure = xs
, weights = initRandomWeights
, activationFunction = h
}
where genInitWeights = mapM $ \(n1,n2) -> do
--let bound = 0.5 * sqrt (6 / (fromIntegral (n1+n2))) :: Double -- bound value taken from Andrew Ng's class
let bound = 1.0 / sqrt (fromIntegral n1)
n1' = n1 + 1
vs <- withSystemRandom . asGenST $ \gen -> uniformVector gen (n1'*n2)
return $ cmap (\w -> (w * 2 * bound) - bound) $ (n1'><n2) $ V.toList vs -- zero-mean the weights
-- Run the Neural Network on the input matrix to get output matrix (automatically add biais neurons with value 1)
runNN :: NeuralNetwork -> Matrix Double -> Matrix Double
runNN nn input =
let ws = weights nn
in foldl' addOnesAndMultiply input ws
where addOnesAndMultiply :: Matrix Double -> Matrix Double -> Matrix Double
addOnesAndMultiply input weights =
let (nbInput, _) = size input
input' = konst 1 (nbInput, 1) ||| input
in cmap h (input' <> weights)
h = getActivationFunction nn
-- Train the Neural Network with Backpropagation algorithm, make N passes on the input
trainNTimes :: (NeuralNetwork, [Double]) -> TrainingSet -> Int -> LearningRateStrategy -> BackPropStrategy -> (NeuralNetwork, [Double])
trainNTimes (nn, c) _ 0 _ _ = (nn, c)
trainNTimes (nn, c) trainingSet nTimes (FixedRate alpha) backpropStrat =
let newNN = trainOnce nn trainingSet alpha backpropStrat
input = (4><2) (concat [[0,0],[0,1],[1,0],[1,1]]) :: Matrix Double
target = (4><1) [0,1,1,0] :: Matrix Double
output = runNN nn input
newC = 0.5 * (sumElements $ cmap (^2) (output - target))
-- Should shuffle the training set after each pass, to avoid cycling
in trainNTimes (newNN, newC:c) trainingSet (nTimes - 1) (FixedRate alpha) backpropStrat
trainOnce :: NeuralNetwork -> TrainingSet -> Double -> BackPropStrategy -> NeuralNetwork
trainOnce nn trainingSet alpha BatchGradientDescent =
let !zeroDeltas = initEmptyDeltas (getStructure nn)
!accDeltas = foldl' (updateNetwork nn) zeroDeltas trainingSet
-- !partialDerivatives = map (/ m) accDeltas :: [Matrix Double]
!rescaledDeltas = map (/ m) accDeltas :: [Matrix Double]
currWeights = getWeights nn
!partialDerivatives = zipWith (regularize 0) rescaledDeltas currWeights
updatedWeights = zipWith updateWeights currWeights partialDerivatives :: [WeightMatrix]
in nn {weights = updatedWeights}
where initEmptyDeltas :: Structure -> [Matrix Double]
initEmptyDeltas [_] = []
initEmptyDeltas (l1:l2:xs) =
let l1' = l1 + 1
in (l1'><l2) (repeat 0) : initEmptyDeltas (l2:xs)
m = fromIntegral . length $ trainingSet
updateWeights :: WeightMatrix -> Matrix Double -> WeightMatrix
updateWeights w deriv = w - scale alpha deriv
regularize :: Double -> Matrix Double -> WeightMatrix -> Matrix Double
regularize lambda rDelta weight = let rDelta' = dropRows 1 rDelta
weight' = dropRows 1 weight
regu = rDelta' + (scale lambda weight')
in takeRows 1 rDelta === regu
updateNetwork :: NeuralNetwork -> [Matrix Double] -> TrainingExample -> [Matrix Double]
updateNetwork nn deltas (input, target) =
let vectorWeights = V.fromList (getWeights nn)
{- First, compute all the unit's logits
Because of forwardPass's first action is to apply activation function,
we can't simply call 'scanl forwardPass input vectorWeights', so we manually
compute the first pass.
But the input won't appear in the return vector from scanl so we prepend it.
-}
input' = konst 1 (1,1) === input
w1 = (V.head vectorWeights)
zs = input `V.cons` V.scanl' forwardPass (tr w1 <> input') (V.tail vectorWeights)
-- Second, compute error (delta) vectors for each layer
ds = V.scanr' backprop (cmap h (V.last zs) - target) $ V.zip (V.tail . V.init $ zs) (V.tail vectorWeights)
-- Third, compute the Deltas
in zipWith3 accumDeltas deltas (V.toList zs) (V.toList ds)
where forwardPass :: Matrix Double -> WeightMatrix -> Matrix Double
forwardPass lastZ w = let lastA = cmap h lastZ -- element-wise sigmoid
lastA' = konst 1 (1,1) === lastA -- add biais
in tr w <> lastA' -- compute next logit
backprop :: (Matrix Double, WeightMatrix) -> Matrix Double -> Matrix Double
backprop (z, w) d = let prod = (dropRows 1 w) <> d
deriv = cmap sigmoid' z
in prod * deriv -- element-wise product here
accumDeltas :: Matrix Double -> Matrix Double -> Matrix Double -> Matrix Double
accumDeltas delta z d = let a = konst 1 (1,1) === cmap h z
in delta + (a <> tr d)
h = getActivationFunction nn
-- usage: test "plot.png"
test :: FilePath -> IO ()
test fp = do
nn <- mkNeuralNetwork sigmoid [2,2,1]
let raw = [[0,0],[0,1],[1,0],[1,1]] :: [[Double]]
rawSet = map (2><1) raw :: [Matrix Double]
m = length raw
n = length (head raw)
input = (m><n) (concat raw) :: Matrix Double
nbPasses = 1000
alpha = 3.5
putStrLn $ "Initial Weights:\n"
mapM_ (putStrLn . show) (getWeights nn)
putStrLn "Initial run:\n"
putStrLn $ show $ runNN nn input
let target = map (1><1) [[0],[1],[1],[0]] :: [Matrix Double]
trainingSet = zip rawSet target
output = runNN nn input
let (newNN, costs) = trainNTimes (nn, []) trainingSet nbPasses (FixedRate alpha) BatchGradientDescent
--putStrLn "Final Weights:\n"
--mapM_ (putStrLn . show) (getWeights newNN)
putStrLn "Final run:\n"
putStrLn $ show $ runNN newNN input
-- putStrLn "\nCosts:\n"
-- putStrLn $ show $ head $ costs
-- putStrLn $ show $ reverse costs
-- Displaying the cost in the file
let ts = vector [1.. fromIntegral nbPasses]
let graph = do
plot (ts, [linepoint (vector (reverse costs)) (1.0::LineWidth, blue) (Cross, blue)])
title "Cost Function"
subtitle $ "alpha = " ++ show alpha ++ "\n" ++ show nbPasses ++ " passes"
xlabel "# Pass"
ylabel "Cost"
grid True
writeFigure PNG fp (1024, 768) graph