Merge pull request #5 from Az-r-ow/callbacks

Callbacks: Base Class, EarlyStopping and CSVLogger
Az-r-ow · Mar 6, 2024 · 1baf5a5 · 1baf5a5
2 parents 21ad506 + c34ae2e
commit 1baf5a5
Show file tree

Hide file tree

Showing 22 changed files with 765 additions and 176 deletions.
diff --git a/TODO.md b/TODO.md
@@ -1,19 +1,22 @@
 ## TODOS :
 
+- [ ] Setup `clang-format`
+- [ ] CI versioning
 - [ ] Find out why the predictions are not accurate on my_samples
 - [ ] Implement batch norm
-- [ ] Multithreading operations
+- [ ] Parallelize operations
 - [ ] Read : https://arxiv.org/pdf/1412.6980.pdf
 - [ ] Implement a dropout to avoid over-fitting the model
 
 ## IN PROGRESS :
 
-- [ ] Implement early stopping
+- [ ] Python tests
 - [ ] Optimize `Catch2`'s build
 - [ ] Add gradient clipping
 
 ## DONE :
 
+- [x] Implement early stopping
 - [x] Update README to include more information about the project
 - [x] Add CI / CD
 - [x] Document the example in python

diff --git a/docs/NeuralNetPy.rst b/docs/NeuralNetPy.rst
@@ -1,7 +1,15 @@
 NeuralNetPy
 ================
 
+.. toctree::
+    :maxdepth: 2
+
+    submodules/callbacks
+    submodules/optimizers
+    submodules/layers
+    submodules/models
+
 .. automodule:: NeuralNetPy
     :members:
     :undoc-members:
-    :show-inheritance:
+    :show-inheritance:
diff --git a/docs/submodules/callbacks.rst b/docs/submodules/callbacks.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.callbacks
+    :members:
+    :show-inheritance:
diff --git a/docs/submodules/layers.rst b/docs/submodules/layers.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.layers
+    :members:
+    :show-inheritance:
diff --git a/docs/submodules/models.rst b/docs/submodules/models.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.models
+    :members:
+    :show-inheritance:
diff --git a/docs/submodules/optimizers.rst b/docs/submodules/optimizers.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.optimizers
+    :members:
+    :show-inheritance:
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -18,7 +18,7 @@ Then you can import the module just like any other :
 
 .. code-block:: python
 
-  import NeuralNetPy as nnp
+  import NeuralNetPy as NNP
 
 .. Attention::
   The path to the build folder must be set before importing the module.
diff --git a/examples/train-predict-MNIST/guess_my_samples.py b/examples/train-predict-MNIST/guess_my_samples.py
@@ -12,10 +12,10 @@
 
 MY_SAMPLES_FOLDER = "./dataset/my_samples"
 
-network = NNP.Network()
+network = NNP.models.Network()
 
 # Loading the model from the file into the network created
-NNP.Model.load_from_file("model.bin", network)
+NNP.models.Model.load_from_file("model.bin", network)
 
 inputs = list()
 

diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
@@ -28,14 +28,14 @@
 # Otherwise load data from file
 (x_train, y_train), (x_test, y_test) = load_data(MNIST_DATASET_FILE)
 
-network = NNP.Network()
+network = NNP.models.Network()
 
-network.addLayer(NNP.Flatten((28, 28)))
-network.addLayer(NNP.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-network.addLayer(NNP.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+network.addLayer(NNP.layers.Flatten((28, 28)))
+network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
 
 # Setting up the networks parameters
-network.setup(optimizer=NNP.Adam(0.02), loss=NNP.LOSS.MCE)
+network.setup(optimizer=NNP.optimizers.Adam(0.02), loss=NNP.LOSS.MCE)
 
 # # combining the data with the labels for later shuffling 
 # combined = list(zip(x_train, y_train))
@@ -53,7 +53,9 @@
 
 trainingData.batch(128)
 
-network.train(trainingData, 3)
+callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.1, 1), NNP.callbacks.CSVLogger("training.csv")]
+
+network.train(trainingData, 3, callbacks)
 
 f_x_test = [normalize_img(x) for x in x_test]
 
@@ -68,11 +70,11 @@
 print(f"Num correct predictions : {correct}/{n} - accuracy {accuracy}")
 
 # Saving the trained model in a bin file
-NNP.Model.save_to_file('./model.bin', network)
+NNP.models.Model.save_to_file('./model.bin', network)
 
-saved_model = NNP.Network()
+saved_model = NNP.models.Network()
 
-NNP.Model.load_from_file('./model.bin', saved_model)
+NNP.models.Model.load_from_file('./model.bin', saved_model)
 
 # preparing the testing data
 predictions = saved_model.predict(f_x_test[:NUM_PREDICTIONS])

diff --git a/examples/train-predict-MNIST/test.py b/examples/train-predict-MNIST/test.py
@@ -17,14 +17,14 @@
 
 import NeuralNetPy as NNP
 
-network = NNP.Network()
+network = NNP.models.Network()
 
-network.addLayer(NNP.Dense(15, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
-network.addLayer(NNP.Dense(20, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
-network.addLayer(NNP.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+network.addLayer(NNP.layers.Dense(15, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
+network.addLayer(NNP.layers.Dense(20, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
+network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
 
 # Setting up the networks parameters
-network.setup(optimizer=NNP.SGD(1), epochs=1, loss=NNP.LOSS.MCE)
+network.setup(optimizer=NNP.optimizers.SGD(1), epochs=1, loss=NNP.LOSS.MCE)
 
 inputs = list()
 

diff --git a/examples/train-predict-MNIST/training.csv b/examples/train-predict-MNIST/training.csv
@@ -0,0 +1,4 @@
+LOSS,ACCURACY,EPOCH,
+0.628645,1.000000,0.000000,
+0.217920,1.000000,1.000000,
+0.130259,1.000000,2.000000,
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
@@ -61,108 +61,160 @@ std::shared_ptr<Layer> Network::getOutputLayer() const
   return this->layers[this->layers.size() - 1];
 }
 
-double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs)
+double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  return onlineTraining(inputs, labels, epochs);
+  try
+  {
+    return onlineTraining(inputs, labels, epochs, callbacks);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
+  }
 }
 
-double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs)
+double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  return onlineTraining(inputs, labels, epochs);
+  try
+  {
+    return onlineTraining(inputs, labels, epochs, callbacks);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
+  }
 }
 
 // Specific implementation of train that takes TrainingData class as input
-double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs)
+double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  return this->trainer(trainingData, epochs);
+  try
+  {
+    return this->trainer(trainingData, epochs, callbacks);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
+  }
 }
 
-double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs)
+double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  return this->trainer(trainingData, epochs);
+  try
+  {
+    return this->trainer(trainingData, epochs, callbacks);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
+  }
 }
 
 template <typename D1, typename D2>
-double Network::trainer(TrainingData<D1, D2> trainingData, int epochs)
+double Network::trainer(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   if (trainingData.batched)
-    return this->miniBatchTraining(trainingData, epochs);
-  return this->batchTraining(trainingData, epochs);
+    return this->miniBatchTraining(trainingData, epochs, callbacks);
+  return this->batchTraining(trainingData, epochs, callbacks);
 }
 
 template <typename D1, typename D2>
-double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs)
+double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
   double sumLoss = 0;
+  trainingCheckpoint("onTrainBegin", callbacks);
 
-  for (int e = 0; e < epochs; e++)
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
-    TrainingGauge g(trainingData.inputs.size(), 0, epochs, (e + 1));
+    trainingCheckpoint("onEpochBegin", callbacks);
+    TrainingGauge g(trainingData.inputs.size(), 0, epochs, (cEpoch + 1));
     for (int b = 0; b < trainingData.inputs.size(); b++)
     {
+      trainingCheckpoint("onBatchBegin", callbacks);
       const int numOutputs = this->getOutputLayer()->getNumNeurons();
       const int inputsSize = trainingData.inputs.batches[b].size();
       Eigen::MatrixXd y = formatLabels(trainingData.labels.batches[b], {inputsSize, numOutputs});
 
       // computing outputs from forward propagation
       Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.batches[b]);
       loss = this->cmpLoss(o, y) / inputsSize;
+      accuracy = computeAccuracy(o, y);
       sumLoss += loss;
       this->backProp(o, y);
-      g.printWithLAndA(loss, computeAccuracy(o, y));
+      g.printWithLAndA(loss, accuracy);
+      trainingCheckpoint("onBatchEnd", callbacks);
     }
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / trainingData.inputs.size();
 }
 
 template <typename D1, typename D2>
-double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs)
+double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
   double sumLoss = 0;
   const int numOutputs = this->getOutputLayer()->getNumNeurons();
   const int numInputs = trainingData.inputs.data.size();
   Eigen::MatrixXd y = formatLabels(trainingData.labels.data, {numInputs, numOutputs});
+  trainingCheckpoint("onTrainBegin", callbacks);
 
-  for (int e = 0; e < epochs; e++)
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
-    TrainingGauge g(1, 0, epochs, (e + 1));
+    trainingCheckpoint("onEpochBegin", callbacks);
+    TrainingGauge g(1, 0, epochs, (cEpoch + 1));
     Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
 
     loss = this->cmpLoss(o, y);
+    accuracy = computeAccuracy(o, y);
     sumLoss += loss;
 
     this->backProp(o, y);
-    g.printWithLoss(loss);
+    g.printWithLAndA(loss, accuracy);
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / numInputs;
 }
 
 template <typename D1, typename D2>
-double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs)
+double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
-  double sumLoss;
+  double sumLoss = 0;
+  int tCorrect = 0;
   const int numOutputs = this->getOutputLayer()->getNumNeurons();
   const int numInputs = inputs.size();
   Eigen::MatrixXd y = formatLabels(labels, {numInputs, numOutputs});
 
-  for (int e = 0; e < epochs; e++)
+  // Injecting callbacks
+  trainingCheckpoint("onTrainBegin", callbacks);
+
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
-    TrainingGauge tg(inputs.size(), 0, epochs, (e + 1));
+    trainingCheckpoint("onEpochBegin", callbacks);
+    TrainingGauge tg(inputs.size(), 0, epochs, (cEpoch + 1));
     for (auto &input : inputs)
     {
       Eigen::MatrixXd o = this->forwardProp(inputs);
       loss = this->cmpLoss(o, y);
       sumLoss += loss;
+      tCorrect += computeAccuracy(o, y);
       this->backProp(o, y);
       tg.printWithLoss(loss);
     }
+    // Computing metrics for the logs
+    accuracy = tCorrect / numInputs;
+    loss = sumLoss / numInputs;
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / numInputs;
 }
 
@@ -259,6 +311,30 @@ void Network::updateOptimizerSetup(size_t numLayers)
   this->optimizer->insiderInit(numLayers);
 }
 
+void Network::trainingCheckpoint(std::string checkpointName, std::vector<std::shared_ptr<Callback>> callbacks)
+{
+  if (callbacks.size() == 0)
+    return;
+
+  std::unordered_map<std::string, double> logs = getLogs();
+
+  for (std::shared_ptr<Callback> callback : callbacks)
+  {
+    Callback::callMethod(callback, checkpointName, logs);
+  }
+}
+
+std::unordered_map<std::string, double> Network::getLogs()
+{
+  std::unordered_map<std::string, double> logs;
+
+  logs["LOSS"] = loss;
+  logs["ACCURACY"] = accuracy;
+  logs["EPOCH"] = cEpoch;
+
+  return logs;
+}
+
 /**
  * @note This function will return the accuracy of the given outputs compared to the labels.
  *