Merge pull request #34 from Az-r-ow/callbacks

ModelCheckpoint Callback
Az-r-ow · May 14, 2024 · de66034 · de66034
2 parents 509606b + ddd02ab
commit de66034
Show file tree

Hide file tree

Showing 17 changed files with 410 additions and 232 deletions.
diff --git a/docs/quick-start.rst b/docs/quick-start.rst
@@ -42,46 +42,50 @@ If you look at the `main.py` file, you'll notice :
       get_MNIST_dataset(MNIST_DATASET_FILE)
 
 - Initiating and composing a Neural Network 
-  .. code-block:: python 
 
-    network = NNP.models.Network()
+.. code-block:: python 
 
-    network.addLayer(NNP.layers.Flatten((28, 28)))
-    network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-    network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+  network = NNP.models.Network()
 
-    # Setting up the networks parameters
-    network.setup(optimizer=NNP.optimizers.Adam(0.01), loss=NNP.LOSS.MCE)
+  network.addLayer(NNP.layers.Flatten((28, 28)))
+  network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+  network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+
+  # Setting up the networks parameters
+  network.setup(optimizer=NNP.optimizers.Adam(0.01), loss=NNP.LOSS.MCE)
 
 - Formatting and normalizing the data and then storing it in a `TrainingData3dI` object. Which simplifies batching. 
-  .. code-block:: python
+
+.. code-block:: python
 
-    # preparing the training data
-    f_x_train = [normalize_img(x) for x in x_train]
+  # preparing the training data
+  f_x_train = [normalize_img(x) for x in x_train]
 
-    trainingData = NNP.TrainingData3dI(f_x_train[:NUM_TRAININGS], y_train[:NUM_TRAININGS])
+  trainingData = NNP.TrainingData3dI(f_x_train[:NUM_TRAININGS], y_train[:NUM_TRAININGS])
 
-    trainingData.batch(128) # Creating batches of 128 inputs
+  trainingData.batch(128) # Creating batches of 128 inputs
 
 - Training the data
 - Computing the accuracy 
 - Serializing the trained model and saving it in a binary format
-  .. code-block:: python
 
-    # Saving the trained model in a bin file
-    NNP.models.Model.save_to_file('./model.bin', network)
+.. code-block:: python
+
+  # Saving the trained model in a bin file
+  NNP.models.Model.save_to_file('./model.bin', network)
 
-    saved_model = NNP.models.Network()
+  saved_model = NNP.models.Network()
 
 - Loading the model into a new instance of `Network` from the file again (simply for showcase)
-  .. code-block:: python
+
+.. code-block:: python
 
-    # Saving the trained model in a bin file
-    NNP.models.Model.save_to_file('./model.bin', network)
+  # Saving the trained model in a bin file
+  NNP.models.Model.save_to_file('./model.bin', network)
 
-    saved_model = NNP.models.Network()
+  saved_model = NNP.models.Network()
 
-    NNP.models.Model.load_from_file('./model.bin', saved_model)
+  NNP.models.Model.load_from_file('./model.bin', saved_model)
 
 - Testing the model with the `test_data`
 

diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
@@ -54,7 +54,7 @@
 
 trainingData.batch(128)
 
-callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01), NNP.callbacks.CSVLogger("training.csv")]
+callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01), NNP.callbacks.CSVLogger("training.csv"), NNP.callbacks.ModelCheckpoint("checkpoints", False, 2,verbose=True)]
 
 network.train(trainingData, 10, callbacks)
 

diff --git a/main.cpp b/main.cpp
@@ -1,94 +1,23 @@
 #include "main.hpp"
 
-using namespace NeuralNet;
-
-int main(int argc, char *argv[])
-{
-
-  Eigen::MatrixXd m(2, 2);
-
-  m << -4, 0,
-      -1, -4;
-
-  Eigen::MatrixXd m2 = m.cwiseAbs();
-
-  std::cout << m2 << "\n";
-  // Network network;
-  // std::shared_ptr<Optimizer> AdamOptimizer = std::make_shared<Adam>(1);
-
-  // std::shared_ptr<Layer> layer1 = std::make_shared<Dense>(3, ACTIVATION::SIGMOID, WEIGHT_INIT::GLOROT);
-  // std::shared_ptr<Layer> layer2 = std::make_shared<Dense>(2, ACTIVATION::SIGMOID, WEIGHT_INIT::HE);
-  // std::shared_ptr<Layer> layerOuput = std::make_shared<Dense>(2, ACTIVATION::SIGMOID, WEIGHT_INIT::GLOROT);
-
-  // network.addLayer(layer1);
-  // network.addLayer(layer2);
-  // network.addLayer(layerOuput);
-
-  // std::shared_ptr<Layer> l = network.getLayer(1);
-  // std::cout << "fetched layer from network : " << l->getNumNeurons() << "\n";
-  // network.setup(AdamOptimizer);
-
-  // network.setup(AdamOptimizer, LOSS::QUADRATIC);
-
-  // std::cout << "num of layers : " << network.getNumLayers() << "\n";
+#include <iostream>
+#include <string>
 
-  // std::cout
-  //     << "Input Dense before training : "
-  //     << "\n";
-  // layer1->printWeights();
-  // layer1->printOutputs();
-
-  // std::cout << "Dense 2 before training : "
-  //           << "\n";
-  // layer2->printWeights();
-  // layer2->printOutputs();
-
-  // std::cout << "Output Dense before training : "
-  //           << "\n";
-  // layerOuput->printWeights();
-  // layerOuput->printOutputs();
-
-  // // training the network
-  // std::vector<std::vector<double>> inputs;
-  // inputs.push_back(randDVector(layer1->getNumNeurons(), -1, 1));
-  // inputs.push_back(randDVector(layer1->getNumNeurons(), -1, 1));
-  // std::vector<double> labels = {1, 1};
-
-  // TrainingData tr_data(inputs, labels);
-  // tr_data.batch(1);
-
-  // network.train(tr_data);
-
-  // std::shared_ptr<Layer> input = network.getLayer(0);
-  // std::shared_ptr<Layer> test = network.getLayer(1);
-  // std::shared_ptr<Layer> test2 = network.getLayer(2);
-
-  // std::cout << "Input Dense after training : "
-  //           << "\n";
-  // input->printWeights();
-  // input->printOutputs();
-
-  // std::cout << "Dense 2 after training : "
-  //           << "\n";
-  // test->printWeights();
-  // test->printOutputs();
-
-  // std::cout << "Output Dense after training : "
-  //           << "\n";
-  // test2->printWeights();
-  // test2->printOutputs();
+using namespace NeuralNet;
 
-  // std::vector<double> data = {1, 2, 3, 4, 5, 6, 7, 8};
+int main(int argc, char *argv[]) {
+  std::string fileName = "test.cpp";
+  std::string folderPath = "build/";
 
-  // Tensor t(data);
+  std::string filepath = constructFilePath(folderPath, fileName);
 
-  // t.batch(2);
+  std::cout << "Constructed file path : " << filepath << "\n";
 
-  // std::vector<std::vector<double>> batch = t.getBatchedData();
+  Network test;
 
-  // std::cout << "Num batches : " << batch.size() << "\n";
+  std::unique_ptr<Network> ptr = std::make_unique<Network>();
 
-  // TrainingData td(data, data);
+  decltype(*ptr) obj = *ptr;
 
-  // td.batch(2);
+  std::cout << "Type of obj: " << typeid(obj).name() << std::endl;
 }
diff --git a/main.hpp b/main.hpp
@@ -1,11 +1,12 @@
 #pragma once
 
+#include <cereal/archives/binary.hpp>
+#include <fstream>
 #include <iostream>
+
 #include "src/NeuralNet/Network.hpp"
+#include "src/NeuralNet/data/Tensor.hpp"
 #include "src/NeuralNet/layers/Dense.hpp"
 #include "src/NeuralNet/layers/Layer.hpp"
-#include "src/NeuralNet/utils/Functions.hpp"
 #include "src/NeuralNet/optimizers/optimizers.hpp"
-#include "src/NeuralNet/data/Tensor.hpp"
-#include <fstream>
-#include <cereal/archives/binary.hpp>
+#include "src/NeuralNet/utils/Functions.hpp"
diff --git a/src/NeuralNet/Model.hpp b/src/NeuralNet/Model.hpp
@@ -1,7 +1,11 @@
 #pragma once
 
+#include <cereal/access.hpp>
 #include <cereal/archives/binary.hpp>
-#include <cereal/cereal.hpp>
+#include <cereal/archives/portable_binary.hpp>
+#include <cereal/types/base_class.hpp>
+#include <cereal/types/common.hpp>
+#include <cereal/types/polymorphic.hpp>
 #include <csignal>
 #include <fstream>
 #include <string>
@@ -53,11 +57,37 @@ class Model {
     archive(model);
   };
 
+  virtual void to_file(const std::string &filename) = 0;
+  virtual void from_file(const std::string &filename) = 0;
+
+  // Declare at least one virtual function
+  virtual ~Model() = default;
+
+ private:
+  friend class cereal::access;
+
+  // Serialization function
+  template <class Archive>
+  void save(Archive &archive) const {
+    archive(loss, accuracy);
+  };
+
+  template <class Archive>
+  void load(Archive &archive) {
+    archive(loss, accuracy);
+  };
+
  protected:
+  friend class Callback;
+  int cEpoch = 0;  // Current epoch
+  double loss = 0, accuracy = 0;
+
   void registerSignals() const {
     // Registering signals
     signal(SIGINT, signalHandler);
     signal(SIGTERM, signalHandler);
   }
 };
-}  // namespace NeuralNet
+}  // namespace NeuralNet
+
+CEREAL_REGISTER_TYPE(NeuralNet::Model);
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
@@ -51,27 +51,27 @@ std::shared_ptr<Layer> Network::getOutputLayer() const {
   return this->layers[this->layers.size() - 1];
 }
 
-double Network::train(std::vector<std::vector<double>> inputs,
-                      std::vector<double> labels, int epochs,
+double Network::train(std::vector<std::vector<double>> X, std::vector<double> y,
+                      int epochs,
                       std::vector<std::shared_ptr<Callback>> callbacks,
                       bool progBar) {
   this->progBar = progBar;
   try {
-    return onlineTraining(inputs, labels, epochs, callbacks);
+    return onlineTraining(X, y, epochs, callbacks);
   } catch (const std::exception &e) {
     trainingCheckpoint("onTrainEnd", callbacks);
     std::cerr << "Training Interrupted : " << e.what() << '\n';
     return loss;
   }
 }
 
-double Network::train(std::vector<std::vector<std::vector<double>>> inputs,
-                      std::vector<double> labels, int epochs,
+double Network::train(std::vector<std::vector<std::vector<double>>> X,
+                      std::vector<double> y, int epochs,
                       std::vector<std::shared_ptr<Callback>> callbacks,
                       bool progBar) {
   this->progBar = progBar;
   try {
-    return onlineTraining(inputs, labels, epochs, callbacks);
+    return onlineTraining(X, y, epochs, callbacks);
   } catch (const std::exception &e) {
     trainingCheckpoint("onTrainEnd", callbacks);  // wrap up callbacks
     std::cerr << "Training Interrupted : " << e.what() << '\n';
@@ -330,23 +330,11 @@ void Network::trainingCheckpoint(
     std::vector<std::shared_ptr<Callback>> callbacks) {
   if (callbacks.size() == 0) return;
 
-  std::unordered_map<std::string, double> logs = getLogs();
-
   for (std::shared_ptr<Callback> callback : callbacks) {
-    Callback::callMethod(callback, checkpointName, logs);
+    Callback::callMethod(callback, checkpointName, *this);
   }
 }
 
-std::unordered_map<std::string, double> Network::getLogs() {
-  std::unordered_map<std::string, double> logs;
-
-  logs["LOSS"] = loss;
-  logs["ACCURACY"] = accuracy;
-  logs["EPOCH"] = cEpoch;
-
-  return logs;
-}
-
 /**
  * @note This function will return the accuracy of the given outputs compared to
  * the labels.