From eb5b7dcbe5d86be5e1c5b2aa0e657e82696d8c8a Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Sat, 2 Mar 2024 19:14:51 +0100
Subject: [PATCH 01/12] feat(callbacks): created base class Callback and
 EarlyStopping callback

---
 TODO.md                                   |  4 +-
 src/NeuralNet/Network.cpp                 | 20 ++---
 src/NeuralNet/Network.hpp                 | 28 ++++---
 src/NeuralNet/callbacks/Callback.hpp      | 61 ++++++++++++++++
 src/NeuralNet/callbacks/EarlyStopping.hpp | 89 +++++++++++++++++++++++
 src/NeuralNet/utils/Functions.hpp         | 19 +++++
 src/bindings/NeuralNetPy.cpp              | 44 ++++++++++-
 7 files changed, 239 insertions(+), 26 deletions(-)
 create mode 100644 src/NeuralNet/callbacks/Callback.hpp
 create mode 100644 src/NeuralNet/callbacks/EarlyStopping.hpp
diff --git a/TODO.md b/TODO.md
index c524619..5f1144e 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,13 +2,15 @@
 
 - [ ] Find out why the predictions are not accurate on my_samples
 - [ ] Implement batch norm
-- [ ] Multithreading operations
+- [ ] Parallelize operations
 - [ ] Read : https://arxiv.org/pdf/1412.6980.pdf
 - [ ] Implement a dropout to avoid over-fitting the model
 
 ## IN PROGRESS :
 
 - [ ] Implement early stopping
+  - [ ] Create a CallBack base class
+  - [ ] Create an EarlyStopping class
 - [ ] Optimize `Catch2`'s build
 - [ ] Add gradient clipping
 
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
index 8e21d4f..7ebaa7b 100644
--- a/src/NeuralNet/Network.cpp
+++ b/src/NeuralNet/Network.cpp
@@ -61,37 +61,37 @@ std::shared_ptr<Layer> Network::getOutputLayer() const
   return this->layers[this->layers.size() - 1];
 }
 
-double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs)
+double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   return onlineTraining(inputs, labels, epochs);
 }
 
-double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs)
+double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   return onlineTraining(inputs, labels, epochs);
 }
 
 // Specific implementation of train that takes TrainingData class as input
-double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs)
+double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   return this->trainer(trainingData, epochs);
 }
 
-double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs)
+double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   return this->trainer(trainingData, epochs);
 }
 
 template <typename D1, typename D2>
-double Network::trainer(TrainingData<D1, D2> trainingData, int epochs)
+double Network::trainer(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   if (trainingData.batched)
-    return this->miniBatchTraining(trainingData, epochs);
-  return this->batchTraining(trainingData, epochs);
+    return this->miniBatchTraining(trainingData, epochs, callbacks);
+  return this->batchTraining(trainingData, epochs, callbacks);
 }
 
 template <typename D1, typename D2>
-double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs)
+double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   double loss;
   double sumLoss = 0;
@@ -118,7 +118,7 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs)
 }
 
 template <typename D1, typename D2>
-double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs)
+double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   double loss;
   double sumLoss = 0;
@@ -142,7 +142,7 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs)
 }
 
 template <typename D1, typename D2>
-double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs)
+double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
   double loss;
   double sumLoss;
diff --git a/src/NeuralNet/Network.hpp b/src/NeuralNet/Network.hpp
index 3471fd6..5fa82f7 100644
--- a/src/NeuralNet/Network.hpp
+++ b/src/NeuralNet/Network.hpp
@@ -18,6 +18,7 @@
 #include "losses/losses.hpp"
 #include "data/Tensor.hpp"
 #include "data/TrainingData.hpp"
+#include "callbacks/Callback.hpp"
 
 namespace NeuralNet
 {
@@ -80,10 +81,11 @@ namespace NeuralNet
      * @param inputs The inputs that will be passed to the model
      * @param labels The labels that represent the expected outputs of the model
      * @param epochs
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return The last training's loss
      */
-    double train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs = 1);
+    double train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will Train the model with the given inputs and labels
@@ -91,30 +93,33 @@ namespace NeuralNet
      * @param inputs The inputs that will be passed to the model
      * @param labels The labels that represent the expected outputs of the model
      * @param epochs
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return The last training's loss
      */
-    double train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs = 1);
+    double train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will train the model with the given TrainingData
      *
      * @param trainingData the data passed through the TrainingData class
      * @param epochs
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return The last training's loss
      */
-    double train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs = 1);
+    double train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will train the model with the given TrainingData
      *
      * @param trainingData the data passed through the TrainingData class
      * @param epochs
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return The last training's loss
      */
-    double train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs = 1);
+    double train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This model will try to make predictions based off the inputs passed
@@ -171,13 +176,14 @@ namespace NeuralNet
      * @param labels A vector of labels (targets) of type D2. Each element in this vector corresponds to the
      * label of the training example at the same index in the inputs vector.
      * @param epochs An integer specifying the number of times the training algorithm should iterate over the dataset.
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return A double value that represents the average loss of the training process. This can be used to gauge the effectiveness of the process.
      *
      * @note The functions assumes that the inputs and labels will be of the same length.
      */
     template <typename D1, typename D2>
-    double onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs);
+    double onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief mini-batch training with given training data
@@ -188,13 +194,14 @@ namespace NeuralNet
      * @param labels A vector of labels (targets) of type D2. Each element in this vector corresponds to the
      * label of the training example at the same index in the inputs vector.
      * @param epochs An integer specifying the number of times the training algorithm should iterate over the dataset.
+     * @param callbacks A vector of `Callback` that will be called during training stages
      *
      * @return A double value that represents the average loss of the training process. This can be used to gauge the effectiveness of the process.
      *
      * @note The functions assumes that the inputs and labels will be of the same length.
      */
     template <typename D1, typename D2>
-    double trainer(TrainingData<D1, D2> trainingData, int epochs);
+    double trainer(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief mini-batch training with given training data
@@ -205,13 +212,12 @@ namespace NeuralNet
      * @param labels A vector of labels (targets) of type D2. Each element in this vector corresponds to the
      * label of the training example at the same index in the inputs vector.
      * @param epochs An integer specifying the number of times the training algorithm should iterate over the dataset.
-     *
-     * @return A double value that represents the average loss of the training process. This can be used to gauge the effectiveness of the process.
+     * @param callbacks A vector of `Callback` that will be called during training stages     * @return A double value that represents the average loss of the training process. This can be used to gauge the effectiveness of the process.
      *
      * @note The functions assumes that the inputs and labels will be of the same length.
      */
     template <typename D1, typename D2>
-    double miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs);
+    double miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief batch training with given training data
@@ -222,13 +228,13 @@ namespace NeuralNet
      * @param labels A vector of labels (targets) of type D2. Each element in this vector corresponds to the
      * label of the training example at the same index in the inputs vector.
      * @param epochs An integer specifying the number of times the training algorithm should iterate over the dataset.
-     *
+     * @param callbacks A vector of `Callback` that will be called during training stages
      * @return A double value that represents the average loss of the training process. This can be used to gauge the effectiveness of the process.
      *
      * @note The functions assumes that the inputs and labels will be of the same length.
      */
     template <typename D1, typename D2>
-    double batchTraining(TrainingData<D1, D2> trainingData, int epochs);
+    double batchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will pass the inputs through the network and return an output
diff --git a/src/NeuralNet/callbacks/Callback.hpp b/src/NeuralNet/callbacks/Callback.hpp
new file mode 100644
index 0000000..06210d0
--- /dev/null
+++ b/src/NeuralNet/callbacks/Callback.hpp
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <vector>
+#include <variant>
+#include <string>
+#include <unordered_map>
+#include <memory>
+#include <utility>
+
+namespace NeuralNet
+{
+  using Logs = std::unordered_map<std::string, double>;
+
+  class Callback
+  {
+  public:
+    virtual bool onTrainBegin(Logs logs) = 0;
+    virtual bool onTrainEnd(Logs logs) = 0;
+    virtual bool onEpochBegin(Logs logs) = 0;
+    virtual bool onEpochEnd(Logs logs) = 0;
+    virtual bool onBatchBegin(Logs logs) = 0;
+    virtual bool onBatchEnd(Logs logs) = 0;
+
+    virtual ~Callback() = default;
+
+    template <typename T>
+    using MethodPointer = bool (T::*)(Logs logs);
+
+    template <typename T, typename... Args>
+    static bool callMethod(std::shared_ptr<T> callback, const std::string &methodName, Logs logs)
+    {
+      static const std::unordered_map<std::string, std::function<bool(T *, Logs)>> methods = {
+          {"onTrainBegin", [](T *callback, Logs logs)
+           { return callback->onTrainBegin(logs); }},
+          {"onTrainEnd", [](T *callback, Logs logs)
+           { return callback->onTrainEnd(logs); }},
+          {"onEpochBegin", [](T *callback, Logs logs)
+           { return callback->onEpochBegin(logs); }},
+          {"onEpochEnd", [](T *callback, Logs logs)
+           { return callback->onEpochEnd(logs); }},
+          {"onBatchBegin", [](T *callback, Logs logs)
+           { return callback->onBatchBegin(logs); }},
+          {"onBatchEnd", [](T *callback, Logs logs)
+           { return callback->onBatchEnd(logs); }}};
+
+      auto it = methods.find(methodName);
+
+      if (it != methods.end())
+        return it->second(callback.get(), logs);
+
+      return true;
+    }
+
+  protected:
+    static void checkMetric(const std::string &metric, const std::vector<std::string> &metrics)
+    {
+      if (std::find(metrics.begin(), metrics.end(), metric) == metrics.end())
+        throw std::invalid_argument("Metric not found");
+    };
+  };
+}
\ No newline at end of file
diff --git a/src/NeuralNet/callbacks/EarlyStopping.hpp b/src/NeuralNet/callbacks/EarlyStopping.hpp
new file mode 100644
index 0000000..565053e
--- /dev/null
+++ b/src/NeuralNet/callbacks/EarlyStopping.hpp
@@ -0,0 +1,89 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <cmath>
+#include "Callback.hpp"
+#include "utils/Functions.hpp"
+
+namespace NeuralNet
+{
+  class EarlyStopping : public Callback
+  {
+  public:
+    /**
+     * @brief EarlyStopping is a `Callback` that stops training when a monitored metric has stopped improving.
+     *
+     * @param metric The metric to monitor default is `LOSS`
+     * @param minDelta Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than minDelta, will count as no improvement.
+     * @param patience Number of epochs with no improvement after which training will be stopped.
+     */
+    EarlyStopping(const std::string &metric, double minDelta = 0, int patience = 0)
+    {
+      checkMetric(metric, metrics);
+      this->metric = metric;
+      this->minDelta = minDelta;
+      this->patience = patience;
+    };
+
+    /**
+     * @brief This method will be called at the beginning of each epoch
+     *
+     * @param epoch The current epoch
+     * @param logs The logs of the current epoch
+     * @return Returns true if the training should continue otherwise returns false
+     *
+     * @warning The order of the logs should be the same as the order of the metrics.
+     */
+    bool onEpochBegin(Logs logs) override { return true; };
+
+    /**
+     * @brief This method will be called at the end of each epoch
+     *
+     * @param epoch The current epoch
+     * @param logs The logs of the current epoch
+     * @return Returns true if the training should continue otherwise returns false
+     *
+     * @warning The order of the logs should be the same as the order of the metrics.
+     */
+    bool onEpochEnd(Logs logs) override
+    {
+      auto it = logs.find(metric);
+
+      if (it == logs.end())
+        throw std::invalid_argument("Metric not found");
+
+      double currentMetric = it->second;
+
+      if (previousMetric == 0)
+      {
+        previousMetric = currentMetric;
+        return true;
+      }
+
+      double absCurrentDelta = std::abs(currentMetric - previousMetric);
+
+      patience = absCurrentDelta <= minDelta ? patience - 1 : patience;
+      previousMetric = currentMetric;
+
+      if (patience < 0)
+        return false;
+
+      return true;
+    };
+
+    bool onTrainBegin(Logs logs) override { return true; };
+    bool onTrainEnd(Logs logs) override { return true; };
+    bool onBatchBegin(Logs logs) override { return true; };
+    bool onBatchEnd(Logs logs) override { return true; };
+
+    ~EarlyStopping() override = default;
+
+  private:
+    std::string metric;
+    double minDelta, previousMetric = 0;
+    int patience;
+    std::vector<std::string> metrics = {"LOSS", "ACCURACY"}; // Available metrics for this Callback
+  };
+
+} // namespace NeuralNet
\ No newline at end of file
diff --git a/src/NeuralNet/utils/Functions.hpp b/src/NeuralNet/utils/Functions.hpp
index 29629ec..3190c22 100644
--- a/src/NeuralNet/utils/Functions.hpp
+++ b/src/NeuralNet/utils/Functions.hpp
@@ -171,6 +171,25 @@ namespace NeuralNet
     return result;
   }
 
+  /**
+   * @brief This function takes a vector and a value and returns the index of the value in that vector.
+   *
+   * @param v The vector
+   * @param el The element to look for in the vector
+   *
+   * @return The index of the element in the vector `-1` if not found.
+   */
+  template <typename T>
+  inline int findIndexOf(const std::vector<T> &v, const T &el)
+  {
+    auto it = std::find(v.begin(), v.end(), el);
+
+    if (it == v.end())
+      return -1;
+
+    return it - v.begin();
+  }
+
   /* MATRIX OPERATIONS */
   inline Eigen::MatrixXd zeroMatrix(const std::tuple<int, int> size)
   {
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 2fad357..4187cec 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -17,6 +17,8 @@
 #include "layers/Dense.hpp"
 #include "optimizers/Optimizer.hpp"
 #include "optimizers/optimizers.hpp"
+#include "callbacks/Callback.hpp"
+#include "callbacks/EarlyStopping.hpp"
 #include "utils/Enums.hpp"
 #include "TemplateBindings.hpp" // Template classes binding functions
 
@@ -133,6 +135,40 @@ PYBIND11_MODULE(NeuralNetPy, m)
                         layer = NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE)
              )pbdoc");
 
+  py::class_<Callback, std::shared_ptr<Callback>>(m, "Callback");
+
+  py::class_<EarlyStopping, Callback, std::shared_ptr<EarlyStopping>>(m, "EarlyStopping")
+      .def(py::init<std::string, double, int>(),
+           py::arg("metric"),
+           py::arg("minDelta") = 0.01,
+           py::arg("patience") = 0,
+           R"pbdoc(
+                Initializes an ``EarlyStopping`` callback. This callback will stop the training if the given metric doesn't improve more than the given delta over a certain number of epochs (patience).
+
+                .. highlight: python
+                .. code-block:: python
+                    :caption: Example
+
+                    import NeuralNetPy as NNP
+
+                    network = NNP.Network()
+                    network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
+                    network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                    network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+
+                    inputs = [
+                      [0.4, 0.5, 0.67],
+                      [0.3, 0.2, 0.1],
+                      [0.1, 0.2, 0.3]
+                    ]
+
+                    labels = [1, 0, 1]
+
+                    earlyStopping = NNP.EarlyStopping("loss", 0.01, 10)
+
+                    network.train(inputs, labels, 100, [earlyStopping])
+             )pbdoc");
+
   py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
       .def(py::init<std::tuple<int, int>>(),
            py::arg("inputShape"),
@@ -313,7 +349,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
                 layer = network.getLayer(1) # Return Dense layer with 2 neurons
           )pbdoc")
       .def("getNumLayers", &Network::getNumLayers, "Return the number of layers in the network.")
-      .def("train", static_cast<double (Network::*)(std::vector<std::vector<double>>, std::vector<double>, int)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(std::vector<std::vector<double>>, std::vector<double>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
             Train the network by passing it 2 dimensional inputs (vectors).
 
             :param inputs: A list of vectors representing the inputs
@@ -344,7 +380,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
                 loss = network.train(inputs, labels, 10)
             
         )pbdoc")
-      .def("train", static_cast<double (Network::*)(std::vector<std::vector<std::vector<double>>>, std::vector<double>, int)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(std::vector<std::vector<std::vector<double>>>, std::vector<double>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a list of 3 dimensional inputs (matrices).
 
         :param inputs: A list of matrices representing the inputs
@@ -380,7 +416,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             loss = network.train(inputs, labels, 10)
       )pbdoc")
-      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<double>>, std::vector<double>>, int)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<double>>, std::vector<double>>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a ``TrainingData2dI`` object.
 
         :param trainingData: A ``TrainingData2dI`` object
@@ -413,7 +449,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             loss = network.train(trainingData, 10)
       )pbdoc")
-      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>>, int)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a ``TrainingData3dI`` object.
 
         :param trainingData: A ``TrainingData3dI`` object

From 5a78520d7b6e14aa886e153a71f16c64dde99791 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Sat, 2 Mar 2024 22:24:53 +0100
Subject: [PATCH 02/12] feat(callbacks): training interruption to use exception
 handling instead of boolean checks

---
 src/NeuralNet/Network.cpp                 | 33 +++++++++++++++++++++++
 src/NeuralNet/Network.hpp                 | 19 +++++++++++--
 src/NeuralNet/callbacks/Callback.hpp      | 25 ++++++++---------
 src/NeuralNet/callbacks/EarlyStopping.hpp | 18 ++++++-------
 4 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
index 7ebaa7b..2d1a74e 100644
--- a/src/NeuralNet/Network.cpp
+++ b/src/NeuralNet/Network.cpp
@@ -63,22 +63,26 @@ std::shared_ptr<Layer> Network::getOutputLayer() const
 
 double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
+  // todo: error handling
   return onlineTraining(inputs, labels, epochs);
 }
 
 double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
+  // todo: error handling
   return onlineTraining(inputs, labels, epochs);
 }
 
 // Specific implementation of train that takes TrainingData class as input
 double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
+  // todo: error handling
   return this->trainer(trainingData, epochs);
 }
 
 double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
+  // todo: error handling
   return this->trainer(trainingData, epochs);
 }
 
@@ -95,12 +99,15 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs,
 {
   double loss;
   double sumLoss = 0;
+  trainingCheckpoint("onTrainBegin", callbacks);
 
   for (int e = 0; e < epochs; e++)
   {
+    trainingCheckpoint("onEpochBegin", callbacks);
     TrainingGauge g(trainingData.inputs.size(), 0, epochs, (e + 1));
     for (int b = 0; b < trainingData.inputs.size(); b++)
     {
+      trainingCheckpoint("onBatchBegin", callbacks);
       const int numOutputs = this->getOutputLayer()->getNumNeurons();
       const int inputsSize = trainingData.inputs.batches[b].size();
       Eigen::MatrixXd y = formatLabels(trainingData.labels.batches[b], {inputsSize, numOutputs});
@@ -111,9 +118,12 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs,
       sumLoss += loss;
       this->backProp(o, y);
       g.printWithLAndA(loss, computeAccuracy(o, y));
+      trainingCheckpoint("onBatchEnd", callbacks);
     }
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / trainingData.inputs.size();
 }
 
@@ -125,9 +135,11 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std
   const int numOutputs = this->getOutputLayer()->getNumNeurons();
   const int numInputs = trainingData.inputs.data.size();
   Eigen::MatrixXd y = formatLabels(trainingData.labels.data, {numInputs, numOutputs});
+  trainingCheckpoint("onTrainBegin", callbacks);
 
   for (int e = 0; e < epochs; e++)
   {
+    trainingCheckpoint("onEpochBegin", callbacks);
     TrainingGauge g(1, 0, epochs, (e + 1));
     Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
 
@@ -136,8 +148,10 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std
 
     this->backProp(o, y);
     g.printWithLoss(loss);
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / numInputs;
 }
 
@@ -150,8 +164,12 @@ double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, i
   const int numInputs = inputs.size();
   Eigen::MatrixXd y = formatLabels(labels, {numInputs, numOutputs});
 
+  // Injecting callbacks
+  trainingCheckpoint("onTrainBegin", callbacks);
+
   for (int e = 0; e < epochs; e++)
   {
+    trainingCheckpoint("onEpochBegin", callbacks);
     TrainingGauge tg(inputs.size(), 0, epochs, (e + 1));
     for (auto &input : inputs)
     {
@@ -161,8 +179,10 @@ double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, i
       this->backProp(o, y);
       tg.printWithLoss(loss);
     }
+    trainingCheckpoint("onEpochEnd", callbacks);
   }
 
+  trainingCheckpoint("onTrainEnd", callbacks);
   return sumLoss / numInputs;
 }
 
@@ -259,6 +279,19 @@ void Network::updateOptimizerSetup(size_t numLayers)
   this->optimizer->insiderInit(numLayers);
 }
 
+void Network::trainingCheckpoint(std::string checkpointName, std::vector<std::shared_ptr<Callback>> callbacks)
+{
+  if (callbacks.size() == 0)
+    return;
+
+  // todo: get logs
+
+  for (std::shared_ptr<Callback> callback : callbacks)
+  {
+    Callback::callMethod(callback, checkpointName);
+  }
+}
+
 /**
  * @note This function will return the accuracy of the given outputs compared to the labels.
  *
diff --git a/src/NeuralNet/Network.hpp b/src/NeuralNet/Network.hpp
index 5fa82f7..214779d 100644
--- a/src/NeuralNet/Network.hpp
+++ b/src/NeuralNet/Network.hpp
@@ -273,7 +273,22 @@ namespace NeuralNet
      */
     void backProp(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y);
 
-    // todo: implement the following method
+    /**
+     * @brief This method will go over the provided callbacks and trigger the appropriate methods whilst passing the necessary logs.
+     *
+     * @param checkpointName The name of the checkpoint (e.g. onTrainBegin, onEpochEnd, etc.)
+     * @param callbacks A vector of `Callback` that will be called during training stages
+     */
+    void trainingCheckpoint(std::string checkpointName, std::vector<std::shared_ptr<Callback>> callbacks);
+
+    /**
+     * @brief This method will compute the accuracy of the model based on the outputs of the model and the expected values.
+     *
+     * @param outputs The outputs from the forward propagation
+     * @param y The expected outputs (targets)
+     *
+     * @return The accuracy of the model (percentage of correct predictions)
+     */
     double computeAccuracy(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y);
 
     /**
@@ -283,4 +298,4 @@ namespace NeuralNet
      */
     void updateOptimizerSetup(size_t numLayers);
   };
-}
+} // namespace NeuralNet
diff --git a/src/NeuralNet/callbacks/Callback.hpp b/src/NeuralNet/callbacks/Callback.hpp
index 06210d0..dc11dce 100644
--- a/src/NeuralNet/callbacks/Callback.hpp
+++ b/src/NeuralNet/callbacks/Callback.hpp
@@ -14,22 +14,19 @@ namespace NeuralNet
   class Callback
   {
   public:
-    virtual bool onTrainBegin(Logs logs) = 0;
-    virtual bool onTrainEnd(Logs logs) = 0;
-    virtual bool onEpochBegin(Logs logs) = 0;
-    virtual bool onEpochEnd(Logs logs) = 0;
-    virtual bool onBatchBegin(Logs logs) = 0;
-    virtual bool onBatchEnd(Logs logs) = 0;
+    virtual void onTrainBegin(Logs logs) = 0;
+    virtual void onTrainEnd(Logs logs) = 0;
+    virtual void onEpochBegin(Logs logs) = 0;
+    virtual void onEpochEnd(Logs logs) = 0;
+    virtual void onBatchBegin(Logs logs) = 0;
+    virtual void onBatchEnd(Logs logs) = 0;
 
     virtual ~Callback() = default;
 
-    template <typename T>
-    using MethodPointer = bool (T::*)(Logs logs);
-
     template <typename T, typename... Args>
-    static bool callMethod(std::shared_ptr<T> callback, const std::string &methodName, Logs logs)
+    static void callMethod(std::shared_ptr<T> callback, const std::string &methodName, Logs logs)
     {
-      static const std::unordered_map<std::string, std::function<bool(T *, Logs)>> methods = {
+      static const std::unordered_map<std::string, std::function<void(T *, Logs)>> methods = {
           {"onTrainBegin", [](T *callback, Logs logs)
            { return callback->onTrainBegin(logs); }},
           {"onTrainEnd", [](T *callback, Logs logs)
@@ -45,10 +42,10 @@ namespace NeuralNet
 
       auto it = methods.find(methodName);
 
-      if (it != methods.end())
-        return it->second(callback.get(), logs);
+      if (it == methods.end())
+        return;
 
-      return true;
+      it->second(callback.get(), logs);
     }
 
   protected:
diff --git a/src/NeuralNet/callbacks/EarlyStopping.hpp b/src/NeuralNet/callbacks/EarlyStopping.hpp
index 565053e..31dc093 100644
--- a/src/NeuralNet/callbacks/EarlyStopping.hpp
+++ b/src/NeuralNet/callbacks/EarlyStopping.hpp
@@ -35,7 +35,7 @@ namespace NeuralNet
      *
      * @warning The order of the logs should be the same as the order of the metrics.
      */
-    bool onEpochBegin(Logs logs) override { return true; };
+    void onEpochBegin(Logs logs) override{};
 
     /**
      * @brief This method will be called at the end of each epoch
@@ -46,7 +46,7 @@ namespace NeuralNet
      *
      * @warning The order of the logs should be the same as the order of the metrics.
      */
-    bool onEpochEnd(Logs logs) override
+    void onEpochEnd(Logs logs) override
     {
       auto it = logs.find(metric);
 
@@ -58,7 +58,7 @@ namespace NeuralNet
       if (previousMetric == 0)
       {
         previousMetric = currentMetric;
-        return true;
+        return;
       }
 
       double absCurrentDelta = std::abs(currentMetric - previousMetric);
@@ -67,15 +67,13 @@ namespace NeuralNet
       previousMetric = currentMetric;
 
       if (patience < 0)
-        return false;
-
-      return true;
+        throw std::runtime_error("Early stopping");
     };
 
-    bool onTrainBegin(Logs logs) override { return true; };
-    bool onTrainEnd(Logs logs) override { return true; };
-    bool onBatchBegin(Logs logs) override { return true; };
-    bool onBatchEnd(Logs logs) override { return true; };
+    void onTrainBegin(Logs logs) override{};
+    void onTrainEnd(Logs logs) override{};
+    void onBatchBegin(Logs logs) override{};
+    void onBatchEnd(Logs logs) override{};
 
     ~EarlyStopping() override = default;
 

From 73e40ef6258359648db2fdf9d9dea9af2b7ab623 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Sat, 2 Mar 2024 23:50:41 +0100
Subject: [PATCH 03/12] feat(callbacks): callbacks handling and logs generation

---
 TODO.md                        |  7 +++-
 src/NeuralNet/Network.cpp      | 70 ++++++++++++++++++++++++++--------
 src/NeuralNet/Network.hpp      |  8 ++++
 src/NeuralNet/layers/Layer.hpp |  2 +-
 4 files changed, 68 insertions(+), 19 deletions(-)

diff --git a/TODO.md b/TODO.md
index 5f1144e..86039a7 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,8 +9,11 @@
 ## IN PROGRESS :
 
 - [ ] Implement early stopping
-  - [ ] Create a CallBack base class
-  - [ ] Create an EarlyStopping class
+  - [x] Create a CallBack base class
+  - [x] Create an EarlyStopping class
+  - [ ] Add more helpful logs
+  - [ ] Find a more convenient way of generating and tracking logs
+  - [ ] Test callbacks
 - [ ] Optimize `Catch2`'s build
 - [ ] Add gradient clipping
 
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
index 2d1a74e..381b09d 100644
--- a/src/NeuralNet/Network.cpp
+++ b/src/NeuralNet/Network.cpp
@@ -63,27 +63,51 @@ std::shared_ptr<Layer> Network::getOutputLayer() const
 
 double Network::train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  // todo: error handling
-  return onlineTraining(inputs, labels, epochs);
+  try
+  {
+    return onlineTraining(inputs, labels, epochs, callbacks);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " e.what() << '\n';
+  }
 }
 
 double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  // todo: error handling
-  return onlineTraining(inputs, labels, epochs);
+  try
+  {
+    return onlineTraining(inputs, labels, epochs);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " e.what() << '\n';
+  }
 }
 
 // Specific implementation of train that takes TrainingData class as input
 double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  // todo: error handling
-  return this->trainer(trainingData, epochs);
+  try
+  {
+    return this->trainer(trainingData, epochs);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " e.what() << '\n';
+  }
 }
 
 double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  // todo: error handling
-  return this->trainer(trainingData, epochs);
+  try
+  {
+    return this->trainer(trainingData, epochs);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Training Interrupted : " e.what() << '\n';
+  }
 }
 
 template <typename D1, typename D2>
@@ -97,7 +121,6 @@ double Network::trainer(TrainingData<D1, D2> trainingData, int epochs, std::vect
 template <typename D1, typename D2>
 double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
   double sumLoss = 0;
   trainingCheckpoint("onTrainBegin", callbacks);
 
@@ -115,9 +138,10 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs,
       // computing outputs from forward propagation
       Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.batches[b]);
       loss = this->cmpLoss(o, y) / inputsSize;
+      accuracy = computeAccuracy(o, y);
       sumLoss += loss;
       this->backProp(o, y);
-      g.printWithLAndA(loss, computeAccuracy(o, y));
+      g.printWithLAndA(loss, accuracy);
       trainingCheckpoint("onBatchEnd", callbacks);
     }
     trainingCheckpoint("onEpochEnd", callbacks);
@@ -130,7 +154,6 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs,
 template <typename D1, typename D2>
 double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
   double sumLoss = 0;
   const int numOutputs = this->getOutputLayer()->getNumNeurons();
   const int numInputs = trainingData.inputs.data.size();
@@ -144,10 +167,11 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std
     Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
 
     loss = this->cmpLoss(o, y);
+    accuracy = computeAccuracy(o, y);
     sumLoss += loss;
 
     this->backProp(o, y);
-    g.printWithLoss(loss);
+    g.printWithLAndA(loss, accuracy);
     trainingCheckpoint("onEpochEnd", callbacks);
   }
 
@@ -158,8 +182,8 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std
 template <typename D1, typename D2>
 double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, int epochs, std::vector<std::shared_ptr<Callback>> callbacks)
 {
-  double loss;
-  double sumLoss;
+  double sumLoss = 0;
+  int tCorrect = 0;
   const int numOutputs = this->getOutputLayer()->getNumNeurons();
   const int numInputs = inputs.size();
   Eigen::MatrixXd y = formatLabels(labels, {numInputs, numOutputs});
@@ -176,9 +200,13 @@ double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, i
       Eigen::MatrixXd o = this->forwardProp(inputs);
       loss = this->cmpLoss(o, y);
       sumLoss += loss;
+      tCorrect += computeAccuracy(o, y);
       this->backProp(o, y);
       tg.printWithLoss(loss);
     }
+    // Computing metrics for the logs
+    average = tCorrect / numInputs;
+    loss = sumLoss / numInputs;
     trainingCheckpoint("onEpochEnd", callbacks);
   }
 
@@ -284,14 +312,24 @@ void Network::trainingCheckpoint(std::string checkpointName, std::vector<std::sh
   if (callbacks.size() == 0)
     return;
 
-  // todo: get logs
+  std::unordered_map<std::string, double> logs = getLogs();
 
   for (std::shared_ptr<Callback> callback : callbacks)
   {
-    Callback::callMethod(callback, checkpointName);
+    Callback::callMethod(callback, checkpointName, logs);
   }
 }
 
+std::unordered_map<std::string, double> Network::getLogs()
+{
+  std::unordered_map<std::string, double> logs;
+
+  logs["LOSS"] = loss;
+  logs["ACCURACY"] = accuracy;
+
+  return logs;
+}
+
 /**
  * @note This function will return the accuracy of the given outputs compared to the labels.
  *
diff --git a/src/NeuralNet/Network.hpp b/src/NeuralNet/Network.hpp
index 214779d..bea103e 100644
--- a/src/NeuralNet/Network.hpp
+++ b/src/NeuralNet/Network.hpp
@@ -159,6 +159,7 @@ namespace NeuralNet
       setLoss(lossFunc);
     }
 
+    double loss = 0, accuracy = 0;
     std::vector<std::shared_ptr<Layer>> layers;
     LOSS lossFunc;      // Storing the loss function for serialization
     int cp = 0, tp = 0; // Correct Predictions, Total Predictions
@@ -291,6 +292,13 @@ namespace NeuralNet
      */
     double computeAccuracy(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y);
 
+    /**
+     * @brief This method will fetch the logs and return them
+     *
+     * @return A map of useful logs
+     */
+    std::unordered_map<std::string, double> getLogs();
+
     /**
      * @brief This method will update the optimizer's setup
      *
diff --git a/src/NeuralNet/layers/Layer.hpp b/src/NeuralNet/layers/Layer.hpp
index ed6e737..974495f 100644
--- a/src/NeuralNet/layers/Layer.hpp
+++ b/src/NeuralNet/layers/Layer.hpp
@@ -308,6 +308,6 @@ namespace NeuralNet
       this->outputs = outputs;
     };
   };
-}
+} // namespace NeuralNet
 
 CEREAL_REGISTER_TYPE(NeuralNet::Layer);
\ No newline at end of file

From a23f9c9056ffca01f6875689cfe99101235157dc Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Sun, 3 Mar 2024 13:43:27 +0100
Subject: [PATCH 04/12] test(callbacks): test cases for EarlyStopping
 exceptions throwing

---
 TODO.md                              |  2 ++
 src/NeuralNet/Network.cpp            | 14 +++++++++-----
 src/NeuralNet/callbacks/Callback.hpp | 14 ++++++++++++--
 tests/CMakeLists.txt                 |  3 ++-
 tests/test-callbacks.cpp             | 24 ++++++++++++++++++++++++
 tests/test-functions.cpp             |  4 +---
 6 files changed, 50 insertions(+), 11 deletions(-)
 create mode 100644 tests/test-callbacks.cpp

diff --git a/TODO.md b/TODO.md
index 86039a7..bd08e5d 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,5 +1,6 @@
 ## TODOS :
 
+- [ ] Setup `clang-format`
 - [ ] Find out why the predictions are not accurate on my_samples
 - [ ] Implement batch norm
 - [ ] Parallelize operations
@@ -14,6 +15,7 @@
   - [ ] Add more helpful logs
   - [ ] Find a more convenient way of generating and tracking logs
   - [ ] Test callbacks
+- [ ] Python tests
 - [ ] Optimize `Catch2`'s build
 - [ ] Add gradient clipping
 
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
index 381b09d..2b3471c 100644
--- a/src/NeuralNet/Network.cpp
+++ b/src/NeuralNet/Network.cpp
@@ -69,7 +69,8 @@ double Network::train(std::vector<std::vector<double>> inputs, std::vector<doubl
   }
   catch (const std::exception &e)
   {
-    std::cerr << "Training Interrupted : " e.what() << '\n';
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
   }
 }
 
@@ -81,7 +82,8 @@ double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std:
   }
   catch (const std::exception &e)
   {
-    std::cerr << "Training Interrupted : " e.what() << '\n';
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
   }
 }
 
@@ -94,7 +96,8 @@ double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector
   }
   catch (const std::exception &e)
   {
-    std::cerr << "Training Interrupted : " e.what() << '\n';
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
   }
 }
 
@@ -106,7 +109,8 @@ double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>
   }
   catch (const std::exception &e)
   {
-    std::cerr << "Training Interrupted : " e.what() << '\n';
+    std::cerr << "Training Interrupted : " << e.what() << '\n';
+    return loss;
   }
 }
 
@@ -205,7 +209,7 @@ double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, i
       tg.printWithLoss(loss);
     }
     // Computing metrics for the logs
-    average = tCorrect / numInputs;
+    accuracy = tCorrect / numInputs;
     loss = sumLoss / numInputs;
     trainingCheckpoint("onEpochEnd", callbacks);
   }
diff --git a/src/NeuralNet/callbacks/Callback.hpp b/src/NeuralNet/callbacks/Callback.hpp
index dc11dce..f67f6a7 100644
--- a/src/NeuralNet/callbacks/Callback.hpp
+++ b/src/NeuralNet/callbacks/Callback.hpp
@@ -23,7 +23,17 @@ namespace NeuralNet
 
     virtual ~Callback() = default;
 
-    template <typename T, typename... Args>
+    /**
+     * @brief Calls the method of the callback with the given logs
+     *
+     * @tparam T The type of the callback
+     * @param callback A shared_ptr to the callback
+     * @param methodName The name of the method to call (onTrainBegin, onTrainEnd, onEpochBegin, onEpochEnd, onBatchBegin, onBatchEnd)
+     * @param logs The logs to pass to the method
+     *
+     * @warning There should be consistency between the names of the logs and the metrics of the callbacks
+     */
+    template <typename T>
     static void callMethod(std::shared_ptr<T> callback, const std::string &methodName, Logs logs)
     {
       static const std::unordered_map<std::string, std::function<void(T *, Logs)>> methods = {
@@ -55,4 +65,4 @@ namespace NeuralNet
         throw std::invalid_argument("Metric not found");
     };
   };
-}
\ No newline at end of file
+} // namespace NeuralNet
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ca12e50..c7bb687 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -12,4 +12,5 @@ neural_net_add_test(test-functions.cpp)
 neural_net_add_test(test-layer.cpp)
 neural_net_add_test(test-network.cpp)
 neural_net_add_test(test-tensor.cpp)
-neural_net_add_test(test-optimizers.cpp)
\ No newline at end of file
+neural_net_add_test(test-optimizers.cpp)
+neural_net_add_test(test-callbacks.cpp)
\ No newline at end of file
diff --git a/tests/test-callbacks.cpp b/tests/test-callbacks.cpp
new file mode 100644
index 0000000..6d56391
--- /dev/null
+++ b/tests/test-callbacks.cpp
@@ -0,0 +1,24 @@
+#include <catch2/catch_test_macros.hpp>
+#include <callbacks/Callback.hpp>
+#include <callbacks/EarlyStopping.hpp>
+#include <vector>
+
+using namespace NeuralNet;
+
+TEST_CASE("EarlyStopping callback throws exception when the metric is not found", "[callback]")
+{
+  std::shared_ptr<Callback> earlyStopping = std::make_shared<EarlyStopping>("LOSS", 0.1);
+  std::unordered_map<std::string, double> logs = {{"TEST", 0.2}};
+
+  REQUIRE_THROWS(Callback::callMethod(earlyStopping, "onEpochEnd", logs));
+}
+
+TEST_CASE("EarlyStopping callback throws exception when metric does not more than the given delta", "[callback]")
+{
+  std::shared_ptr<Callback> earlyStopping = std::make_shared<EarlyStopping>("LOSS", 0.1);
+  std::unordered_map<std::string, double> logs = {{"LOSS", 0.2}};
+  std::unordered_map<std::string, double> logs2 = {{"LOSS", 0.2}};
+
+  Callback::callMethod(earlyStopping, "onEpochEnd", logs);
+  REQUIRE_THROWS(Callback::callMethod(earlyStopping, "onEpochEnd", logs2));
+}
\ No newline at end of file
diff --git a/tests/test-functions.cpp b/tests/test-functions.cpp
index b8e16b6..bd8de91 100644
--- a/tests/test-functions.cpp
+++ b/tests/test-functions.cpp
@@ -7,14 +7,12 @@
 using namespace Catch::Matchers;
 using namespace NeuralNet;
 
-const double ERR_MARGIN = 0.001;
-
 TEST_CASE("Sqr function returns the right square", "[helper_function]")
 {
   CHECK(sqr(0) == 0);
   CHECK(sqr(2) == 4);
   CHECK(sqr(10) == 100);
-  REQUIRE_THAT(sqr(7.8877), WithinAbs(62.215, ERR_MARGIN));
+  REQUIRE_THAT(sqr(7.8877), WithinAbs(62.215, EPSILON));
   CHECK(sqr(657666) == 432524567556);
 }
 

From 9b3e4d52fd1dbc3e92471820aac172e68d4eac2f Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Sun, 3 Mar 2024 21:29:13 +0100
Subject: [PATCH 05/12] refactor(network): added current epoch as a metric and
 reordered Network's attributes

---
 TODO.md                              |  7 +------
 examples/train-predict-MNIST/main.py |  4 +++-
 src/NeuralNet/Network.cpp            | 19 ++++++++++---------
 src/NeuralNet/Network.hpp            | 26 +++++++++++++-------------
 src/bindings/NeuralNetPy.cpp         | 11 +++++++----
 5 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/TODO.md b/TODO.md
index bd08e5d..ccc34c3 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,18 +9,13 @@
 
 ## IN PROGRESS :
 
-- [ ] Implement early stopping
-  - [x] Create a CallBack base class
-  - [x] Create an EarlyStopping class
-  - [ ] Add more helpful logs
-  - [ ] Find a more convenient way of generating and tracking logs
-  - [ ] Test callbacks
 - [ ] Python tests
 - [ ] Optimize `Catch2`'s build
 - [ ] Add gradient clipping
 
 ## DONE :
 
+- [x] Implement early stopping
 - [x] Update README to include more information about the project
 - [x] Add CI / CD
 - [x] Document the example in python
diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
index d6773a7..11a8e59 100644
--- a/examples/train-predict-MNIST/main.py
+++ b/examples/train-predict-MNIST/main.py
@@ -53,7 +53,9 @@
 
 trainingData.batch(128)
 
-network.train(trainingData, 3)
+callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1)]
+
+network.train(trainingData, 10, callbacks)
 
 f_x_test = [normalize_img(x) for x in x_test]
 
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
index 2b3471c..dc4df06 100644
--- a/src/NeuralNet/Network.cpp
+++ b/src/NeuralNet/Network.cpp
@@ -78,7 +78,7 @@ double Network::train(std::vector<std::vector<std::vector<double>>> inputs, std:
 {
   try
   {
-    return onlineTraining(inputs, labels, epochs);
+    return onlineTraining(inputs, labels, epochs, callbacks);
   }
   catch (const std::exception &e)
   {
@@ -92,7 +92,7 @@ double Network::train(TrainingData<std::vector<std::vector<double>>, std::vector
 {
   try
   {
-    return this->trainer(trainingData, epochs);
+    return this->trainer(trainingData, epochs, callbacks);
   }
   catch (const std::exception &e)
   {
@@ -105,7 +105,7 @@ double Network::train(TrainingData<std::vector<std::vector<std::vector<double>>>
 {
   try
   {
-    return this->trainer(trainingData, epochs);
+    return this->trainer(trainingData, epochs, callbacks);
   }
   catch (const std::exception &e)
   {
@@ -128,10 +128,10 @@ double Network::miniBatchTraining(TrainingData<D1, D2> trainingData, int epochs,
   double sumLoss = 0;
   trainingCheckpoint("onTrainBegin", callbacks);
 
-  for (int e = 0; e < epochs; e++)
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
     trainingCheckpoint("onEpochBegin", callbacks);
-    TrainingGauge g(trainingData.inputs.size(), 0, epochs, (e + 1));
+    TrainingGauge g(trainingData.inputs.size(), 0, epochs, (cEpoch + 1));
     for (int b = 0; b < trainingData.inputs.size(); b++)
     {
       trainingCheckpoint("onBatchBegin", callbacks);
@@ -164,10 +164,10 @@ double Network::batchTraining(TrainingData<D1, D2> trainingData, int epochs, std
   Eigen::MatrixXd y = formatLabels(trainingData.labels.data, {numInputs, numOutputs});
   trainingCheckpoint("onTrainBegin", callbacks);
 
-  for (int e = 0; e < epochs; e++)
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
     trainingCheckpoint("onEpochBegin", callbacks);
-    TrainingGauge g(1, 0, epochs, (e + 1));
+    TrainingGauge g(1, 0, epochs, (cEpoch + 1));
     Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
 
     loss = this->cmpLoss(o, y);
@@ -195,10 +195,10 @@ double Network::onlineTraining(std::vector<D1> inputs, std::vector<D2> labels, i
   // Injecting callbacks
   trainingCheckpoint("onTrainBegin", callbacks);
 
-  for (int e = 0; e < epochs; e++)
+  for (cEpoch = 0; cEpoch < epochs; cEpoch++)
   {
     trainingCheckpoint("onEpochBegin", callbacks);
-    TrainingGauge tg(inputs.size(), 0, epochs, (e + 1));
+    TrainingGauge tg(inputs.size(), 0, epochs, (cEpoch + 1));
     for (auto &input : inputs)
     {
       Eigen::MatrixXd o = this->forwardProp(inputs);
@@ -330,6 +330,7 @@ std::unordered_map<std::string, double> Network::getLogs()
 
   logs["LOSS"] = loss;
   logs["ACCURACY"] = accuracy;
+  logs["EPOCH"] = cEpoch;
 
   return logs;
 }
diff --git a/src/NeuralNet/Network.hpp b/src/NeuralNet/Network.hpp
index bea103e..58efba3 100644
--- a/src/NeuralNet/Network.hpp
+++ b/src/NeuralNet/Network.hpp
@@ -85,7 +85,7 @@ namespace NeuralNet
      *
      * @return The last training's loss
      */
-    double train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
+    double train(std::vector<std::vector<double>> inputs, std::vector<double> labels, int epochs = 1, const std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will Train the model with the given inputs and labels
@@ -97,7 +97,7 @@ namespace NeuralNet
      *
      * @return The last training's loss
      */
-    double train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
+    double train(std::vector<std::vector<std::vector<double>>> inputs, std::vector<double> labels, int epochs = 1, const std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will train the model with the given TrainingData
@@ -108,7 +108,7 @@ namespace NeuralNet
      *
      * @return The last training's loss
      */
-    double train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
+    double train(TrainingData<std::vector<std::vector<double>>, std::vector<double>> trainingData, int epochs = 1, const std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This method will train the model with the given TrainingData
@@ -119,7 +119,7 @@ namespace NeuralNet
      *
      * @return The last training's loss
      */
-    double train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs = 1, std::vector<std::shared_ptr<Callback>> callbacks = {});
+    double train(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>> trainingData, int epochs = 1, const std::vector<std::shared_ptr<Callback>> callbacks = {});
 
     /**
      * @brief This model will try to make predictions based off the inputs passed
@@ -145,6 +145,15 @@ namespace NeuralNet
     // non-public serialization
     friend class cereal::access;
 
+    int cEpoch = 0; // Current epoch
+    double loss = 0, accuracy = 0;
+    std::vector<std::shared_ptr<Layer>> layers;
+    LOSS lossFunc; // Storing the loss function for serialization
+    bool debugMode = false;
+    double (*cmpLoss)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
+    Eigen::MatrixXd (*cmpLossGrad)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
+    std::shared_ptr<Optimizer> optimizer;
+
     template <class Archive>
     void save(Archive &archive) const
     {
@@ -159,15 +168,6 @@ namespace NeuralNet
       setLoss(lossFunc);
     }
 
-    double loss = 0, accuracy = 0;
-    std::vector<std::shared_ptr<Layer>> layers;
-    LOSS lossFunc;      // Storing the loss function for serialization
-    int cp = 0, tp = 0; // Correct Predictions, Total Predictions
-    bool debugMode = false;
-    double (*cmpLoss)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
-    Eigen::MatrixXd (*cmpLossGrad)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
-    std::shared_ptr<Optimizer> optimizer;
-
     /**
      * @brief online training with given training data
      *
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 4187cec..383cd7b 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -169,6 +169,9 @@ PYBIND11_MODULE(NeuralNetPy, m)
                     network.train(inputs, labels, 100, [earlyStopping])
              )pbdoc");
 
+  py::bind_vector<std::vector<std::shared_ptr<Callback>>>(m, "VectorCallback");
+  py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(m, "VectorEarlyStopping");
+
   py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
       .def(py::init<std::tuple<int, int>>(),
            py::arg("inputShape"),
@@ -349,7 +352,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
                 layer = network.getLayer(1) # Return Dense layer with 2 neurons
           )pbdoc")
       .def("getNumLayers", &Network::getNumLayers, "Return the number of layers in the network.")
-      .def("train", static_cast<double (Network::*)(std::vector<std::vector<double>>, std::vector<double>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(std::vector<std::vector<double>>, std::vector<double>, int, const std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
             Train the network by passing it 2 dimensional inputs (vectors).
 
             :param inputs: A list of vectors representing the inputs
@@ -380,7 +383,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
                 loss = network.train(inputs, labels, 10)
             
         )pbdoc")
-      .def("train", static_cast<double (Network::*)(std::vector<std::vector<std::vector<double>>>, std::vector<double>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(std::vector<std::vector<std::vector<double>>>, std::vector<double>, int, const std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a list of 3 dimensional inputs (matrices).
 
         :param inputs: A list of matrices representing the inputs
@@ -416,7 +419,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             loss = network.train(inputs, labels, 10)
       )pbdoc")
-      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<double>>, std::vector<double>>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<double>>, std::vector<double>>, int, const std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a ``TrainingData2dI`` object.
 
         :param trainingData: A ``TrainingData2dI`` object
@@ -449,7 +452,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             loss = network.train(trainingData, 10)
       )pbdoc")
-      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>>, int, std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
+      .def("train", static_cast<double (Network::*)(TrainingData<std::vector<std::vector<std::vector<double>>>, std::vector<double>>, int, const std::vector<std::shared_ptr<Callback>>)>(&Network::train), R"pbdoc(
         Train the network by passing it a ``TrainingData3dI`` object.
 
         :param trainingData: A ``TrainingData3dI`` object

From 61c5437fcda40dbac9691620562cd4b6f73484c4 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Mon, 4 Mar 2024 17:26:17 +0100
Subject: [PATCH 06/12] wip(callbacks): CSVLogger callback

---
 docs/usage.rst                            |  2 +-
 examples/train-predict-MNIST/main.py      |  4 +-
 examples/train-predict-MNIST/training.csv |  4 +
 src/NeuralNet/callbacks/CSVLogger.hpp     | 96 +++++++++++++++++++++++
 src/NeuralNet/callbacks/EarlyStopping.hpp |  9 ---
 src/bindings/NeuralNetPy.cpp              | 32 ++++++++
 6 files changed, 135 insertions(+), 12 deletions(-)
 create mode 100644 examples/train-predict-MNIST/training.csv
 create mode 100644 src/NeuralNet/callbacks/CSVLogger.hpp

diff --git a/docs/usage.rst b/docs/usage.rst
index e073439..29f33f9 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -18,7 +18,7 @@ Then you can import the module just like any other :
 
 .. code-block:: python
 
-  import NeuralNetPy as nnp
+  import NeuralNetPy as NNP
 
 .. Attention::
   The path to the build folder must be set before importing the module.
\ No newline at end of file
diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
index 11a8e59..e2bc3dd 100644
--- a/examples/train-predict-MNIST/main.py
+++ b/examples/train-predict-MNIST/main.py
@@ -53,9 +53,9 @@
 
 trainingData.batch(128)
 
-callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1)]
+callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1), NNP.CSVLogger("training.csv")]
 
-network.train(trainingData, 10, callbacks)
+network.train(trainingData, 3, callbacks)
 
 f_x_test = [normalize_img(x) for x in x_test]
 
diff --git a/examples/train-predict-MNIST/training.csv b/examples/train-predict-MNIST/training.csv
new file mode 100644
index 0000000..5f5965c
--- /dev/null
+++ b/examples/train-predict-MNIST/training.csv
@@ -0,0 +1,4 @@
+LOSS,ACCURACY,EPOCH,
+0.628645,1.000000,0.000000,
+0.217920,1.000000,1.000000,
+0.130259,1.000000,2.000000,
diff --git a/src/NeuralNet/callbacks/CSVLogger.hpp b/src/NeuralNet/callbacks/CSVLogger.hpp
new file mode 100644
index 0000000..8f1304b
--- /dev/null
+++ b/src/NeuralNet/callbacks/CSVLogger.hpp
@@ -0,0 +1,96 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <fstream>
+#include "Callback.hpp"
+#include "utils/Functions.hpp" // fileExistsWithExtension
+
+namespace NeuralNet
+{
+  class CSVLogger : public Callback
+  {
+  public:
+    CSVLogger(const std::string &filename, const std::string &separator = ",")
+    {
+      assert(fileExistsWithExtension(filename, ".csv") && "The file doesn't exists or is not a CSV file '.csv'");
+      this->filename = filename;
+      this->separator = separator;
+    };
+
+    void onEpochBegin(Logs logs) override{};
+
+    void onEpochEnd(Logs logs) override
+    {
+      std::vector<double> row;
+
+      row.reserve(logs.size());
+
+      std::transform(logs.begin(), logs.end(), std::back_inserter(row),
+                     [](const auto &log)
+                     { return static_cast<double>(log.second); });
+
+      data.push_back(row);
+    };
+
+    void onTrainBegin(Logs logs) override
+    {
+      // Initializing the headers with the logs keys
+      for (const auto &log : logs)
+      {
+        headers.push_back(log.first);
+      };
+    };
+
+    void onTrainEnd(Logs logs) override
+    {
+      std::ofstream file(filename);
+
+      if (!file.is_open())
+      {
+        throw std::runtime_error("Couldn't open csv file");
+      }
+
+      file << formatRow(headers);
+
+      std::for_each(data.begin(), data.end(), [&file, this](auto &row)
+                    { file << this->formatRow(row); });
+
+      file.close();
+    };
+
+    void onBatchBegin(Logs logs) override{};
+    void onBatchEnd(Logs logs) override{};
+
+  private:
+    std::string filename;
+    std::string separator;
+    std::vector<std::string> headers;
+    std::vector<std::vector<double>> data;
+
+    template <typename T>
+    std::string formatRow(const std::vector<T> &v)
+    {
+      std::string csvRow;
+
+      for (T el : v)
+      {
+        csvRow += std::to_string(el) + separator;
+      }
+
+      return csvRow + "\n";
+    };
+
+    std::string formatRow(const std::vector<std::string> &v)
+    {
+      std::string csvRow;
+
+      for (const std::string &el : v)
+      {
+        csvRow += el + separator;
+      }
+
+      return csvRow + "\n";
+    };
+  };
+} // namespace NeuralNet
\ No newline at end of file
diff --git a/src/NeuralNet/callbacks/EarlyStopping.hpp b/src/NeuralNet/callbacks/EarlyStopping.hpp
index 31dc093..89ca02c 100644
--- a/src/NeuralNet/callbacks/EarlyStopping.hpp
+++ b/src/NeuralNet/callbacks/EarlyStopping.hpp
@@ -26,15 +26,6 @@ namespace NeuralNet
       this->patience = patience;
     };
 
-    /**
-     * @brief This method will be called at the beginning of each epoch
-     *
-     * @param epoch The current epoch
-     * @param logs The logs of the current epoch
-     * @return Returns true if the training should continue otherwise returns false
-     *
-     * @warning The order of the logs should be the same as the order of the metrics.
-     */
     void onEpochBegin(Logs logs) override{};
 
     /**
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 383cd7b..0f3f202 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -19,6 +19,7 @@
 #include "optimizers/optimizers.hpp"
 #include "callbacks/Callback.hpp"
 #include "callbacks/EarlyStopping.hpp"
+#include "callbacks/CSVLogger.hpp"
 #include "utils/Enums.hpp"
 #include "TemplateBindings.hpp" // Template classes binding functions
 
@@ -169,6 +170,37 @@ PYBIND11_MODULE(NeuralNetPy, m)
                     network.train(inputs, labels, 100, [earlyStopping])
              )pbdoc");
 
+  py::class_<CSVLogger, Callback, std::shared_ptr<CSVLogger>>(m, "CSVLogger")
+      .def(py::init<std::string, std::string>(),
+           py::arg("filename"),
+           py::arg("separator") = ",",
+           R"pbdoc(
+                Initializes a ``CSVLogger`` callback. This callback will log the training process in a CSV file.
+
+                .. highlight: python
+                .. code-block:: python
+                    :caption: Example
+
+                    import NeuralNetPy as NNP
+
+                    network = NNP.Network()
+                    network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
+                    network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                    network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+
+                    inputs = [
+                      [0.4, 0.5, 0.67],
+                      [0.3, 0.2, 0.1],
+                      [0.1, 0.2, 0.3]
+                    ]
+
+                    labels = [1, 0, 1]
+
+                    csvLogger = NNP.CSVLogger("logs.csv")
+
+                    network.train(inputs, labels, 100, [csvLogger])
+             )pbdoc");
+
   py::bind_vector<std::vector<std::shared_ptr<Callback>>>(m, "VectorCallback");
   py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(m, "VectorEarlyStopping");
 

From c4275e798adb364dc6880c99ff63260c0bb60bc8 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Mon, 4 Mar 2024 17:26:17 +0100
Subject: [PATCH 07/12] feat(callbacks): CSVLogger callback

---
 docs/usage.rst                            |  2 +-
 examples/train-predict-MNIST/main.py      |  4 +-
 examples/train-predict-MNIST/training.csv |  4 +
 src/NeuralNet/callbacks/CSVLogger.hpp     | 96 +++++++++++++++++++++++
 src/NeuralNet/callbacks/EarlyStopping.hpp |  9 ---
 src/NeuralNet/utils/Functions.hpp         | 14 ++++
 src/bindings/NeuralNetPy.cpp              | 32 ++++++++
 7 files changed, 149 insertions(+), 12 deletions(-)
 create mode 100644 examples/train-predict-MNIST/training.csv
 create mode 100644 src/NeuralNet/callbacks/CSVLogger.hpp

diff --git a/docs/usage.rst b/docs/usage.rst
index e073439..29f33f9 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -18,7 +18,7 @@ Then you can import the module just like any other :
 
 .. code-block:: python
 
-  import NeuralNetPy as nnp
+  import NeuralNetPy as NNP
 
 .. Attention::
   The path to the build folder must be set before importing the module.
\ No newline at end of file
diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
index 11a8e59..e2bc3dd 100644
--- a/examples/train-predict-MNIST/main.py
+++ b/examples/train-predict-MNIST/main.py
@@ -53,9 +53,9 @@
 
 trainingData.batch(128)
 
-callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1)]
+callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1), NNP.CSVLogger("training.csv")]
 
-network.train(trainingData, 10, callbacks)
+network.train(trainingData, 3, callbacks)
 
 f_x_test = [normalize_img(x) for x in x_test]
 
diff --git a/examples/train-predict-MNIST/training.csv b/examples/train-predict-MNIST/training.csv
new file mode 100644
index 0000000..5f5965c
--- /dev/null
+++ b/examples/train-predict-MNIST/training.csv
@@ -0,0 +1,4 @@
+LOSS,ACCURACY,EPOCH,
+0.628645,1.000000,0.000000,
+0.217920,1.000000,1.000000,
+0.130259,1.000000,2.000000,
diff --git a/src/NeuralNet/callbacks/CSVLogger.hpp b/src/NeuralNet/callbacks/CSVLogger.hpp
new file mode 100644
index 0000000..4681599
--- /dev/null
+++ b/src/NeuralNet/callbacks/CSVLogger.hpp
@@ -0,0 +1,96 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <fstream>
+#include "Callback.hpp"
+#include "utils/Functions.hpp" // fileExistsWithExtension
+
+namespace NeuralNet
+{
+  class CSVLogger : public Callback
+  {
+  public:
+    CSVLogger(const std::string &filename, const std::string &separator = ",")
+    {
+      assert(fileHasExtension(filename, ".csv") && "Filename must have .csv extension");
+      this->filename = filename;
+      this->separator = separator;
+    };
+
+    void onEpochBegin(Logs logs) override{};
+
+    void onEpochEnd(Logs logs) override
+    {
+      std::vector<double> row;
+
+      row.reserve(logs.size());
+
+      std::transform(logs.begin(), logs.end(), std::back_inserter(row),
+                     [](const auto &log)
+                     { return static_cast<double>(log.second); });
+
+      data.push_back(row);
+    };
+
+    void onTrainBegin(Logs logs) override
+    {
+      // Initializing the headers with the logs keys
+      for (const auto &log : logs)
+      {
+        headers.push_back(log.first);
+      };
+    };
+
+    void onTrainEnd(Logs logs) override
+    {
+      std::ofstream file(filename);
+
+      if (!file.is_open())
+      {
+        throw std::runtime_error("Couldn't open csv file");
+      }
+
+      file << formatRow(headers);
+
+      std::for_each(data.begin(), data.end(), [&file, this](auto &row)
+                    { file << this->formatRow(row); });
+
+      file.close();
+    };
+
+    void onBatchBegin(Logs logs) override{};
+    void onBatchEnd(Logs logs) override{};
+
+  private:
+    std::string filename;
+    std::string separator;
+    std::vector<std::string> headers;
+    std::vector<std::vector<double>> data;
+
+    template <typename T>
+    std::string formatRow(const std::vector<T> &v)
+    {
+      std::string csvRow;
+
+      for (T el : v)
+      {
+        csvRow += std::to_string(el) + separator;
+      }
+
+      return csvRow + "\n";
+    };
+
+    std::string formatRow(const std::vector<std::string> &v)
+    {
+      std::string csvRow;
+
+      for (const std::string &el : v)
+      {
+        csvRow += el + separator;
+      }
+
+      return csvRow + "\n";
+    };
+  };
+} // namespace NeuralNet
\ No newline at end of file
diff --git a/src/NeuralNet/callbacks/EarlyStopping.hpp b/src/NeuralNet/callbacks/EarlyStopping.hpp
index 31dc093..89ca02c 100644
--- a/src/NeuralNet/callbacks/EarlyStopping.hpp
+++ b/src/NeuralNet/callbacks/EarlyStopping.hpp
@@ -26,15 +26,6 @@ namespace NeuralNet
       this->patience = patience;
     };
 
-    /**
-     * @brief This method will be called at the beginning of each epoch
-     *
-     * @param epoch The current epoch
-     * @param logs The logs of the current epoch
-     * @return Returns true if the training should continue otherwise returns false
-     *
-     * @warning The order of the logs should be the same as the order of the metrics.
-     */
     void onEpochBegin(Logs logs) override{};
 
     /**
diff --git a/src/NeuralNet/utils/Functions.hpp b/src/NeuralNet/utils/Functions.hpp
index 3190c22..b7816a5 100644
--- a/src/NeuralNet/utils/Functions.hpp
+++ b/src/NeuralNet/utils/Functions.hpp
@@ -73,6 +73,20 @@ namespace NeuralNet
     return false;
   }
 
+  /**
+   * @brief This function checks if a file has a specific extension
+   *
+   * @param filePath The path of the file
+   * @param extension The extension that's checked
+   *
+   * @return Returns true if the file has the specified extension otherwise returns false
+   */
+  inline bool fileHasExtension(const std::string &filePath, const std::string &extension)
+  {
+    fs::path file(filePath);
+    return file.has_extension() && file.extension() == extension;
+  }
+
   /* MATHEMATICAL FUNCTIONS */
 
   /**
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 383cd7b..0f3f202 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -19,6 +19,7 @@
 #include "optimizers/optimizers.hpp"
 #include "callbacks/Callback.hpp"
 #include "callbacks/EarlyStopping.hpp"
+#include "callbacks/CSVLogger.hpp"
 #include "utils/Enums.hpp"
 #include "TemplateBindings.hpp" // Template classes binding functions
 
@@ -169,6 +170,37 @@ PYBIND11_MODULE(NeuralNetPy, m)
                     network.train(inputs, labels, 100, [earlyStopping])
              )pbdoc");
 
+  py::class_<CSVLogger, Callback, std::shared_ptr<CSVLogger>>(m, "CSVLogger")
+      .def(py::init<std::string, std::string>(),
+           py::arg("filename"),
+           py::arg("separator") = ",",
+           R"pbdoc(
+                Initializes a ``CSVLogger`` callback. This callback will log the training process in a CSV file.
+
+                .. highlight: python
+                .. code-block:: python
+                    :caption: Example
+
+                    import NeuralNetPy as NNP
+
+                    network = NNP.Network()
+                    network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
+                    network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                    network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+
+                    inputs = [
+                      [0.4, 0.5, 0.67],
+                      [0.3, 0.2, 0.1],
+                      [0.1, 0.2, 0.3]
+                    ]
+
+                    labels = [1, 0, 1]
+
+                    csvLogger = NNP.CSVLogger("logs.csv")
+
+                    network.train(inputs, labels, 100, [csvLogger])
+             )pbdoc");
+
   py::bind_vector<std::vector<std::shared_ptr<Callback>>>(m, "VectorCallback");
   py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(m, "VectorEarlyStopping");
 

From 3d6d54ff4ea7e4be872992c9b34fb7bceb4fb196 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Mon, 4 Mar 2024 22:13:38 +0100
Subject: [PATCH 08/12] docs(callbacks): documented CSVLogger methods

---
 TODO.md                               |  1 +
 src/NeuralNet/callbacks/CSVLogger.hpp | 29 +++++++++++++++++++++
 src/bindings/NeuralNetPy.cpp          | 36 +++++++++++++--------------
 3 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/TODO.md b/TODO.md
index ccc34c3..8244a03 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,6 +1,7 @@
 ## TODOS :
 
 - [ ] Setup `clang-format`
+- [ ] CI versioning
 - [ ] Find out why the predictions are not accurate on my_samples
 - [ ] Implement batch norm
 - [ ] Parallelize operations
diff --git a/src/NeuralNet/callbacks/CSVLogger.hpp b/src/NeuralNet/callbacks/CSVLogger.hpp
index 4681599..839b0bc 100644
--- a/src/NeuralNet/callbacks/CSVLogger.hpp
+++ b/src/NeuralNet/callbacks/CSVLogger.hpp
@@ -20,6 +20,13 @@ namespace NeuralNet
 
     void onEpochBegin(Logs logs) override{};
 
+    /**
+     * @brief This method will be called at the end of each epoch
+     *
+     * In the case of CSVLogger, it will append the logs of the current epoch to data which `onTrainEnd` will be written to the file.
+     *
+     * @param logs The logs of the current epoch
+     */
     void onEpochEnd(Logs logs) override
     {
       std::vector<double> row;
@@ -33,6 +40,13 @@ namespace NeuralNet
       data.push_back(row);
     };
 
+    /**
+     * @brief This method will be called at the beginning of the training.
+     *
+     * It will initialize the headers with the logs keys.
+     *
+     * @param logs The logs of the current epoch
+     */
     void onTrainBegin(Logs logs) override
     {
       // Initializing the headers with the logs keys
@@ -42,6 +56,13 @@ namespace NeuralNet
       };
     };
 
+    /**
+     * @brief This method will be called at the end of the training.
+     *
+     * It will write the data in the given csv file.
+     *
+     * @param logs The logs of the current epoch
+     */
     void onTrainEnd(Logs logs) override
     {
       std::ofstream file(filename);
@@ -68,6 +89,14 @@ namespace NeuralNet
     std::vector<std::string> headers;
     std::vector<std::vector<double>> data;
 
+    /**
+     * @brief This method will format a row of the csv file
+     *
+     * @tparam T The type of the elements in the row
+     * @param v The row to format
+     *
+     * @return The row in a csv format
+     */
     template <typename T>
     std::string formatRow(const std::vector<T> &v)
     {
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 0f3f202..e7a4d4f 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -136,6 +136,24 @@ PYBIND11_MODULE(NeuralNetPy, m)
                         layer = NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE)
              )pbdoc");
 
+  py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
+      .def(py::init<std::tuple<int, int>>(),
+           py::arg("inputShape"),
+           R"pbdoc(
+                Initializes a ``Flatten`` layer. The sole purpose of this layer is to vectorize matrix inputs like images.
+
+                .. code-block:: python
+                    :caption: Example
+
+                        import NeuralNetPy as NNP
+
+                        layer = NNP.Flatten((3, 3))
+             )pbdoc");
+
+  py::bind_vector<std::vector<std::shared_ptr<Layer>>>(m, "VectorLayer");
+  py::bind_vector<std::vector<std::shared_ptr<Flatten>>>(m, "VectorFlatten");
+  py::bind_vector<std::vector<std::shared_ptr<Dense>>>(m, "VectorDense");
+
   py::class_<Callback, std::shared_ptr<Callback>>(m, "Callback");
 
   py::class_<EarlyStopping, Callback, std::shared_ptr<EarlyStopping>>(m, "EarlyStopping")
@@ -204,24 +222,6 @@ PYBIND11_MODULE(NeuralNetPy, m)
   py::bind_vector<std::vector<std::shared_ptr<Callback>>>(m, "VectorCallback");
   py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(m, "VectorEarlyStopping");
 
-  py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
-      .def(py::init<std::tuple<int, int>>(),
-           py::arg("inputShape"),
-           R"pbdoc(
-                Initializes a ``Flatten`` layer. The sole purpose of this layer is to vectorize matrix inputs like images.
-
-                .. code-block:: python
-                    :caption: Example
-
-                        import NeuralNetPy as NNP
-
-                        layer = NNP.Flatten((3, 3))
-             )pbdoc");
-
-  py::bind_vector<std::vector<std::shared_ptr<Layer>>>(m, "VectorLayer");
-  py::bind_vector<std::vector<std::shared_ptr<Flatten>>>(m, "VectorFlatten");
-  py::bind_vector<std::vector<std::shared_ptr<Dense>>>(m, "VectorDense");
-
   // TrainingData with 2 dimensional inputs
   bindTrainingData<std::vector<std::vector<double>>, std::vector<double>>(m, "TrainingData2dI", R"pbdoc(
     Represents training data with 2 dimensional inputs (vectors). This class is supposed to bring the table some methods to easily manipulate the data and prepare it for training.

From 9a02574ce0c699cf5a81d68aaab1714b5a38ae02 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Tue, 5 Mar 2024 09:43:34 +0100
Subject: [PATCH 09/12] fix(callbacks): Missing include functional

---
 src/NeuralNet/callbacks/Callback.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/NeuralNet/callbacks/Callback.hpp b/src/NeuralNet/callbacks/Callback.hpp
index f67f6a7..6545e06 100644
--- a/src/NeuralNet/callbacks/Callback.hpp
+++ b/src/NeuralNet/callbacks/Callback.hpp
@@ -6,6 +6,7 @@
 #include <unordered_map>
 #include <memory>
 #include <utility>
+#include <functional>
 
 namespace NeuralNet
 {

From 1263df359be80897ae33c74db2c28a4da90eb0ab Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Tue, 5 Mar 2024 09:51:02 +0100
Subject: [PATCH 10/12] fix(callbacks): duplicate CSVLogger definition

---
 src/NeuralNet/callbacks/CSVLogger.hpp     | 103 ++--------------------
 src/NeuralNet/callbacks/EarlyStopping.hpp |   8 +-
 2 files changed, 11 insertions(+), 100 deletions(-)

diff --git a/src/NeuralNet/callbacks/CSVLogger.hpp b/src/NeuralNet/callbacks/CSVLogger.hpp
index db8e871..d380c86 100644
--- a/src/NeuralNet/callbacks/CSVLogger.hpp
+++ b/src/NeuralNet/callbacks/CSVLogger.hpp
@@ -11,6 +11,12 @@ namespace NeuralNet
   class CSVLogger : public Callback
   {
   public:
+    /**
+     * @brief CSVLogger is a `Callback` that streams epoch results to a csv file
+     *
+     * @param filename The name of the csv file
+     * @param separator The separator used in the csv file (default: ",")
+     */
     CSVLogger(const std::string &filename, const std::string &separator = ",")
     {
       assert(fileHasExtension(filename, ".csv") && "Filename must have .csv extension");
@@ -122,99 +128,4 @@ namespace NeuralNet
       return csvRow + "\n";
     };
   };
-} // namespa#pragma once
-
-#include <string>
-#include <vector>
-#include <fstream>
-#include "Callback.hpp"
-#include "utils/Functions.hpp" // fileExistsWithExtension
-
-namespace NeuralNet
-{
-  class CSVLogger : public Callback
-  {
-  public:
-    CSVLogger(const std::string &filename, const std::string &separator = ",")
-    {
-      assert(fileExistsWithExtension(filename, ".csv") && "The file doesn't exists or is not a CSV file '.csv'");
-      this->filename = filename;
-      this->separator = separator;
-    };
-
-    void onEpochBegin(Logs logs) override{};
-
-    void onEpochEnd(Logs logs) override
-    {
-      std::vector<double> row;
-
-      row.reserve(logs.size());
-
-      std::transform(logs.begin(), logs.end(), std::back_inserter(row),
-                     [](const auto &log)
-                     { return static_cast<double>(log.second); });
-
-      data.push_back(row);
-    };
-
-    void onTrainBegin(Logs logs) override
-    {
-      // Initializing the headers with the logs keys
-      for (const auto &log : logs)
-      {
-        headers.push_back(log.first);
-      };
-    };
-
-    void onTrainEnd(Logs logs) override
-    {
-      std::ofstream file(filename);
-
-      if (!file.is_open())
-      {
-        throw std::runtime_error("Couldn't open csv file");
-      }
-
-      file << formatRow(headers);
-
-      std::for_each(data.begin(), data.end(), [&file, this](auto &row)
-                    { file << this->formatRow(row); });
-
-      file.close();
-    };
-
-    void onBatchBegin(Logs logs) override{};
-    void onBatchEnd(Logs logs) override{};
-
-  private:
-    std::string filename;
-    std::string separator;
-    std::vector<std::string> headers;
-    std::vector<std::vector<double>> data;
-
-    template <typename T>
-    std::string formatRow(const std::vector<T> &v)
-    {
-      std::string csvRow;
-
-      for (T el : v)
-      {
-        csvRow += std::to_string(el) + separator;
-      }
-
-      return csvRow + "\n";
-    };
-
-    std::string formatRow(const std::vector<std::string> &v)
-    {
-      std::string csvRow;
-
-      for (const std::string &el : v)
-      {
-        csvRow += el + separator;
-      }
-
-      return csvRow + "\n";
-    };
-  };
-} // namespa
\ No newline at end of file
+} // namespace NeuralNet
diff --git a/src/NeuralNet/callbacks/EarlyStopping.hpp b/src/NeuralNet/callbacks/EarlyStopping.hpp
index 89ca02c..c63bc9f 100644
--- a/src/NeuralNet/callbacks/EarlyStopping.hpp
+++ b/src/NeuralNet/callbacks/EarlyStopping.hpp
@@ -14,11 +14,11 @@ namespace NeuralNet
     /**
      * @brief EarlyStopping is a `Callback` that stops training when a monitored metric has stopped improving.
      *
-     * @param metric The metric to monitor default is `LOSS`
-     * @param minDelta Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than minDelta, will count as no improvement.
-     * @param patience Number of epochs with no improvement after which training will be stopped.
+     * @param metric The metric to monitor (default: `LOSS`)
+     * @param minDelta Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than minDelta, will count as no improvement. (default: 0)
+     * @param patience Number of epochs with no improvement after which training will be stopped. (default: 0)
      */
-    EarlyStopping(const std::string &metric, double minDelta = 0, int patience = 0)
+    EarlyStopping(const std::string &metric = "LOSS", double minDelta = 0, int patience = 0)
     {
       checkMetric(metric, metrics);
       this->metric = metric;

From 69336d011f6cd7ff4b321011577777043e32b0c3 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Tue, 5 Mar 2024 21:45:25 +0100
Subject: [PATCH 11/12] docs(py-docs): updated and improved pydocs

---
 docs/NeuralNetPy.rst           |  10 +-
 docs/submodules/callbacks.rst  |   3 +
 docs/submodules/layers.rst     |   3 +
 docs/submodules/models.rst     |   3 +
 docs/submodules/optimizers.rst |   3 +
 src/bindings/NeuralNetPy.cpp   | 363 +++++++++++++++++++--------------
 6 files changed, 227 insertions(+), 158 deletions(-)
 create mode 100644 docs/submodules/callbacks.rst
 create mode 100644 docs/submodules/layers.rst
 create mode 100644 docs/submodules/models.rst
 create mode 100644 docs/submodules/optimizers.rst

diff --git a/docs/NeuralNetPy.rst b/docs/NeuralNetPy.rst
index b22ab8e..c96c453 100644
--- a/docs/NeuralNetPy.rst
+++ b/docs/NeuralNetPy.rst
@@ -1,7 +1,15 @@
 NeuralNetPy
 ================
 
+.. toctree::
+    :maxdepth: 2
+
+    submodules/callbacks
+    submodules/optimizers
+    submodules/layers
+    submodules/models
+
 .. automodule:: NeuralNetPy
     :members:
     :undoc-members:
-    :show-inheritance:
+    :show-inheritance:
\ No newline at end of file
diff --git a/docs/submodules/callbacks.rst b/docs/submodules/callbacks.rst
new file mode 100644
index 0000000..063a082
--- /dev/null
+++ b/docs/submodules/callbacks.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.callbacks
+    :members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/docs/submodules/layers.rst b/docs/submodules/layers.rst
new file mode 100644
index 0000000..9423dd6
--- /dev/null
+++ b/docs/submodules/layers.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.layers
+    :members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/docs/submodules/models.rst b/docs/submodules/models.rst
new file mode 100644
index 0000000..c02a60f
--- /dev/null
+++ b/docs/submodules/models.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.models
+    :members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/docs/submodules/optimizers.rst b/docs/submodules/optimizers.rst
new file mode 100644
index 0000000..32ac2ad
--- /dev/null
+++ b/docs/submodules/optimizers.rst
@@ -0,0 +1,3 @@
+.. automodule:: NeuralNetPy.optimizers
+    :members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/src/bindings/NeuralNetPy.cpp b/src/bindings/NeuralNetPy.cpp
index 87591f8..3b1a4d9 100644
--- a/src/bindings/NeuralNetPy.cpp
+++ b/src/bindings/NeuralNetPy.cpp
@@ -68,160 +68,163 @@ PYBIND11_MODULE(NeuralNetPy, m)
       .value("QUADRATIC", LOSS::QUADRATIC)
       .value("MCE", LOSS::MCE);
 
-  py::class_<Optimizer, std::shared_ptr<Optimizer>>(m, "Optimizer");
+  py::module optimizers_m = m.def_submodule("optimizers", R"pbdoc(
+      Optimizers
+      ----------
 
-  py::class_<SGD, Optimizer, std::shared_ptr<SGD>>(m, "SGD")
-      .def(py::init<double>(),
-           py::arg("alpha") = 0.001,
-           R"pbdoc(
-                For more information on `Stochastic Gradient Descent <https://en.wikipedia.org/wiki/Stochastic_gradient_descent>`
+      Optimizers are algorithms or methods used to change the attributes of the Neural Network such as weights and learning rate in order to reduce the losses. They are used to solve the optimization problem of minimizing the loss function.
 
-                .. highlight: python
-                .. code-block:: python
-                    :caption: Example
+      .. currentmodule:: NeuralNetPy.optimizers
+      .. autosummary::
+          :toctree: _generate
+          :recursive:
+    )pbdoc");
 
-                        import NeuralNetPy as NNP
+  py::class_<Optimizer, std::shared_ptr<Optimizer>>(optimizers_m, "Optimizer");
 
-                        network = NNP.Network()
-                        network.setup(optimizer=NNP.SGD(0.01))
-              )pbdoc");
+  py::class_<SGD, Optimizer, std::shared_ptr<SGD>>(optimizers_m, "SGD", R"pbdoc(
+        For more information on `Stochastic Gradient Descent <https://en.wikipedia.org/wiki/Stochastic_gradient_descent>`
 
-  py::class_<Adam, Optimizer, std::shared_ptr<Adam>>(m, "Adam")
+        :param alpha: The learning rate, defaults to 0.001
+        :type alpha: float
+      )pbdoc")
+      .def(py::init<double>(),
+           py::arg("alpha") = 0.001);
+
+  py::class_<Adam, Optimizer, std::shared_ptr<Adam>>(optimizers_m, "Adam", R"pbdoc(
+        For more information on `Adam optimizer <https://arxiv.org/abs/1412.6980>`
+
+        :param alpha: The learning rate, defaults to 0.001
+        :type alpha: float
+        :param beta1: The exponential decay rate for the first moment estimates, defaults to 0.9
+        :type beta1: float
+        :param beta2: The exponential decay rate for the second-moment estimates, defaults to 0.999
+        :type beta2: float
+        :param epsilon: A small constant for numerical stability, defaults to 10E-8
+        :type epsilon: float
+      )pbdoc")
       .def(py::init<double, double, double, double>(),
            py::arg("alpha") = 0.001,
            py::arg("beta1") = 0.9,
            py::arg("beta2") = 0.999,
-           py::arg("epsilon") = 10E-8,
-           R"pbdoc(
-                For more information on `Adam optimizer <https://arxiv.org/abs/1412.6980>`
-                
-                .. highlight: python 
-                .. code-block:: python
-                    :caption: Example
+           py::arg("epsilon") = 10E-8);
 
-                        import NeuralNetPy as NNP
+  py::module layers_m = m.def_submodule("layers", R"pbdoc(
+      Layers
+      ------
 
-                        network = NNP.Network()
-                        network.setup(optimizer=NNP.Adam(0.001))
-             )pbdoc");
+      Layers are the building blocks of a Neural Network. They are the individual neurons that are connected to each other to form the network. Each layer has a specific number of neurons and an activation function.
 
-  py::class_<Layer, std::shared_ptr<Layer>>(m, "Layer")
-      .def(py::init<int, ACTIVATION, WEIGHT_INIT, int>(),
-           py::arg("nNeurons"),
-           py::arg("activationFunc") = ACTIVATION::SIGMOID,
-           py::arg("weightInit") = WEIGHT_INIT::RANDOM,
-           py::arg("bias") = 0,
-           R"pbdoc(
-                Base class of all layers.
+      .. currentmodule:: NeuralNetPy.layers
+      .. autosummary::
+          :toctree: _generate
+          :recursive:
+    )pbdoc");
 
-                .. tip::
-                    It is recommended that you use it's derivatives instead. Like `Dense` or `Flatten` since they're more specific.
-             )pbdoc")
+  py::class_<Layer, std::shared_ptr<Layer>>(layers_m, "Layer")
+      .def(py::init<int, ACTIVATION, WEIGHT_INIT, int>(), py::arg("nNeurons"), py::arg("activationFunc") = ACTIVATION::SIGMOID, py::arg("weightInit") = WEIGHT_INIT::RANDOM, py::arg("bias") = 0, "This is a simple test")
       .def("getNumNeurons", &Layer::getNumNeurons);
 
-  py::class_<Dense, Layer, std::shared_ptr<Dense>>(m, "Dense")
+  py::class_<Dense, Layer, std::shared_ptr<Dense>>(layers_m, "Dense", R"pbdoc(
+        Initializes a ``Dense`` layer, which is the backbone of a Neural Network.
+
+        :param nNeurons: The number of neurons in the layer
+        :type nNeurons: int
+        :param activationFunc: The activation function to be used, defaults to ``SIGMOID``
+        :type activationFunc: ACTIVATION
+        :param weightInit: The weight initialization method to be used, defaults to ``RANDOM``
+        :type weightInit: WEIGHT_INIT
+        :param bias: The bias to be used, defaults to 0
+        :type bias: int
+
+        .. highlight: python 
+        .. code-block:: python
+            :caption: Example
+
+                import NeuralNetPy as NNP
+
+                layer = NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE)
+      )pbdoc")
       .def(py::init<int, ACTIVATION, WEIGHT_INIT, int>(),
            py::arg("nNeurons"),
            py::arg("activationFunc") = ACTIVATION::SIGMOID,
            py::arg("weightInit") = WEIGHT_INIT::RANDOM,
-           py::arg("bias") = 0,
-           R"pbdoc(
-                Initializes a ``Dense`` layer, which is the backbone of a Neural Network.
-
-                .. highlight: python 
-                .. code-block:: python
-                    :caption: Example
+           py::arg("bias") = 0);
 
-                        import NeuralNetPy as NNP
+  py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(layers_m, "Flatten", R"pbdoc(
+        Initializes a ``Flatten`` layer. The sole purpose of this layer is to vectorize matrix inputs like images.
 
-                        layer = NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE)
-             )pbdoc");
+        :param inputShape: The shape of the input matrix (rows, cols or number of pixels per row and column in the case of images)
+        :type inputShape: tuple
 
-  py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
-      .def(py::init<std::tuple<int, int>>(),
-           py::arg("inputShape"),
-           R"pbdoc(
-                Initializes a ``Flatten`` layer. The sole purpose of this layer is to vectorize matrix inputs like images.
+        .. code-block:: python
+            :caption: Example
 
-                .. code-block:: python
-                    :caption: Example
+                import NeuralNetPy as NNP
 
-                        import NeuralNetPy as NNP
+                layer = NNP.layers.Flatten((3, 3))
+      )pbdoc")
+      .def(py::init<std::tuple<int, int>>(),
+           py::arg("inputShape"));
 
-                        layer = NNP.Flatten((3, 3))
-             )pbdoc");
+  py::bind_vector<std::vector<std::shared_ptr<Layer>>>(layers_m, "VectorLayer");
+  py::bind_vector<std::vector<std::shared_ptr<Flatten>>>(layers_m, "VectorFlatten");
+  py::bind_vector<std::vector<std::shared_ptr<Dense>>>(layers_m, "VectorDense");
 
-  py::bind_vector<std::vector<std::shared_ptr<Layer>>>(m, "VectorLayer");
-  py::bind_vector<std::vector<std::shared_ptr<Flatten>>>(m, "VectorFlatten");
-  py::bind_vector<std::vector<std::shared_ptr<Dense>>>(m, "VectorDense");
+  py::module callbacks_m = m.def_submodule("callbacks", R"pbdoc(
+      Callbacks
+      ----------
 
-  py::class_<Callback, std::shared_ptr<Callback>>(m, "Callback");
+      Callbacks are a set of functions that can be applied at given stages of the training procedure. They can be used to get a view on internal states and statistics of the model during training. You can pass a list of callbacks to the ``train`` method of the ``Network`` class.
+      Each callback has it's own purpose make sure the read their documentation carefully. 
 
-  py::class_<EarlyStopping, Callback, std::shared_ptr<EarlyStopping>>(m, "EarlyStopping")
-      .def(py::init<std::string, double, int>(),
-           py::arg("metric"),
-           py::arg("minDelta") = 0.01,
-           py::arg("patience") = 0,
-           R"pbdoc(
-                Initializes an ``EarlyStopping`` callback. This callback will stop the training if the given metric doesn't improve more than the given delta over a certain number of epochs (patience).
+      .. currentmodule:: NeuralNetPy.callbacks
+      .. autosummary::
+          :toctree: _generate
+          :recursive:
+    )pbdoc");
 
-                .. highlight: python
-                .. code-block:: python
-                    :caption: Example
+  py::class_<Callback, std::shared_ptr<Callback>>(callbacks_m, "Callback", R"pbdoc(
+      This is the base class for all callbacks.
+    )pbdoc");
 
-                    import NeuralNetPy as NNP
+  py::class_<EarlyStopping, Callback, std::shared_ptr<EarlyStopping>>(callbacks_m, "EarlyStopping", R"pbdoc(
+          Initializes an ``EarlyStopping`` callback. This callback will stop the training if the given metric doesn't improve more than the given delta over a certain number of epochs (patience).
 
-                    network = NNP.Network()
-                    network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-                    network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-                    network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+          :param metric: The metric to be monitored (Either ``LOSS`` or ``ACCURACY``), defaults to ``LOSS``
+          :type metric: str
+          :param minDelta: The minimum change in the monitored metric to be considered an improvement, defaults to 0.01
+          :type minDelta: float
+          :param patience: The number of epochs with no improvement after which training will be stopped, defaults to 0
+          :type patience: int
 
-                    inputs = [
-                      [0.4, 0.5, 0.67],
-                      [0.3, 0.2, 0.1],
-                      [0.1, 0.2, 0.3]
-                    ]
+          .. highlight: python
+          .. code-block:: python
+              :caption: Example
 
-                    labels = [1, 0, 1]
+              network.train(inputs, labels, 100, [NNP.callbacks.EarlyStopping("loss", 0.01, 10)])
+      )pbdoc")
+      .def(py::init<std::string, double, int>(),
+           py::arg("metric") = "LOSS",
+           py::arg("minDelta") = 0.01,
+           py::arg("patience") = 0);
 
-                    earlyStopping = NNP.EarlyStopping("loss", 0.01, 10)
+  py::class_<CSVLogger, Callback, std::shared_ptr<CSVLogger>>(callbacks_m, "CSVLogger", R"pbdoc(
+        Initializes a ``CSVLogger`` callback. This callback will log the training process in a CSV file.
 
-                    network.train(inputs, labels, 100, [earlyStopping])
-             )pbdoc");
+        .. highlight: python
+        .. code-block:: python
+            :caption: Example
 
-  py::class_<CSVLogger, Callback, std::shared_ptr<CSVLogger>>(m, "CSVLogger")
+            network.train(inputs, labels, 100, [NNP.callbacks.CSVLogger("logs.csv")])
+      )pbdoc")
       .def(py::init<std::string, std::string>(),
            py::arg("filename"),
-           py::arg("separator") = ",",
-           R"pbdoc(
-                Initializes a ``CSVLogger`` callback. This callback will log the training process in a CSV file.
-
-                .. highlight: python
-                .. code-block:: python
-                    :caption: Example
-
-                    import NeuralNetPy as NNP
-
-                    network = NNP.Network()
-                    network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-                    network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-                    network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
-
-                    inputs = [
-                      [0.4, 0.5, 0.67],
-                      [0.3, 0.2, 0.1],
-                      [0.1, 0.2, 0.3]
-                    ]
-
-                    labels = [1, 0, 1]
+           py::arg("separator") = ",");
 
-                    csvLogger = NNP.CSVLogger("logs.csv")
-
-                    network.train(inputs, labels, 100, [csvLogger])
-             )pbdoc");
-
-  py::bind_vector<std::vector<std::shared_ptr<Callback>>>(m, "VectorCallback");
-  py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(m, "VectorEarlyStopping");
+  py::bind_vector<std::vector<std::shared_ptr<Callback>>>(callbacks_m, "VectorCallback");
+  py::bind_vector<std::vector<std::shared_ptr<EarlyStopping>>>(callbacks_m, "VectorEarlyStopping");
+  py::bind_vector<std::vector<std::shared_ptr<CSVLogger>>>(callbacks_m, "VectorCSVLogger");
 
   // TrainingData with 2 dimensional inputs
   bindTrainingData<std::vector<std::vector<double>>, std::vector<double>>(m, "TrainingData2dI", R"pbdoc(
@@ -289,7 +292,20 @@ PYBIND11_MODULE(NeuralNetPy, m)
    *
    * This is why I had to specify the type "Network", I'll have to do so for every type added
    */
-  py::class_<Model>(m, "Model")
+
+  py::module models_m = m.def_submodule("models", R"pbdoc(
+      Models
+      ------
+
+      Models are used in machine learning to make predictions or decisions without being explicitly programmed to do so. 
+
+      .. currentmodule:: NeuralNetPy.models
+      .. autosummary::
+          :toctree: _generate
+          :recursive:
+    )pbdoc");
+
+  py::class_<Model>(models_m, "Model", "Base class for all models")
       .def_static("save_to_file", &Model::save_to_file<Network>, R"pbdoc(
         This function will save the given ``Model``'s parameters in a binary file.
 
@@ -299,14 +315,14 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             import NeuralNetPy as NNP
 
-            network = NNP.Network()
-            network.setup(optimizer=NNP.SGD(0.01))
-            network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-            network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+            network = NNP.models.Network()
+            network.setup(optimizer=NNP.optimizers.SGD(0.01))
+            network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+            network.addLayer(NNP.layers.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
 
             # ... after training
 
-            NNP.Model.save_to_file(network, "network.bin") 
+            NNP.models.Model.save_to_file(network, "network.bin") 
 
         .. warning::
             The file content will be overwritten if it already exists.
@@ -321,35 +337,39 @@ PYBIND11_MODULE(NeuralNetPy, m)
             import NeuralNetPy as NNP
 
             # Initialize an empty network            
-            network = NNP.Network()
+            network = NNP.models.Network()
 
             # Populate it with previously saved parameters
-            NNP.Model.load_from_file("network.bin", network)
+            NNP.models.Model.load_from_file("network.bin", network)
       )pbdoc");
 
-  py::class_<Network, Model>(m, "Network")
-      .def(py::init<>(), R"pbdoc(
-            Initializes a Neural Network
-        )pbdoc")
-      .def("setup",
-           &Network::setup,
-           py::arg("optimizer"),
-           py::arg("loss") = LOSS::QUADRATIC,
-           R"pbdoc(
-            Setup the network with the given optimizer and loss function
+  py::class_<Network, Model>(models_m, "Network", R"pbdoc(
+      This is the base of a Neural Network. You can setup the network with the given optimizer and loss function.
 
-            .. highlight: python
-            .. code-block:: python
-                :caption: Example
+      :param optimizer: The optimizer to be used from the ``optimizers`` module
+      :type optimizer: Optimizer
+      :param loss: The loss function to be used from the ``LOSS`` enum, defaults to ``QUADRATIC``
+      :type loss: LOSS
 
-                import NeuralNetPy as NNP
+      .. highlight: python
+      .. code-block:: python
+          :caption: Example
 
-                network = NNP.Network()
-                network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-           )pbdoc")
+          import NeuralNetPy as NNP
+
+          network = NNP.models.Network()
+          network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
+      )pbdoc")
+      .def(py::init<>())
+      .def("setup",
+           &Network::setup,
+           py::arg("optimizer"),
+           py::arg("loss") = LOSS::QUADRATIC)
       .def("addLayer", &Network::addLayer, R"pbdoc(
             Add a layer to the network. 
 
+            :param layer: The layer to be added
+            :type layer: Layer
 
             .. highlight: python
             .. code-block:: python
@@ -357,8 +377,8 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
                 import NeuralNetPy as NNP
 
-                network = NNP.Network()
-                network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                network = NNP.models.Network()
+                network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
 
             .. warning::
                 The order of the layers added is important, it will reflect the overall structure of the network.
@@ -372,15 +392,20 @@ PYBIND11_MODULE(NeuralNetPy, m)
            R"pbdoc(
             Get a layer from the network by it's index. They're 0-indexed.
 
+            :param index: The index of the layer
+            :type index: int
+            :return: The layer at the given index
+            :rtype: Layer
+
             .. highlight: python
             .. code-block:: python
                 :caption: Example
 
                 import NeuralNetPy as NNP
 
-                network = NNP.Network()
-                network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-                network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+                network = NNP.models.Network()
+                network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                network.addLayer(NNP.layers.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
 
                 layer = network.getLayer(1) # Return Dense layer with 2 neurons
           )pbdoc")
@@ -389,8 +414,13 @@ PYBIND11_MODULE(NeuralNetPy, m)
             Train the network by passing it 2 dimensional inputs (vectors).
 
             :param inputs: A list of vectors representing the inputs
+            :type inputs: list[list[float]]
             :param labels: A list of labels
+            :type labels: list[float]
             :param epochs: The number of epochs to train the network
+            :type epochs: int
+            :param callbacks: A list of callbacks to be used during the training
+            :type callbacks: list[Callback]
             :return: The average loss throughout the training
             :rtype: float
 
@@ -400,10 +430,10 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
                 import NeuralNetPy as NNP
 
-                network = NNP.Network()
-                network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-                network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-                network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+                network = NNP.models.Network()
+                network.setup(optimizer=NNP.optimizers.SGD(0.01), loss=NNP.LOSS.MCQ)
+                network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+                network.addLayer(NNP.layers.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
 
                 inputs = [
                   [0.4, 0.5, 0.67],
@@ -420,9 +450,15 @@ PYBIND11_MODULE(NeuralNetPy, m)
         Train the network by passing it a list of 3 dimensional inputs (matrices).
 
         :param inputs: A list of matrices representing the inputs
+        :type inputs: list[list[list[float]]]
         :param labels: A list of labels
+        :type labels: list[float]
         :param epochs: The number of epochs to train the network
+        :type epochs: int
+        :param callbacks: A list of callbacks to be used during the training
+        :type callbacks: list[Callback]
         :return: The average loss throughout the training
+        :rtype: float
 
         .. highlight: python
         .. code-block: python
@@ -430,10 +466,10 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             import NeuralNetPy as NNP
 
-            network = NNP.Network()
-            network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-            network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-            network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+            network = NNP.models.Network()
+            network.setup(optimizer=NNP.optimizers.SGD(0.01), loss=NNP.LOSS.MCQ)
+            network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+            network.addLayer(NNP.layers.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
 
             inputs = [
               [
@@ -456,8 +492,13 @@ PYBIND11_MODULE(NeuralNetPy, m)
         Train the network by passing it a ``TrainingData2dI`` object.
 
         :param trainingData: A ``TrainingData2dI`` object
+        :type trainingData: TrainingData2dI
         :param epochs: The number of epochs to train the network
+        :type epochs: int
+        :param callbacks: A list of callbacks to be used during the training
+        :type callbacks: list[Callback]
         :return: The average loss throughout the training
+        :rtype: float
 
         .. highlight: python
         .. code-block: python
@@ -465,11 +506,12 @@ PYBIND11_MODULE(NeuralNetPy, m)
 
             import NeuralNetPy as NNP
 
-            network = NNP.Network()
-            network.setup(optimizer=NNP.SGD(0.01), loss=NNP.LOSS.MCQ)
-            network.addLayer(NNP.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-            network.addLayer(NNP.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
+            network = NNP.models.Network()
+            network.setup(optimizer=NNP.optimizers.SGD(0.01), loss=NNP.LOSS.MCQ)
+            network.addLayer(NNP.layers.Dense(3, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+            network.addLayer(NNP.layers.Dense(2, NNP.ACTIVATION.SIGMOID, NNP.WEIGHT_INIT.HE))
 
+            # Meaningless values just for the sake of the example
             inputs = [
               [0.4, 0.5, 0.67],
               [0.3, 0.2, 0.1],
@@ -489,8 +531,13 @@ PYBIND11_MODULE(NeuralNetPy, m)
         Train the network by passing it a ``TrainingData3dI`` object.
 
         :param trainingData: A ``TrainingData3dI`` object
+        :type trainingData: TrainingData3dI
         :param epochs: The number of epochs to train the network
+        :type epochs: int
+        :param callbacks: A list of callbacks to be used during the training
+        :type callbacks: list[Callback]
         :return: The average loss throughout the training
+        :rtype: float
 
         .. highlight: python
         .. code-block: python
@@ -535,6 +582,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
         Feed forward the given inputs through the network and return the predictions/outputs.
 
         :param inputs: A list of vectors representing the inputs
+        :type inputs: list[list[float]]
         :return: A matrix representing the outputs of the network for the given inputs
         :rtype: numpy.ndarray
       )pbdoc")
@@ -542,6 +590,7 @@ PYBIND11_MODULE(NeuralNetPy, m)
         Feed forward the given inputs through the network and return the predictions/outputs.
 
         :param inputs: A list of vectors representing the inputs
+        :type inputs: list[list[list[float]]]
         :return: A matrix representing the outputs of the network for the given inputs
         :rtype: numpy.ndarray
       )pbdoc");

From c34ae2ed534d265bc6414a31fa912eaf96c3ee48 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Tue, 5 Mar 2024 21:50:48 +0100
Subject: [PATCH 12/12] update(example): adapted example to recent bindings
 changes

---
 .../train-predict-MNIST/guess_my_samples.py    |  4 ++--
 examples/train-predict-MNIST/main.py           | 18 +++++++++---------
 examples/train-predict-MNIST/test.py           | 10 +++++-----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/train-predict-MNIST/guess_my_samples.py b/examples/train-predict-MNIST/guess_my_samples.py
index 241056f..ec0990b 100644
--- a/examples/train-predict-MNIST/guess_my_samples.py
+++ b/examples/train-predict-MNIST/guess_my_samples.py
@@ -12,10 +12,10 @@
 
 MY_SAMPLES_FOLDER = "./dataset/my_samples"
 
-network = NNP.Network()
+network = NNP.models.Network()
 
 # Loading the model from the file into the network created
-NNP.Model.load_from_file("model.bin", network)
+NNP.models.Model.load_from_file("model.bin", network)
 
 inputs = list()
 
diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
index e2bc3dd..e7c39f0 100644
--- a/examples/train-predict-MNIST/main.py
+++ b/examples/train-predict-MNIST/main.py
@@ -28,14 +28,14 @@
 # Otherwise load data from file
 (x_train, y_train), (x_test, y_test) = load_data(MNIST_DATASET_FILE)
 
-network = NNP.Network()
+network = NNP.models.Network()
 
-network.addLayer(NNP.Flatten((28, 28)))
-network.addLayer(NNP.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
-network.addLayer(NNP.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+network.addLayer(NNP.layers.Flatten((28, 28)))
+network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
+network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
 
 # Setting up the networks parameters
-network.setup(optimizer=NNP.Adam(0.02), loss=NNP.LOSS.MCE)
+network.setup(optimizer=NNP.optimizers.Adam(0.02), loss=NNP.LOSS.MCE)
 
 # # combining the data with the labels for later shuffling 
 # combined = list(zip(x_train, y_train))
@@ -53,7 +53,7 @@
 
 trainingData.batch(128)
 
-callbacks = [NNP.EarlyStopping("LOSS", 0.1, 1), NNP.CSVLogger("training.csv")]
+callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.1, 1), NNP.callbacks.CSVLogger("training.csv")]
 
 network.train(trainingData, 3, callbacks)
 
@@ -70,11 +70,11 @@
 print(f"Num correct predictions : {correct}/{n} - accuracy {accuracy}")
 
 # Saving the trained model in a bin file
-NNP.Model.save_to_file('./model.bin', network)
+NNP.models.Model.save_to_file('./model.bin', network)
 
-saved_model = NNP.Network()
+saved_model = NNP.models.Network()
 
-NNP.Model.load_from_file('./model.bin', saved_model)
+NNP.models.Model.load_from_file('./model.bin', saved_model)
 
 # preparing the testing data
 predictions = saved_model.predict(f_x_test[:NUM_PREDICTIONS])
diff --git a/examples/train-predict-MNIST/test.py b/examples/train-predict-MNIST/test.py
index 5352393..e458f92 100644
--- a/examples/train-predict-MNIST/test.py
+++ b/examples/train-predict-MNIST/test.py
@@ -17,14 +17,14 @@
 
 import NeuralNetPy as NNP
 
-network = NNP.Network()
+network = NNP.models.Network()
 
-network.addLayer(NNP.Dense(15, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
-network.addLayer(NNP.Dense(20, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
-network.addLayer(NNP.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
+network.addLayer(NNP.layers.Dense(15, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
+network.addLayer(NNP.layers.Dense(20, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.GLOROT))
+network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
 
 # Setting up the networks parameters
-network.setup(optimizer=NNP.SGD(1), epochs=1, loss=NNP.LOSS.MCE)
+network.setup(optimizer=NNP.optimizers.SGD(1), epochs=1, loss=NNP.LOSS.MCE)
 
 inputs = list()