From 1af5dea9392022508ca3c0451a42911aef702947 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Thu, 16 May 2024 21:12:32 +0200
Subject: [PATCH 1/2] test: BCE test case with softmax

---
 src/NeuralNet/losses/BCE.hpp      | 13 ++++++++---
 src/NeuralNet/utils/Functions.hpp | 13 +++++++++++
 tests/test-losses.cpp             | 38 +++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/src/NeuralNet/losses/BCE.hpp b/src/NeuralNet/losses/BCE.hpp
index e0e1d05..411a8b7 100644
--- a/src/NeuralNet/losses/BCE.hpp
+++ b/src/NeuralNet/losses/BCE.hpp
@@ -9,14 +9,21 @@ namespace NeuralNet {
 class BCE : public Loss {
  public:
   static double cmpLoss(const Eigen::MatrixXd &o, const Eigen::MatrixXd &y) {
-    Eigen::MatrixXd loss = -(y.array() * o.array().log() +
-                             (1.0 - y.array()) * (1.0 - o.array()).log());
+    double threshold = 1.0e-5;
+    Eigen::MatrixXd oTrim = trim(o, threshold);
+    Eigen::MatrixXd yTrim = trim(y, threshold);
+
+    Eigen::MatrixXd loss =
+        -(yTrim.array() * oTrim.array().log() +
+          (1.0 - yTrim.array()) * (1.0 - oTrim.array()).log());
+
     return loss.sum();
   }
 
   static Eigen::MatrixXd cmpLossGrad(const Eigen::MatrixXd &yHat,
                                      const Eigen::MatrixXd &y) {
-    return (yHat.array() - y.array()) / (yHat.array() * (1.0 - y.array()));
+    return (yHat.array() - y.array()) /
+           ((yHat.array() * (1.0 - yHat.array())) + 1e-9);
   }
 };
 
diff --git a/src/NeuralNet/utils/Functions.hpp b/src/NeuralNet/utils/Functions.hpp
index b4cae1f..c56ac0d 100644
--- a/src/NeuralNet/utils/Functions.hpp
+++ b/src/NeuralNet/utils/Functions.hpp
@@ -323,6 +323,19 @@ static Eigen::MatrixXd hardmax(const Eigen::MatrixXd &mat) {
   return hardmaxMatrix;
 }
 
+/**
+ * @brief round the number < to the given threshold to 0
+ *
+ * @param logits Matrix of doubles
+ * @param threshold a double (default: 0.01)
+ *
+ * @return the same matrix with the values < threshold = 0
+ */
+static Eigen::MatrixXd trim(const Eigen::MatrixXd &logits,
+                            double threshold = 0.01) {
+  return (logits.array() < threshold).select(0, logits);
+}
+
 /* SIGNAL HANDLING */
 static void signalHandler(int signum) {
   std::cout << "Interrupt signal (" << signum << ") received.\n";
diff --git a/tests/test-losses.cpp b/tests/test-losses.cpp
index 72c4d39..3f3169c 100644
--- a/tests/test-losses.cpp
+++ b/tests/test-losses.cpp
@@ -1,3 +1,4 @@
+#include <activations/activations.hpp>
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 #include <losses/losses.hpp>
@@ -14,4 +15,41 @@ TEST_CASE("Testing Binary Cross-Entropy loss with random values", "[losses]") {
   double loss = BCE::cmpLoss(o, y);
 
   REQUIRE(loss >= 0);
+}
+
+TEST_CASE("Testing Binary Cross-Entropy derivation with pre-calculated values",
+          "[losses]") {
+  Eigen::MatrixXd o(2, 2);
+  Eigen::MatrixXd y(2, 2);
+
+  o << 0.5, 0.5, 0.2, 0.8;
+  y << 0, 1, 0, 1;
+
+  Eigen::MatrixXd grad = BCE::cmpLossGrad(o, y);
+
+  Eigen::MatrixXd exp(2, 2);
+
+  exp << 2.0, -2.0, 1.25, -1.25;
+
+  CHECK_MATRIX_APPROX(grad, exp, EPSILON);
+}
+
+TEST_CASE("Testing Binary Cross-Entropy with softmax activation", "[losses]") {
+  Eigen::MatrixXd i = Eigen::MatrixXd::Random(2, 2);
+  Eigen::MatrixXd y = Eigen::MatrixXd::Zero(2, 2);
+
+  y(0, 0) = 1;
+  y(1, 1) = 1;
+
+  Eigen::MatrixXd prob = Softmax::activate(i);
+
+  double loss = BCE::cmpLoss(prob, y);
+
+  CHECK(loss >= 0);
+
+  Eigen::MatrixXd grad = BCE::cmpLossGrad(prob, y);
+
+  bool hasNaN = grad.array().isNaN().any();
+
+  CHECK_FALSE(hasNaN);
 }
\ No newline at end of file

From c9eff7cdb55b6b75b71b098ccc00579fb9cec4b4 Mon Sep 17 00:00:00 2001
From: Az-r-ow <antoine.azar123@gmail.com>
Date: Fri, 17 May 2024 22:00:27 +0200
Subject: [PATCH 2/2] fix: handling NaN propagation in BCE

---
 src/NeuralNet/losses/BCE.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/NeuralNet/losses/BCE.hpp b/src/NeuralNet/losses/BCE.hpp
index 411a8b7..6788195 100644
--- a/src/NeuralNet/losses/BCE.hpp
+++ b/src/NeuralNet/losses/BCE.hpp
@@ -17,6 +17,10 @@ class BCE : public Loss {
         -(yTrim.array() * oTrim.array().log() +
           (1.0 - yTrim.array()) * (1.0 - oTrim.array()).log());
 
+    if (loss.array().isNaN().any())
+      throw std::runtime_error(
+          "NaN value encountered. Inputs might be too big");
+
     return loss.sum();
   }