From 1af5dea9392022508ca3c0451a42911aef702947 Mon Sep 17 00:00:00 2001 From: Az-r-ow Date: Thu, 16 May 2024 21:12:32 +0200 Subject: [PATCH 1/2] test: BCE test case with softmax --- src/NeuralNet/losses/BCE.hpp | 13 ++++++++--- src/NeuralNet/utils/Functions.hpp | 13 +++++++++++ tests/test-losses.cpp | 38 +++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/NeuralNet/losses/BCE.hpp b/src/NeuralNet/losses/BCE.hpp index e0e1d05..411a8b7 100644 --- a/src/NeuralNet/losses/BCE.hpp +++ b/src/NeuralNet/losses/BCE.hpp @@ -9,14 +9,21 @@ namespace NeuralNet { class BCE : public Loss { public: static double cmpLoss(const Eigen::MatrixXd &o, const Eigen::MatrixXd &y) { - Eigen::MatrixXd loss = -(y.array() * o.array().log() + - (1.0 - y.array()) * (1.0 - o.array()).log()); + double threshold = 1.0e-5; + Eigen::MatrixXd oTrim = trim(o, threshold); + Eigen::MatrixXd yTrim = trim(y, threshold); + + Eigen::MatrixXd loss = + -(yTrim.array() * oTrim.array().log() + + (1.0 - yTrim.array()) * (1.0 - oTrim.array()).log()); + return loss.sum(); } static Eigen::MatrixXd cmpLossGrad(const Eigen::MatrixXd &yHat, const Eigen::MatrixXd &y) { - return (yHat.array() - y.array()) / (yHat.array() * (1.0 - y.array())); + return (yHat.array() - y.array()) / + ((yHat.array() * (1.0 - yHat.array())) + 1e-9); } }; diff --git a/src/NeuralNet/utils/Functions.hpp b/src/NeuralNet/utils/Functions.hpp index b4cae1f..c56ac0d 100644 --- a/src/NeuralNet/utils/Functions.hpp +++ b/src/NeuralNet/utils/Functions.hpp @@ -323,6 +323,19 @@ static Eigen::MatrixXd hardmax(const Eigen::MatrixXd &mat) { return hardmaxMatrix; } +/** + * @brief round the number < to the given threshold to 0 + * + * @param logits Matrix of doubles + * @param threshold a double (default: 0.01) + * + * @return the same matrix with the values < threshold = 0 + */ +static Eigen::MatrixXd trim(const Eigen::MatrixXd &logits, + double threshold = 0.01) { + return (logits.array() < threshold).select(0, logits); +} + /* SIGNAL HANDLING */ static void signalHandler(int signum) { std::cout << "Interrupt signal (" << signum << ") received.\n"; diff --git a/tests/test-losses.cpp b/tests/test-losses.cpp index 72c4d39..3f3169c 100644 --- a/tests/test-losses.cpp +++ b/tests/test-losses.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,4 +15,41 @@ TEST_CASE("Testing Binary Cross-Entropy loss with random values", "[losses]") { double loss = BCE::cmpLoss(o, y); REQUIRE(loss >= 0); +} + +TEST_CASE("Testing Binary Cross-Entropy derivation with pre-calculated values", + "[losses]") { + Eigen::MatrixXd o(2, 2); + Eigen::MatrixXd y(2, 2); + + o << 0.5, 0.5, 0.2, 0.8; + y << 0, 1, 0, 1; + + Eigen::MatrixXd grad = BCE::cmpLossGrad(o, y); + + Eigen::MatrixXd exp(2, 2); + + exp << 2.0, -2.0, 1.25, -1.25; + + CHECK_MATRIX_APPROX(grad, exp, EPSILON); +} + +TEST_CASE("Testing Binary Cross-Entropy with softmax activation", "[losses]") { + Eigen::MatrixXd i = Eigen::MatrixXd::Random(2, 2); + Eigen::MatrixXd y = Eigen::MatrixXd::Zero(2, 2); + + y(0, 0) = 1; + y(1, 1) = 1; + + Eigen::MatrixXd prob = Softmax::activate(i); + + double loss = BCE::cmpLoss(prob, y); + + CHECK(loss >= 0); + + Eigen::MatrixXd grad = BCE::cmpLossGrad(prob, y); + + bool hasNaN = grad.array().isNaN().any(); + + CHECK_FALSE(hasNaN); } \ No newline at end of file From c9eff7cdb55b6b75b71b098ccc00579fb9cec4b4 Mon Sep 17 00:00:00 2001 From: Az-r-ow Date: Fri, 17 May 2024 22:00:27 +0200 Subject: [PATCH 2/2] fix: handling NaN propagation in BCE --- src/NeuralNet/losses/BCE.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/NeuralNet/losses/BCE.hpp b/src/NeuralNet/losses/BCE.hpp index 411a8b7..6788195 100644 --- a/src/NeuralNet/losses/BCE.hpp +++ b/src/NeuralNet/losses/BCE.hpp @@ -17,6 +17,10 @@ class BCE : public Loss { -(yTrim.array() * oTrim.array().log() + (1.0 - yTrim.array()) * (1.0 - oTrim.array()).log()); + if (loss.array().isNaN().any()) + throw std::runtime_error( + "NaN value encountered. Inputs might be too big"); + return loss.sum(); }