From c753d15e6ad96867b10e266a62db8f4ff9d58df2 Mon Sep 17 00:00:00 2001 From: Ayush Joshi Date: Wed, 20 Dec 2023 11:37:23 +0530 Subject: [PATCH] Updated classifier name from `GaussianNaiveBayes` to `GaussianNB` Signed-off-by: Ayush Joshi --- ai/naive_bayes/__init__.py | 42 +------------------------------ ai/naive_bayes/naive_bayes.py | 47 ++++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 58 deletions(-) diff --git a/ai/naive_bayes/__init__.py b/ai/naive_bayes/__init__.py index b9c187b..3be14fb 100644 --- a/ai/naive_bayes/__init__.py +++ b/ai/naive_bayes/__init__.py @@ -15,47 +15,7 @@ """Naive Bayes methods are a set of `supervised` learning algorithms based on applying `Bayes’` theorem with the “naive” assumption of conditional independence between every pair of features given the value of the class -variable. Bayes theorem states the following relationship, given class variable -`y` and dependent feature vector :math:`x_{1}` through :math:`x_{n}`: - -.. math:: - - P(y | x_{1}, ..., x_{n}) = \\dfrac{ - P(y) \\cdot P(x_{1} ..., x_{n} | y) - }{ - P(x_{1} ..., x_{n}) - } - -Using the naive conditional independence assumption that: - -.. math:: - - P(x_{i} | y, x_{1}, ..., x_{i-1}, x_{i+1}, ..., x_{n}) = P(x_{i} | y) - -for all :math:`i`, this relationship is simplified to: - -.. math:: - - P(y | x_{1}, ..., x_{n}) = \\dfrac{ - P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y) - }{ - P(x_{1} ..., x_{n}) - } - -Since :math:`P(x_{1}, ..., x_{n})` is constant given the input, we can use the -following classification rule: - -.. math:: - - P(y | x_{1}, ..., x_{n}) \\propto P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y) - -.. math:: - - ⇒ \\hat y = arg \\max_{y} P(y) \\cdot \\prod_{i=1}^{n} P(x_{i} | y) - -and we can use **Maximum A Posteriori (MAP)** estimation to estimate -:math:`P(y)` and :math:`P(x_{i} | y)`; the former is then the relative -frequency of class :math:`y` in the training set. +variable. `ai.naive_bayes` implements the following `naive bayes` algorithms: diff --git a/ai/naive_bayes/naive_bayes.py b/ai/naive_bayes/naive_bayes.py index feb992b..cd1a47b 100644 --- a/ai/naive_bayes/naive_bayes.py +++ b/ai/naive_bayes/naive_bayes.py @@ -19,9 +19,9 @@ import numpy as np -class GaussianNaiveBayes: +class GaussianNB: """Gaussian Naive Bayes (GaussianNB). - + Naive Bayes methods are a set of supervised learning algorithms based on applying Bayes’ theorem with the “naive” assumption of conditional independence between every pair of features given the value of the class @@ -31,9 +31,9 @@ class GaussianNaiveBayes: .. math:: P(y | x_{1}, ..., x_{n}) = \\dfrac{ - P(y) \\cdot P(x_{1} ..., x_{n} | y) + P(y) \\cdot P(x_{1}, ..., x_{n} | y) }{ - P(x_{1} ..., x_{n}) + P(x_{1}, ..., x_{n}) } Using the naive conditional independence assumption that: @@ -49,7 +49,7 @@ class GaussianNaiveBayes: P(y | x_{1}, ..., x_{n}) = \\dfrac{ P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y) }{ - P(x_{1} ..., x_{n}) + P(x_{1}, ..., x_{n}) } Since :math:`P(x_{1}, ..., x_{n})` is constant given the input, we can use the @@ -63,6 +63,19 @@ class GaussianNaiveBayes: ⇒ \\hat y = arg \\max_{y} P(y) \\cdot \\prod_{i=1}^{n} P(x_{i} | y) + .. note:: + + We never multiply probabilities in computer science since, the number can + multiply to :math:`0` upto the machine precision. It's better to use the + monotonic function :math:`\\mathrm{log}` and add the log of the + probabilities. + + .. math:: + + ⇒ \\hat y = arg \\max_{y} \\sum_{i=1}^{n} \\mathrm{log}( + P(x_{i} | y) + ) + \\mathrm{log}(P(y)) + and we can use Maximum A Posteriori (MAP) estimation to estimate :math:`P(y)` and :math:`P(x_{i} | y)`; the former is then the relative frequency of class :math:`y` in the training set. @@ -92,7 +105,7 @@ class GaussianNaiveBayes: @staticmethod def _check_if_parameters_comply_to_constraints(**kwargs: dict) -> None: """Private static method to ensure the compatibility of the hyperparameters - passed to the `GaussianNaiveBayes`. + passed to the `GaussianNB`. Args: kwargs: Passed hyperparameters. @@ -100,9 +113,9 @@ def _check_if_parameters_comply_to_constraints(**kwargs: dict) -> None: Raises: ValueError: If any hyperparameter is not compatible. """ - if kwargs[ + if kwargs['priors'] is not None and kwargs[ 'priors' - ].__class__.__name__ not in GaussianNaiveBayes._parameter_constraints[ + ].__class__.__name__ not in GaussianNB._parameter_constraints[ 'priors']: raise ValueError( ( @@ -119,11 +132,12 @@ def __init__(self, *, priors: Union[list, np.ndarray] = None): not adjusted according to the data. """ self._priors = priors - self._is_fitted = False + self._mean = None + self._var = None self._check_if_parameters_comply_to_constraints(priors=self._priors) - def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes': + def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNB': """Fit Gaussian Naive Bayes according to X, y. Args: @@ -136,7 +150,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes': """ self._classes = np.unique(y) - n_features = X.shape[1] + n_samples, n_features = X.shape n_classes = len(self._classes) self._var = np.zeros((n_classes, n_features)) self._mean = np.zeros((n_classes, n_features)) @@ -164,14 +178,13 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes': # Update if only no priors is provided if self._priors is None: - self._class_priors[idx] = X_c.shape[0] / float(X.shape[0]) + self._class_priors[idx] = X_c.shape[0] / float(n_samples) - self._is_fitted = True return self def predict(self, X: np.ndarray) -> np.ndarray: """Predict for `X` using the previously calculated priors. - + Args: X: Testing vectors, where `n_samples` is the number of samples and `n_features` is the number of features. @@ -179,7 +192,7 @@ def predict(self, X: np.ndarray) -> np.ndarray: Returns: Predictions made for the given testing vector `X`. """ - if self._is_fitted is False: + if self._mean is None or self._var is None: raise RuntimeError( f'{self.__class__.__name__}: predict called before fitting data' ) @@ -202,9 +215,9 @@ def predict(self, X: np.ndarray) -> np.ndarray: # `y` and also add the prior rather multiplying it posterior = np.sum(np.log(self._pdf(idx, x)) ) + np.log(self._class_priors[idx]) - posteriors = [*posteriors, posterior] + posteriors.append(posterior) # Only add classes with the highest posterior - preds = [*preds, self._classes[np.argsort(posteriors)]] + preds.append(self._classes[np.argsort(posteriors)]) return np.array(preds) def _pdf(self, c_idx: int, x: np.ndarray) -> np.float64: