Skip to content

Commit

Permalink
Updated classifier name from GaussianNaiveBayes to GaussianNB
Browse files Browse the repository at this point in the history
Signed-off-by: Ayush Joshi <[email protected]>
  • Loading branch information
joshiayush committed Dec 20, 2023
1 parent 55fd5b4 commit c753d15
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 58 deletions.
42 changes: 1 addition & 41 deletions ai/naive_bayes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,7 @@
"""Naive Bayes methods are a set of `supervised` learning algorithms based on
applying `Bayes’` theorem with the “naive” assumption of conditional
independence between every pair of features given the value of the class
variable. Bayes theorem states the following relationship, given class variable
`y` and dependent feature vector :math:`x_{1}` through :math:`x_{n}`:
.. math::
P(y | x_{1}, ..., x_{n}) = \\dfrac{
P(y) \\cdot P(x_{1} ..., x_{n} | y)
}{
P(x_{1} ..., x_{n})
}
Using the naive conditional independence assumption that:
.. math::
P(x_{i} | y, x_{1}, ..., x_{i-1}, x_{i+1}, ..., x_{n}) = P(x_{i} | y)
for all :math:`i`, this relationship is simplified to:
.. math::
P(y | x_{1}, ..., x_{n}) = \\dfrac{
P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y)
}{
P(x_{1} ..., x_{n})
}
Since :math:`P(x_{1}, ..., x_{n})` is constant given the input, we can use the
following classification rule:
.. math::
P(y | x_{1}, ..., x_{n}) \\propto P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y)
.. math::
\\hat y = arg \\max_{y} P(y) \\cdot \\prod_{i=1}^{n} P(x_{i} | y)
and we can use **Maximum A Posteriori (MAP)** estimation to estimate
:math:`P(y)` and :math:`P(x_{i} | y)`; the former is then the relative
frequency of class :math:`y` in the training set.
variable.
`ai.naive_bayes` implements the following `naive bayes` algorithms:
Expand Down
47 changes: 30 additions & 17 deletions ai/naive_bayes/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import numpy as np


class GaussianNaiveBayes:
class GaussianNB:
"""Gaussian Naive Bayes (GaussianNB).
Naive Bayes methods are a set of supervised learning algorithms based on
applying Bayes’ theorem with the “naive” assumption of conditional
independence between every pair of features given the value of the class
Expand All @@ -31,9 +31,9 @@ class GaussianNaiveBayes:
.. math::
P(y | x_{1}, ..., x_{n}) = \\dfrac{
P(y) \\cdot P(x_{1} ..., x_{n} | y)
P(y) \\cdot P(x_{1}, ..., x_{n} | y)
}{
P(x_{1} ..., x_{n})
P(x_{1}, ..., x_{n})
}
Using the naive conditional independence assumption that:
Expand All @@ -49,7 +49,7 @@ class GaussianNaiveBayes:
P(y | x_{1}, ..., x_{n}) = \\dfrac{
P(y) \\cdot \\prod_{i=1}^{n}P(x_{i} | y)
}{
P(x_{1} ..., x_{n})
P(x_{1}, ..., x_{n})
}
Since :math:`P(x_{1}, ..., x_{n})` is constant given the input, we can use the
Expand All @@ -63,6 +63,19 @@ class GaussianNaiveBayes:
\\hat y = arg \\max_{y} P(y) \\cdot \\prod_{i=1}^{n} P(x_{i} | y)
.. note::
We never multiply probabilities in computer science since, the number can
multiply to :math:`0` upto the machine precision. It's better to use the
monotonic function :math:`\\mathrm{log}` and add the log of the
probabilities.
.. math::
\\hat y = arg \\max_{y} \\sum_{i=1}^{n} \\mathrm{log}(
P(x_{i} | y)
) + \\mathrm{log}(P(y))
and we can use Maximum A Posteriori (MAP) estimation to estimate :math:`P(y)`
and :math:`P(x_{i} | y)`; the former is then the relative frequency of class
:math:`y` in the training set.
Expand Down Expand Up @@ -92,17 +105,17 @@ class GaussianNaiveBayes:
@staticmethod
def _check_if_parameters_comply_to_constraints(**kwargs: dict) -> None:
"""Private static method to ensure the compatibility of the hyperparameters
passed to the `GaussianNaiveBayes`.
passed to the `GaussianNB`.
Args:
kwargs: Passed hyperparameters.
Raises:
ValueError: If any hyperparameter is not compatible.
"""
if kwargs[
if kwargs['priors'] is not None and kwargs[
'priors'
].__class__.__name__ not in GaussianNaiveBayes._parameter_constraints[
].__class__.__name__ not in GaussianNB._parameter_constraints[
'priors']:
raise ValueError(
(
Expand All @@ -119,11 +132,12 @@ def __init__(self, *, priors: Union[list, np.ndarray] = None):
not adjusted according to the data.
"""
self._priors = priors
self._is_fitted = False
self._mean = None
self._var = None

self._check_if_parameters_comply_to_constraints(priors=self._priors)

def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes':
def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNB':
"""Fit Gaussian Naive Bayes according to X, y.
Args:
Expand All @@ -136,7 +150,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes':
"""
self._classes = np.unique(y)

n_features = X.shape[1]
n_samples, n_features = X.shape
n_classes = len(self._classes)
self._var = np.zeros((n_classes, n_features))
self._mean = np.zeros((n_classes, n_features))
Expand Down Expand Up @@ -164,22 +178,21 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes':

# Update if only no priors is provided
if self._priors is None:
self._class_priors[idx] = X_c.shape[0] / float(X.shape[0])
self._class_priors[idx] = X_c.shape[0] / float(n_samples)

self._is_fitted = True
return self

def predict(self, X: np.ndarray) -> np.ndarray:
"""Predict for `X` using the previously calculated priors.
Args:
X: Testing vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.
Returns:
Predictions made for the given testing vector `X`.
"""
if self._is_fitted is False:
if self._mean is None or self._var is None:
raise RuntimeError(
f'{self.__class__.__name__}: predict called before fitting data'
)
Expand All @@ -202,9 +215,9 @@ def predict(self, X: np.ndarray) -> np.ndarray:
# `y` and also add the prior rather multiplying it
posterior = np.sum(np.log(self._pdf(idx, x))
) + np.log(self._class_priors[idx])
posteriors = [*posteriors, posterior]
posteriors.append(posterior)
# Only add classes with the highest posterior
preds = [*preds, self._classes[np.argsort(posteriors)]]
preds.append(self._classes[np.argsort(posteriors)])
return np.array(preds)

def _pdf(self, c_idx: int, x: np.ndarray) -> np.float64:
Expand Down

0 comments on commit c753d15

Please sign in to comment.