Skip to content

Commit

Permalink
Closes Bears-R-Us#3167: Add normal to random number generators (Bea…
Browse files Browse the repository at this point in the history
…rs-R-Us#3180)

* Closes Bears-R-Us#3167: Add `normal` to random number generators

This PR (closes Bears-R-Us#3167) adds `normal` to our generators by transforming the results of standard normal. It also upddates `standard_normal` to use generators ot make sure we have the same multiple calls of the same function give different output as the other rand funcs. Adds goodness of fit testing against scipy to ensure we are actually drawing from the correct distribution

* add docs for random functions and deps to yaml files

* leftover latex preventing display in usage

---------

Co-authored-by: Tess Hayes <[email protected]>
  • Loading branch information
stress-tess and stress-tess authored May 21, 2024
1 parent 01bfe3f commit af41bb8
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 53 deletions.
41 changes: 41 additions & 0 deletions PROTO_tests/tests/random_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pytest
from scipy import stats as sp_stats

import arkouda as ak
from arkouda.scipy import chisquare as akchisquare
Expand Down Expand Up @@ -205,6 +206,46 @@ def test_choice_flags(self):
current = rng.choice(a, size, replace, p)
assert np.allclose(previous.to_list(), current.to_list())

def test_normal(self):
rng = ak.random.default_rng(17)
both_scalar = rng.normal(loc=10, scale=2, size=10).to_list()
scale_scalar = rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list()
loc_scalar = rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list()
both_array = rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()

# redeclare rng with same seed to test reproducibility
rng = ak.random.default_rng(17)
assert rng.normal(loc=10, scale=2, size=10).to_list() == both_scalar
assert rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() == scale_scalar
assert rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() == loc_scalar
assert (
rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()
== both_array
)

def test_normal_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

mean = rng.uniform(-10, 10)
deviation = rng.uniform(0, 10)
sample = rng.normal(loc=mean, scale=deviation, size=num_samples)
sample_list = sample.to_list()

# first test if samples are normal at all
_, pval = sp_stats.normaltest(sample_list)

# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

# second goodness of fit test against the distribution with proper mean and std
good_fit_res = sp_stats.goodness_of_fit(
sp_stats.norm, sample_list, known_params={"loc": mean, "scale": deviation}
)
assert good_fit_res.pvalue > 0.05

def test_legacy_randint(self):
testArray = ak.random.randint(0, 10, 5)
assert isinstance(testArray, ak.pdarray)
Expand Down
96 changes: 50 additions & 46 deletions arkouda-env-dev.yml
Original file line number Diff line number Diff line change
@@ -1,46 +1,50 @@
name: arkouda-dev
channels:
- conda-forge
- defaults
dependencies:
- python>=3.8 # minimum 3.8
- numpy>=1.24.1
- pandas>=1.4.0,!=2.2.0
- pyzmq>=20.0.0
- tabulate
- pyfiglet
- versioneer
- matplotlib>=3.3.2
- h5py>=3.7.0
- hdf5==1.12.2
- pip
- types-tabulate
- pytables>=3.7.0
- pyarrow
- libiconv
- libidn2
- jupyter
- scipy

# Developer dependencies
- pexpect
- pytest>=6.0
- pytest-env
- Sphinx>=5.1.1
- sphinx-argparse
- sphinx-autoapi
- typed-ast
- flake8
- mypy>=0.931,<0.990
- black
- isort
- pytest-json-report
- pytest-benchmark

- pip:
# Developer dependencies
- typeguard==2.10.0
- furo # sphinx theme
- myst-parser
- linkify-it-py

name: arkouda-dev
channels:
- conda-forge
- defaults
dependencies:
- python>=3.8 # minimum 3.8
- numpy>=1.24.1
- pandas>=1.4.0,!=2.2.0
- pyzmq>=20.0.0
- tabulate
- pyfiglet
- versioneer
- matplotlib>=3.3.2
- h5py>=3.7.0
- hdf5==1.12.2
- pip
- types-tabulate
- pytables>=3.7.0
- pyarrow
- libiconv
- libidn2
- jupyter
- scipy

# Developer dependencies
- pexpect
- pytest>=6.0
- pytest-env
- Sphinx>=5.1.1
- sphinx-argparse
- sphinx-autoapi
- sphinx-design
- sphinx-autodoc-typehints
- typed-ast
- flake8
- mypy>=0.931,<0.990
- black
- isort
- pytest-json-report
- pytest-benchmark
- mathjax

- pip:
# Developer dependencies
- typeguard==2.10.0
- furo # sphinx theme
- myst-parser
- linkify-it-py
- sphinx-autopackagesummary

73 changes: 67 additions & 6 deletions arkouda/random/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,55 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
self._state += size
return create_pdarray(rep_msg)

def normal(self, loc=0.0, scale=1.0, size=None):
r"""
Draw samples from a normal (Gaussian) distribution
Parameters
----------
loc: float or pdarray of floats, optional
Mean of the distribution. Default of 0.
scale: float or pdarray of floats, optional
Standard deviation of the distribution. Must be non-negative. Default of 1.
size: numeric_scalars, optional
Output shape. Default is None, in which case a single value is returned.
Notes
-----
The probability density for the Gaussian distribution is:
.. math::
p(x) = \frac{1}{\sqrt{2\pi \sigma^2}} e^{-\frac{(x-\mu)^2}{2\sigma^2}}
where :math:`\mu` is the mean and :math:`\sigma` the standard deviation.
The square of the standard deviation, :math:`\sigma^2`, is called the variance.
Returns
-------
pdarray
Pdarray of floats (unless size=None, in which case a single float is returned).
See Also
--------
standard_normal
uniform
Examples
--------
>>> ak.random.default_rng(17).normal(3, 2.5, 10)
array([2.3673425816523577 4.0532529435624589 2.0598322696795694])
"""
if size is None:
# delegate to numpy when return size is 1
return self._np_generator.standard_normal(loc, scale)

if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0:
raise TypeError("scale must be non-negative.")

return loc + scale * self.standard_normal(size=size)

def random(self, size=None):
"""
Return random floats in the half-open interval [0.0, 1.0).
Expand Down Expand Up @@ -261,7 +310,7 @@ def random(self, size=None):
return create_pdarray(rep_msg)

def standard_normal(self, size=None):
"""
r"""
Draw samples from a standard Normal distribution (mean=0, stdev=1).
Parameters
Expand All @@ -276,10 +325,14 @@ def standard_normal(self, size=None):
Notes
-----
For random samples from :math:`N(\\mu, \\sigma^2)`, use:
For random samples from :math:`N(\mu, \sigma^2)`, either call `normal` or do:
``(sigma * standard_normal(size)) + mu``
.. math::
(\sigma \cdot \texttt{standard_normal}(size)) + \mu
See Also
--------
normal
Examples
--------
Expand All @@ -289,12 +342,20 @@ def standard_normal(self, size=None):
>>> rng.standard_normal(3)
array([0.8797352989638163, -0.7085325853376141, 0.021728052940979934]) # random
"""
from arkouda.random._legacy import standard_normal

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.standard_normal()
return standard_normal(size=size, seed=self._seed)
rep_msg = generic_msg(
cmd="standardNormalGenerator",
args={
"name": self._name_dict[akfloat64],
"size": size,
"state": self._state,
},
)
# since we generate 2*size uniform samples for box-muller transform
self._state += (size * 2)
return create_pdarray(rep_msg)

def shuffle(self, x):
"""
Expand Down
16 changes: 16 additions & 0 deletions pydoc/usage/random.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,26 @@ To create a ``Generator`` use the ``default_rng`` constructor
Features
==========

choice
---------
.. autofunction:: arkouda.random.Generator.choice

integers
---------
.. autofunction:: arkouda.random.Generator.integers

normal
---------
.. autofunction:: arkouda.random.Generator.normal

permutation
---------
.. autofunction:: arkouda.random.Generator.permutation

shuffle
---------
.. autofunction:: arkouda.random.Generator.shuffle

random
---------
.. autofunction:: arkouda.random.Generator.random
Expand Down
3 changes: 3 additions & 0 deletions src/RandArray.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ module RandArray {
}

proc fillNormal(ref a:[?D] real, const seedStr:string="None") throws {
// uses Box–Muller transform
// generates values drawn from the standard normal distribution using
// 2 uniformly distributed random numbers
var u1 = makeDistArray(D, real);
var u2 = makeDistArray(D, real);
if (seedStr.toLower() == "none") {
Expand Down
38 changes: 37 additions & 1 deletion src/RandMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module RandMsg
use ServerConfig;

use ArkoudaTimeCompat as Time;
use Math only;
use Math;
use Reflection;
use ServerErrors;
use ServerConfig;
Expand Down Expand Up @@ -297,6 +297,41 @@ module RandMsg
return new MsgTuple(repMsg, MsgType.NORMAL);
}

proc standardNormalGeneratorMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws {
const pn = Reflection.getRoutineName(),
name = msgArgs.getValueOf("name"), // generator name
size = msgArgs.get("size").getIntValue(), // population size
state = msgArgs.get("state").getIntValue(), // rng state
rname = st.nextName();

randLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
"name: %? size %i state %i".doFormat(name, size, state));

st.checkTable(name);

var generatorEntry: borrowed GeneratorSymEntry(real) = toGeneratorSymEntry(st.lookup(name), real);
ref rng = generatorEntry.generator;
if state != 1 {
// you have to skip to one before where you want to be
rng.skipTo(state-1);
}

// uses Box–Muller transform
// generates values drawn from the standard normal distribution using
// 2 uniformly distributed random numbers
var u1 = makeDistArray(size, real);
var u2 = makeDistArray(size, real);
rng.fill(u1);
rng.fill(u2);

var standNorm = sqrt(-2*log(u1))*cos(2*pi*u2);
st.addEntry(rname, createSymEntry(standNorm));

var repMsg = "created " + st.attrib(rname);
randLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);
return new MsgTuple(repMsg, MsgType.NORMAL);
}

proc segmentedSampleMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws {
const pn = Reflection.getRoutineName(),
genName = msgArgs.getValueOf("genName"), // generator name
Expand Down Expand Up @@ -567,6 +602,7 @@ module RandMsg
registerFunction("randomNormal", randomNormalMsg, getModuleName());
registerFunction("createGenerator", createGeneratorMsg, getModuleName());
registerFunction("uniformGenerator", uniformGeneratorMsg, getModuleName());
registerFunction("standardNormalGenerator", standardNormalGeneratorMsg, getModuleName());
registerFunction("segmentedSample", segmentedSampleMsg, getModuleName());
registerFunction("choice", choiceMsg, getModuleName());
registerFunction("permutation", permutationMsg, getModuleName());
Expand Down
Loading

0 comments on commit af41bb8

Please sign in to comment.