Skip to content

Commit

Permalink
Closes Bears-R-Us#3045 index.map (Bears-R-Us#3057)
Browse files Browse the repository at this point in the history
* Closes Bears-R-Us#3045 index.map

* remove left over print statement

---------

Co-authored-by: Amanda Potts <[email protected]>
  • Loading branch information
ajpotts and ajpotts authored Mar 26, 2024
1 parent caa4106 commit a84a35f
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 43 deletions.
6 changes: 6 additions & 0 deletions PROTO_tests/tests/index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ def test_argsort(self):
assert i4.argsort(ascending=True).to_list() == [0, 1, 2]
assert i4.argsort(ascending=False).to_list() == [2, 1, 0]

def test_map(self):
idx = ak.Index(ak.array([2, 3, 2, 3, 4]))

result = idx.map({4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
assert result.values.to_list() == [30.0, 5.0, 30.0, 5.0, 25.0]

def test_concat(self):
idx_1 = ak.Index.factory(ak.arange(5))

Expand Down
44 changes: 43 additions & 1 deletion PROTO_tests/tests/util_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np

import arkouda as ak
from arkouda.util import is_float, is_int, is_numeric
from arkouda.util import is_float, is_int, is_numeric, map


class TestUtil:
Expand Down Expand Up @@ -44,3 +44,45 @@ def test_is_numeric(self):
assert ~is_float(b)
assert ~is_float(c)
assert is_float(d)

def test_map(self):
a = ak.array(["1", "1", "4", "4", "4"])
b = ak.array([2, 3, 2, 3, 4])
c = ak.array([1.0, 1.0, 2.2, 2.2, 4.4])
d = ak.Categorical(a)

result = map(a, {"4": 25, "5": 30, "1": 7})
assert result.to_list() == [7, 7, 25, 25, 25]

result = map(a, {"1": 7})
assert (
result.to_list() == ak.cast(ak.array([7, 7, np.nan, np.nan, np.nan]), dt=ak.int64).to_list()
)

result = map(a, {"1": 7.0})
assert np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True)

result = map(b, {4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
assert result.to_list() == [30.0, 5.0, 30.0, 5.0, 25.0]

result = map(c, {1.0: "a", 2.2: "b", 4.4: "c", 5.0: "d"})
assert result.to_list() == ["a", "a", "b", "b", "c"]

result = map(c, {1.0: "a"})
assert result.to_list() == ["a", "a", "null", "null", "null"]

result = map(c, {1.0: "a", 2.2: "b", 4.4: "c", 5.0: "d", 6.0: "e"})
assert result.to_list() == ["a", "a", "b", "b", "c"]

result = map(d, {"4": 25, "5": 30, "1": 7})
assert result.to_list() == [7, 7, 25, 25, 25]

result = map(d, {"1": 7})
assert np.allclose(
result.to_list(),
ak.cast(ak.array([7, 7, np.nan, np.nan, np.nan]), dt=ak.int64).to_list(),
equal_nan=True,
)

result = map(d, {"1": 7.0})
assert np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True)
46 changes: 45 additions & 1 deletion arkouda/index.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import json
from typing import List, Optional, Union
from typing import TYPE_CHECKING, List, Optional, Union

import pandas as pd # type: ignore
from numpy import array as ndarray
Expand All @@ -17,6 +19,9 @@
from arkouda.sorting import coargsort
from arkouda.util import convert_if_categorical, generic_concat, get_callback

if TYPE_CHECKING:
from arkouda.series import Series


class Index:
objType = "Index"
Expand Down Expand Up @@ -448,6 +453,45 @@ def argsort(self, ascending=True):
i = argsort(self.values)
return i

def map(self, arg: Union[dict, "Series"]) -> "Index":
"""
Map values of Index according to an input mapping.
Parameters
----------
arg : dict or Series
The mapping correspondence.
Returns
-------
arkouda.index.Index
A new index with the values transformed by the mapping correspondence.
Raises
------
TypeError
Raised if arg is not of type dict or arkouda.Series.
Raised if index values not of type pdarray, Categorical, or Strings.
Examples
--------
>>> import arkouda as ak
>>> ak.connect()
>>> idx = ak.Index(ak.array([2, 3, 2, 3, 4]))
>>> display(idx)
Index(array([2 3 2 3 4]), dtype='int64')
>>> idx.map({4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
Index(array([30.00000000000000000 5.00000000000000000 30.00000000000000000
5.00000000000000000 25.00000000000000000]), dtype='float64')
>>> s2 = ak.Series(ak.array(["a","b","c","d"]), index = ak.array([4,2,1,3]))
>>> idx.map(s2)
Index(array(['b', 'b', 'd', 'd', 'a']), dtype='<U0')
"""
from arkouda.util import map

return Index(map(self.values, arg))

def concat(self, other):
self._check_types(other)

Expand Down
42 changes: 4 additions & 38 deletions arkouda/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def map(self, arg: Union[dict, Series]) -> Series:
------
TypeError
Raised if arg is not of type dict or arkouda.Series.
Raised if arg values not of type pdarray or Strings.
Raised if series values not of type pdarray, Categorical, or Strings.
Examples
--------
>>> import arkouda as ak
Expand Down Expand Up @@ -1206,44 +1206,10 @@ def map(self, arg: Union[dict, Series]) -> Series:
+----+-----+
"""
from arkouda import Series, broadcast, full
from arkouda import Series
from arkouda.util import map

keys = self.values
gb = GroupBy(keys, dropna=False)
gb_keys = gb.unique_keys

mapping = arg
if isinstance(mapping, dict):
mapping = Series([array(list(mapping.keys())), array(list(mapping.values()))])

if isinstance(mapping, Series):
xtra_keys = gb_keys[in1d(gb_keys, mapping.index.values, invert=True)]

if xtra_keys.size > 0:
if not isinstance(mapping.values, (Strings, Categorical)):
nans = full(xtra_keys.size, np.nan, mapping.values.dtype)
else:
nans = full(xtra_keys.size, "null")

if isinstance(xtra_keys, Categorical):
xtra_keys = xtra_keys.to_strings()

xtra_series = Series(nans, index=xtra_keys)
mapping = Series.concat([mapping, xtra_series])

if isinstance(gb_keys, Categorical):
mapping = mapping[gb_keys.to_strings()]
else:
mapping = mapping[gb_keys]

if isinstance(mapping.values, (pdarray, Strings)):
return Series(
broadcast(gb.segments, mapping.values, permutation=gb.permutation), index=self.index
)
else:
raise TypeError("Map values must be castable to pdarray or Strings.")
else:
raise TypeError("Map must be dict or arkouda.Series.")
return Series(map(self.values, arg), index=self.index)

def isna(self) -> Series:
"""
Expand Down
86 changes: 85 additions & 1 deletion arkouda/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import builtins
import json
from typing import Sequence, Tuple, Union, cast
from typing import TYPE_CHECKING, Sequence, Tuple, Union, cast
from warnings import warn

from typeguard import typechecked
Expand All @@ -25,6 +25,9 @@
from arkouda.strings import Strings
from arkouda.timeclass import Datetime, Timedelta

if TYPE_CHECKING:
from arkouda.series import Series


def identity(x):
return x
Expand Down Expand Up @@ -510,3 +513,84 @@ def is_int(arry: Union[pdarray, Strings, Categorical]):
return _is_dtype_in_union(dtype(arry.dtype), int_scalars)
else:
return False


def map(
values: Union[pdarray, Strings, Categorical], mapping: Union[dict, "Series"]
) -> Union[pdarray, Strings]:
"""
Map values of an array according to an input mapping.
Parameters
----------
values : pdarray, Strings, or Categorical
The values to be mapped.
mapping : dict or Series
The mapping correspondence.
Returns
-------
arkouda.pdarrayclass.pdarray or arkouda.strings.Strings
A new array with the values mapped by the mapping correspondence.
When the input Series has Categorical values,
the return Series will have Strings values.
Otherwise, the return type will match the input type.
Raises
------
TypeError
Raised if arg is not of type dict or arkouda.Series.
Raised if values not of type pdarray, Categorical, or Strings.
Examples
--------
>>> import arkouda as ak
>>> ak.connect()
>>> from arkouda.util import map
>>> a = ak.array([2, 3, 2, 3, 4])
>>> a
array([2 3 2 3 4])
>>> map(a, {4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
array([30.00000000000000000 5.00000000000000000 30.00000000000000000
5.00000000000000000 25.00000000000000000])
>>> s = ak.Series(ak.array(["a","b","c","d"]), index = ak.array([4,2,1,3]))
>>> map(a, s)
array(['b', 'b', 'd', 'd', 'a'])
"""
import numpy as np

from arkouda import Series, array, broadcast, full
from arkouda.pdarraysetops import in1d

keys = values
gb = GroupBy(keys, dropna=False)
gb_keys = gb.unique_keys

if isinstance(mapping, dict):
mapping = Series([array(list(mapping.keys())), array(list(mapping.values()))])

if isinstance(mapping, Series):
xtra_keys = gb_keys[in1d(gb_keys, mapping.index.values, invert=True)]

if xtra_keys.size > 0:
if not isinstance(mapping.values, (Strings, Categorical)):
nans = full(xtra_keys.size, np.nan, mapping.values.dtype)
else:
nans = full(xtra_keys.size, "null")

if isinstance(xtra_keys, Categorical):
xtra_keys = xtra_keys.to_strings()

xtra_series = Series(nans, index=xtra_keys)
mapping = Series.concat([mapping, xtra_series])

if isinstance(gb_keys, Categorical):
mapping = mapping[gb_keys.to_strings()]
else:
mapping = mapping[gb_keys]

if isinstance(mapping.values, (pdarray, Strings)):
return broadcast(gb.segments, mapping.values, permutation=gb.permutation)
else:
raise TypeError("Map values must be castable to pdarray or Strings.")
else:
raise TypeError("Map must be dict or arkouda.Series.")
6 changes: 6 additions & 0 deletions tests/index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ def test_argsort(self):
self.assertListEqual(i4.argsort(ascending=True).to_list(), [0, 1, 2])
self.assertListEqual(i4.argsort(ascending=False).to_list(), [2, 1, 0])

def test_map(self):
idx = ak.Index(ak.array([2, 3, 2, 3, 4]))

result = idx.map({4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
self.assertListEqual(result.values.to_list(), [30.0, 5.0, 30.0, 5.0, 25.0])

def test_concat(self):
idx_1 = ak.Index.factory(ak.arange(5))
idx_2 = ak.Index(ak.array([2, 4, 1, 3, 0]))
Expand Down
51 changes: 49 additions & 2 deletions tests/util_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import numpy as np
from base_test import ArkoudaTest
from context import arkouda as ak
import numpy as np
from arkouda.util import is_numeric, is_int, is_float

from arkouda.util import is_float, is_int, is_numeric, map


class UtilTest(ArkoudaTest):
def test_sparse_sum_helper(self):
Expand Down Expand Up @@ -44,4 +46,49 @@ def test_is_numeric(self):
self.assertFalse(is_float(c))
self.assertTrue(is_float(d))

def test_map(self):
a = ak.array(["1", "1", "4", "4", "4"])
b = ak.array([2, 3, 2, 3, 4])
c = ak.array([1.0, 1.0, 2.2, 2.2, 4.4])
d = ak.Categorical(a)

result = map(a, {"4": 25, "5": 30, "1": 7})
self.assertListEqual(result.to_list(), [7, 7, 25, 25, 25])

result = map(a, {"1": 7})
self.assertListEqual(
result.to_list(), ak.cast(ak.array([7, 7, np.nan, np.nan, np.nan]), dt=ak.int64).to_list()
)

result = map(a, {"1": 7.0})
self.assertTrue(
np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True)
)

result = map(b, {4: 25.0, 2: 30.0, 1: 7.0, 3: 5.0})
self.assertListEqual(result.to_list(), [30.0, 5.0, 30.0, 5.0, 25.0])

result = map(c, {1.0: "a", 2.2: "b", 4.4: "c", 5.0: "d"})
self.assertListEqual(result.to_list(), ["a", "a", "b", "b", "c"])

result = map(c, {1.0: "a"})
self.assertListEqual(result.to_list(), ["a", "a", "null", "null", "null"])

result = map(c, {1.0: "a", 2.2: "b", 4.4: "c", 5.0: "d", 6.0: "e"})
self.assertListEqual(result.to_list(), ["a", "a", "b", "b", "c"])

result = map(d, {"4": 25, "5": 30, "1": 7})
self.assertListEqual(result.to_list(), [7, 7, 25, 25, 25])

result = map(d, {"1": 7})
self.assertTrue(
np.allclose(
result.to_list(),
ak.cast(ak.array([7, 7, np.nan, np.nan, np.nan]), dt=ak.int64).to_list(),
)
)

result = map(d, {"1": 7.0})
self.assertTrue(
np.allclose(result.to_list(), [7.0, 7.0, np.nan, np.nan, np.nan], equal_nan=True)
)

0 comments on commit a84a35f

Please sign in to comment.