Skip to content

Commit

Permalink
Closes Bears-R-Us#2914-add-isdigit-for-pdarray (Bears-R-Us#2925)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <[email protected]>
  • Loading branch information
ajpotts and ajpotts authored Jan 25, 2024
1 parent 8a7e9c0 commit 2ef265c
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 5 deletions.
38 changes: 38 additions & 0 deletions PROTO_tests/tests/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,44 @@ def test_string_isalpha(self):

assert example2.isalpha().to_list() == expected

def test_string_isdigit(self):
not_digit = ak.array([f"Strings {i}" for i in range(3)])
digit = ak.array([f"12{i}" for i in range(3)])
example = ak.concatenate([not_digit, digit])
assert example.isdigit().to_list() == [False, False, False, True, True, True]

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030", # Unicode for zero
"\u00B2",
]
)

expected = [
False,
False,
False,
False,
True,
False,
False,
False,
False,
True,
False,
]

assert example2.isdigit().to_list() == expected

def test_where(self):
revs = ak.arange(10) % 2 == 0
s1 = ak.array([f"str {i}" for i in range(10)])
Expand Down
44 changes: 41 additions & 3 deletions arkouda/strings.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -716,9 +716,9 @@ def isalnum(self) -> pdarray:
See Also
--------
Strings.is_lower
Strings.is_upper
Strings.is_title
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
Expand Down Expand Up @@ -776,6 +776,44 @@ def isalpha(self) -> pdarray:
)
)

@typechecked
def isdigit(self) -> pdarray:
"""
Returns a boolean pdarray where index i indicates whether string i of the
Strings has all digit characters.
Returns
-------
pdarray, bool
True for elements that are digits, False otherwise
Raises
------
RuntimeError
Raised if there is a server-side error thrown
See Also
--------
Strings.islower
Strings.isupper
Strings.istitle
Examples
--------
>>> not_digit = ak.array([f'Strings {i}' for i in range(3)])
>>> digit = ak.array([f'12{i}' for i in range(3)])
>>> strings = ak.concatenate([not_digit, digit])
>>> strings
array(['Strings 0', 'Strings 1', 'Strings 2', '120', '121', '122'])
>>> strings.isdigit()
array([False False False True True True])
"""
return create_pdarray(
generic_msg(
cmd="checkChars", args={"subcmd": "isdigit", "objType": self.objType, "obj": self.entry}
)
)

@typechecked
def strip(self, chars: Optional[Union[bytes, str_scalars]] = "") -> Strings:
"""
Expand Down
4 changes: 4 additions & 0 deletions src/SegmentedComputation.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ module SegmentedComputation {
StringIsTitle,
StringIsAlphaNumeric,
StringIsAlphabetic,
StringIsDigit,
}

proc computeOnSegments(segments: [?D] int, ref values: [?vD] ?t, param function: SegFunction, type retType, const strArg: string = "") throws {
Expand Down Expand Up @@ -116,6 +117,9 @@ module SegmentedComputation {
when SegFunction.StringIsAlphabetic {
agg.copy(res[i], stringIsAlphabetic(values, start..#len));
}
when SegFunction.StringIsDigit {
agg.copy(res[i], stringIsDigit(values, start..#len));
}
otherwise {
compilerError("Unrecognized segmented function");
}
Expand Down
4 changes: 4 additions & 0 deletions src/SegmentedMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ module SegmentedMsg {
truth.a = strings.isalpha();
repMsg = "created "+st.attrib(rname);
}
when "isdigit" {
truth.a = strings.isdigit();
repMsg = "created "+st.attrib(rname);
}
otherwise {
var errorMsg = notImplementedError(pn, "%s".doFormat(subcmd));
smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
Expand Down
19 changes: 17 additions & 2 deletions src/SegmentedString.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -501,14 +501,22 @@ module SegmentedString {
return computeOnSegments(offsets.a, values.a, SegFunction.StringIsAlphaNumeric, bool);
}

/*
/*
Returns list of bools where index i indicates whether the string i of the SegString is alphabetic
:returns: [domain] bool where index i indicates whether the string i of the SegString is alphabetic
*/
proc isalpha() throws {
return computeOnSegments(offsets.a, values.a, SegFunction.StringIsAlphabetic, bool);
}

/*
Returns list of bools where index i indicates whether the string i of the SegString is digits
:returns: [domain] bool where index i indicates whether the string i of the SegString is digits
*/
proc isdigit() throws {
return computeOnSegments(offsets.a, values.a, SegFunction.StringIsDigit, bool);
}

proc bytesToUintArr(const max_bytes:int, lens: [?D] ?t, st) throws {
// bytes contained in strings < 128 bits, so concatenating is better than the hash
ref off = offsets.a;
Expand Down Expand Up @@ -1458,12 +1466,19 @@ module SegmentedString {
}

/*
The SegFunction called by computeOnSegments for isalnum
The SegFunction called by computeOnSegments for isalpha
*/
inline proc stringIsAlphabetic(ref values, rng) throws {
return interpretAsString(values, rng, borrow=true).isAlpha();
}

/*
The SegFunction called by computeOnSegments for isdigit
*/
inline proc stringIsDigit(ref values, rng) throws {
return interpretAsString(values, rng, borrow=true).isDigit();
}

inline proc stringBytesToUintArr(ref values, rng) throws {
var localSlice = new lowLevelLocalizingSlice(values, rng);
return | reduce [i in 0..#rng.size] (localSlice.ptr(i):uint)<<(8*(rng.size-1-i));
Expand Down
38 changes: 38 additions & 0 deletions tests/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,44 @@ def test_string_isalpha(self):

self.assertListEqual(example2.isalpha().to_list(), expected)

def test_string_isdigit(self):
not_digit = ak.array([f"Strings {i}" for i in range(3)])
digit = ak.array([f"12{i}" for i in range(3)])
example = ak.concatenate([not_digit, digit])
self.assertListEqual(example.isdigit().to_list(), [False, False, False, True, True, True])

example2 = ak.array(
[
"",
"string1",
"stringA",
"String",
"12345",
"Hello\tWorld",
" ",
"\n",
"3.14",
"\u0030", # Unicode for zero
"\u00B2",
]
)

expected = [
False,
False,
False,
False,
True,
False,
False,
False,
False,
True,
False,
]

self.assertListEqual(example2.isdigit().to_list(), expected)

def test_where(self):
revs = ak.arange(10) % 2 == 0
s1 = ak.array([f"str {i}" for i in range(10)])
Expand Down

0 comments on commit 2ef265c

Please sign in to comment.