From 421ea95f6de215bc99432ebefdbc72a28b35c97f Mon Sep 17 00:00:00 2001 From: Amanda Potts Date: Tue, 22 Oct 2024 10:39:03 -0400 Subject: [PATCH] Closes #3855: refactor boolReductionMsg --- ServerModules.cfg | 1 + arkouda/array_api/statistical_functions.py | 24 +- arkouda/pdarrayclass.py | 310 ++++++++------------- pydoc/preprocess/generate_import_stubs.py | 3 +- src/AryUtil.chpl | 4 +- src/ReductionMsg.chpl | 177 ++++-------- src/ReductionMsgFunctions.chpl | 109 ++++++++ src/registry/reduction_script.py | 75 +++++ tests/conftest.py | 11 + tests/pdarrayclass_test.py | 184 ++++++++---- 10 files changed, 505 insertions(+), 393 deletions(-) create mode 100644 src/ReductionMsgFunctions.chpl create mode 100644 src/registry/reduction_script.py diff --git a/ServerModules.cfg b/ServerModules.cfg index efec67b62..8c61c61ee 100644 --- a/ServerModules.cfg +++ b/ServerModules.cfg @@ -26,6 +26,7 @@ OperatorMsg ParquetMsg RandMsg ReductionMsg +ReductionMsgFunctions RegistrationMsg SegmentedMsg SequenceMsg diff --git a/arkouda/array_api/statistical_functions.py b/arkouda/array_api/statistical_functions.py index 81d5bb78c..4442a1c04 100644 --- a/arkouda/array_api/statistical_functions.py +++ b/arkouda/array_api/statistical_functions.py @@ -50,11 +50,7 @@ def max( from arkouda import max as ak_max - arr = Array._new(ak_max(x._array, axis=axis)) - if keepdims or axis is None or x.ndim == 1: - return arr - else: - return squeeze(arr, axis) + return Array._new(ak_max(x._array, axis=axis, keepdims=keepdims)) # this is a temporary fix to get mean working with XArray @@ -138,11 +134,7 @@ def min( from arkouda import min as ak_min - arr = Array._new(ak_min(x._array, axis=axis)) - if keepdims or axis is None or x.ndim == 1: - return arr - else: - return squeeze(arr, axis) + return Array._new(ak_min(x._array, axis=axis, keepdims=keepdims)) def prod( @@ -180,11 +172,7 @@ def prod( from arkouda import prod as ak_prod - arr = Array._new(ak_prod(x_op, axis=axis)) - if keepdims or axis is None or x.ndim == 1: - return arr - else: - return squeeze(arr, axis) + return Array._new(ak_prod(x_op, axis=axis, keepdims=keepdims)) # Not working with XArray yet, pending a fix for: @@ -277,11 +265,7 @@ def sum( from arkouda import sum as ak_sum - arr = Array._new(ak_sum(x_op, axis=axis)) - if keepdims or axis is None or x.ndim == 1: - return arr - else: - return squeeze(arr, axis) + return Array._new(ak_sum(x_op, axis=axis, keepdims=keepdims)) # Not working with XArray yet, pending a fix for: diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index ff189c62a..d5451c584 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -72,6 +72,8 @@ logger = getArkoudaLogger(name="pdarrayclass") +supported_reduction_ops = ["any", "all", "isSorted", "isSortedLocally", "max", "min", "sum", "prod"] + @typechecked def parse_single_value(msg: str) -> object: @@ -2638,19 +2640,27 @@ def clear() -> None: @typechecked -def any(pda: pdarray) -> np.bool_: +def _reduce_by_op( + op: str, + pda: pdarray, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, +) -> Union[numpy_scalars, pdarray]: """ - Return True iff any element of the array evaluates to True. + Return reduction of a pdarray by an operation along an axis. Parameters ---------- pda : pdarray The pdarray instance to be evaluated - + axis : int or Tuple[int, ...], optional + The axis or axes along which to compute the sum. If None, the sum of the entire array is + computed (returning a scalar). + keepdims : bool, optional + Whether to keep the singleton dimension(s) along `axis` in the result. Returns ------- bool - Indicates if 1..n pdarray elements evaluate to True Raises ------ @@ -2658,53 +2668,80 @@ def any(pda: pdarray) -> np.bool_: Raised if pda is not a pdarray instance RuntimeError Raised if there's a server-side error thrown + ValueError + Raised op is not a supported reduction operation. """ - return parse_single_value( - generic_msg(cmd=f"reduce->bool{pda.ndim}D", args={"op": "any", "x": pda, "nAxes": 0, "axis": []}) + from arkouda.numpy import squeeze + + if op not in supported_reduction_ops: + raise ValueError(f"value {op} not supported by _reduce_by_op.") + axis_ = ( + [] + if axis is None + else ( + [ + axis, + ] + if isinstance(axis, int) + else list(axis) + ) ) + if len(axis_) == 0: + return parse_single_value(cast(str, generic_msg( + cmd=f"{op}All<{pda.dtype.name},{pda.ndim}>", + args={"x": pda, "skipNan": False}, + ))) + else: + repMsg = generic_msg( + cmd=f"{op}<{pda.dtype.name},{pda.ndim}>", + args={"x": pda, "axis": axis_, "skipNan": False}, + ) -@typechecked -def all(pda: pdarray) -> np.bool_: - """ - Return True iff all elements of the array evaluate to True. - - Parameters - ---------- - pda : pdarray - The pdarray instance to be evaluated + result = create_pdarray(cast(str, repMsg)) + if result.size == 1: + # TODO: remove call to 'flatten' + return result.flatten()[0] + elif keepdims or axis is None or pda.ndim == 1: + return result + else: + return squeeze(result, axis) - Returns - ------- - bool - Indicates if all pdarray elements evaluate to True - Raises - ------ - TypeError - Raised if pda is not a pdarray instance - RuntimeError - Raised if there's a server-side error thrown - """ - return parse_single_value( - generic_msg(cmd=f"reduce->bool{pda.ndim}D", args={"op": "all", "x": pda, "nAxes": 0, "axis": []}) - ) +def _make_reduction_func( + op, + function_descriptor="Return reduction of a pdarray by an operation along an axis.", + return_descriptor="", + return_dtype="numpy_scalars", +): + if op not in supported_reduction_ops: + raise ValueError(f"value {op} not supported by _reduce_by_op.") + @typechecked + def op_func( + pda: pdarray, + axis: Optional[Union[int, Tuple[int, ...]]] = None, + keepdims: bool = False, + ) -> Union[numpy_scalars, pdarray]: + return _reduce_by_op(op, pda, axis, keepdims=keepdims) -@typechecked -def is_sorted(pda: pdarray) -> np.bool_: - """ - Return True iff the array is monotonically non-decreasing. + op_func.__doc__ = f""" + {function_descriptor} Parameters ---------- pda : pdarray The pdarray instance to be evaluated - + axis : int or Tuple[int, ...], optional + The axis or axes along which to compute the sum. If None, the sum of the entire array is + computed (returning a scalar). + keepdims : bool, optional + Whether to keep the singleton dimension(s) along `axis` in the result. + Returns ------- - bool - Indicates if the array is monotonically non-decreasing + pdarray or {return_dtype} + {return_descriptor} Raises ------ @@ -2712,82 +2749,46 @@ def is_sorted(pda: pdarray) -> np.bool_: Raised if pda is not a pdarray instance RuntimeError Raised if there's a server-side error thrown - """ - return parse_single_value( - generic_msg( - cmd=f"reduce->bool{pda.ndim}D", args={"op": "is_sorted", "x": pda, "nAxes": 0, "axis": []} - ) - ) - + """ -# check whether a reduction of the given axes on an 'ndim' dimensional array -# would result in a single scalar value -def _reduces_to_single_value(axis, ndim) -> bool: - if len(axis) == 0 or ndim == 1: - # if no axes are specified or the array is 1D, the result is a scalar - return True - elif len(axis) == ndim: - # if all axes are specified, the result is a scalar - axes = set(axis) - for i in range(ndim): - if i not in axes: - return False - return True - else: - return False - - -# helper function for sum, min, max, prod -def _comon_reduction( - pda: pdarray, axis: Optional[Union[int, Tuple[int, ...]]], kind: str -): - if kind not in ["sum", "min", "max", "prod"]: - raise ValueError(f"Unsupported reduction type: {kind}") + return op_func - axis_ = [] if axis is None else ([axis,] if isinstance(axis, int) else list(axis)) - if _reduces_to_single_value(axis_, pda.ndim): - return parse_single_value(cast(str, generic_msg( - cmd=f"{kind}All<{pda.dtype.name},{pda.ndim}>", - args={"x": pda, "skipNan": False}, - ))) - else: - return create_pdarray( - generic_msg( - cmd=f"{kind}<{pda.dtype.name},{pda.ndim}>", - args={"x": pda, "axis": axis_, "skipNan": False}, - ) - ) +globals()["any"] = _make_reduction_func( + "any", + function_descriptor="Return True iff any element of the array evaluates to True.", + return_descriptor="Indicates if any pdarray element evaluates to True.", + return_dtype="bool", +) +globals()["all"] = _make_reduction_func( + "all", + function_descriptor="Return True iff all elements of the array evaluate to True.", + return_descriptor="Indicates if all pdarray elements evaluate to True.", + return_dtype="bool", +) -@typechecked -def sum( - pda: pdarray, axis: Optional[Union[int, Tuple[int, ...]]] = None -) -> Union[numeric_and_bool_scalars, pdarray]: - """ - Return the sum of all elements in the array. +globals()["is_sorted"] = _make_reduction_func( + "isSorted", + function_descriptor="Return True iff the array is monotonically non-decreasing.", + return_descriptor="Indicates if the array is monotonically non-decreasing.", + return_dtype="bool", +) - Parameters - ---------- - pda : pdarray - Values for which to calculate the sum - axis : int or Tuple[int, ...], optional - The axis or axes along which to compute the sum. If None, the sum of the entire array is - computed (returning a scalar). - Returns - ------- - np.float64 - The sum of all elements in the array +globals()["is_locally_sorted"] = _make_reduction_func( + "isSortedLocally", + function_descriptor="Return True iff the array is monotonically non-decreasing on each locale where the data is stored.", + return_descriptor="Indicates if the array is monotonically non-decreasing on each locale.", + return_dtype="bool", +) - Raises - ------ - TypeError - Raised if pda is not a pdarray instance - RuntimeError - Raised if there's a server-side error thrown - """ - return _comon_reduction(pda, axis, "sum") +globals()["sum"] = _make_reduction_func( + "sum", + function_descriptor="Return the sum of all elements in the array.", + return_descriptor="The sum of all elements in the array.", + return_dtype="float64", +) @typechecked @@ -2841,92 +2842,27 @@ def dot( return pda1 * pda2 -@typechecked -def prod(pda: pdarray, axis: Optional[Union[int, Tuple[int, ...]]] = None) -> Union[np.float64, pdarray]: - """ - Return the product of all elements in the array. Return value is - always a np.float64 or np.int64 - - Parameters - ---------- - pda : pdarray - Values for which to calculate the product - axis : int or Tuple[int, ...], optional - The axis or axes along which to compute the sum. If None, the sum of the entire array is - computed (returning a scalar). - - Returns - ------- - numpy_scalars - The product calculated from the pda - - Raises - ------ - TypeError - Raised if pda is not a pdarray instance - RuntimeError - Raised if there's a server-side error thrown - """ - return _comon_reduction(pda, axis, "prod") - - -def min( - pda: pdarray, axis: Optional[Union[int, Tuple[int, ...]]] = None -) -> Union[numpy_scalars, pdarray]: - """ - Return the minimum value of the array. - - Parameters - ---------- - pda : pdarray - Values for which to calculate the min - axis : int or Tuple[int, ...], optional - The axis or axes along which to compute the sum. If None, the sum of the entire array is - computed (returning a scalar). - - Returns - ------- - numpy_scalars - The min calculated from the pda - - Raises - ------ - TypeError - Raised if pda is not a pdarray instance - RuntimeError - Raised if there's a server-side error thrown - """ - return _comon_reduction(pda, axis, "min") - - -@typechecked -def max( - pda: pdarray, axis: Optional[Union[int, Tuple[int, ...]]] = None -) -> Union[numpy_scalars, pdarray]: - """ - Return the maximum value of the array. +globals()["prod"] = _make_reduction_func( + "prod", + function_descriptor="Return the product of all elements in the array. Return value is always a np.float64 or np.int64", + return_descriptor="The product calculated from the pda.", + return_dtype="numpy_scalars", +) - Parameters - ---------- - pda : pdarray - Values for which to calculate the max - axis : int or Tuple[int, ...], optional - The axis or axes along which to compute the sum. If None, the sum of the entire array is - computed (returning a scalar). +globals()["min"] = _make_reduction_func( + "min", + function_descriptor="Return the minimum value of the array.", + return_descriptor="The min calculated from the pda.", + return_dtype="numpy_scalars", +) - Returns - ------- - numpy_scalars: - The max calculated from the pda - Raises - ------ - TypeError - Raised if pda is not a pdarray instance - RuntimeError - Raised if there's a server-side error thrown - """ - return _comon_reduction(pda, axis, "max") +globals()["max"] = _make_reduction_func( + "max", + function_descriptor="Return the maximum value of the array.", + return_descriptor="The max calculated from the pda.", + return_dtype="numpy_scalars", +) @typechecked diff --git a/pydoc/preprocess/generate_import_stubs.py b/pydoc/preprocess/generate_import_stubs.py index fd5d263ac..1718d2266 100644 --- a/pydoc/preprocess/generate_import_stubs.py +++ b/pydoc/preprocess/generate_import_stubs.py @@ -152,6 +152,7 @@ def main(): import arkouda.scipy.special as akscipySpecial import arkouda.scipy.stats as akscipyStats import arkouda.series as akSeries + import arkouda as ak write_stub(aknp, "arkouda/numpy.pyi", all_only=False, allow_arkouda=True) write_stub(aknp.dtypes, "arkouda/numpy/dtypes.pyi", all_only=False, allow_arkouda=True) @@ -162,7 +163,7 @@ def main(): write_stub(akDataframe, "arkouda/dataframe.pyi", all_only=True, allow_arkouda=True) write_stub(akGroupbyclass, "arkouda/groupbyclass.pyi", all_only=True, allow_arkouda=True) write_stub(akSeries, "arkouda/series.pyi", all_only=True, allow_arkouda=True) - + write_stub(ak.pdarrayclass, "arkouda/pdarrayclass.pyi", all_only=True, allow_arkouda=True) if __name__ == "__main__": main() diff --git a/src/AryUtil.chpl b/src/AryUtil.chpl index 5ef9642c9..e1b4c6b0d 100644 --- a/src/AryUtil.chpl +++ b/src/AryUtil.chpl @@ -96,7 +96,7 @@ module AryUtil :arg A: array to check */ - proc isSorted(A:[?D] ?t): bool { + proc isSorted(A:[?D] ?t): bool where D.rank == 1 { var sorted: bool; sorted = true; forall (a,i) in zip(A,D) with (&& reduce sorted) { @@ -115,7 +115,7 @@ module AryUtil :arg slice: a slice domain (only the indices in this domain are checked) :arg axisIdx: the axis to check */ - proc isSortedOver(A: [?D] ?t, slice, axisIdx: int) { + proc isSortedOver(const ref A: [?D] ?t, const ref slice, axisIdx: int) { var sorted = true; forall i in slice with (&& reduce sorted, var im1: D.rank*int) { if i[axisIdx] > slice.dim(axisIdx).low { diff --git a/src/ReductionMsg.chpl b/src/ReductionMsg.chpl index 6f790212f..7b3311eb4 100644 --- a/src/ReductionMsg.chpl +++ b/src/ReductionMsg.chpl @@ -34,7 +34,7 @@ module ReductionMsg const basicReductionOps = {"sum", "prod", "min", "max"}, boolReductionOps = {"any", "all", "is_sorted", "is_locally_sorted"}, idxReductionOps = {"argmin", "argmax"}; - + proc reductionReturnType(type t) type do return if t == bool then int else t; @@ -141,120 +141,6 @@ module ReductionMsg } } - /* - Compute an array reduction along one or more axes. - (where the result has a bool data type) - - Supports: 'any', 'all', is_sorted, is_locally_sorted - */ - @arkouda.registerND(cmd_prefix="reduce->bool") - proc boolReductionMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, param nd: int): MsgTuple throws { - use SliceReductionOps; - param pn = Reflection.getRoutineName(); - const x = msgArgs.getValueOf("x"), - op = msgArgs.getValueOf("op"), - nAxes = msgArgs.get("nAxes").getIntValue(), - axesRaw = msgArgs.get("axis").toScalarArray(int, nAxes), - rname = st.nextName(); - - var gEnt: borrowed GenSymEntry = getGenericTypedArrayEntry(x, st); - - if !boolReductionOps.contains(op) { - const errorMsg = notImplementedError(pn,op,gEnt.dtype); - rmLogger.error(getModuleName(),pn,getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - - if nd > 1 && (op == "is_sorted" || op == "is_locally_sorted") { - // TODO: support this for any case where nAxes == 1) - const errorMsg = "is_sorted checks are only supported for 1D arrays"; - rmLogger.error(getModuleName(),pn,getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - - proc computeReduction(type t): MsgTuple throws { - const eIn = toSymEntry(gEnt, t, nd); - - if nd == 1 || nAxes == 0 { - var s: bool; - select op { - when "any" { - s = if t == bool - then | reduce eIn.a - else (+ reduce (eIn.a != 0)) != 0; - } - when "all" { - s = if t == bool - then & reduce eIn.a - else (+ reduce (eIn.a != 0)) == eIn.a.size; - } - when "is_sorted" do s = isSorted(eIn.a); - when "is_locally_sorted" { - coforall loc in Locales with (&& reduce s) do on loc { - ref aLocal = eIn.a[eIn.a.localSubdomain()]; - s &&= isSorted(aLocal); - } - } - otherwise halt("unreachable"); - } - - const scalarValue = "bool " + bool2str(s); - rmLogger.debug(getModuleName(),pn,getLineNumber(),scalarValue); - return new MsgTuple(scalarValue, MsgType.NORMAL); - } else { - const (valid, axes) = validateNegativeAxes(axesRaw, nd); - if !valid { - var errorMsg = "Invalid axis value(s) '%?' in slicing reduction".format(axesRaw); - rmLogger.error(getModuleName(),pn,getLineNumber(),errorMsg); - return new MsgTuple(errorMsg,MsgType.ERROR); - } else { - const outShape = reducedShape(eIn.a.shape, axes); - var eOut = st.addEntry(rname, outShape, bool); - - forall sliceIdx in domOffAxis(eIn.a.domain, axes) { - const sliceDom = domOnAxis(eIn.a.domain, sliceIdx, axes); - var s: bool = true; - select op { - when "any" do s = any(eIn.a, sliceDom); - when "all" do s = all(eIn.a, sliceDom); - when "is_sorted" { - // TODO: maybe it's better to fold this loop outside of the - // domOffAxis loop (check one dimension at a time globally). - for axisIdx in axes do - s &&= isSortedOver(eIn.a, sliceDom, axisIdx); - } - when "is_locally_sorted" { - coforall loc in Locales with (&& reduce s) do on loc { - const localSliceDom = sliceDom[eIn.a.localSubdomain()]; - for axisIdx in axes do - s &&= isSortedOver(eIn.a, localSliceDom, axisIdx); - } - } - otherwise halt("unreachable"); - } - eOut.a[sliceIdx] = s; - } - - const repMsg = "created " + st.attrib(rname); - rmLogger.info(getModuleName(),pn,getLineNumber(),repMsg); - return new MsgTuple(repMsg, MsgType.NORMAL); - } - } - } - - select gEnt.dtype { - when DType.Int64 do return computeReduction(int); - when DType.UInt64 do return computeReduction(uint); - when DType.Float64 do return computeReduction(real); - when DType.Bool do return computeReduction(bool); - otherwise { - var errorMsg = notImplementedError(pn,dtype2str(gEnt.dtype)); - rmLogger.error(getModuleName(),pn,getLineNumber(),errorMsg); - return new MsgTuple(errorMsg,MsgType.ERROR); - } - } - } - /* Compute an array reduction along one or more axes. (where the result has an integer data type) @@ -429,32 +315,77 @@ module ReductionMsg return nnzIndices; } - private module SliceReductionOps { + module SliceReductionOps { private proc isArgandType(type t) param: bool do return isRealType(t) || isImagType(t) || isComplexType(t); - proc any(ref a: [] bool, slice): bool { + proc anySlice(const ref a: [] bool, slice): bool { var hasAny = false; forall i in slice with (|| reduce hasAny) do hasAny ||= a[i]; return hasAny; } - proc any(ref a: [] ?t, slice): bool { - var sum = 0:t; + proc anySlice(const ref a: [] ?t, slice): bool { + var sum = 0:int; forall i in slice with (+ reduce sum) do sum += (a[i] != 0):int; return sum != 0; } - proc all(ref a: [] bool, slice): bool { + proc allSlice(const ref a: [] bool, slice): bool { var hasAll = true; forall i in slice with (&& reduce hasAll) do hasAll &&= a[i]; return hasAll; } - proc all(ref a: [] ?t, slice): bool { - var sum = 0:t; + proc allSlice(const ref a: [] ?t, slice): bool { + var sum = 0:int; forall i in slice with (+ reduce sum) do sum += (a[i] != 0):int; - return sum == a.size; + return sum == slice.size; + } + + proc isSortedLocallySlice(const ref A: [?D] ?t, slice) + where D.rank == 1 { + var s = true; + coforall loc in Locales with (&& reduce s) do on loc { + + const ref localSliceDom = slice.localSubdomain(); + const ref aLocalSlice = A[localSliceDom]; + + s &&= isSortedSlice(aLocalSlice, localSliceDom); + + } + return s; + } + + proc isSortedLocallySlice(const ref A: [?D] ?t, slice) + where D.rank != 1 { + var s = true; + coforall loc in Locales with (&& reduce s) do on loc { + + const ref localSliceDom = slice.localSubdomain(); + const ref aLocalSlice = A[localSliceDom]; + + forall axisIdx in 0..#aLocalSlice.rank with (&& reduce s){ + s &&= isSortedOver(aLocalSlice, localSliceDom, axisIdx); + } + } + return s; + } + + proc isSortedSlice(const ref A: [?D] ?t, slice) + where D.rank != 1 { + const ref aSlice = A[slice]; + var s = true; + forall axisIdx in 0..#slice.rank with (&& reduce s){ + s &&= isSortedOver(aSlice, slice, axisIdx); + } + return s; + } + + proc isSortedSlice(const ref A: [?D] ?t, slice) + where D.rank == 1 { + const ref aSlice = A[slice]; + return isSorted(aSlice); } proc sumSlice(const ref a: [?d] ?t, slice, type opType, skipNan: bool): opType { diff --git a/src/ReductionMsgFunctions.chpl b/src/ReductionMsgFunctions.chpl new file mode 100644 index 000000000..416c72758 --- /dev/null +++ b/src/ReductionMsgFunctions.chpl @@ -0,0 +1,109 @@ +module ReductionMsgFunctions +{ + use BigInteger; + use List; + use AryUtil; + use ReductionMsg; + use SliceReductionOps; + + @arkouda.registerCommand + proc anyAll(const ref x:[?d] ?t): bool throws + { + use SliceReductionOps; + return anySlice(x, x.domain); + } + + @arkouda.registerCommand + proc any(const ref x:[?d] ?t, axis: list(int)): [] bool throws + { + use SliceReductionOps; + type opType = bool; + const (valid, axes) = validateNegativeAxes(axis, x.rank); + if !valid { + throw new Error("Invalid axis value(s) '%?' in slicing reduction".format(axis)); + } else { + const outShape = reducedShape(x.shape, axes); + var ret = makeDistArray((...outShape), opType); + forall (sliceDom, sliceIdx) in axisSlices(x.domain, axes) + do ret[sliceIdx] = anySlice(x, sliceDom); + return ret; + } + } + + + @arkouda.registerCommand + proc allAll(const ref x:[?d] ?t): bool throws + { + use SliceReductionOps; + return allSlice(x, x.domain); + } + + @arkouda.registerCommand + proc all(const ref x:[?d] ?t, axis: list(int)): [] bool throws + { + use SliceReductionOps; + type opType = bool; + const (valid, axes) = validateNegativeAxes(axis, x.rank); + if !valid { + throw new Error("Invalid axis value(s) '%?' in slicing reduction".format(axis)); + } else { + const outShape = reducedShape(x.shape, axes); + var ret = makeDistArray((...outShape), opType); + forall (sliceDom, sliceIdx) in axisSlices(x.domain, axes) + do ret[sliceIdx] = allSlice(x, sliceDom); + return ret; + } + } + + + @arkouda.registerCommand + proc isSortedAll(const ref x:[?d] ?t): bool throws + { + use SliceReductionOps; + return isSortedSlice(x, x.domain); + } + + @arkouda.registerCommand + proc isSorted(const ref x:[?d] ?t, axis: list(int)): [] bool throws + { + use SliceReductionOps; + type opType = bool; + const (valid, axes) = validateNegativeAxes(axis, x.rank); + if !valid { + throw new Error("Invalid axis value(s) '%?' in slicing reduction".format(axis)); + } else { + const outShape = reducedShape(x.shape, axes); + var ret = makeDistArray((...outShape), opType); + forall (sliceDom, sliceIdx) in axisSlices(x.domain, axes) + do ret[sliceIdx] = isSortedSlice(x, sliceDom); + return ret; + } + } + + + @arkouda.registerCommand + proc isSortedLocallyAll(const ref x:[?d] ?t): bool throws + { + use SliceReductionOps; + return isSortedLocallySlice(x, x.domain); + } + + @arkouda.registerCommand + proc isSortedLocally(const ref x:[?d] ?t, axis: list(int)): [] bool throws + { + use SliceReductionOps; + type opType = bool; + const (valid, axes) = validateNegativeAxes(axis, x.rank); + if !valid { + throw new Error("Invalid axis value(s) '%?' in slicing reduction".format(axis)); + } else { + const outShape = reducedShape(x.shape, axes); + var ret = makeDistArray((...outShape), opType); + forall (sliceDom, sliceIdx) in axisSlices(x.domain, axes) + do ret[sliceIdx] = isSortedLocallySlice(x, sliceDom); + return ret; + } + } + + +} \ No newline at end of file diff --git a/src/registry/reduction_script.py b/src/registry/reduction_script.py new file mode 100644 index 000000000..c5999f993 --- /dev/null +++ b/src/registry/reduction_script.py @@ -0,0 +1,75 @@ +FUNCS = [ + ["any", "bool", []], + ["all", "bool", []], + ["isSorted", "bool", []], + ["isSortedLocally", "bool", []], +] + + +def generate_function(): + + ret = """module ReductionMsgFunctions +{ + use BigInteger; + use List; + use AryUtil; + use ReductionMsg; + use SliceReductionOps; +""" + + for func, ret_type, allowed_types in FUNCS: + + where_statement = "" + + if len(allowed_types) > 0: + where_statement += "where " + where_statement += "||".join([f"(t=={tp})" for tp in allowed_types]) + + ret += f""" + @arkouda.registerCommand + proc {func}All(const ref x:[?d] ?t): {ret_type} throws + {where_statement}{{ + use SliceReductionOps; + return {func}Slice(x, x.domain); + }} + + @arkouda.registerCommand + proc {func}(const ref x:[?d] ?t, axis: list(int)): [] {ret_type} throws + {where_statement}{{ + use SliceReductionOps; + type opType = {ret_type}; + const (valid, axes) = validateNegativeAxes(axis, x.rank); + if !valid {{ + throw new Error("Invalid axis value(s) '%?' in slicing reduction".format(axis)); + }} else {{ + const outShape = reducedShape(x.shape, axes); + var ret = makeDistArray((...outShape), opType); + forall (sliceDom, sliceIdx) in axisSlices(x.domain, axes) + do ret[sliceIdx] = {func}Slice(x, sliceDom); + return ret; + }} + }}""" + ret += "\n\n" + + ret = ret.replace("\t", " ") + ret = ret + ret += "\n}" + + return ret + + +def main(): + + infile = "src/ReductionMsg.chpl" + + x = generate_function() + print(x) + + outfile = "src/ReductionMsgFunctions.chpl" + + with open(outfile, "w") as text_file: + text_file.write(x) + + +if __name__ == "__main__": + main() diff --git a/tests/conftest.py b/tests/conftest.py index 2e8e1230f..816cfebdd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,3 +130,14 @@ def skip_by_rank(request): if request.node.get_closest_marker("skip_if_max_rank_greater_than"): if request.node.get_closest_marker("skip_if_max_rank_greater_than").args[0] < pytest.max_rank: pytest.skip("this test requires server with max_array_dims =< {}".format(pytest.max_rank)) + + +@pytest.fixture(autouse=True) +def skip_by_num_locales(request): + if request.node.get_closest_marker("skip_if_nl_less_than"): + if request.node.get_closest_marker("skip_if_nl_less_than").args[0] > pytest.nl: + pytest.skip("this test requires server with nl >= {}".format(pytest.nl)) + + if request.node.get_closest_marker("skip_if_nl_greater_than"): + if request.node.get_closest_marker("skip_if_nl_greater_than").args[0] < pytest.nl: + pytest.skip("this test requires server with nl =< {}".format(pytest.nl)) diff --git a/tests/pdarrayclass_test.py b/tests/pdarrayclass_test.py index 3a2937045..fd2127632 100644 --- a/tests/pdarrayclass_test.py +++ b/tests/pdarrayclass_test.py @@ -1,3 +1,5 @@ +from typing import Optional, Tuple, Union + import numpy as np import pytest @@ -5,6 +7,14 @@ from arkouda.testing import assert_equal as ak_assert_equal SEED = 314159 +import numpy + +import arkouda.pdarrayclass + +REDUCTION_OPS = list(set(ak.pdarrayclass.supported_reduction_ops) - set(["isSorted", "isSortedLocally"])) +DTYPES = ["int64", "float64", "bool", "uint64"] + +# TODO: add unint8 to DTYPES class TestPdarrayClass: @@ -42,79 +52,133 @@ def test_flatten(self, size): b = a.reshape((2, 2, size / 4)) ak_assert_equal(b.flatten(), a) - def test_prod(self): - a = ak.arange(10) + 1 - - assert ak.prod(a) == 3628800 - - @pytest.mark.skip_if_max_rank_less_than(3) - def test_prod_multidim(self): - a = ak.ones((2, 3, 4)) - a = a + a - - assert ak.prod(a) == 2**24 - - aProd0 = ak.prod(a, axis=0) - assert aProd0.shape == (1, 3, 4) - assert aProd0[0, 0, 0] == 2**2 + @pytest.mark.parametrize("size", pytest.prob_size) + @pytest.mark.parametrize("dtype", DTYPES) + @pytest.mark.parametrize("axis", [0, (0,), None]) + def test_is_sorted(self, size, dtype, axis): - aProd02 = ak.prod(a, axis=(1, 2)) - assert aProd02.shape == (2, 1, 1) - assert aProd02[0, 0, 0] == 2**12 + a = ak.arange(size, dtype=dtype) + assert ak.is_sorted(a, axis=axis) - def test_sum(self): - a = ak.ones(10) + b = ak.flip(a) + assert not ak.is_sorted(b, axis=axis) - assert ak.sum(a) == 10 + c = ak.randint(0, size // 10, size, seed=SEED) + assert not ak.is_sorted(c, axis=axis) @pytest.mark.skip_if_max_rank_less_than(3) - def test_sum_multidim(self): - a = ak.ones((2, 3, 4)) - - assert ak.sum(a) == 24 - - aSum0 = ak.sum(a, axis=0) - assert aSum0.shape == (1, 3, 4) - assert aSum0[0, 0, 0] == 2 + @pytest.mark.parametrize("dtype", list(set(DTYPES) - set(["bool"]))) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 2), (0, 1, 2)]) + def test_is_sorted_multidim(self, dtype, axis): + + a = ak.array(ak.randint(0, 100, (5, 7, 4), dtype=dtype, seed=SEED)) + sorted = ak.is_sorted(a, axis=axis) + if isinstance(sorted, np.bool_): + assert not sorted + else: + assert ak.all(sorted == False) + + x = ak.arange(40).reshape((2, 10, 2)) + sorted = ak.is_sorted(x, axis=axis) + if isinstance(sorted, np.bool_): + assert sorted + else: + assert ak.all(sorted) - aSum02 = ak.sum(a, axis=(1, 2)) - assert aSum02.shape == (2, 1, 1) - assert aSum02[0, 0, 0] == 12 - - def test_max(self): - a = ak.arange(10) - assert ak.max(a) == 9 + @pytest.mark.parametrize("size", pytest.prob_size) + @pytest.mark.parametrize("dtype", DTYPES) + @pytest.mark.parametrize("axis", [0, (0,), None]) + def test_is_locally_sorted(self, size, dtype, axis): + from arkouda.pdarrayclass import is_locally_sorted - @pytest.mark.skip_if_max_rank_less_than(3) - def test_max(self): - a = ak.array(ak.randint(0, 100, (5, 7, 4), dtype=ak.int64, seed=SEED)) - a[3, 6, 2] = 101 + a = ak.arange(size) + assert is_locally_sorted(a, axis=axis) - assert ak.max(a) == 101 + assert not is_locally_sorted(ak.flip(a), axis=axis) - aMax0 = ak.max(a, axis=0) - assert aMax0.shape == (1, 7, 4) - assert aMax0[0, 6, 2] == 101 + b = ak.randint(0, size // 10, size) + assert not is_locally_sorted(b, axis=axis) - aMax02 = ak.max(a, axis=(0, 2)) - assert aMax02.shape == (1, 7, 1) - assert aMax02[0, 6, 0] == 101 + @pytest.mark.skip_if_nl_greater_than(2) + @pytest.mark.skip_if_nl_less_than(2) + @pytest.mark.parametrize("size", pytest.prob_size) + def test_is_locally_sorted_multi_locale(self, size): + from arkouda.pdarrayclass import is_locally_sorted, is_sorted - def test_min(self): - a = ak.arange(10) + 2 - assert ak.min(a) == 2 + size = size // 2 + a = ak.concatenate([ak.arange(size), ak.arange(size)]) + assert is_locally_sorted(a) + assert not is_sorted(a) @pytest.mark.skip_if_max_rank_less_than(3) - def test_min(self): - a = ak.array(ak.randint(0, 100, (5, 7, 4), dtype=ak.int64, seed=SEED)) - a[3, 6, 2] = -1 + @pytest.mark.parametrize("dtype", DTYPES) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 2), (0, 1, 2)]) + def test_is_locally_sorted_multidim(self, dtype, axis): + from arkouda.pdarrayclass import is_locally_sorted - assert ak.min(a) == -1 + a = ak.array(ak.randint(0, 100, (5, 7, 4), dtype=ak.int64, seed=SEED)) + sorted = is_locally_sorted(a, axis=axis) + if isinstance(sorted, np.bool_): + assert not sorted + else: + assert ak.all(sorted == False) + + x = ak.arange(40).reshape((2, 10, 2)) + sorted = is_locally_sorted(x, axis=axis) + if isinstance(sorted, np.bool_): + assert sorted + else: + assert ak.all(sorted) + + def assert_reduction_ops_match( + self, op: str, pda: ak.pdarray, axis: Optional[Union[int, Tuple[int, ...]]] = None + ): + from arkouda.testing import assert_equivalent as ak_assert_equivalent + + ak_op = getattr(arkouda.pdarrayclass, op) + np_op = getattr(numpy, op) + nda = pda.to_ndarray() + + # TODO: remove cast when #3864 is resolved. + ak_result = ak_op(pda, axis=axis) + if op in ["max", "min"] and pda.dtype == ak.bool_: + if isinstance(ak_result, ak.pdarray): + ak_result = ak.cast(ak_result, dt=ak.bool_) + else: + ak_result = np.bool_(ak_result) + + ak_assert_equivalent(ak_result, np_op(nda, axis=axis)) + + @pytest.mark.parametrize("op", REDUCTION_OPS) + @pytest.mark.parametrize("size", pytest.prob_size) + @pytest.mark.parametrize("dtype", DTYPES) + @pytest.mark.parametrize("arry_gen", [ak.zeros, ak.ones, ak.arange]) + @pytest.mark.parametrize("axis", [0, (0,), None]) + def test_reductions_match_numpy_1D(self, op, size, dtype, arry_gen, axis): + size = min(size, 1000) if op == "prod" else size + pda = arry_gen(size, dtype=dtype) + self.assert_reduction_ops_match(op, pda, axis=axis) - aMin0 = ak.min(a, axis=0) - assert aMin0.shape == (1, 7, 4) - assert aMin0[0, 6, 2] == -1 + @pytest.mark.skip_if_max_rank_less_than(3) + @pytest.mark.parametrize("op", REDUCTION_OPS) + @pytest.mark.parametrize("size", pytest.prob_size) + @pytest.mark.parametrize("dtype", DTYPES) + @pytest.mark.parametrize("arry_gen", [ak.zeros, ak.ones]) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 2), (0, 1, 2)]) + def test_reductions_match_numpy_3D_zeros(self, op, size, dtype, arry_gen, axis): + size = min(size // 3, 100) if op == "prod" else size // 3 + pda = arry_gen((size, size, size), dtype=dtype) + self.assert_reduction_ops_match(op, pda, axis=axis) + + @pytest.mark.parametrize("op", REDUCTION_OPS) + @pytest.mark.parametrize("axis", [0, (0,), None]) + def test_reductions_match_numpy_1D_TF(self, op, axis): + pda = ak.array([True, True, False, True, True, True, True, True]) + self.assert_reduction_ops_match(op, pda, axis=axis) - aMin02 = ak.min(a, axis=(0, 2)) - assert aMin02.shape == (1, 7, 1) - assert aMin02[0, 6, 0] == -1 + @pytest.mark.skip_if_max_rank_less_than(3) + @pytest.mark.parametrize("op", REDUCTION_OPS) + @pytest.mark.parametrize("axis", [None, 0, 1, (0, 2), (0, 1, 2)]) + def test_reductions_match_numpy_3D_TF(self, op, axis): + pda = ak.array([True, True, False, True, True, True, True, True]).reshape((2, 2, 2)) + self.assert_reduction_ops_match(op, pda, axis=axis)