Skip to content

Commit

Permalink
Creating Sparse Matrix from Pdarrays (Bears-R-Us#3840)
Browse files Browse the repository at this point in the history
* Add initial implementation for create_sparse_array

Signed-off-by: Shreyas Khandekar <[email protected]>

* Add testing for sparse array creation from pdarray

Signed-off-by: Shreyas Khandekar <[email protected]>

* Remove debug output

Signed-off-by: Shreyas Khandekar <[email protected]>

* Add type annotation to to_pdarray

Signed-off-by: Shreyas Khandekar <[email protected]>

---------

Signed-off-by: Shreyas Khandekar <[email protected]>
  • Loading branch information
ShreyasKhandekar authored Oct 17, 2024
1 parent dd8500c commit 1c0b933
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 10 deletions.
12 changes: 7 additions & 5 deletions arkouda/sparrayclass.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import builtins
from typing import Optional, Sequence, Union, cast
from typing import List, Optional, Sequence, Union, cast

import numpy as np
from typeguard import typechecked
Expand Down Expand Up @@ -36,6 +36,8 @@ class sparray:
The element type of the array
size : int_scalars
The size of any one dimension of the array (all dimensions are assumed to be equal sized for now)
nnz: int_scalars
The number of non-zero elements in the array
ndim : int_scalars
The rank of the array (currently only rank 2 arrays supported)
shape : Sequence[int]
Expand Down Expand Up @@ -71,7 +73,7 @@ def __init__(

def __del__(self):
try:
logger.debug(f"deleting pdarray with name {self.name}")
logger.debug(f"deleting sparray with name {self.name}")
generic_msg(cmd="delete", args={"name": self.name})
except (RuntimeError, AttributeError):
pass
Expand All @@ -90,7 +92,7 @@ def __len__(self):
def __getitem__(self, key):
raise NotImplementedError("sparray does not support __getitem__")

def __str__(self): # This won't work out of the box for sparrays need to add this in later
def __str__(self):
from arkouda.client import sparrayIterThresh

return generic_msg(cmd="str", args={"array": self, "printThresh": sparrayIterThresh})
Expand All @@ -116,7 +118,7 @@ def __str__(self): # This won't work out of the box for sparrays need to add th
"""

@typechecked
def to_pdarray(self):
def to_pdarray(self) -> List[pdarray]:
dtype = self.dtype
dtype_name = cast(np.dtype, dtype).name
# check dtype for error
Expand All @@ -139,7 +141,7 @@ def fill_vals(self, a: pdarray):

generic_msg(
cmd=f"fill_sparse_vals<{self.dtype},2,{self.layout},{a.dtype},1>",
args={"matrix": self, "vals": a}
args={"matrix": self, "vals": a},
)


Expand Down
56 changes: 52 additions & 4 deletions arkouda/sparsematrix.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from __future__ import annotations

from typing import Union, cast

import numpy as np
from typeguard import typechecked

from arkouda.client import generic_msg
from arkouda.dtypes import dtype as akdtype
from arkouda.dtypes import int64
from arkouda.logger import getArkoudaLogger
from arkouda.numpy.dtypes.dtypes import NumericDTypes
from arkouda.pdarrayclass import pdarray
from arkouda.sparrayclass import create_sparray, sparray
from typing import Union
from arkouda.dtypes import int64
from arkouda.dtypes import dtype as akdtype

__all__ = ["random_sparse_matrix", "sparse_matrix_matrix_mult"]
__all__ = ["random_sparse_matrix", "sparse_matrix_matrix_mult", "create_sparse_matrix"]

logger = getArkoudaLogger(name="sparsematrix")

Expand Down Expand Up @@ -88,3 +92,47 @@ def sparse_matrix_matrix_mult(A, B: sparray) -> sparray:
)

return create_sparray(repMsg)


def create_sparse_matrix(size: int, rows: pdarray, cols: pdarray, vals: pdarray, layout: str) -> sparray:
"""
Create a sparse matrix from three pdarrays representing the row indices,
column indices, and values of the non-zero elements of the matrix.
Parameters
----------
rows : pdarray
The row indices of the non-zero elements
cols : pdarray
The column indices of the non-zero elements
vals : pdarray
The values of the non-zero elements
Returns
-------
sparray
A sparse matrix with the specified row and column indices and values
"""
if not (isinstance(rows, pdarray) and isinstance(cols, pdarray) and isinstance(vals, pdarray)):
raise TypeError("rows, cols, and vals must be pdarrays for create_sparse_matrix")
if not (rows.ndim == 1 and cols.ndim == 1 and vals.ndim == 1):
raise ValueError("rows, cols, and vals must be 1D for create_sparse_matrix")
if not (rows.size == cols.size and rows.size == vals.size):
raise ValueError("rows, cols, and vals must have the same size for create_sparse_matrix")
if not (rows.dtype == int64 and cols.dtype == int64):
raise ValueError("rows and cols must have dtype int64 for create_sparse_matrix")
if layout not in ["CSR", "CSC"]:
raise ValueError("layout must be 'CSR' or 'CSC'")

vals_dtype_name = cast(np.dtype, vals.dtype).name
# check dtype for error
if vals_dtype_name not in NumericDTypes:
raise TypeError(f"unsupported dtype {vals.dtype}")

shape = (size, size)
repMsg = generic_msg(
cmd=f"sparse_matrix_from_pdarrays<{vals.dtype},{layout}>",
args={"rows": rows.name, "cols": cols.name, "vals": vals.name, "shape": shape},
)

return create_sparray(repMsg)
34 changes: 33 additions & 1 deletion src/SparseMatrix.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ module SparseMatrix {
moduleName=getModuleName(),
errorClass="IllegalArgumentError"
);

// Note: this simplified loop cannot be used because iteration over spsMat.domain
// occures one locale at a time (i.e., the first spsMat.domain.parDom.localSubdomain(Locales[0]).size
// values from 'A' are deposited on locale 0, and so on), rather than depositing
Expand Down Expand Up @@ -467,6 +467,38 @@ module SparseMatrix {
return A;
}

proc sparseMatFromArrays(rows, cols, vals, shape: 2*int, param layout, type eltType) throws {
import SymArrayDmap.makeSparseDomain;
var (SD, dense) = makeSparseDomain(shape, layout);

for i in 0..<rows.size {
if SD.contains((rows[i], cols[i])) then
throw getErrorWithContext(
msg="Duplicate index (%i, %i) in sparse matrix".format(rows[i], cols[i]),
lineNumber=getLineNumber(),
routineName=getRoutineName(),
moduleName=getModuleName(),
errorClass="InvalidArgumentError"
);
if rows[i] < 1 || rows[i] > shape[0] || cols[i] < 1 || cols[i] > shape[1] then
throw getErrorWithContext(
msg="Index (%i, %i) out of bounds for sparse matrix of shape (%i, %i)".format(rows[i], cols[i], shape[0], shape[1]),
lineNumber=getLineNumber(),
routineName=getRoutineName(),
moduleName=getModuleName(),
errorClass="InvalidArgumentError"
);
SD += (rows[i], cols[i]);
}

var A: [SD] eltType;
for i in 0..<rows.size {
A[rows[i], cols[i]] = vals[i];
}

return A;
}

module SpsMatUtil {
// The following are routines that should arguably be supported directly
// by the LayoutCS and SparseBlockDist modules themselves
Expand Down
13 changes: 13 additions & 0 deletions src/SparseMatrixMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,17 @@ module SparseMatrixMsg {
where d.rank != 1 && t == matrix.etype
do throw new Error("fillSparseMatrixMsg: vals must be rank 1");

@arkouda.instantiateAndRegister("sparse_matrix_from_pdarrays")
proc sparseMatrixFromPdarrays(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab,
type SparseSymEntry_etype, param SparseSymEntry_matLayout: Layout
): MsgTuple throws {
const rows = st[msgArgs["rows"]]: borrowed SymEntry(int, 1),
cols = st[msgArgs["cols"]]: borrowed SymEntry(int, 1),
vals = st[msgArgs["vals"]]: borrowed SymEntry(SparseSymEntry_etype, 1),
shape = msgArgs["shape"].toScalarTuple(int, 2); // Hardcode 2D for now

const aV = sparseMatFromArrays(rows.a, cols.a, vals.a, shape, SparseSymEntry_matLayout, SparseSymEntry_etype);
return st.insert(new shared SparseSymEntry(aV, SparseSymEntry_matLayout));
}

}
46 changes: 46 additions & 0 deletions tests/sparse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,49 @@ def matmatmult(rowsA, colsA, valsA, rowsB, colsB, valsB):
assert np.all(result_rows == ans_rows)
assert np.all(result_cols == ans_cols)
assert np.all(result_vals == ans_vals)

def test_creation_csc(self):
# Ensure that a sparse matrix can be created from three pdarrays
# These pdarrays are already "sorted" in a CSC layout
# This makes testing easier
rows = ak.array([9, 5, 6, 7, 2, 3, 1, 5, 1, 5, 4, 6, 5, 4, 8, 2, 4, 8])
cols = ak.array([1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9])
vals = ak.array([441, 148, 445, 664, 165, 121, 620, 73, 91, 106, 437, 558, 722, 420, 843, 338, 598, 499])
layout = "CSC"
mat = ak.create_sparse_matrix(10, rows, cols, vals, layout)
# Convert back to pdarrays
rows_, cols_, vals_ = (arr.to_ndarray() for arr in mat.to_pdarray())
# Check the values are correct
rows = rows.to_ndarray()
cols = cols.to_ndarray()
vals = vals.to_ndarray()

assert np.all(rows == rows_)
assert np.all(cols == cols_)
assert np.all(vals == vals_)
# Check the layout is correct
assert mat.layout == layout

def test_creation_csr(self):
# Ensure that a sparse matrix can be created from three pdarrays
# These pdarrays are already "sorted" in a CSR layout
# This makes testing easier
rows = ak.array([1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 8])
cols = ak.array([4, 5, 6, 1, 3, 4, 2, 3, 1, 2, 8, 1, 2, 6, 1, 6, 7, 8])
vals = ak.array([ 3, 20, 30, 10, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170])
layout = "CSR"
mat = ak.create_sparse_matrix(10, rows, cols, vals, layout)
# Convert back to pdarrays
rows_, cols_, vals_ = (arr.to_ndarray() for arr in mat.to_pdarray())
# Check the values are correct
rows = rows.to_ndarray()
cols = cols.to_ndarray()
vals = vals.to_ndarray()

assert np.all(rows == rows_)
assert np.all(cols == cols_)
assert np.all(vals == vals_)
# Check the layout is correct
assert mat.layout == layout


0 comments on commit 1c0b933

Please sign in to comment.