From d952891ce05b2cbdaf5f4e3358b295964e44fb21 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 2 Sep 2024 11:15:29 +0200 Subject: [PATCH 01/14] Add `__binsparse_descriptor__` and `__binsparse_dlpack__`. --- .../numba_backend/_compressed/compressed.py | 38 ++++++++++++++++++- sparse/numba_backend/_coo/core.py | 38 +++++++++++++++++++ sparse/numba_backend/_dok.py | 6 +++ sparse/numba_backend/_sparse_array.py | 25 ++++++++++++ 4 files changed, 106 insertions(+), 1 deletion(-) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 380f539b..00456fed 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -844,6 +844,15 @@ def isinf(self): def isnan(self): return self.tocoo().isnan().asformat("gcxs", compressed_axes=self.compressed_axes) + # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) + # be expressed in the `binsparse` 0.1 language. + # We are missing index maps. + def __binsparse_descriptor__(self) -> dict: + return super().__binsparse_descriptor__() + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return super().__binsparse_dlpack__() + class _Compressed2d(GCXS): class_compressed_axes: tuple[int] @@ -883,6 +892,34 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" + return { + "binsparse": { + "version": "0.1", + "format": self.format.upper(), + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": str(self.indices.dtype), + "indices_1": str(self.indptr.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": self.indices, + "indices_1": self.indptr, + "values": self.data, + } + class CSR(_Compressed2d): """ @@ -915,7 +952,6 @@ def transpose(self, axes: None = None, copy: bool = False) -> Union["CSC", "CSR" return self return CSC((self.data, self.indices, self.indptr), self.shape[::-1]) - class CSC(_Compressed2d): """ The CSC or CCS scheme stores a n-dimensional array using n+1 one-dimensional arrays. diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 2db83fde..e0a5a7a6 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1538,6 +1538,44 @@ def isnan(self): prune=True, ) + def __binsparse_descriptor__(self) -> dict: + from sparse._version import __version__ + + data_dt = str(self.data.dtype) + if np.issubdtype(data_dt, np.complexfloating): + data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" + return { + "binsparse": { + "version": "0.1", + "format": { + "custom": { + "level": { + "level_desc": "sparse", + "rank": self.ndim, + "level": { + "level_desc": "element", + }, + } + } + }, + "shape": list(self.shape), + "number_of_stored_values": self.nnz, + "data_types": { + "pointers_to_1": "uint8", + "indices_1": str(self.coords.dtype), + "values": data_dt, + }, + }, + "original_source": f"`sparse`, version {__version__}", + } + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": np.array([0, self.nnz], dtype=np.uint8), + "indices_1": self.coords, + "values": self.data, + } + def as_coo(x, shape=None, fill_value=None, idx_dtype=None): """ diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 4cfed3bc..8aa0cfd4 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -549,6 +549,12 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) + def __binsparse_descriptor__(self) -> dict: + raise RuntimeError("`DOK` doesn't support the `__binsparse_descriptor__` protocol.") + + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + raise RuntimeError("`DOK` doesn't support the `__binsparse_dlpack__` protocol.") + def to_slice(k): """Convert integer indices to one-element slices for consistency""" diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 13180521..0c8b0993 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -218,6 +218,31 @@ def _str_impl(self, summary): except (ImportError, ValueError): return summary + @abstractmethod + def __binsparse_descriptor__(self) -> dict: + """Return a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + of this array. + + Returns + ------- + dict + Parsed `binsparse` descriptor. + """ + raise NotImplementedError + + @abstractmethod + def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: + """A `dict` containing the constituent arrays of this sparse array. The keys are compatible with the + [`binsparse`](https://graphblas.org/binsparse-specification/) scheme, and the values are [`__dlpack__`](https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html) + compatible objects. + + Returns + ------- + dict[str, np.ndarray] + The constituent arrays. + """ + raise NotImplementedError + @abstractmethod def asformat(self, format): """ From e593a7bce8c2b05f35eba6e1a0fc178494e39f1b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 07:57:59 +0000 Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sparse/numba_backend/_compressed/compressed.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 00456fed..00e14d29 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -952,6 +952,7 @@ def transpose(self, axes: None = None, copy: bool = False) -> Union["CSC", "CSR" return self return CSC((self.data, self.indices, self.indptr), self.shape[::-1]) + class CSC(_Compressed2d): """ The CSC or CCS scheme stores a n-dimensional array using n+1 one-dimensional arrays. From 0a5fdda163b6a5953a40f6fa12fcd57294a4a372 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:28:56 +0200 Subject: [PATCH 03/14] Update with suggestions from @pearu. --- .../numba_backend/_compressed/compressed.py | 18 +++++----------- sparse/numba_backend/_coo/core.py | 11 +++------- sparse/numba_backend/_dok.py | 7 ++----- sparse/numba_backend/_sparse_array.py | 21 ++++++------------- 4 files changed, 16 insertions(+), 41 deletions(-) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 00e14d29..d7d9cba1 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -847,11 +847,8 @@ def isnan(self): # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) # be expressed in the `binsparse` 0.1 language. # We are missing index maps. - def __binsparse_descriptor__(self) -> dict: - return super().__binsparse_descriptor__() - - def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: - return super().__binsparse_dlpack__() + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + return super().__binsparse__() class _Compressed2d(GCXS): @@ -892,13 +889,13 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) - def __binsparse_descriptor__(self) -> dict: + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: from sparse._version import __version__ data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" - return { + descriptor = { "binsparse": { "version": "0.1", "format": self.format.upper(), @@ -913,12 +910,7 @@ def __binsparse_descriptor__(self) -> dict: "original_source": f"`sparse`, version {__version__}", } - def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: - return { - "pointers_to_1": self.indices, - "indices_1": self.indptr, - "values": self.data, - } + return descriptor, [self.indices, self.indptr, self.data] class CSR(_Compressed2d): diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index e0a5a7a6..14bfd855 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1538,13 +1538,13 @@ def isnan(self): prune=True, ) - def __binsparse_descriptor__(self) -> dict: + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: from sparse._version import __version__ data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" - return { + descriptor = { "binsparse": { "version": "0.1", "format": { @@ -1569,12 +1569,7 @@ def __binsparse_descriptor__(self) -> dict: "original_source": f"`sparse`, version {__version__}", } - def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: - return { - "pointers_to_1": np.array([0, self.nnz], dtype=np.uint8), - "indices_1": self.coords, - "values": self.data, - } + return descriptor, [np.array([0, self.nnz], dtype=np.uint8), self.coords, self.data] def as_coo(x, shape=None, fill_value=None, idx_dtype=None): diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 8aa0cfd4..4b7056d8 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -549,11 +549,8 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) - def __binsparse_descriptor__(self) -> dict: - raise RuntimeError("`DOK` doesn't support the `__binsparse_descriptor__` protocol.") - - def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: - raise RuntimeError("`DOK` doesn't support the `__binsparse_dlpack__` protocol.") + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + raise RuntimeError("`DOK` doesn't support the `__binsparse__` protocol.") def to_slice(k): diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 0c8b0993..ec2e4751 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -219,27 +219,18 @@ def _str_impl(self, summary): return summary @abstractmethod - def __binsparse_descriptor__(self) -> dict: - """Return a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + """Return a 2-tuple: + * First element is a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) of this array. + * Second element is a `list[np.ndarray]` of the constituent arrays. Returns ------- dict Parsed `binsparse` descriptor. - """ - raise NotImplementedError - - @abstractmethod - def __binsparse_dlpack__(self) -> dict[str, np.ndarray]: - """A `dict` containing the constituent arrays of this sparse array. The keys are compatible with the - [`binsparse`](https://graphblas.org/binsparse-specification/) scheme, and the values are [`__dlpack__`](https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html) - compatible objects. - - Returns - ------- - dict[str, np.ndarray] - The constituent arrays. + list[np.ndarray] + The constituent arrays """ raise NotImplementedError From 00aaebd9649ba0da2d8024d7ade4525cd32e909b Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:27:31 +0200 Subject: [PATCH 04/14] Recognize `"COOR"` format and use simpler representation for it. --- sparse/numba_backend/_coo/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 14bfd855..cb062ac7 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1557,7 +1557,9 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: }, } } - }, + } + if self.ndim != 2 + else "COOR", "shape": list(self.shape), "number_of_stored_values": self.nnz, "data_types": { From edaa60e453e148cc630fa74ebff67ebcda48bf6a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:41:59 +0200 Subject: [PATCH 05/14] Fix bug where `dtype.itemsize` was assumed to be in bits rather than bytes. --- sparse/numba_backend/_compressed/compressed.py | 2 +- sparse/numba_backend/_coo/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index d7d9cba1..81426819 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -894,7 +894,7 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): - data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" descriptor = { "binsparse": { "version": "0.1", diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index cb062ac7..4f59b8d1 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1543,7 +1543,7 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): - data_dt = f"complex[float{self.data.dtype.itemsize // 2}]" + data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" descriptor = { "binsparse": { "version": "0.1", From 7b9e77939df46b3b89db9720927ff6ba3d43b836 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:54:13 +0200 Subject: [PATCH 06/14] Fix bug where `format` property was not correct for all formats. --- .../numba_backend/_compressed/compressed.py | 26 ----------------- sparse/numba_backend/_coo/core.py | 23 --------------- sparse/numba_backend/_dok.py | 23 --------------- sparse/numba_backend/_sparse_array.py | 28 +++++++++++++++++++ 4 files changed, 28 insertions(+), 72 deletions(-) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 81426819..a24a3835 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -259,32 +259,6 @@ def nnz(self): """ return self.data.shape[0] - @property - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "gcxs" - @property def nbytes(self): """ diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 4f59b8d1..8d009df0 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -601,29 +601,6 @@ def nnz(self): """ return self.coords.shape[1] - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - -------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "coo" - @property def nbytes(self): """ diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 4b7056d8..00e6964e 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -272,29 +272,6 @@ def nnz(self): """ return len(self.data) - @property - def format(self): - """ - The storage format of this array. - Returns - ------- - str - The storage format of this array. - See Also - ------- - [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. - Examples - ------- - >>> import sparse - >>> s = sparse.random((5, 5), density=0.2, format="dok") - >>> s.format - 'dok' - >>> t = sparse.random((5, 5), density=0.2, format="coo") - >>> t.format - 'coo' - """ - return "dok" - @property def nbytes(self): """ diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index ec2e4751..1fd6e093 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -145,6 +145,34 @@ def size(self): # returns a float64 for an empty shape. return reduce(operator.mul, self.shape, 1) + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + [`scipy.sparse.coo_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.csr_matrix.format`][] : The Scipy equivalent property. + [`scipy.sparse.dok_matrix.format`][] : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5, 5), density=0.2, format="dok") + >>> s.format + 'dok' + >>> t = sparse.random((5, 5), density=0.2, format="coo") + >>> t.format + 'coo' + """ + return type(self).__name__.lower() + @property def density(self): """ From 51823d4002c5eb1eff3bec81c40c3764e92a7be4 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 12:20:51 +0200 Subject: [PATCH 07/14] Fix `notebooks` feature and add it to development environments. --- pixi.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pixi.toml b/pixi.toml index 01e5061b..fb40e19d 100644 --- a/pixi.toml +++ b/pixi.toml @@ -36,8 +36,13 @@ pytest-cov = "*" pytest-xdist = "*" pytest-codspeed = "*" +<<<<<<< HEAD [feature.notebooks.pypi-dependencies] ipykernel = "*" +======= +[feature.notebooks.dependencies] +ipython = "*" +>>>>>>> 54b7c1d (Fix `notebooks` feature and add it to development environments.) nbmake = "*" matplotlib = "*" networkx = "*" From 96245141425de91ea708110cff8d4a649a683736 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:55:30 +0200 Subject: [PATCH 08/14] Get `COO` round-trip working. --- sparse/numba_backend/__init__.py | 3 +- sparse/numba_backend/_common.py | 2 +- sparse/numba_backend/_io.py | 126 +++++++++++++++++++ sparse/numba_backend/tests/test_io.py | 8 ++ sparse/numba_backend/tests/test_namespace.py | 1 + 5 files changed, 138 insertions(+), 2 deletions(-) diff --git a/sparse/numba_backend/__init__.py b/sparse/numba_backend/__init__.py index 9f731cc1..789222f3 100644 --- a/sparse/numba_backend/__init__.py +++ b/sparse/numba_backend/__init__.py @@ -157,7 +157,7 @@ where, ) from ._dok import DOK -from ._io import load_npz, save_npz +from ._io import from_binsparse, load_npz, save_npz from ._umath import elemwise from ._utils import random @@ -226,6 +226,7 @@ "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater", diff --git a/sparse/numba_backend/_common.py b/sparse/numba_backend/_common.py index d952a0b2..9ce04dc2 100644 --- a/sparse/numba_backend/_common.py +++ b/sparse/numba_backend/_common.py @@ -35,7 +35,7 @@ def _check_device(func): def wrapped(*args, **kwargs): device = kwargs.get("device") if device not in {"cpu", None}: - raise ValueError("Device must be `'cpu'` or `None`.") + raise BufferError("Device must be `'cpu'` or `None`.") return func(*args, **kwargs) return wrapped diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index 24d9f1db..732b2b7b 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -1,7 +1,9 @@ import numpy as np +from ._common import _check_device from ._compressed import GCXS from ._coo.core import COO +from ._sparse_array import SparseArray def save_npz(filename, matrix, compressed=True): @@ -130,3 +132,127 @@ def load_npz(filename): ) except KeyError as e: raise RuntimeError(f"The file {filename!s} does not contain a valid sparse matrix") from e + + +@_check_device +def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseArray: + desc, arrs = arr.__binsparse__() + + desc = desc["binsparse"] + version_tuple: tuple[int, ...] = tuple(int(v) for v in desc["version"].split(".")) + if version_tuple != (0, 1): + raise RuntimeError("Unsupported `__binsparse__` protocol version.") + + format = desc["format"] + format_err_str = f"Unsupported format: `{format!r}`." + invalid_dtype_str = "Invalid dtype: `{dtype!s}`, expected `{expected!s}`." + + if isinstance(format, str): + match format: + case "COO" | "COOR": + desc["format"] = { + "custom": { + "transpose": [0, 1], + "level": { + "level_desc": "sparse", + "rank": 2, + "level": { + "level_desc": "element", + }, + }, + } + } + case "CSC" | "CSR": + desc["format"] = { + "custom": { + "transpose": [0, 1] if format == "CSR" else [0, 1], + "level": { + "level_desc": "dense", + "level": { + "level_desc": "sparse", + "level": { + "level_desc": "element", + }, + }, + }, + }, + } + case _: + raise RuntimeError(format_err_str) + + format = desc["format"] + if "transpose" not in format: + rank = 0 + level = format + while "level" in level: + if "rank" not in level: + level["rank"] = 1 + rank += level["rank"] + + format["transpose"] = list(range(rank)) + + match desc: + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "sparse", + "rank": ndim, + "level": { + "level_desc": "element", + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": _, + "indices_1": coords_dtype, + "values": value_dtype, + }, + **_kwargs, + }: + if transpose != list(range(ndim)): + raise RuntimeError(format_err_str) + + ptr_arr: np.ndarray = np.from_dlpack(arrs[0]) + start, end = ptr_arr + if copy is False and not (start == 0 or end == nnz): + raise RuntimeError(format_err_str) + + coord_arr: np.ndarray = np.from_dlpack(arrs[1]) + value_arr: np.ndarray = np.from_dlpack(arrs[2]) + + if str(coord_arr.dtype) != coords_dtype: + raise BufferError( + invalid_dtype_str.format( + dtype=str(coord_arr.dtype), + expected=coords_dtype, + ) + ) + + if value_dtype.startswith("complex[float") and value_dtype.endswith("]"): + complex_bits = 2 * int(value_arr[len("complex[float") : -len("]")]) + value_dtype: str = f"complex{complex_bits}" + + if str(value_arr.dtype) != value_dtype: + raise BufferError( + invalid_dtype_str.format( + dtype=str(coord_arr.dtype), + expected=coords_dtype, + ) + ) + + return COO( + coord_arr[:, start:end], + value_arr, + shape=shape, + has_duplicates=False, + sorted=True, + prune=False, + idx_dtype=coord_arr.dtype, + ) + case _: + raise RuntimeError(format_err_str) diff --git a/sparse/numba_backend/tests/test_io.py b/sparse/numba_backend/tests/test_io.py index 060b9263..d2bd3d3d 100644 --- a/sparse/numba_backend/tests/test_io.py +++ b/sparse/numba_backend/tests/test_io.py @@ -28,3 +28,11 @@ def test_load_wrong_format_exception(tmp_path): np.savez(filename, x) with pytest.raises(RuntimeError): load_npz(filename) + + +@pytest.mark.parametrize("format", ["coo", "csr", "csc"]) +def test_round_trip_binsparse(format: str) -> None: + x = sparse.random((20, 30), density=0.25, format=format) + y = sparse.from_binsparse(x) + + assert_eq(x, y) diff --git a/sparse/numba_backend/tests/test_namespace.py b/sparse/numba_backend/tests/test_namespace.py index 39556f99..ca1a4277 100644 --- a/sparse/numba_backend/tests/test_namespace.py +++ b/sparse/numba_backend/tests/test_namespace.py @@ -67,6 +67,7 @@ def test_namespace(): "float64", "floor", "floor_divide", + "from_binsparse", "full", "full_like", "greater", From a333112c42c8211b4a282adee5ef1fb34442a9d2 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:30:19 +0200 Subject: [PATCH 09/14] Get `CSR` round-trip working. --- .../numba_backend/_compressed/compressed.py | 11 +- sparse/numba_backend/_io.py | 103 +++++++++++++----- sparse/numba_backend/tests/test_io.py | 4 +- 3 files changed, 80 insertions(+), 38 deletions(-) diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index a24a3835..4f40b3d6 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -11,7 +11,6 @@ from .._coo.core import COO from .._sparse_array import SparseArray from .._utils import ( - _zero_of_dtype, can_store, check_compressed_axes, check_fill_value, @@ -175,13 +174,9 @@ def __init__( if self.data.ndim != 1: raise ValueError("data must be a scalar or 1-dimensional.") - self.shape = shape - - if fill_value is None: - fill_value = _zero_of_dtype(self.data.dtype) + SparseArray.__init__(self, shape=shape, fill_value=fill_value) self._compressed_axes = tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None - self.fill_value = self.data.dtype.type(fill_value) if prune: self._prune() @@ -417,7 +412,7 @@ def tocoo(self): fill_value=self.fill_value, ) uncompressed = uncompress_dimension(self.indptr) - coords = np.vstack((uncompressed, self.indices)) + coords = np.stack((uncompressed, self.indices)) order = np.argsort(self._axis_order) return ( COO( @@ -884,7 +879,7 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: "original_source": f"`sparse`, version {__version__}", } - return descriptor, [self.indices, self.indptr, self.data] + return descriptor, [self.indptr, self.indices, self.data] class CSR(_Compressed2d): diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index 732b2b7b..70f860b1 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -1,7 +1,7 @@ import numpy as np from ._common import _check_device -from ._compressed import GCXS +from ._compressed import CSC, CSR, GCXS from ._coo.core import COO from ._sparse_array import SparseArray @@ -145,7 +145,6 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr format = desc["format"] format_err_str = f"Unsupported format: `{format!r}`." - invalid_dtype_str = "Invalid dtype: `{dtype!s}`, expected `{expected!s}`." if isinstance(format, str): match format: @@ -180,15 +179,15 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr case _: raise RuntimeError(format_err_str) - format = desc["format"] + format = desc["format"]["custom"] + rank = 0 + level = format + while "level" in level: + if "rank" not in level: + level["rank"] = 1 + rank += level["rank"] + level = level["level"] if "transpose" not in format: - rank = 0 - level = format - while "level" in level: - if "rank" not in level: - level["rank"] = 1 - rank += level["rank"] - format["transpose"] = list(range(rank)) match desc: @@ -225,25 +224,8 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr coord_arr: np.ndarray = np.from_dlpack(arrs[1]) value_arr: np.ndarray = np.from_dlpack(arrs[2]) - if str(coord_arr.dtype) != coords_dtype: - raise BufferError( - invalid_dtype_str.format( - dtype=str(coord_arr.dtype), - expected=coords_dtype, - ) - ) - - if value_dtype.startswith("complex[float") and value_dtype.endswith("]"): - complex_bits = 2 * int(value_arr[len("complex[float") : -len("]")]) - value_dtype: str = f"complex{complex_bits}" - - if str(value_arr.dtype) != value_dtype: - raise BufferError( - invalid_dtype_str.format( - dtype=str(coord_arr.dtype), - expected=coords_dtype, - ) - ) + _check_binsparse_dt(coord_arr, coords_dtype) + _check_binsparse_dt(value_arr, value_dtype) return COO( coord_arr[:, start:end], @@ -254,5 +236,68 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr prune=False, idx_dtype=coord_arr.dtype, ) + case { + "format": { + "custom": { + "transpose": transpose, + "level": { + "level_desc": "dense", + "rank": 1, + "level": { + "level_desc": "sparse", + "rank": 1, + "level": { + "level_desc": "element", + }, + }, + }, + }, + }, + "shape": shape, + "number_of_stored_values": nnz, + "data_types": { + "pointers_to_1": ptr_dtype, + "indices_1": crd_dtype, + "values": val_dtype, + }, + **_kwargs, + }: + crd_arr = np.from_dlpack(arrs[0]) + _check_binsparse_dt(crd_arr, crd_dtype) + ptr_arr = np.from_dlpack(arrs[1]) + _check_binsparse_dt(ptr_arr, ptr_dtype) + val_arr = np.from_dlpack(arrs[2]) + _check_binsparse_dt(val_arr, val_dtype) + + match transpose: + case [0, 1]: + sparse_type = CSR + case [1, 0]: + sparse_type = CSC + case _: + raise RuntimeError(format_err_str) + + return sparse_type((val_arr, ptr_arr, crd_arr), shape=shape) case _: + print(desc) raise RuntimeError(format_err_str) + + +def _convert_binsparse_dtype(dt: str) -> np.dtype: + if dt.startswith("complex[float") and dt.endswith("]"): + complex_bits = 2 * int(dt[len("complex[float") : -len("]")]) + dt: str = f"complex{complex_bits}" + + return np.dtype(dt) + + +def _check_binsparse_dt(arr: np.ndarray, dt: str) -> None: + invalid_dtype_str = "Invalid dtype: `{dtype!s}`, expected `{expected!s}`." + dt = _convert_binsparse_dtype(dt) + if dt != arr.dtype: + raise BufferError( + invalid_dtype_str.format( + dtype=arr.dtype, + expected=dt, + ) + ) diff --git a/sparse/numba_backend/tests/test_io.py b/sparse/numba_backend/tests/test_io.py index d2bd3d3d..c6caa25f 100644 --- a/sparse/numba_backend/tests/test_io.py +++ b/sparse/numba_backend/tests/test_io.py @@ -30,7 +30,9 @@ def test_load_wrong_format_exception(tmp_path): load_npz(filename) -@pytest.mark.parametrize("format", ["coo", "csr", "csc"]) +@pytest.mark.parametrize( + "format", ["coo", "csr", pytest.param("csc", marks=pytest.mark.xfail(reason="`CSC<>COO` round-trip broken"))] +) def test_round_trip_binsparse(format: str) -> None: x = sparse.random((20, 30), density=0.25, format=format) y = sparse.from_binsparse(x) From c0743b979728edd301324817f4c1ae069e27c50a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:35:37 +0200 Subject: [PATCH 10/14] Fix up test. --- sparse/numba_backend/tests/test_coo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/numba_backend/tests/test_coo.py b/sparse/numba_backend/tests/test_coo.py index efd7f779..e3b29a54 100644 --- a/sparse/numba_backend/tests/test_coo.py +++ b/sparse/numba_backend/tests/test_coo.py @@ -1896,7 +1896,7 @@ def test_invalid_device(func, args, kwargs): like = sparse.random((5, 5), density=0.5) args = (like,) + args - with pytest.raises(ValueError, match="Device must be"): + with pytest.raises(BufferError, match="Device must be"): func(*args, device="invalid_device", **kwargs) From 199d87804887bf4c2ed0d8b12a85e564e8183025 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:35:39 +0200 Subject: [PATCH 11/14] Remove debugging `print` statement. --- sparse/numba_backend/_io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index 70f860b1..a5067d9b 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -279,7 +279,6 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr return sparse_type((val_arr, ptr_arr, crd_arr), shape=shape) case _: - print(desc) raise RuntimeError(format_err_str) From 0dfa2655ea33b2a2618c59bbd747ea3499f04703 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:19:51 +0200 Subject: [PATCH 12/14] `nnz` may not fit in `uint8`. --- sparse/numba_backend/_coo/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index 8d009df0..fee4e7f5 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1540,7 +1540,7 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: "shape": list(self.shape), "number_of_stored_values": self.nnz, "data_types": { - "pointers_to_1": "uint8", + "pointers_to_1": "uint64", "indices_1": str(self.coords.dtype), "values": data_dt, }, @@ -1548,7 +1548,7 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: "original_source": f"`sparse`, version {__version__}", } - return descriptor, [np.array([0, self.nnz], dtype=np.uint8), self.coords, self.data] + return descriptor, [np.array([0, self.nnz], dtype=np.uint64), self.coords, self.data] def as_coo(x, shape=None, fill_value=None, idx_dtype=None): From c314d08b84ac41f0d40af5b17de1d8084122f99b Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:02:13 +0100 Subject: [PATCH 13/14] Update to latest agreed-upon `binsparse` protocol version. --- pixi.toml | 5 ----- .../numba_backend/_compressed/compressed.py | 12 ++++++++---- sparse/numba_backend/_coo/core.py | 11 ++++++++--- sparse/numba_backend/_dok.py | 2 +- sparse/numba_backend/_io.py | 15 ++++++++------- sparse/numba_backend/_sparse_array.py | 19 +++++++++++++------ 6 files changed, 38 insertions(+), 26 deletions(-) diff --git a/pixi.toml b/pixi.toml index fb40e19d..01e5061b 100644 --- a/pixi.toml +++ b/pixi.toml @@ -36,13 +36,8 @@ pytest-cov = "*" pytest-xdist = "*" pytest-codspeed = "*" -<<<<<<< HEAD [feature.notebooks.pypi-dependencies] ipykernel = "*" -======= -[feature.notebooks.dependencies] -ipython = "*" ->>>>>>> 54b7c1d (Fix `notebooks` feature and add it to development environments.) nbmake = "*" matplotlib = "*" networkx = "*" diff --git a/sparse/numba_backend/_compressed/compressed.py b/sparse/numba_backend/_compressed/compressed.py index 4f40b3d6..95f3180f 100644 --- a/sparse/numba_backend/_compressed/compressed.py +++ b/sparse/numba_backend/_compressed/compressed.py @@ -816,9 +816,12 @@ def isnan(self): # `GCXS` is a reshaped/transposed `CSR`, but it can't (usually) # be expressed in the `binsparse` 0.1 language. # We are missing index maps. - def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + def __binsparse__(self) -> dict: return super().__binsparse__() + def __binsparse_descriptor__(self) -> dict[str, np.ndarray]: + return super().__binsparse_descriptor__() + class _Compressed2d(GCXS): class_compressed_axes: tuple[int] @@ -858,13 +861,13 @@ def from_numpy(cls, x, fill_value=0, idx_dtype=None): coo = COO.from_numpy(x, fill_value=fill_value, idx_dtype=idx_dtype) return cls.from_coo(coo, cls.class_compressed_axes, idx_dtype) - def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + def __binsparse_descriptor__(self) -> dict: from sparse._version import __version__ data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" - descriptor = { + return { "binsparse": { "version": "0.1", "format": self.format.upper(), @@ -879,7 +882,8 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: "original_source": f"`sparse`, version {__version__}", } - return descriptor, [self.indptr, self.indices, self.data] + def __binsparse__(self) -> dict[str, np.ndarray]: + return {"pointers_to_1": self.indptr, "indices_1": self.indices, "values": self.data} class CSR(_Compressed2d): diff --git a/sparse/numba_backend/_coo/core.py b/sparse/numba_backend/_coo/core.py index fee4e7f5..afcbec91 100644 --- a/sparse/numba_backend/_coo/core.py +++ b/sparse/numba_backend/_coo/core.py @@ -1515,13 +1515,13 @@ def isnan(self): prune=True, ) - def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: + def __binsparse_descriptor__(self) -> dict: from sparse._version import __version__ data_dt = str(self.data.dtype) if np.issubdtype(data_dt, np.complexfloating): data_dt = f"complex[float{self.data.dtype.itemsize * 4}]" - descriptor = { + return { "binsparse": { "version": "0.1", "format": { @@ -1548,7 +1548,12 @@ def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: "original_source": f"`sparse`, version {__version__}", } - return descriptor, [np.array([0, self.nnz], dtype=np.uint64), self.coords, self.data] + def __binsparse__(self) -> dict[str, np.ndarray]: + return { + "pointers_to_1": np.array([0, self.nnz], dtype=np.uint64), + "indices_1": self.coords, + "values": self.data, + } def as_coo(x, shape=None, fill_value=None, idx_dtype=None): diff --git a/sparse/numba_backend/_dok.py b/sparse/numba_backend/_dok.py index 00e6964e..7df2d128 100644 --- a/sparse/numba_backend/_dok.py +++ b/sparse/numba_backend/_dok.py @@ -527,7 +527,7 @@ def reshape(self, shape, order="C"): return DOK.from_coo(self.to_coo().reshape(shape)) def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: - raise RuntimeError("`DOK` doesn't support the `__binsparse__` protocol.") + raise TypeError("`DOK` doesn't support the `__binsparse__` protocol.") def to_slice(k): diff --git a/sparse/numba_backend/_io.py b/sparse/numba_backend/_io.py index a5067d9b..d73a4a22 100644 --- a/sparse/numba_backend/_io.py +++ b/sparse/numba_backend/_io.py @@ -136,7 +136,8 @@ def load_npz(filename): @_check_device def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseArray: - desc, arrs = arr.__binsparse__() + desc = arr.__binsparse_descriptor__() + arrs = arr.__binsparse__() desc = desc["binsparse"] version_tuple: tuple[int, ...] = tuple(int(v) for v in desc["version"].split(".")) @@ -216,13 +217,13 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr if transpose != list(range(ndim)): raise RuntimeError(format_err_str) - ptr_arr: np.ndarray = np.from_dlpack(arrs[0]) + ptr_arr: np.ndarray = np.from_dlpack(arrs["pointers_to_1"]) start, end = ptr_arr if copy is False and not (start == 0 or end == nnz): raise RuntimeError(format_err_str) - coord_arr: np.ndarray = np.from_dlpack(arrs[1]) - value_arr: np.ndarray = np.from_dlpack(arrs[2]) + coord_arr: np.ndarray = np.from_dlpack(arrs["indices_1"]) + value_arr: np.ndarray = np.from_dlpack(arrs["values"]) _check_binsparse_dt(coord_arr, coords_dtype) _check_binsparse_dt(value_arr, value_dtype) @@ -262,11 +263,11 @@ def from_binsparse(arr, /, *, device=None, copy: bool | None = None) -> SparseAr }, **_kwargs, }: - crd_arr = np.from_dlpack(arrs[0]) + crd_arr = np.from_dlpack(arrs["pointers_to_1"]) _check_binsparse_dt(crd_arr, crd_dtype) - ptr_arr = np.from_dlpack(arrs[1]) + ptr_arr = np.from_dlpack(arrs["indices_1"]) _check_binsparse_dt(ptr_arr, ptr_dtype) - val_arr = np.from_dlpack(arrs[2]) + val_arr = np.from_dlpack(arrs["values"]) _check_binsparse_dt(val_arr, val_dtype) match transpose: diff --git a/sparse/numba_backend/_sparse_array.py b/sparse/numba_backend/_sparse_array.py index 1fd6e093..a750fbb0 100644 --- a/sparse/numba_backend/_sparse_array.py +++ b/sparse/numba_backend/_sparse_array.py @@ -247,18 +247,25 @@ def _str_impl(self, summary): return summary @abstractmethod - def __binsparse__(self) -> tuple[dict, list[np.ndarray]]: - """Return a 2-tuple: - * First element is a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) + def __binsparse_descriptor__(self) -> dict: + """Return a `dict` equivalent to a parsed JSON [`binsparse` descriptor](https://graphblas.org/binsparse-specification/#descriptor) of this array. - * Second element is a `list[np.ndarray]` of the constituent arrays. Returns ------- dict Parsed `binsparse` descriptor. - list[np.ndarray] - The constituent arrays + """ + raise NotImplementedError + + @abstractmethod + def __binsparse__(self) -> dict[str, np.ndarray]: + """Return a is a `dict[str, np.ndarray]` of the constituent arrays. + + Returns + ------- + dict + Parsed `binsparse` descriptor. """ raise NotImplementedError From 148d1aabbc0438fccd8cad6bafce1cf5af4ac18d Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 20 Feb 2025 10:05:43 +0100 Subject: [PATCH 14/14] Test with latest version of `codecov-action`. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7aecccf..8e9cfc5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,11 +49,11 @@ jobs: pip install -e '.[tests]' - name: Run tests run: ci/test_backends.sh - - uses: codecov/codecov-action@v5 + - uses: codecov/codecov-action@2e6e9c5a74ec004831b6d17edfb76c53a54d4d55 if: always() with: token: ${{ secrets.CODECOV_TOKEN }} - files: ./**/coverage*.xml + files: "\"./**/coverage*.xml\"" examples: runs-on: ubuntu-latest