Skip to content

Commit

Permalink
support python 3.9, and expand the logic of from_array to handle more…
Browse files Browse the repository at this point in the history
… array-likes
  • Loading branch information
d-v-b committed Mar 19, 2024
1 parent fbe124c commit 7e1ba7f
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 30 deletions.
18 changes: 16 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ python = "^3.9"
zarr = "^2.14.2"
pydantic = "^2.0.0"
typing-extensions = {version = "^4.7.1", python = "<3.12"}
eval-type-backport = "^0.1.3"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3.1"
Expand Down
2 changes: 1 addition & 1 deletion src/pydantic_zarr/core.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
from typing_extensions import TypeAlias
from typing import (
Any,
Dict,
Literal,
Mapping,
Set,
TypeAlias,
Union,
)
from pydantic import BaseModel, ConfigDict
Expand Down
163 changes: 150 additions & 13 deletions src/pydantic_zarr/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from zarr.storage import init_group, BaseStore, contains_group, contains_array
import numcodecs
import zarr
from zarr.util import guess_chunks
import os
import numpy as np
import numpy.typing as npt
Expand All @@ -29,6 +30,7 @@
model_like,
)


TAttr = TypeVar("TAttr", bound=Mapping[str, Any])
TItem = TypeVar("TItem", bound=Union["GroupSpec", "ArraySpec"])

Expand Down Expand Up @@ -176,24 +178,51 @@ def check_ndim(self):
return self

@classmethod
def from_array(cls, array: npt.NDArray[Any], **kwargs):
def from_array(
cls,
array: npt.NDArray[Any],
chunks: Literal["auto"] | tuple[int, ...] = "auto",
attributes: Literal["auto"] | TAttr = "auto",
fill_value: Literal["auto"] | int | float | None = "auto",
order: Literal["auto", "C", "F"] = "auto",
filters: list[CodecDict] | None = "auto",
dimension_separator: Annotated[Literal["/", "."], BeforeValidator(parse_dimension_separator)] = "auto",
compressor: CodecDict | None = "auto"):
"""
Create an `ArraySpec` from a numpy array-like object.
Create an `ArraySpec` from an array-like object. This is a convenience method for when Zarr array will be modelled from an existing array.
This method takes nearly the same arguments as the `ArraySpec` constructor, minus `shape` and `dtype`, which will be inferred from the `array` argument.
Additionally, this method accepts the string "auto" as a parameter for all other `ArraySpec` attributes, in which case these attributes will be
inferred from the `array` argument, with a fallback value equal to the default `ArraySpec` parameters.
Parameters
----------
array : an array-like object.
Must have `shape` and `dtype` attributes.
The `shape` and `dtype` of this object will be used to construct an `ArraySpec`.
If the `chunks` keyword argument is not given, the `shape` of the array will
be used for the chunks.
**kwargs
Keyword arguments passed to the `ArraySpec` constructor.
attributes: "auto" | TAttr, default = "auto""
User-defined metadata associated with this array. Should be JSON-serializable. The default is "auto", which means that `array.attributes` will be used,
with a fallback value of the empty dict `{}`.
chunks: "auto" | tuple[int, ...], default = "auto"
The chunks for this `ArraySpec`. If `chunks` is "auto" (the default), then this method first checks if `array` has a `chunksize` attribute, using it if present.
This supports copying chunk sizes from dask arrays. If `array` does not have `chunksize`, then a routine from `zarr-python` is used to guess the chunk size,
given the `shape` and `dtype` of `array`. If `chunks` is not auto, then it should be a tuple of ints.
order: "auto" | "C" | "F", default = "auto"
The memory order of the `ArraySpec`. One of "auto", "C", or "F". The default is "auto", which means that, if present, `array.order`
will be used, falling back to "C" if `array` does not have an `order` attribute.
fill_value: "auto" | int | float | None, default = "auto"
The fill value for this array. Either "auto" or FillValue. The default is "auto", which means that `array.fill_value` will be used if that attribute exists, with a fallback value of 0.
compressor: "auto" | CodecDict | None, default = "auto"
The compressor for this `ArraySpec`. One of "auto", a JSON-serializable representation of a compression codec, or `None`. The default is "auto", which means that `array.compressor` attribute will be used, with a fallback value of `None`.
filters: "auto" | List[CodecDict] | None, default = "auto"
The filters for this `ArraySpec`. One of "auto", a list of JSON-serializable representations of compression codec, or `None`. The default is "auto", which means that the `array.filters` attribute will be
used, with a fallback value of `None`.
dimension_separator: "auto" | "." | "/", default = "auto"
Sets the character used for partitioning the different dimensions of a chunk key.
Must be one of "auto", "/" or ".". The default is "auto", which means that `array.dimension_separator` is used, with a fallback value of "/".
Returns
-------
ArraySpec
An instance of `ArraySpec` with `shape` and `dtype` attributes derived from `array`.
An instance of `ArraySpec` with `shape` and `dtype` attributes derived from `array`.
Examples
--------
Expand All @@ -205,12 +234,54 @@ def from_array(cls, array: npt.NDArray[Any], **kwargs):
"""
shape_actual = array.shape
dtype_actual = array.dtype

if chunks == "auto":
chunks_actual = auto_chunks(array)
else:
chunks_actual = chunks

if attributes == 'auto':
attributes_actual = auto_attributes(array)
else:
attributes_actual = attributes

if fill_value == 'auto':
fill_value_actual = auto_fill_value(array)
else:
fill_value_actual = fill_value

if compressor == 'auto':
compressor_actual = auto_compresser(array)
else:
compressor_actual= compressor

if filters == 'auto':
filters_actual = auto_filters(array)
else:
filters_actual = filters

if order == 'auto':
order_actual = auto_order(array)
else:
order_actual = order

if dimension_separator == 'auto':
dimension_separator_actual = auto_dimension_separator(array)
else:
dimension_separator_actual = dimension_separator

return cls(
shape=array.shape,
dtype=str(array.dtype),
chunks=kwargs.pop("chunks", array.shape),
attributes=kwargs.pop("attributes", {}),
**kwargs,
shape=shape_actual,
dtype=dtype_actual,
chunks=chunks_actual,
attributes=attributes_actual,
fill_value = fill_value_actual,
order = order_actual,
compressor = compressor_actual,
filters=filters_actual,
dimension_separator=dimension_separator_actual
)

@classmethod
Expand Down Expand Up @@ -930,3 +1001,69 @@ def from_flat_group(data: Dict[str, ArraySpec | GroupSpec]) -> GroupSpec:
return GroupSpec(
members={**member_groups, **member_arrays}, attributes=root_node.attributes
)

def auto_chunks(data: Any) -> tuple[int ,...]:
"""
Guess chunks from:
input with a `chunksize` attribute, or
input with a `chunks` attribute, or,
input with `shape` and `dtype` attributes
"""
if hasattr(data, 'chunksize'):
return data.chunksize
if hasattr(data, 'chunks'):
return data.chunks
return guess_chunks(data.shape, np.dtype(data.dtype).itemsize)

def auto_attributes(data: Any) -> Mapping[str, Any]:
"""
Guess attributes from:
input with an `attrs` attribute, or
input with an `attributes` attribute,
or anything (returning {})
"""
if hasattr(data, 'attrs'):
return data.attrs
if hasattr(data, 'attributes'):
return data.attributes
return {}

def auto_fill_value(data: Any) -> Any:
"""
Guess fill value from an input with a `fill_value` attribute, returning 0 otherwise.
"""
if hasattr(data, 'fill_value'):
return data.fill_value
return 0

def auto_compresser(data: Any) -> Codec | None:
"""
Guess compressor from an input with a `compressor` attribute, returning `None` otherwise.
"""
if hasattr(data, 'compressor'):
return data.compressor
return None

def auto_filters(data: Any) -> list[Codec] | None:
"""
Guess filters from an input with a `filters` attribute, returning `None` otherwise.
"""
if hasattr(data, 'filters'):
return data.filters
return None

def auto_order(data: Any) -> Literal["C", "F"]:
"""
Guess array order from an input with an `order` attribute, returning "C" otherwise.
"""
if hasattr(data, 'order'):
return data.order
return "C"

def auto_dimension_separator(data: Any) -> Literal["/", "."]:
"""
Guess dimension separator from an input with a `dimension_separator` attribute, returning "/" otherwise.
"""
if hasattr(data, "dimension_separator"):
return data.dimension_separator
return "/"
Loading

0 comments on commit 7e1ba7f

Please sign in to comment.