From 00229203421317709ae9260e3cd939e8bdf83163 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:16:34 -0700 Subject: [PATCH 1/6] Use to_numpy in time decoding --- xarray/coding/times.py | 8 ++++---- xarray/core/formatting.py | 17 ++++++----------- xarray/namedarray/pycompat.py | 11 +++++++++-- xarray/tests/arrays.py | 7 +++++++ xarray/tests/namespace.py | 5 +++++ xarray/tests/test_coding_times.py | 18 ++++++++++++++++++ 6 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 xarray/tests/namespace.py diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 68369dac0d7..fb859813f7e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -28,7 +28,7 @@ from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.pycompat import is_chunked_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -310,7 +310,7 @@ def _decode_cf_datetime_dtype( # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) example_value = np.concatenate( - [first_n_items(values, 1) or [0], last_item(values) or [0]] + [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] ) try: @@ -516,7 +516,7 @@ def decode_cf_datetime( -------- cftime.num2date """ - num_dates = np.asarray(num_dates) + num_dates = to_numpy(num_dates) flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" @@ -632,7 +632,7 @@ def decode_cf_timedelta( """Given an array of numeric timedeltas in netCDF format, convert it into a numpy timedelta64 ["s", "ms", "us", "ns"] array. """ - num_timedeltas = np.asarray(num_timedeltas) + num_timedeltas = to_numpy(num_timedeltas) unit = _netcdf_to_numpy_timeunit(units) with warnings.catch_warnings(): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index a6bacccbeef..993cddf2b57 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -18,12 +18,12 @@ from pandas.errors import OutOfBoundsDatetime from xarray.core.datatree_render import RenderDataTree -from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype +from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype, ravel from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default from xarray.core.treenode import group_subtrees from xarray.core.utils import is_duck_array -from xarray.namedarray.pycompat import array_type, to_duck_array, to_numpy +from xarray.namedarray.pycompat import array_type, to_duck_array if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates @@ -94,7 +94,7 @@ def first_n_items(array, n_desired): # pass Variable._data if isinstance(array, Variable): array = array._data - return np.ravel(to_duck_array(array))[:n_desired] + return ravel(to_duck_array(array))[:n_desired] def last_n_items(array, n_desired): @@ -118,18 +118,13 @@ def last_n_items(array, n_desired): # pass Variable._data if isinstance(array, Variable): array = array._data - return np.ravel(to_duck_array(array))[-n_desired:] + return ravel(to_duck_array(array))[-n_desired:] def last_item(array): - """Returns the last item of an array in a list or an empty list.""" - if array.size == 0: - # work around for https://github.com/numpy/numpy/issues/5195 - return [] - + """Returns the last item of an array.""" indexer = (slice(-1, None),) * array.ndim - # to_numpy since dask doesn't support tolist - return np.ravel(to_numpy(array[indexer])).tolist() + return ravel(to_duck_array(array[indexer])) def calc_max_rows_first(max_rows: int) -> int: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 3ce33d4d8ea..91903f5cfaf 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -102,6 +102,10 @@ def to_numpy( from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type + if hasattr(data, "to_numpy"): + # for tests only + return data.to_numpy() + if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] @@ -122,7 +126,10 @@ def to_numpy( def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: - from xarray.core.indexing import ExplicitlyIndexed + from xarray.core.indexing import ( + ExplicitlyIndexed, + ImplicitToExplicitIndexingAdapter, + ) from xarray.namedarray.parallelcompat import get_chunked_array_type if is_chunked_array(data): @@ -130,7 +137,7 @@ def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] return loaded_data - if isinstance(data, ExplicitlyIndexed): + if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter): return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] elif is_duck_array(data): return data diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index 7373b6c75ab..cc4c480c437 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,6 +51,10 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) + def to_numpy(self) -> np.ndarray: + """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" + return self.array + def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: @@ -58,6 +62,9 @@ def __array__( def __array_namespace__(self): """Present to satisfy is_duck_array test.""" + from xarray.tests import namespace + + return namespace CONCATENATABLEARRAY_HANDLED_ARRAY_FUNCTIONS: dict[str, Callable] = {} diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py new file mode 100644 index 00000000000..f0cc28f4b57 --- /dev/null +++ b/xarray/tests/namespace.py @@ -0,0 +1,5 @@ +from xarray.core import duck_array_ops + + +def reshape(array, shape, **kwargs): + return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e61e5d853e..380d5265653 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -42,6 +42,7 @@ from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( + DuckArrayWrapper, FirstElementAccessibleArray, arm_xfail, assert_array_equal, @@ -1901,3 +1902,20 @@ def test_lazy_decode_timedelta_error() -> None: ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() + + +@pytest.mark.parametrize("calendar", ["standard", "360_day"]) +def test_duck_array_decode_times(calendar) -> None: + from xarray.core.indexing import LazilyIndexedArray + + days = LazilyIndexedArray(DuckArrayWrapper(np.array([1.0, 2.0, 3.0]))) + var = Variable( + ["time"], days, {"units": "days since 2001-01-01", "calendar": calendar} + ) + decoded = conventions.decode_cf_variable( + "foo", var, decode_times=CFDatetimeCoder(use_cftime=None) + ) + if calendar not in _STANDARD_CALENDARS: + assert decoded.dtype == np.dtype("O") + else: + assert decoded.dtype == np.dtype("=M8[ns]") From 856b299a610b0734fd75a2ce5981173aa4714922 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:22:09 -0700 Subject: [PATCH 2/6] WIP --- xarray/coding/strings.py | 5 ++-- xarray/coding/times.py | 63 ++++++++++++++++++++++++++------------- xarray/tests/arrays.py | 12 ++++++++ xarray/tests/namespace.py | 27 ++++++++++++++++- 4 files changed, 83 insertions(+), 24 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..8c74e0a2dfb 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -18,7 +18,7 @@ from xarray.core.utils import module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.pycompat import is_chunked_array, to_numpy HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -135,7 +135,8 @@ def decode(self, variable, name=None): if data.dtype == "S1" and dims: encoding["char_dim_name"] = dims[-1] dims = dims[:-1] - data = char_to_bytes(data) + # TODO (duck array encoding) + data = char_to_bytes(to_numpy(data)) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fb859813f7e..57fed1fe52c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -21,14 +21,24 @@ unpack_for_encoding, ) from xarray.core import indexing +from xarray.core.array_api_compat import get_array_namespace from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape +from xarray.core.duck_array_ops import ( + array_all, + array_any, + asarray, + astype, + concatenate, + isnull, + ravel, + reshape, +) from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import default_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array, to_duck_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -100,7 +110,7 @@ def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # times array contains cftime objects - times = np.asarray(times) + times = to_duck_array(times) tmin = times.min() tmax = times.max() try: @@ -309,8 +319,9 @@ def _decode_cf_datetime_dtype( # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) - example_value = np.concatenate( - [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] + zero = asarray([0], xp=get_array_namespace(values)) + example_value = concatenate( + [first_n_items(values, 1) or zero, last_item(values) or zero] ) try: @@ -342,7 +353,13 @@ def _decode_datetime_with_cftime( cftime = attempt_import("cftime") if num_dates.size > 0: return np.asarray( - cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) + cftime.num2date( + # cftime uses Cython so we must convert to numpy here. + to_numpy(num_dates), + units, + calendar, + only_use_cftime_datetimes=True, + ) ) else: return np.array([], dtype=object) @@ -357,7 +374,7 @@ def _check_date_for_units_since_refdate( f"Value {date} can't be represented as Datetime/Timedelta." ) delta = date * np.timedelta64(1, unit) - if not np.isnan(delta): + if not isnull(delta): # this will raise on dtype overflow for integer dtypes if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( @@ -381,7 +398,7 @@ def _check_timedelta_range(value, data_unit, time_unit): "ignore", "invalid value encountered in multiply", RuntimeWarning ) delta = value * np.timedelta64(1, data_unit) - if not np.isnan(delta): + if not isnull(delta): # this will raise on dtype overflow for integer dtypes if value.dtype.kind in "u" and not np.int64(delta) == value: raise OutOfBoundsTimedelta( @@ -449,9 +466,9 @@ def _decode_datetime_with_pandas( # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for # more details. if flat_num_dates.dtype.kind == "i": - flat_num_dates = flat_num_dates.astype(np.int64) + flat_num_dates = astype(flat_num_dates, np.int64) elif flat_num_dates.dtype.kind == "u": - flat_num_dates = flat_num_dates.astype(np.uint64) + flat_num_dates = astype(flat_num_dates, np.uint64) try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) @@ -483,9 +500,9 @@ def _decode_datetime_with_pandas( # overflow when converting to np.int64 would not be representable with a # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": - flat_num_dates = flat_num_dates.astype(np.int64) + flat_num_dates = astype(flat_num_dates, np.int64) elif flat_num_dates.dtype.kind in "f": - flat_num_dates = flat_num_dates.astype(np.float64) + flat_num_dates = astype(flat_num_dates, np.float64) timedeltas = _numbers_to_timedelta( flat_num_dates, time_unit, ref_date.unit, "datetime" @@ -528,8 +545,12 @@ def decode_cf_datetime( ) except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError): dates = _decode_datetime_with_cftime( - flat_num_dates.astype(float), units, calendar + astype(flat_num_dates, float), units, calendar ) + # This conversion to numpy is only needed for nanarg* below. + # TODO: explore removing it. + # Note that `dates` is already a numpy object array of cftime objects. + num_dates = to_numpy(num_dates) # retrieve cftype dates_min = dates[np.nanargmin(num_dates)] dates_max = dates[np.nanargmax(num_dates)] @@ -586,16 +607,16 @@ def _numbers_to_timedelta( """Transform numbers to np.timedelta64.""" # keep NaT/nan mask if flat_num.dtype.kind == "f": - nan = np.asarray(np.isnan(flat_num)) + nan = isnull(flat_num) elif flat_num.dtype.kind == "i": - nan = np.asarray(flat_num == np.iinfo(np.int64).min) + nan = flat_num == np.iinfo(np.int64).min # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: - flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) + flat_num = flat_num * np.int64(ns_time_unit / ns_ref_date_unit) time_unit = ref_unit # estimate fitting resolution for floating point values @@ -618,12 +639,12 @@ def _numbers_to_timedelta( # to prevent casting NaN to int with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - flat_num = flat_num.astype(np.int64) - if nan.any(): + flat_num = astype(flat_num, np.int64) + if array_any(nan): flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type - return flat_num.astype(f"timedelta64[{time_unit}]") + return astype(flat_num, f"timedelta64[{time_unit}]") def decode_cf_timedelta( @@ -712,8 +733,8 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = ravel(np.asarray(dates)) - if np.issubdtype(np.asarray(dates).dtype, "datetime64"): + dates = ravel(to_duck_array(dates)) + if np.issubdtype(dates.dtype, "datetime64"): dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] reference_date = dates[0] if len(dates) > 0 else "1970-01-01" diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index cc4c480c437..ca7d70c4be5 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,6 +51,18 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) + def min(self): + return self.array.min() + + def max(self): + return self.array.max() + + def __mul__(self, other): + return type(self)(self.array.__mul__(other)) + + def __radd__(self, other): + return type(self)(other + self.array) + def to_numpy(self) -> np.ndarray: """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" return self.array diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py index f0cc28f4b57..ceeb85c2cbc 100644 --- a/xarray/tests/namespace.py +++ b/xarray/tests/namespace.py @@ -1,5 +1,30 @@ -from xarray.core import duck_array_ops +import numpy as np + +from xarray.core import array_api_compat, duck_array_ops def reshape(array, shape, **kwargs): return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) + + +def concatenate(arrays, axis): + return type(arrays[0])( + duck_array_ops.concatenate([a.array for a in arrays], axis=axis) + ) + + +def result_type(*arrays_and_dtypes): + parsed = [a.array if hasattr(a, "array") else a for a in arrays_and_dtypes] + return array_api_compat.result_type(*parsed, xp=np) + + +def astype(array, dtype, **kwargs): + return type(array)(duck_array_ops.astype(array.array, dtype=dtype, **kwargs)) + + +def isnan(array): + return type(array)(duck_array_ops.isnull(array.array)) + + +def any(array, *args, **kwargs): # TODO: keepdims + return duck_array_ops.array_any(array.array, *args, **kwargs) From f69ba29367ea502c9f97d0c76fb4c2f11dfc31ed Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 10:22:21 -0700 Subject: [PATCH 3/6] Revert "WIP" This reverts commit 54be9b1b8c1b1f0fa23d8dd7ae0a96bb0834b9dc. --- xarray/coding/strings.py | 5 ++-- xarray/coding/times.py | 63 +++++++++++++-------------------------- xarray/tests/arrays.py | 12 -------- xarray/tests/namespace.py | 27 +---------------- 4 files changed, 24 insertions(+), 83 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 8c74e0a2dfb..4ca6a3f0a46 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -18,7 +18,7 @@ from xarray.core.utils import module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -135,8 +135,7 @@ def decode(self, variable, name=None): if data.dtype == "S1" and dims: encoding["char_dim_name"] = dims[-1] dims = dims[:-1] - # TODO (duck array encoding) - data = char_to_bytes(to_numpy(data)) + data = char_to_bytes(data) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 57fed1fe52c..fb859813f7e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -21,24 +21,14 @@ unpack_for_encoding, ) from xarray.core import indexing -from xarray.core.array_api_compat import get_array_namespace from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import ( - array_all, - array_any, - asarray, - astype, - concatenate, - isnull, - ravel, - reshape, -) +from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import default_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, to_duck_array, to_numpy +from xarray.namedarray.pycompat import is_chunked_array, to_numpy from xarray.namedarray.utils import is_duck_dask_array try: @@ -110,7 +100,7 @@ def _is_numpy_compatible_time_range(times): if is_np_datetime_like(times.dtype): return True # times array contains cftime objects - times = to_duck_array(times) + times = np.asarray(times) tmin = times.min() tmax = times.max() try: @@ -319,9 +309,8 @@ def _decode_cf_datetime_dtype( # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) - zero = asarray([0], xp=get_array_namespace(values)) - example_value = concatenate( - [first_n_items(values, 1) or zero, last_item(values) or zero] + example_value = np.concatenate( + [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] ) try: @@ -353,13 +342,7 @@ def _decode_datetime_with_cftime( cftime = attempt_import("cftime") if num_dates.size > 0: return np.asarray( - cftime.num2date( - # cftime uses Cython so we must convert to numpy here. - to_numpy(num_dates), - units, - calendar, - only_use_cftime_datetimes=True, - ) + cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) ) else: return np.array([], dtype=object) @@ -374,7 +357,7 @@ def _check_date_for_units_since_refdate( f"Value {date} can't be represented as Datetime/Timedelta." ) delta = date * np.timedelta64(1, unit) - if not isnull(delta): + if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( @@ -398,7 +381,7 @@ def _check_timedelta_range(value, data_unit, time_unit): "ignore", "invalid value encountered in multiply", RuntimeWarning ) delta = value * np.timedelta64(1, data_unit) - if not isnull(delta): + if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes if value.dtype.kind in "u" and not np.int64(delta) == value: raise OutOfBoundsTimedelta( @@ -466,9 +449,9 @@ def _decode_datetime_with_pandas( # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for # more details. if flat_num_dates.dtype.kind == "i": - flat_num_dates = astype(flat_num_dates, np.int64) + flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind == "u": - flat_num_dates = astype(flat_num_dates, np.uint64) + flat_num_dates = flat_num_dates.astype(np.uint64) try: time_unit, ref_date = _unpack_time_unit_and_ref_date(units) @@ -500,9 +483,9 @@ def _decode_datetime_with_pandas( # overflow when converting to np.int64 would not be representable with a # timedelta64 value, and therefore would raise an error in the lines above. if flat_num_dates.dtype.kind in "iu": - flat_num_dates = astype(flat_num_dates, np.int64) + flat_num_dates = flat_num_dates.astype(np.int64) elif flat_num_dates.dtype.kind in "f": - flat_num_dates = astype(flat_num_dates, np.float64) + flat_num_dates = flat_num_dates.astype(np.float64) timedeltas = _numbers_to_timedelta( flat_num_dates, time_unit, ref_date.unit, "datetime" @@ -545,12 +528,8 @@ def decode_cf_datetime( ) except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError): dates = _decode_datetime_with_cftime( - astype(flat_num_dates, float), units, calendar + flat_num_dates.astype(float), units, calendar ) - # This conversion to numpy is only needed for nanarg* below. - # TODO: explore removing it. - # Note that `dates` is already a numpy object array of cftime objects. - num_dates = to_numpy(num_dates) # retrieve cftype dates_min = dates[np.nanargmin(num_dates)] dates_max = dates[np.nanargmax(num_dates)] @@ -607,16 +586,16 @@ def _numbers_to_timedelta( """Transform numbers to np.timedelta64.""" # keep NaT/nan mask if flat_num.dtype.kind == "f": - nan = isnull(flat_num) + nan = np.asarray(np.isnan(flat_num)) elif flat_num.dtype.kind == "i": - nan = flat_num == np.iinfo(np.int64).min + nan = np.asarray(flat_num == np.iinfo(np.int64).min) # in case we need to change the unit, we fix the numbers here # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit] if ns_time_unit > ns_ref_date_unit: - flat_num = flat_num * np.int64(ns_time_unit / ns_ref_date_unit) + flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit)) time_unit = ref_unit # estimate fitting resolution for floating point values @@ -639,12 +618,12 @@ def _numbers_to_timedelta( # to prevent casting NaN to int with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - flat_num = astype(flat_num, np.int64) - if array_any(nan): + flat_num = flat_num.astype(np.int64) + if nan.any(): flat_num[nan] = np.iinfo(np.int64).min # cast to wanted type - return astype(flat_num, f"timedelta64[{time_unit}]") + return flat_num.astype(f"timedelta64[{time_unit}]") def decode_cf_timedelta( @@ -733,8 +712,8 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = ravel(to_duck_array(dates)) - if np.issubdtype(dates.dtype, "datetime64"): + dates = ravel(np.asarray(dates)) + if np.issubdtype(np.asarray(dates).dtype, "datetime64"): dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] reference_date = dates[0] if len(dates) > 0 else "1970-01-01" diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index ca7d70c4be5..cc4c480c437 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -51,18 +51,6 @@ def __init__(self, array: np.ndarray): def __getitem__(self, key): return type(self)(self.array[key]) - def min(self): - return self.array.min() - - def max(self): - return self.array.max() - - def __mul__(self, other): - return type(self)(self.array.__mul__(other)) - - def __radd__(self, other): - return type(self)(other + self.array) - def to_numpy(self) -> np.ndarray: """Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc.""" return self.array diff --git a/xarray/tests/namespace.py b/xarray/tests/namespace.py index ceeb85c2cbc..f0cc28f4b57 100644 --- a/xarray/tests/namespace.py +++ b/xarray/tests/namespace.py @@ -1,30 +1,5 @@ -import numpy as np - -from xarray.core import array_api_compat, duck_array_ops +from xarray.core import duck_array_ops def reshape(array, shape, **kwargs): return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs)) - - -def concatenate(arrays, axis): - return type(arrays[0])( - duck_array_ops.concatenate([a.array for a in arrays], axis=axis) - ) - - -def result_type(*arrays_and_dtypes): - parsed = [a.array if hasattr(a, "array") else a for a in arrays_and_dtypes] - return array_api_compat.result_type(*parsed, xp=np) - - -def astype(array, dtype, **kwargs): - return type(array)(duck_array_ops.astype(array.array, dtype=dtype, **kwargs)) - - -def isnan(array): - return type(array)(duck_array_ops.isnull(array.array)) - - -def any(array, *args, **kwargs): # TODO: keepdims - return duck_array_ops.array_any(array.array, *args, **kwargs) From e092c483b8a4c04e81c44bcd5d31454c3a6e56d8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 27 Feb 2025 11:14:59 -0700 Subject: [PATCH 4/6] fix --- xarray/coding/times.py | 2 +- xarray/namedarray/pycompat.py | 4 ++-- xarray/tests/test_coding_times.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fb859813f7e..2c931d6fb5c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -310,7 +310,7 @@ def _decode_cf_datetime_dtype( # Dataset.__repr__ when users try to view their lazily decoded array. values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data)) example_value = np.concatenate( - [to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])] + [to_numpy(first_n_items(values, 1)), to_numpy(last_item(values))] ) try: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 91903f5cfaf..f7b60935103 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -103,8 +103,8 @@ def to_numpy( from xarray.namedarray.parallelcompat import get_chunked_array_type if hasattr(data, "to_numpy"): - # for tests only - return data.to_numpy() + # for tests only at the moment + return data.to_numpy() # type: ignore[no-any-return] if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 380d5265653..62fdc7955ba 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1904,7 +1904,15 @@ def test_lazy_decode_timedelta_error() -> None: decoded.load() -@pytest.mark.parametrize("calendar", ["standard", "360_day"]) +@pytest.mark.parametrize( + "calendar", + [ + "standard", + pytest.param( + "360_day", marks=pytest.mark.skipif(not has_cftime, reason="no cftime") + ), + ], +) def test_duck_array_decode_times(calendar) -> None: from xarray.core.indexing import LazilyIndexedArray From d171b44c5bdd4aaa2a040899dd4a724c53ef9341 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 9 Mar 2025 21:28:40 -0600 Subject: [PATCH 5/6] Update xarray/namedarray/pycompat.py --- xarray/namedarray/pycompat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index f7b60935103..35c545080fb 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -102,9 +102,11 @@ def to_numpy( from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type - if hasattr(data, "to_numpy"): + try: # for tests only at the moment return data.to_numpy() # type: ignore[no-any-return] + except AttributeError: + pass if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() # type: ignore[no-untyped-call] From 91f5d5ccdcf96a86e466334b6d07c7753c364986 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Mar 2025 13:04:51 -0600 Subject: [PATCH 6/6] Update xarray/namedarray/pycompat.py --- xarray/namedarray/pycompat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 35c545080fb..68b6a7853bf 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -104,7 +104,7 @@ def to_numpy( try: # for tests only at the moment - return data.to_numpy() # type: ignore[no-any-return] + return data.to_numpy() # type: ignore[no-any-return,union-attr] except AttributeError: pass pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy