Skip to content

Use to_numpy in time decoding #10081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Revert "WIP"
This reverts commit 54be9b1.
  • Loading branch information
dcherian committed Feb 27, 2025
commit f69ba29367ea502c9f97d0c76fb4c2f11dfc31ed
5 changes: 2 additions & 3 deletions xarray/coding/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from xarray.core.utils import module_available
from xarray.core.variable import Variable
from xarray.namedarray.parallelcompat import get_chunked_array_type
from xarray.namedarray.pycompat import is_chunked_array, to_numpy
from xarray.namedarray.pycompat import is_chunked_array

HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0")

Expand Down Expand Up @@ -135,8 +135,7 @@ def decode(self, variable, name=None):
if data.dtype == "S1" and dims:
encoding["char_dim_name"] = dims[-1]
dims = dims[:-1]
# TODO (duck array encoding)
data = char_to_bytes(to_numpy(data))
data = char_to_bytes(data)
return Variable(dims, data, attrs, encoding)


Expand Down
63 changes: 21 additions & 42 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,14 @@
unpack_for_encoding,
)
from xarray.core import indexing
from xarray.core.array_api_compat import get_array_namespace
from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like
from xarray.core.duck_array_ops import (
array_all,
array_any,
asarray,
astype,
concatenate,
isnull,
ravel,
reshape,
)
from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape
from xarray.core.formatting import first_n_items, format_timestamp, last_item
from xarray.core.pdcompat import default_precision_timestamp, timestamp_as_unit
from xarray.core.utils import attempt_import, emit_user_level_warning
from xarray.core.variable import Variable
from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type
from xarray.namedarray.pycompat import is_chunked_array, to_duck_array, to_numpy
from xarray.namedarray.pycompat import is_chunked_array, to_numpy
from xarray.namedarray.utils import is_duck_dask_array

try:
Expand Down Expand Up @@ -110,7 +100,7 @@ def _is_numpy_compatible_time_range(times):
if is_np_datetime_like(times.dtype):
return True
# times array contains cftime objects
times = to_duck_array(times)
times = np.asarray(times)
tmin = times.min()
tmax = times.max()
try:
Expand Down Expand Up @@ -319,9 +309,8 @@ def _decode_cf_datetime_dtype(
# successfully. Otherwise, tracebacks end up swallowed by
# Dataset.__repr__ when users try to view their lazily decoded array.
values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data))
zero = asarray([0], xp=get_array_namespace(values))
example_value = concatenate(
[first_n_items(values, 1) or zero, last_item(values) or zero]
example_value = np.concatenate(
[to_numpy(first_n_items(values, 1) or [0]), to_numpy(last_item(values) or [0])]
)

try:
Expand Down Expand Up @@ -353,13 +342,7 @@ def _decode_datetime_with_cftime(
cftime = attempt_import("cftime")
if num_dates.size > 0:
return np.asarray(
cftime.num2date(
# cftime uses Cython so we must convert to numpy here.
to_numpy(num_dates),
units,
calendar,
only_use_cftime_datetimes=True,
)
cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True)
)
else:
return np.array([], dtype=object)
Expand All @@ -374,7 +357,7 @@ def _check_date_for_units_since_refdate(
f"Value {date} can't be represented as Datetime/Timedelta."
)
delta = date * np.timedelta64(1, unit)
if not isnull(delta):
if not np.isnan(delta):
# this will raise on dtype overflow for integer dtypes
if date.dtype.kind in "u" and not np.int64(delta) == date:
raise OutOfBoundsTimedelta(
Expand All @@ -398,7 +381,7 @@ def _check_timedelta_range(value, data_unit, time_unit):
"ignore", "invalid value encountered in multiply", RuntimeWarning
)
delta = value * np.timedelta64(1, data_unit)
if not isnull(delta):
if not np.isnan(delta):
# this will raise on dtype overflow for integer dtypes
if value.dtype.kind in "u" and not np.int64(delta) == value:
raise OutOfBoundsTimedelta(
Expand Down Expand Up @@ -466,9 +449,9 @@ def _decode_datetime_with_pandas(
# respectively. See https://github.com/pandas-dev/pandas/issues/56996 for
# more details.
if flat_num_dates.dtype.kind == "i":
flat_num_dates = astype(flat_num_dates, np.int64)
flat_num_dates = flat_num_dates.astype(np.int64)
elif flat_num_dates.dtype.kind == "u":
flat_num_dates = astype(flat_num_dates, np.uint64)
flat_num_dates = flat_num_dates.astype(np.uint64)

try:
time_unit, ref_date = _unpack_time_unit_and_ref_date(units)
Expand Down Expand Up @@ -500,9 +483,9 @@ def _decode_datetime_with_pandas(
# overflow when converting to np.int64 would not be representable with a
# timedelta64 value, and therefore would raise an error in the lines above.
if flat_num_dates.dtype.kind in "iu":
flat_num_dates = astype(flat_num_dates, np.int64)
flat_num_dates = flat_num_dates.astype(np.int64)
elif flat_num_dates.dtype.kind in "f":
flat_num_dates = astype(flat_num_dates, np.float64)
flat_num_dates = flat_num_dates.astype(np.float64)

timedeltas = _numbers_to_timedelta(
flat_num_dates, time_unit, ref_date.unit, "datetime"
Expand Down Expand Up @@ -545,12 +528,8 @@ def decode_cf_datetime(
)
except (KeyError, OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError):
dates = _decode_datetime_with_cftime(
astype(flat_num_dates, float), units, calendar
flat_num_dates.astype(float), units, calendar
)
# This conversion to numpy is only needed for nanarg* below.
# TODO: explore removing it.
# Note that `dates` is already a numpy object array of cftime objects.
num_dates = to_numpy(num_dates)
# retrieve cftype
dates_min = dates[np.nanargmin(num_dates)]
dates_max = dates[np.nanargmax(num_dates)]
Expand Down Expand Up @@ -607,16 +586,16 @@ def _numbers_to_timedelta(
"""Transform numbers to np.timedelta64."""
# keep NaT/nan mask
if flat_num.dtype.kind == "f":
nan = isnull(flat_num)
nan = np.asarray(np.isnan(flat_num))
elif flat_num.dtype.kind == "i":
nan = flat_num == np.iinfo(np.int64).min
nan = np.asarray(flat_num == np.iinfo(np.int64).min)

# in case we need to change the unit, we fix the numbers here
# this should be safe, as errors would have been raised above
ns_time_unit = _NS_PER_TIME_DELTA[time_unit]
ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_unit]
if ns_time_unit > ns_ref_date_unit:
flat_num = flat_num * np.int64(ns_time_unit / ns_ref_date_unit)
flat_num = np.asarray(flat_num * np.int64(ns_time_unit / ns_ref_date_unit))
time_unit = ref_unit

# estimate fitting resolution for floating point values
Expand All @@ -639,12 +618,12 @@ def _numbers_to_timedelta(
# to prevent casting NaN to int
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
flat_num = astype(flat_num, np.int64)
if array_any(nan):
flat_num = flat_num.astype(np.int64)
if nan.any():
flat_num[nan] = np.iinfo(np.int64).min

# cast to wanted type
return astype(flat_num, f"timedelta64[{time_unit}]")
return flat_num.astype(f"timedelta64[{time_unit}]")


def decode_cf_timedelta(
Expand Down Expand Up @@ -733,8 +712,8 @@ def infer_datetime_units(dates) -> str:
'hours', 'minutes' or 'seconds' (the first one that can evenly divide all
unique time deltas in `dates`)
"""
dates = ravel(to_duck_array(dates))
if np.issubdtype(dates.dtype, "datetime64"):
dates = ravel(np.asarray(dates))
if np.issubdtype(np.asarray(dates).dtype, "datetime64"):
dates = to_datetime_unboxed(dates)
dates = dates[pd.notnull(dates)]
reference_date = dates[0] if len(dates) > 0 else "1970-01-01"
Expand Down
12 changes: 0 additions & 12 deletions xarray/tests/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,6 @@ def __init__(self, array: np.ndarray):
def __getitem__(self, key):
return type(self)(self.array[key])

def min(self):
return self.array.min()

def max(self):
return self.array.max()

def __mul__(self, other):
return type(self)(self.array.__mul__(other))

def __radd__(self, other):
return type(self)(other + self.array)

def to_numpy(self) -> np.ndarray:
"""Allow explicit conversions to numpy in `to_numpy`, but disallow np.asarray etc."""
return self.array
Expand Down
27 changes: 1 addition & 26 deletions xarray/tests/namespace.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,5 @@
import numpy as np

from xarray.core import array_api_compat, duck_array_ops
from xarray.core import duck_array_ops


def reshape(array, shape, **kwargs):
return type(array)(duck_array_ops.reshape(array.array, shape=shape, **kwargs))


def concatenate(arrays, axis):
return type(arrays[0])(
duck_array_ops.concatenate([a.array for a in arrays], axis=axis)
)


def result_type(*arrays_and_dtypes):
parsed = [a.array if hasattr(a, "array") else a for a in arrays_and_dtypes]
return array_api_compat.result_type(*parsed, xp=np)


def astype(array, dtype, **kwargs):
return type(array)(duck_array_ops.astype(array.array, dtype=dtype, **kwargs))


def isnan(array):
return type(array)(duck_array_ops.isnull(array.array))


def any(array, *args, **kwargs): # TODO: keepdims
return duck_array_ops.array_any(array.array, *args, **kwargs)
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy