From f1e8be4552f2c50fc83def272921950648a85e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 23 Jan 2025 09:20:45 +0100 Subject: [PATCH 1/7] fix mean for datetime-like by using the respective dtype time resolution unit, adapting tests --- xarray/core/duck_array_ops.py | 15 +++---- xarray/tests/test_duck_array_ops.py | 64 +++++++++++++++++++---------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7e7333fd8ea..9e702fcd6c8 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -666,12 +666,10 @@ def np_timedelta64_to_float(array, datetime_unit): Notes ----- - The array is first converted to microseconds, which is less likely to - cause overflow errors. + The array is first converted to datetimeunit. """ - array = array.astype("timedelta64[ns]").astype(np.float64) - conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) - return conversion_factor * array + # todo: should we check for overflow here? + return array.astype(f"timedelta64[{datetime_unit}]").astype(np.float64) def pd_timedelta_to_float(value, datetime_unit): @@ -715,8 +713,11 @@ def mean(array, axis=None, skipna=None, **kwargs): if dtypes.is_datetime_like(array.dtype): offset = _datetime_nanmin(array) - # xarray always uses np.datetime64[ns] for np.datetime64 data - dtype = "timedelta64[ns]" + # from version 2025.01.2 xarray uses np.datetime64[unit] where unit + # is one of "s", "ms", "us", "ns" + # the respective unit is used for the timedelta representation + unit, _ = np.datetime_data(offset.dtype) + dtype = f"timedelta64[{unit}]" return ( _mean( datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index e1306964757..e26fe25e04c 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -9,6 +9,7 @@ from numpy import array, nan from xarray import DataArray, Dataset, cftime_range, concat +from xarray.coding.times import _NS_PER_TIME_DELTA from xarray.core import dtypes, duck_array_ops from xarray.core.duck_array_ops import ( array_notnull_equiv, @@ -28,6 +29,7 @@ where, ) from xarray.core.extension_array import PandasExtensionArray +from xarray.core.types import NPDatetimeUnitOptions, PDDatetimeUnitOptions from xarray.namedarray.pycompat import array_type from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( @@ -411,10 +413,11 @@ def assert_dask_array(da, dask): @arm_xfail @pytest.mark.filterwarnings("ignore:All-NaN .* encountered:RuntimeWarning") @pytest.mark.parametrize("dask", [False, True] if has_dask else [False]) -def test_datetime_mean(dask: bool) -> None: +def test_datetime_mean(dask: bool, time_unit: PDDatetimeUnitOptions) -> None: # Note: only testing numpy, as dask is broken upstream + dtype = f"M8[{time_unit}]" da = DataArray( - np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8[ns]"), + np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype=dtype), dims=["time"], ) if dask: @@ -846,11 +849,11 @@ def test_multiple_dims(dtype, dask, skipna, func): @pytest.mark.parametrize("dask", [True, False]) -def test_datetime_to_numeric_datetime64(dask): +def test_datetime_to_numeric_datetime64(dask, time_unit: PDDatetimeUnitOptions): if dask and not has_dask: pytest.skip("requires dask") - times = pd.date_range("2000", periods=5, freq="7D").values + times = pd.date_range("2000", periods=5, freq="7D").as_unit(time_unit).values if dask: import dask.array @@ -923,15 +926,18 @@ def test_datetime_to_numeric_cftime(dask): @requires_cftime -def test_datetime_to_numeric_potential_overflow(): +def test_datetime_to_numeric_potential_overflow(time_unit: PDDatetimeUnitOptions): import cftime - times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") + if time_unit == "ns": + pytest.skip("out-of-bounds datetime64 overflow") + dtype = f"M8[{time_unit}]" + times = pd.date_range("2000", periods=5, freq="7D").values.astype(dtype) cftimes = cftime_range( "2000", periods=5, freq="7D", calendar="proleptic_gregorian" ).values - offset = np.datetime64("0001-01-01") + offset = np.datetime64("0001-01-01", time_unit) cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1) result = duck_array_ops.datetime_to_numeric( @@ -957,24 +963,37 @@ def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 -@pytest.mark.parametrize( - "td, expected", - ([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]), -) -def test_np_timedelta64_to_float(td, expected): - out = np_timedelta64_to_float(td, datetime_unit="ns") +@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"]) +def test_np_timedelta64_to_float( + np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions +): + # tests any combination of source np.timedelta64 (NPDatetimeUnitOptions) with + # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) + td = np.timedelta64(1, np_dt_unit) + if _NS_PER_TIME_DELTA[np_dt_unit] < _NS_PER_TIME_DELTA[time_unit]: + pytest.skip("combination cannot be calculated without precision loss") + expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] + + out = np_timedelta64_to_float(td, datetime_unit=time_unit) np.testing.assert_allclose(out, expected) assert isinstance(out, float) - out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns") + out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit=time_unit) np.testing.assert_allclose(out, expected) -@pytest.mark.parametrize( - "td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0]) -) -def test_pd_timedelta_to_float(td, expected): - out = pd_timedelta_to_float(td, datetime_unit="ns") +@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"]) +def test_pd_timedelta_to_float( + np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions +): + # tests any combination of source pd.Timedelta (NPDatetimeUnitOptions) with + # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) + td = pd.Timedelta(1, np_dt_unit) + if _NS_PER_TIME_DELTA[np_dt_unit] < _NS_PER_TIME_DELTA[time_unit]: + pytest.skip("combination cannot be calculated without precision loss") + expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] + + out = pd_timedelta_to_float(td, datetime_unit=time_unit) np.testing.assert_allclose(out, expected) assert isinstance(out, float) @@ -982,10 +1001,11 @@ def test_pd_timedelta_to_float(td, expected): @pytest.mark.parametrize( "td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"] ) -def test_timedelta_to_numeric(td): +def test_timedelta_to_numeric(td, time_unit: PDDatetimeUnitOptions): # Scalar input - out = timedelta_to_numeric(td, "ns") - np.testing.assert_allclose(out, 86400 * 1e9) + out = timedelta_to_numeric(td, time_unit) + expected = _NS_PER_TIME_DELTA["D"] / _NS_PER_TIME_DELTA[time_unit] + np.testing.assert_allclose(out, expected) assert isinstance(out, float) From a66656bdb88059d63f8e9c49cec28dd9f1b3c089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 23 Jan 2025 10:06:20 +0100 Subject: [PATCH 2/7] fix mypy --- xarray/tests/test_duck_array_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index e26fe25e04c..736b00a4f20 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -877,8 +877,8 @@ def test_datetime_to_numeric_datetime64(dask, time_unit: PDDatetimeUnitOptions): result = duck_array_ops.datetime_to_numeric( times, datetime_unit="h", dtype=dtype ) - expected = 24 * np.arange(0, 35, 7).astype(dtype) - np.testing.assert_array_equal(result, expected) + expected2 = 24 * np.arange(0, 35, 7).astype(dtype) + np.testing.assert_array_equal(result, expected2) @requires_cftime From 618760d9b539d1a89b29d1cc76fcfd7bd337e9b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 23 Jan 2025 11:53:29 +0100 Subject: [PATCH 3/7] add PR to existing entry for non-nanosecond datetimes --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b40a323f39..8a9ba2d2ea3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,7 +50,7 @@ eventually be deprecated. New Features ~~~~~~~~~~~~ -- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). +- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`, :pull:`9977`). By `Kai Mühlbauer `_ and `Spencer Clark `_. - Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) By `Jimmy Westling `_. From 5d919a092ba134eaa136408c89fdee3e4d156f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sat, 25 Jan 2025 19:00:32 +0100 Subject: [PATCH 4/7] Update xarray/core/duck_array_ops.py Co-authored-by: Spencer Clark --- xarray/core/duck_array_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 9e702fcd6c8..4f9f079467c 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -668,8 +668,9 @@ def np_timedelta64_to_float(array, datetime_unit): ----- The array is first converted to datetimeunit. """ - # todo: should we check for overflow here? - return array.astype(f"timedelta64[{datetime_unit}]").astype(np.float64) + unit, _ = np.datetime_data(array.dtype) + conversion_factor = np.timedelta64(1, unit) / np.timedelta64(1, datetime_unit) + return conversion_factor * array.astype(np.float64) def pd_timedelta_to_float(value, datetime_unit): From dfc9e03c88eb7d97cb3eff6d57f60384e6098585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sat, 25 Jan 2025 19:03:11 +0100 Subject: [PATCH 5/7] cast to "int64" in calculation of datime-like mean --- xarray/core/duck_array_ops.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 4f9f079467c..46dc81c62c6 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -662,12 +662,7 @@ def _to_pytimedelta(array, unit="us"): def np_timedelta64_to_float(array, datetime_unit): - """Convert numpy.timedelta64 to float. - - Notes - ----- - The array is first converted to datetimeunit. - """ + """Convert numpy.timedelta64 to float, possibly at a loss of resolution.""" unit, _ = np.datetime_data(array.dtype) conversion_factor = np.timedelta64(1, unit) / np.timedelta64(1, datetime_unit) return conversion_factor * array.astype(np.float64) @@ -714,15 +709,16 @@ def mean(array, axis=None, skipna=None, **kwargs): if dtypes.is_datetime_like(array.dtype): offset = _datetime_nanmin(array) - # from version 2025.01.2 xarray uses np.datetime64[unit] where unit - # is one of "s", "ms", "us", "ns" - # the respective unit is used for the timedelta representation - unit, _ = np.datetime_data(offset.dtype) - dtype = f"timedelta64[{unit}]" + # From version 2025.01.2 xarray uses np.datetime64[unit], where unit + # is one of "s", "ms", "us", "ns". + # To not have to worry about the resolution, we just convert the output + # to "int64" and let the dtype of offset take precedence. + # This is fully backwards compatible with datetime64[ns]. For other + # this might lead to imprecise output, where fractional parts are truncated. return ( _mean( datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs - ).astype(dtype) + ).astype("int64") + offset ) elif _contains_cftime_datetimes(array): From 180aa0a843d80e977257c54179026bda45f1c096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 26 Jan 2025 11:30:48 +0100 Subject: [PATCH 6/7] Apply suggestions from code review --- xarray/core/duck_array_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 46dc81c62c6..31391854208 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -712,13 +712,13 @@ def mean(array, axis=None, skipna=None, **kwargs): # From version 2025.01.2 xarray uses np.datetime64[unit], where unit # is one of "s", "ms", "us", "ns". # To not have to worry about the resolution, we just convert the output - # to "int64" and let the dtype of offset take precedence. + # to "timedelta64" (without unit) and let the dtype of offset take precedence. # This is fully backwards compatible with datetime64[ns]. For other # this might lead to imprecise output, where fractional parts are truncated. return ( _mean( datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs - ).astype("int64") + ).astype("timedelta64") + offset ) elif _contains_cftime_datetimes(array): From 295d8167ff5a45701f06b219ec5dac0a8f3c87ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 26 Jan 2025 17:08:20 +0100 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Spencer Clark --- xarray/core/duck_array_ops.py | 3 +-- xarray/tests/test_duck_array_ops.py | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 31391854208..c3f1598050a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -713,8 +713,7 @@ def mean(array, axis=None, skipna=None, **kwargs): # is one of "s", "ms", "us", "ns". # To not have to worry about the resolution, we just convert the output # to "timedelta64" (without unit) and let the dtype of offset take precedence. - # This is fully backwards compatible with datetime64[ns]. For other - # this might lead to imprecise output, where fractional parts are truncated. + # This is fully backwards compatible with datetime64[ns]. return ( _mean( datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 736b00a4f20..9b8b50d99a1 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -970,8 +970,6 @@ def test_np_timedelta64_to_float( # tests any combination of source np.timedelta64 (NPDatetimeUnitOptions) with # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) td = np.timedelta64(1, np_dt_unit) - if _NS_PER_TIME_DELTA[np_dt_unit] < _NS_PER_TIME_DELTA[time_unit]: - pytest.skip("combination cannot be calculated without precision loss") expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] out = np_timedelta64_to_float(td, datetime_unit=time_unit) @@ -989,8 +987,6 @@ def test_pd_timedelta_to_float( # tests any combination of source pd.Timedelta (NPDatetimeUnitOptions) with # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) td = pd.Timedelta(1, np_dt_unit) - if _NS_PER_TIME_DELTA[np_dt_unit] < _NS_PER_TIME_DELTA[time_unit]: - pytest.skip("combination cannot be calculated without precision loss") expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] out = pd_timedelta_to_float(td, datetime_unit=time_unit) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy