From 86f37b1aa430713a607623a8807bd4d55bc8cb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 7 Feb 2025 09:44:44 +0100 Subject: [PATCH 1/4] use mean of min/max years as offset in caclulation of datetime64 mean --- xarray/core/duck_array_ops.py | 15 +++++++++------ xarray/tests/test_duck_array_ops.py | 13 +++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 45fdaee9768..6eee2a2c3db 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -549,8 +549,8 @@ def array_any(array, axis=None, keepdims=False, **kwargs): _mean = _create_nan_agg_method("mean", invariant_0d=True) -def _datetime_nanmin(array): - """nanmin() function for datetime64. +def _datetime_nanreduce(array, func): + """nanreduce() function for datetime64. Caveats that this function deals with: @@ -562,7 +562,7 @@ def _datetime_nanmin(array): assert dtypes.is_datetime_like(dtype) # (NaT).astype(float) does not produce NaN... array = where(pandas_isnull(array), np.nan, array.astype(float)) - array = min(array, skipna=True) + array = func(array, skipna=True) if isinstance(array, float): array = np.array(array) # ...but (NaN).astype("M8") does produce NaT @@ -597,7 +597,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Set offset to minimum if not given if offset is None: if dtypes.is_datetime_like(array.dtype): - offset = _datetime_nanmin(array) + offset = _datetime_nanreduce(array, min) else: offset = min(array) @@ -717,8 +717,11 @@ def mean(array, axis=None, skipna=None, **kwargs): array = asarray(array) if dtypes.is_datetime_like(array.dtype): - offset = _datetime_nanmin(array) - + dmin = _datetime_nanreduce(array, min).astype("datetime64[Y]").astype(int) + dmax = _datetime_nanreduce(array, max).astype("datetime64[Y]").astype(int) + offset = ( + np.array((dmin + dmax) // 2).astype("datetime64[Y]").astype(array.dtype) + ) # From version 2025.01.2 xarray uses np.datetime64[unit], where unit # is one of "s", "ms", "us", "ns". # To not have to worry about the resolution, we just convert the output diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 0771e0034af..1f4ef2acf91 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -481,6 +481,19 @@ def test_cftime_datetime_mean(dask): assert_equal(result, expected) +@pytest.mark.parametrize("dask", [False, True]) +def test_mean_over_long_spanning_datetime64(dask) -> None: + if dask and not has_dask: + pytest.skip("requires dask") + array = np.array(["1678-01-01", "NaT", "2260-01-01"], dtype="datetime64[ns]") + da = DataArray(array, dims=["time"]) + if dask: + da = da.chunk({"time": 2}) + expected = DataArray(np.array("1969-01-01", dtype="datetime64[ns]")) + result = da.mean() + assert_equal(result, expected) + + @requires_cftime @requires_dask def test_mean_over_non_time_dim_of_dataset_with_dask_backed_cftime_data(): From fbe54963bb35554220db43e46643b5efac7cd3eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 7 Feb 2025 10:26:04 +0100 Subject: [PATCH 2/4] reinstate _datetime_nanmin as it is used downstream in flox<0.10.0 --- xarray/core/duck_array_ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6eee2a2c3db..faec5ded04e 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -549,6 +549,10 @@ def array_any(array, axis=None, keepdims=False, **kwargs): _mean = _create_nan_agg_method("mean", invariant_0d=True) +def _datetime_nanmin(array): + return _datetime_nanreduce(array, min) + + def _datetime_nanreduce(array, func): """nanreduce() function for datetime64. From 7c5a1d09e21a4498d6f2acf243ad9fa58f89e37f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 7 Feb 2025 11:04:06 +0100 Subject: [PATCH 3/4] add whats-new.rst entry --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e4024835409..1521f6d1f18 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,9 @@ Bug fixes "nanoseconds" were chosen by default, which are optimal for nanosecond-resolution times, but not for times with coarser resolution. By `Spencer Clark `_ (:pull:`10017`). +- Use mean of min/max years as offset in calculation of datetime64 mean +(:issue:`10019`, :pull:`10035`). + By `Kai Mühlbauer `_. Documentation From fcf711b717dc63598e785e0ccc5fc880df291597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 7 Feb 2025 11:40:30 +0100 Subject: [PATCH 4/4] add whats-new.rst entry --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1521f6d1f18..fe63a923fe6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,7 +40,7 @@ Bug fixes nanosecond-resolution times, but not for times with coarser resolution. By `Spencer Clark `_ (:pull:`10017`). - Use mean of min/max years as offset in calculation of datetime64 mean -(:issue:`10019`, :pull:`10035`). + (:issue:`10019`, :pull:`10035`). By `Kai Mühlbauer `_. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy