From 6e49c2a79b4fc1259600f8f2826dbf181d75c519 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 20 Jan 2025 13:34:12 -0500 Subject: [PATCH 1/6] Add time_unit argument to CFTimeIndex.to_datetimeindex --- doc/whats-new.rst | 7 +++ xarray/coding/cftimeindex.py | 71 ++++++++++++++++------- xarray/tests/test_accessor_dt.py | 4 +- xarray/tests/test_cftime_offsets.py | 6 +- xarray/tests/test_cftimeindex.py | 52 +++++++++++------ xarray/tests/test_cftimeindex_resample.py | 6 +- xarray/tests/test_groupby.py | 4 +- xarray/tests/test_missing.py | 4 +- 8 files changed, 106 insertions(+), 48 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe698bc358b..a48d003de69 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,13 @@ New Features By `Kai Mühlbauer `_ and `Spencer Clark `_. - Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) By `Jimmy Westling `_. +- Added a ``time_unit`` argument to :py:meth:`CFTimeIndex.to_datetimeindex`. + Note that in a future version of xarray, + :py:meth:`CFTimeIndex.to_datetimeindex` will return a microsecond-resolution + :py:class:`pandas.DatetimeIndex` instead of a nanosecond-resolution + :py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark + `_ and `Kai Mühlbauer + `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index bd5f51551c7..06f44399084 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -42,9 +42,8 @@ from __future__ import annotations import math -import warnings from datetime import timedelta -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional import numpy as np import pandas as pd @@ -58,7 +57,8 @@ ) from xarray.core.common import _contains_cftime_datetimes from xarray.core.options import OPTIONS -from xarray.core.utils import attempt_import, is_scalar +from xarray.core.types import PDDatetimeUnitOptions +from xarray.core.utils import attempt_import, emit_user_level_warning, is_scalar if TYPE_CHECKING: from xarray.coding.cftime_offsets import BaseCFTimeOffset @@ -239,6 +239,8 @@ class CFTimeIndex(pd.Index): cftime_range """ + _data: np.ndarray + year = _field_accessor("year", "The year of the datetime") month = _field_accessor("month", "The month of the datetime") day = _field_accessor("day", "The days of the datetime") @@ -544,14 +546,18 @@ def __rsub__(self, other): "that can be expressed at the nanosecond resolution." ) from err - def to_datetimeindex(self, unsafe=False): + def to_datetimeindex( + self, unsafe: bool = False, time_unit: Optional[PDDatetimeUnitOptions] = None + ) -> pd.DatetimeIndex: """If possible, convert this index to a pandas.DatetimeIndex. Parameters ---------- unsafe : bool - Flag to turn off warning when converting from a CFTimeIndex with - a non-standard calendar to a DatetimeIndex (default ``False``). + Flag to turn off calendar mismatch warnings (default ``False``). + time_unit : str + Time resolution of resulting DatetimeIndex. Can be one of `"s"`, + ``"ms"``, ``"us"``, or ``"ns"`` (default ``"ns"``). Returns ------- @@ -561,18 +567,20 @@ def to_datetimeindex(self, unsafe=False): ------ ValueError If the CFTimeIndex contains dates that are not possible in the - standard calendar or outside the nanosecond-precision range. + standard calendar or outside the range representable by the + specified ``time_unit``. Warns ----- RuntimeWarning - If converting from a non-standard calendar to a DatetimeIndex. + If converting from a non-standard calendar, or a Gregorian + calendar with dates prior to the reform (1582-10-15). Warnings -------- - Note that for non-standard calendars, this will change the calendar - type of the index. In that case the result of this method should be - used with caution. + Note that for non-proleptic Gregorian calendars, this will change the + calendar type of the index. In that case the result of this method + should be used with caution. Examples -------- @@ -580,26 +588,47 @@ def to_datetimeindex(self, unsafe=False): >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object', length=2, calendar='standard', freq=None) - >>> times.to_datetimeindex() - DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[us]', freq=None) + >>> times.to_datetimeindex(time_unit="ns") + DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None) """ if not self._data.size: return pd.DatetimeIndex([]) - # transform to us-resolution is needed for DatetimeIndex - nptimes = cftime_to_nptime(self, time_unit="us") + if time_unit is None: + emit_user_level_warning( + "In a future version of xarray to_datetimeindex will default " + "to returning a 'us'-resolution DatetimeIndex instead of a " + "'ns'-resolution DatetimeIndex. This warning can be silenced " + "by explicitly setting the time_unit of the index returned.", + FutureWarning, + ) + time_unit = "ns" + + nptimes = cftime_to_nptime(self, time_unit=time_unit) calendar = infer_calendar_name(self) if calendar not in _STANDARD_CALENDARS and not unsafe: - warnings.warn( + emit_user_level_warning( "Converting a CFTimeIndex with dates from a non-standard " - f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which uses dates " - "from the standard calendar. This may lead to subtle errors " - "in operations that depend on the length of time between " - "dates.", + f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which " + "uses dates from the standard calendar. This may lead to " + "subtle errors in operations that depend on the length of " + "time between dates.", RuntimeWarning, - stacklevel=2, ) + if calendar == "standard" and not unsafe: + reform_date = self.date_type(1582, 10, 15) + if self.min() < reform_date: + emit_user_level_warning( + "Converting a CFTimeIndex with dates from a Gregorian " + "calendar that fall before the reform date of 1582-10-15 " + "to a pandas.DatetimeIndex. During this time period the " + "Gregorian calendar and the proleptic Gregorian calendar " + "of the DatetimeIndex do not exactly align. This warning " + "can be silenced by setting unsafe=True.", + RuntimeWarning, + ) + return pd.DatetimeIndex(nptimes) def strftime(self, date_format): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 64309966103..652b1752f12 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -519,7 +519,9 @@ def test_cftime_strftime_access(data) -> None: date_format = "%Y%m%d%H" result = data.time.dt.strftime(date_format) datetime_array = xr.DataArray( - xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex(), + xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex( + time_unit="ns" + ), name="stftime", coords=data.time.coords, dims=data.time.dims, diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 4db73048548..ee2f2f06c4f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1672,7 +1672,7 @@ def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected): ) def test_cftime_range_same_as_pandas(start, end, freq): result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True) - result = result.to_datetimeindex() + result = result.to_datetimeindex(time_unit="ns") expected = date_range(start, end, freq=freq, use_cftime=False) np.testing.assert_array_equal(result, expected) @@ -1694,8 +1694,8 @@ def test_cftime_range_no_freq(start, end, periods): when freq is not provided, but start, end and periods are. """ # Generate date ranges using cftime_range - result = cftime_range(start=start, end=end, periods=periods) - result = result.to_datetimeindex() + cftimeindex = cftime_range(start=start, end=end, periods=periods) + result = cftimeindex.to_datetimeindex(time_unit="ns") expected = pd.date_range(start=start, end=end, periods=periods) np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8fc79a0cc53..6df6ebe748b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1219,43 +1219,59 @@ def test_strftime_of_cftime_array(calendar): @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) @pytest.mark.parametrize("unsafe", [False, True]) -def test_to_datetimeindex(calendar, unsafe): +def test_to_datetimeindex(calendar, unsafe) -> None: index = xr.cftime_range("2000", periods=5, calendar=calendar) - expected = pd.date_range("2000", periods=5, unit="us") + expected = pd.date_range("2000", periods=5, unit="ns") if calendar in _NON_STANDARD_CALENDARS and not unsafe: with pytest.warns(RuntimeWarning, match="non-standard"): - result = index.to_datetimeindex() + result = index.to_datetimeindex(time_unit="ns") else: - result = index.to_datetimeindex(unsafe=unsafe) + result = index.to_datetimeindex(unsafe=unsafe, time_unit="ns") assert result.equals(expected) np.testing.assert_array_equal(result, expected) assert isinstance(result, pd.DatetimeIndex) +@requires_cftime +def test_to_datetimeindex_future_warning() -> None: + index = xr.cftime_range("2000", periods=5) + expected = pd.date_range("2000", periods=5, unit="ns") + with pytest.warns(FutureWarning, match="In a future version"): + result = index.to_datetimeindex() + assert result.equals(expected) + assert result.dtype == expected.dtype + + @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) -def test_to_datetimeindex_out_of_range(calendar): +def test_to_datetimeindex_out_of_range(calendar) -> None: index = xr.cftime_range("0001", periods=5, calendar=calendar) - # todo: suggestion from code review: - # - still warn when converting from a non-standard calendar - # to a proleptic Gregorian calendar - # - also warn when converting from a Gregorian calendar - # to a proleptic Gregorian calendar when dates fall before the reform - if calendar in _NON_STANDARD_CALENDARS: - with pytest.warns(RuntimeWarning, match="non-standard"): - index.to_datetimeindex() + with pytest.raises(ValueError, match="0001"): + index.to_datetimeindex(time_unit="ns") + + +@requires_cftime +@pytest.mark.parametrize("unsafe", [False, True]) +def test_to_datetimeindex_gregorian_pre_reform(unsafe) -> None: + index = xr.cftime_range("1582", periods=5, calendar="gregorian") + if unsafe: + result = index.to_datetimeindex(time_unit="us", unsafe=unsafe) else: - index.to_datetimeindex() + with pytest.warns(RuntimeWarning, match="reform"): + result = index.to_datetimeindex(time_unit="us", unsafe=unsafe) + expected = pd.date_range("1582", periods=5, unit="us") + assert result.equals(expected) + assert result.dtype == expected.dtype @requires_cftime @pytest.mark.parametrize("calendar", ["all_leap", "360_day"]) -def test_to_datetimeindex_feb_29(calendar): +def test_to_datetimeindex_feb_29(calendar) -> None: index = xr.cftime_range("2001-02-28", periods=2, calendar=calendar) with pytest.raises(ValueError, match="29"): - index.to_datetimeindex() + index.to_datetimeindex(time_unit="ns") @pytest.mark.xfail(reason="fails on pandas main branch") @@ -1271,10 +1287,10 @@ def test_multiindex(): @pytest.mark.parametrize("method", ["floor", "ceil", "round"]) def test_rounding_methods_against_datetimeindex(freq, method): # for now unit="us" seems good enough - expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="us") + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="ns") expected = getattr(expected, method)(freq) result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777s") - result = getattr(result, method)(freq).to_datetimeindex() + result = getattr(result, method)(freq).to_datetimeindex(time_unit="ns") assert result.equals(expected) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 081970108a0..5510eaacf1f 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -97,7 +97,9 @@ def compare_against_pandas( ).mean() # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass result_cftimeindex["time"] = ( - result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex() + result_cftimeindex.xindexes["time"] + .to_pandas_index() + .to_datetimeindex(time_unit="ns") ) xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) @@ -181,7 +183,7 @@ def test_calendars(calendar: str) -> None: # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass new_pd_index = da_cftime.xindexes["time"].to_pandas_index() assert isinstance(new_pd_index, CFTimeIndex) # shouldn't that be a pd.Index? - da_cftime["time"] = new_pd_index.to_datetimeindex() + da_cftime["time"] = new_pd_index.to_datetimeindex(time_unit="ns") xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 3c7f46f5a02..d42f86f5ea6 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1868,7 +1868,7 @@ def test_resample( def resample_as_pandas(array, *args, **kwargs): array_ = array.copy(deep=True) if use_cftime: - array_["time"] = times.to_datetimeindex() + array_["time"] = times.to_datetimeindex(time_unit="ns") result = DataArray.from_series( array_.to_series().resample(*args, **kwargs).mean() ) @@ -2321,7 +2321,7 @@ def test_resample( def resample_as_pandas(ds, *args, **kwargs): ds_ = ds.copy(deep=True) if use_cftime: - ds_["time"] = times.to_datetimeindex() + ds_["time"] = times.to_datetimeindex(time_unit="ns") result = Dataset.from_dataframe( ds_.to_dataframe().resample(*args, **kwargs).mean() ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index eb21cca0861..c1e2bb2ac85 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -609,7 +609,9 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): def test_get_clean_interp_index_dt(cf_da, calendar, freq): """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) - g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) + g["stime"] = xr.Variable( + data=g.time.to_index().to_datetimeindex(time_unit="ns"), dims=("time",) + ) gi = get_clean_interp_index(g, "time") si = get_clean_interp_index(g, "time", use_coordinate="stime") From 09c0ae435a67578dbd27f5c07f170f554fa6ecf4 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:06 -0500 Subject: [PATCH 2/6] Update xarray/tests/test_cftime_offsets.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index ee2f2f06c4f..086e187d2e6 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1670,7 +1670,7 @@ def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected): pytest.param("-1YE", marks=requires_pandas_3), ), ) -def test_cftime_range_same_as_pandas(start, end, freq): +def test_cftime_range_same_as_pandas(start, end, freq) -> None: result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True) result = result.to_datetimeindex(time_unit="ns") expected = date_range(start, end, freq=freq, use_cftime=False) From 06e7c6a5cf8796b3dd07fe5e3b2338b9592c9ff6 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:27 -0500 Subject: [PATCH 3/6] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_groupby.py | 2 +- xarray/tests/test_missing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d42f86f5ea6..677854ca23a 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1865,7 +1865,7 @@ def test_resample( "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime ) - def resample_as_pandas(array, *args, **kwargs): + def resample_as_pandas(array, *args, **kwargs) -> None: array_ = array.copy(deep=True) if use_cftime: array_["time"] = times.to_datetimeindex(time_unit="ns") diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c1e2bb2ac85..d3eaa70f1df 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): @requires_cftime @pytest.mark.parametrize("calendar", ["gregorian", "proleptic_gregorian"]) @pytest.mark.parametrize("freq", ["1D", "1ME", "1YE"]) -def test_get_clean_interp_index_dt(cf_da, calendar, freq): +def test_get_clean_interp_index_dt(cf_da, calendar, freq) -> None: """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) g["stime"] = xr.Variable( From 04f138ff69ce8d278953395af62bbd2e104481cf Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:51 -0500 Subject: [PATCH 4/6] Update xarray/tests/test_cftimeindex.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 6df6ebe748b..85622d54973 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1285,7 +1285,7 @@ def test_multiindex(): @requires_cftime @pytest.mark.parametrize("freq", ["3663s", "33min", "2h"]) @pytest.mark.parametrize("method", ["floor", "ceil", "round"]) -def test_rounding_methods_against_datetimeindex(freq, method): +def test_rounding_methods_against_datetimeindex(freq, method) -> None: # for now unit="us" seems good enough expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="ns") expected = getattr(expected, method)(freq) From 1375034e8f9be0a596254253884cb2dec6373176 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 27 Jan 2025 20:48:28 -0500 Subject: [PATCH 5/6] Apply Deepak's wording suggestion Co-authored-by: Deepak Cherian --- xarray/coding/cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 06f44399084..34b266b9e2f 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -600,7 +600,7 @@ def to_datetimeindex( "In a future version of xarray to_datetimeindex will default " "to returning a 'us'-resolution DatetimeIndex instead of a " "'ns'-resolution DatetimeIndex. This warning can be silenced " - "by explicitly setting the time_unit of the index returned.", + "by explicitly passing the `time_unit` keyword argument.", FutureWarning, ) time_unit = "ns" From 79f744e3006cce792c42717cbe930303534b8d22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 29 Jan 2025 09:08:32 +0100 Subject: [PATCH 6/6] Update xarray/tests/test_groupby.py --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 104061ea0b1..2287130ebe2 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1867,7 +1867,7 @@ def test_resample( "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime ) - def resample_as_pandas(array, *args, **kwargs) -> None: + def resample_as_pandas(array, *args, **kwargs): array_ = array.copy(deep=True) if use_cftime: array_["time"] = times.to_datetimeindex(time_unit="ns") pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy