From da78bcc092609bb24aebb5be43f41c95519d78a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 13:30:52 +0100 Subject: [PATCH 01/13] refactor out _maybe_strip_tz_from_timestamp for better readability --- xarray/coding/times.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4622298e152..16ef713b212 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -189,6 +189,14 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]: return delta_units, ref_date +def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: + # If the ref_date Timestamp is timezone-aware, convert to UTC and + # make it timezone-naive (GH 2649). + if date.tz is not None: + return date.tz_convert("UTC").tz_convert(None) + return date + + def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime @@ -196,10 +204,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(_ref_date) - # If the ref_date Timestamp is timezone-aware, convert to UTC and - # make it timezone-naive (GH 2649). - if ref_date.tz is not None: - ref_date = ref_date.tz_convert(None) + ref_date = _maybe_strip_tz_from_timestamp(ref_date) return time_units, ref_date From b55b9035281fcbd3239a1aa4df9f0caa5a5b1bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:41:01 +0100 Subject: [PATCH 02/13] backwards compatible refactor of time coding, preparing for relaxing nanosecond-restriction --- xarray/coding/times.py | 112 +++++++++++++++++++++--------- xarray/core/pdcompat.py | 17 ++++- xarray/tests/test_coding_times.py | 4 +- 3 files changed, 97 insertions(+), 36 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 16ef713b212..1e652b96200 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -24,7 +24,7 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item -from xarray.core.pdcompat import nanosecond_precision_timestamp +from xarray.core.pdcompat import nanosecond_precision_timestamp, timestamp_as_unit from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type @@ -36,7 +36,11 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, T_DuckArray +from xarray.core.types import ( + CFCalendar, + NPDatetimeUnitOptions, + T_DuckArray, +) T_Name = Union[Hashable, None] @@ -197,15 +201,18 @@ def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: return date -def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]: +def _unpack_time_unit_and_ref_date( + units: str, +) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime - time_units, _ref_date = _unpack_netcdf_time_units(units) + time_unit, _ref_date = _unpack_netcdf_time_units(units) + time_unit = _netcdf_to_numpy_timeunit(time_unit) # TODO: the strict enforcement of nanosecond precision Timestamps can be # relaxed when addressing GitHub issue #7493. ref_date = nanosecond_precision_timestamp(_ref_date) ref_date = _maybe_strip_tz_from_timestamp(ref_date) - return time_units, ref_date + return time_unit, ref_date def _decode_cf_datetime_dtype( @@ -252,6 +259,30 @@ def _decode_datetime_with_cftime( return np.array([], dtype=object) +def _check_date_for_units_since_refdate( + date, unit: str, ref_date: pd.Timestamp +) -> pd.Timestamp: + # check for out-of-bounds floats and raise + if date > np.iinfo("int64").max or date < np.iinfo("int64").min: + raise OutOfBoundsTimedelta( + f"Value {date} can't be represented as Datetime/Timedelta." + ) + delta = date * np.timedelta64(1, unit) + if not np.isnan(delta): + # this will raise on dtype overflow for integer dtypes + if date.dtype.kind in "iu" and not np.int64(delta) == date: + raise OutOfBoundsTimedelta( + "DType overflow in Datetime/Timedelta calculation." + ) + # this will raise on overflow if ref_date + delta + # can't be represented in the current ref_date resolution + return timestamp_as_unit(ref_date + delta, ref_date.unit) + else: + # if date is exactly NaT (np.iinfo("int64").min) return refdate + # to make follow-up checks work + return ref_date + + def _decode_datetime_with_pandas( flat_num_dates: np.ndarray, units: str, calendar: str ) -> np.ndarray: @@ -270,12 +301,8 @@ def _decode_datetime_with_pandas( elif flat_num_dates.dtype.kind == "u": flat_num_dates = flat_num_dates.astype(np.uint64) - time_units, ref_date_str = _unpack_netcdf_time_units(units) - time_units = _netcdf_to_numpy_timeunit(time_units) try: - # TODO: the strict enforcement of nanosecond precision Timestamps can be - # relaxed when addressing GitHub issue #7493. - ref_date = nanosecond_precision_timestamp(ref_date_str) + time_unit, ref_date = _unpack_time_unit_and_ref_date(units) except ValueError as err: # ValueError is raised by pd.Timestamp for non-ISO timestamp # strings, in which case we fall back to using cftime @@ -285,8 +312,12 @@ def _decode_datetime_with_pandas( warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning) if flat_num_dates.size > 0: # avoid size 0 datetimes GH1329 - pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date - pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date + _check_date_for_units_since_refdate( + flat_num_dates.min(), time_unit, ref_date + ) + _check_date_for_units_since_refdate( + flat_num_dates.max(), time_unit, ref_date + ) # To avoid integer overflow when converting to nanosecond units for integer # dtypes smaller than np.int64 cast all integer and unsigned integer dtype @@ -299,20 +330,24 @@ def _decode_datetime_with_pandas( elif flat_num_dates.dtype.kind in "f": flat_num_dates = flat_num_dates.astype(np.float64) - # Cast input ordinals to integers of nanoseconds because pd.to_timedelta - # works much faster when dealing with integers (GH 1399). - # properly handle NaN/NaT to prevent casting NaN to int + # keep NaT/nan mask nan = np.isnan(flat_num_dates) | (flat_num_dates == np.iinfo(np.int64).min) - flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA[time_units] - flat_num_dates_ns_int = np.zeros_like(flat_num_dates, dtype=np.int64) - flat_num_dates_ns_int[nan] = np.iinfo(np.int64).min - flat_num_dates_ns_int[~nan] = flat_num_dates[~nan].astype(np.int64) + # in case we need to change the unit, we fix the numbers here + # this should be safe, as errors would have been raised above + ns_time_unit = _NS_PER_TIME_DELTA[time_unit] + ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_date.unit] + if flat_num_dates.dtype.kind in "iuf" and (ns_time_unit > ns_ref_date_unit): + flat_num_dates *= np.int64(ns_time_unit / ns_ref_date_unit) + time_unit = ref_date.unit - # Use pd.to_timedelta to safely cast integer values to timedeltas, - # and add those to a Timestamp to safely produce a DatetimeIndex. This - # ensures that we do not encounter integer overflow at any point in the - # process without raising OutOfBoundsDatetime. - return (pd.to_timedelta(flat_num_dates_ns_int, "ns") + ref_date).values + # Cast input ordinals to integers and properly handle NaN/NaT + # to prevent casting NaN to int + flat_num_dates_int = np.zeros_like(flat_num_dates, dtype=np.int64) + flat_num_dates_int[nan] = np.iinfo(np.int64).min + flat_num_dates_int[~nan] = flat_num_dates[~nan].astype(np.int64) + + # cast to timedelta64[time_unit] and add to ref_date + return ref_date + flat_num_dates_int.astype(f"timedelta64[{time_unit}]") def decode_cf_datetime( @@ -344,10 +379,16 @@ def decode_cf_datetime( dates = _decode_datetime_with_cftime( flat_num_dates.astype(float), units, calendar ) + # retrieve cftype + cftype = type(dates[np.nanargmin(num_dates)]) + # "ns" boarders + # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] + lower = cftype(1677, 9, 21, 0, 12, 43, 145224) + upper = cftype(2262, 4, 11, 23, 47, 16, 854775) if ( - dates[np.nanargmin(num_dates)].year < 1678 - or dates[np.nanargmax(num_dates)].year >= 2262 + dates[np.nanargmin(num_dates)] < lower + or dates[np.nanargmax(num_dates)] > upper ): if _is_standard_calendar(calendar): warnings.warn( @@ -768,8 +809,8 @@ def _eagerly_encode_cf_datetime( raise OutOfBoundsDatetime assert dates.dtype == "datetime64[ns]" - time_units, ref_date = _unpack_time_units_and_ref_date(units) - time_delta = _time_units_to_timedelta64(time_units) + time_unit, ref_date = _unpack_time_unit_and_ref_date(units) + time_delta = np.timedelta64(1, time_unit) # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from @@ -778,16 +819,17 @@ def _eagerly_encode_cf_datetime( time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 - needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units) + needed_unit, data_ref_date = _unpack_time_unit_and_ref_date(data_units) + needed_units = _numpy_to_netcdf_timeunit(needed_unit) if data_units != units: # this accounts for differences in the reference times ref_delta = abs(data_ref_date - ref_date).to_timedelta64() - data_delta = _time_units_to_timedelta64(needed_units) + data_delta = np.timedelta64(1, needed_unit) if (ref_delta % data_delta) > np.timedelta64(0, "ns"): needed_units = _infer_time_units_from_diff(ref_delta) # needed time delta to encode faithfully to int64 - needed_time_delta = _time_units_to_timedelta64(needed_units) + needed_time_delta = _unit_timedelta_numpy(needed_units) floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: @@ -800,6 +842,7 @@ def _eagerly_encode_cf_datetime( f"Set encoding['dtype'] to floating point dtype to silence this warning." ) elif np.issubdtype(dtype, np.integer) and allow_units_modification: + floor_division = True new_units = f"{needed_units} since {format_timestamp(ref_date)}" emit_user_level_warning( f"Times can't be serialized faithfully to int64 with requested units {units!r}. " @@ -809,9 +852,12 @@ def _eagerly_encode_cf_datetime( ) units = new_units time_delta = needed_time_delta - floor_division = True - num = _division(time_deltas, time_delta, floor_division) + # get resolution of TimedeltaIndex and align time_delta + # todo: check, if this works in any case + num = _division( + time_deltas, time_delta.astype(f"=m8[{time_deltas.unit}]"), floor_division + ) num = reshape(num.values, dates.shape) except (OutOfBoundsDatetime, OverflowError, ValueError): diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index ae4febd6beb..cefe0c71a0a 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -36,11 +36,13 @@ from __future__ import annotations from enum import Enum -from typing import Literal +from typing import Literal, cast import pandas as pd from packaging.version import Version +from xarray.core.types import PDDatetimeUnitOptions + def count_not_none(*args) -> int: """Compute the number of non-None arguments. @@ -73,6 +75,19 @@ def __repr__(self) -> str: NoDefault = Literal[_NoDefault.no_default] # For typing following pandas +def timestamp_as_unit(date: pd.Timestamp, unit: str) -> pd.Timestamp: + # compatibility function for pandas issue + # where "as_unit" is not defined for pandas.Timestamp + # in pandas versions < 2.2 + # can be removed minimum pandas version is >= 2.2 + unit = cast(PDDatetimeUnitOptions, unit) + if hasattr(date, "as_unit"): + date = date.as_unit(unit) + elif hasattr(date, "_as_unit"): + date = date._as_unit(unit) + return date + + def nanosecond_precision_timestamp(*args, **kwargs) -> pd.Timestamp: """Return a nanosecond-precision Timestamp object. diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9a51ca40d07..685767b71bb 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -167,8 +167,8 @@ def test_decode_cf_datetime_overflow() -> None: units = "days since 2000-01-01 00:00:00" # date after 2262 and before 1678 - days = (-117608, 95795) - expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) + days = (-117710, 95795) + expected = (datetime(1677, 9, 20), datetime(2262, 4, 12)) for i, day in enumerate(days): with warnings.catch_warnings(): From 34ad89eefcbbfd06ba5bd63036f4756bce5a62d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:46:59 +0100 Subject: [PATCH 03/13] add whats-new.rst entry --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dec80590c11..5f3dce53d28 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,6 +65,8 @@ Internal Changes within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. +- Refactor of time coding to prepare for realxing nanosecond restriction (:pull:`9906`). + By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: From 69645eb8d838778f1112bd21e59b4b6d0ed66ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:48:29 +0100 Subject: [PATCH 04/13] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5f3dce53d28..f4cfbafc0ae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,7 +65,7 @@ Internal Changes within ``as_compatible_data``. This is consistent with how lists of these objects will be converted (:pull:`9900`). By `Kai Mühlbauer `_. -- Refactor of time coding to prepare for realxing nanosecond restriction (:pull:`9906`). +- Refactor of time coding to prepare for relaxing nanosecond restriction (:pull:`9906`). By `Kai Mühlbauer `_. .. _whats-new.2024.11.0: From 313bf2e93520123089ee78867d08ee272e14b33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Dec 2024 14:50:07 +0100 Subject: [PATCH 05/13] Update xarray/coding/times.py --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 1e652b96200..4ee7c839d2c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -381,7 +381,7 @@ def decode_cf_datetime( ) # retrieve cftype cftype = type(dates[np.nanargmin(num_dates)]) - # "ns" boarders + # "ns" borders # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] lower = cftype(1677, 9, 21, 0, 12, 43, 145224) upper = cftype(2262, 4, 11, 23, 47, 16, 854775) From 669539dbc4bb8abb3510610db849ab32ef510c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:48:57 +0100 Subject: [PATCH 06/13] only check for unsigned int --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4ee7c839d2c..fe07f1b7244 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -270,7 +270,7 @@ def _check_date_for_units_since_refdate( delta = date * np.timedelta64(1, unit) if not np.isnan(delta): # this will raise on dtype overflow for integer dtypes - if date.dtype.kind in "iu" and not np.int64(delta) == date: + if date.dtype.kind in "u" and not np.int64(delta) == date: raise OutOfBoundsTimedelta( "DType overflow in Datetime/Timedelta calculation." ) From 678e6d90b9540e37f76c05a9b06a88cade728b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:50:34 +0100 Subject: [PATCH 07/13] return pd.NaT in case of np.iinfo("int64").min --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index fe07f1b7244..4ab3a8f5662 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -280,7 +280,7 @@ def _check_date_for_units_since_refdate( else: # if date is exactly NaT (np.iinfo("int64").min) return refdate # to make follow-up checks work - return ref_date + return pd.NaT def _decode_datetime_with_pandas( From fb5a4fb05469b2b23b36ec58cae62071f3b6d60c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:51:52 +0100 Subject: [PATCH 08/13] skip check for `"iuf"`, it's unneeded --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4ab3a8f5662..6886664f951 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -336,7 +336,7 @@ def _decode_datetime_with_pandas( # this should be safe, as errors would have been raised above ns_time_unit = _NS_PER_TIME_DELTA[time_unit] ns_ref_date_unit = _NS_PER_TIME_DELTA[ref_date.unit] - if flat_num_dates.dtype.kind in "iuf" and (ns_time_unit > ns_ref_date_unit): + if ns_time_unit > ns_ref_date_unit: flat_num_dates *= np.int64(ns_time_unit / ns_ref_date_unit) time_unit = ref_date.unit From 70f96d09b66664ae58649e29954ed40bb33912af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 13:57:49 +0100 Subject: [PATCH 09/13] return pd.Timestamp("NaT") instead pd.NaT (fixes typing) --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 6886664f951..7d393acaddf 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -280,7 +280,7 @@ def _check_date_for_units_since_refdate( else: # if date is exactly NaT (np.iinfo("int64").min) return refdate # to make follow-up checks work - return pd.NaT + return pd.Timestamp("NaT") def _decode_datetime_with_pandas( From de6f3db0a1d354d990fa5b8f40b25e3459d80e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 29 Dec 2024 16:04:33 +0100 Subject: [PATCH 10/13] Update xarray/coding/times.py Co-authored-by: Spencer Clark --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7d393acaddf..326682aab56 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -278,7 +278,7 @@ def _check_date_for_units_since_refdate( # can't be represented in the current ref_date resolution return timestamp_as_unit(ref_date + delta, ref_date.unit) else: - # if date is exactly NaT (np.iinfo("int64").min) return refdate + # if date is exactly NaT (np.iinfo("int64").min) return NaT # to make follow-up checks work return pd.Timestamp("NaT") From 90de1edcacf3829c2f5bb4acd5bea2727deaa34a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 14:16:37 +0000 Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 26840c6fd22..cf55ca1a227 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -262,18 +262,18 @@ def _parse_iso8601(date_type, timestr): replace[attr] = int(value) return default.replace(**replace), resolution - + def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: # If the ref_date Timestamp is timezone-aware, convert to UTC and # make it timezone-naive (GH 2649). if date.tz is not None: return date.tz_convert("UTC").tz_convert(None) - return date - - + return date + + def _unpack_time_unit_and_ref_date( units: str, -) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: +) -> tuple[NPDatetimeUnitOptions, pd.Timestamp]: # same us _unpack_netcdf_time_units but finalizes ref_date for # processing in encode_cf_datetime time_unit, _ref_date = _unpack_netcdf_time_units(units) From 21784136c748b4893082a5cdd242cb8f5c136408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 1 Jan 2025 15:21:11 +0100 Subject: [PATCH 12/13] fix indentation --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cf55ca1a227..c02ad902ce1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -263,7 +263,7 @@ def _parse_iso8601(date_type, timestr): return default.replace(**replace), resolution - def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: +def _maybe_strip_tz_from_timestamp(date: pd.Timestamp) -> pd.Timestamp: # If the ref_date Timestamp is timezone-aware, convert to UTC and # make it timezone-naive (GH 2649). if date.tz is not None: From f4e10de2ce3cf2997b54a3f71390bbf3eafae8d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 2 Jan 2025 15:07:46 +0100 Subject: [PATCH 13/13] add suggestion from code review --- xarray/coding/times.py | 8 +++----- xarray/core/pdcompat.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c02ad902ce1..74ba97af81b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -450,16 +450,14 @@ def decode_cf_datetime( flat_num_dates.astype(float), units, calendar ) # retrieve cftype - cftype = type(dates[np.nanargmin(num_dates)]) + dates_min = dates[np.nanargmin(num_dates)] + cftype = type(dates_min) # "ns" borders # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807'] lower = cftype(1677, 9, 21, 0, 12, 43, 145224) upper = cftype(2262, 4, 11, 23, 47, 16, 854775) - if ( - dates[np.nanargmin(num_dates)] < lower - or dates[np.nanargmax(num_dates)] > upper - ): + if dates_min < lower or dates[np.nanargmax(num_dates)] > upper: if _is_standard_calendar(calendar): warnings.warn( "Unable to decode time axis into full " diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index cefe0c71a0a..4c54715e2a0 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -36,7 +36,7 @@ from __future__ import annotations from enum import Enum -from typing import Literal, cast +from typing import Literal import pandas as pd from packaging.version import Version @@ -75,12 +75,13 @@ def __repr__(self) -> str: NoDefault = Literal[_NoDefault.no_default] # For typing following pandas -def timestamp_as_unit(date: pd.Timestamp, unit: str) -> pd.Timestamp: - # compatibility function for pandas issue - # where "as_unit" is not defined for pandas.Timestamp - # in pandas versions < 2.2 - # can be removed minimum pandas version is >= 2.2 - unit = cast(PDDatetimeUnitOptions, unit) +def timestamp_as_unit(date: pd.Timestamp, unit: PDDatetimeUnitOptions) -> pd.Timestamp: + """Convert the underlying int64 representation to the given unit. + + Compatibility function for pandas issue where "as_unit" is not defined + for pandas.Timestamp in pandas versions < 2.2. Can be removed minimum + pandas version is >= 2.2. + """ if hasattr(date, "as_unit"): date = date.as_unit(unit) elif hasattr(date, "_as_unit"): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy