From 4a581f459a25d5e999ced2504e6b08fd0dcc0980 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 29 Jun 2025 19:44:21 -0400 Subject: [PATCH 1/4] Fix literal timedelta encoding bugs --- doc/whats-new.rst | 8 ++++++++ xarray/backends/netcdf3.py | 20 +++++++++++++++++--- xarray/coding/times.py | 7 +++++-- xarray/tests/test_backends.py | 18 ++++++++++++++++++ xarray/tests/test_coding_times.py | 5 ++--- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6db780484bd..44711c327ea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -121,6 +121,14 @@ Bug fixes (:pull:`10352`). By `Spencer Clark `_. - Avoid unsafe casts from float to unsigned int in CFMaskCoder (:issue:`9815`, :pull:`9964`). By ` Elliott Sales de Andrade `_. +- Fix attribute overwriting bug when decoding literally encoded + :py:class:`numpy.timedelta64` values from disk (:issue:`10468`, + :pull:`10469`). By `Spencer Clark `_. +- Fix default ``"_FillValue"`` dtype coercion bug when literally encoding + :py:class:`numpy.timedelta64` values to an on-disk format that only supports + 32-bit integers (:issue:`10466`, :pull:`10469`). By `Spencer Clark + `_. + Performance ~~~~~~~~~~~ diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 6f66b6c1059..bdec2fdad34 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -103,7 +103,7 @@ def encode_nc3_attrs(attrs): return {k: encode_nc3_attr_value(v) for k, v in attrs.items()} -def _maybe_prepare_times(var): +def _maybe_prepare_times(var, name=None): # checks for integer-based time-like and # replaces np.iinfo(np.int64).min with _FillValue or np.nan # this keeps backwards compatibility @@ -112,7 +112,21 @@ def _maybe_prepare_times(var): if data.dtype.kind in "iu": units = var.attrs.get("units", None) if units is not None and coding.variables._is_time_like(units): - mask = data == np.iinfo(np.int64).min + default_int64_fill_value = np.iinfo(np.int64).min + default_int32_fill_value = np.iinfo(np.int32).min + mask = data == default_int64_fill_value + + if var.attrs.get("_FillValue") == default_int64_fill_value: + if (data == default_int32_fill_value).any(): + raise ValueError( + f"Could not safely coerce default int64 _FillValue " + f"({default_int64_fill_value}) to the analogous int32 " + f"value ({default_int32_fill_value}), since it " + f"already exists as non-missing within variable " + f"{name!r}. Try explicitly setting " + f"encoding['_FillValue'] to another int32 value." + ) + var.attrs["_FillValue"] = default_int32_fill_value if mask.any(): data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) return data @@ -124,7 +138,7 @@ def encode_nc3_variable(var, name=None): coding.strings.CharacterArrayCoder(), ]: var = coder.encode(var, name=name) - data = _maybe_prepare_times(var) + data = _maybe_prepare_times(var, name=name) data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) return Variable(var.dims, data, attrs, var.encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e6bc8ca59bd..85718822f62 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1509,8 +1509,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: f"present in encoding. Check the encoding parameters " f"of variable {name!r}." ) - dtype = pop_to(attrs, encoding, "dtype", name=name) - dtype = np.dtype(dtype) + # Overwrite the on-disk dtype encoding, which is numeric, with + # the dtype attribute stored on disk, which corresponds to + # a timedelta64 dtype. + encoding["dtype"] = attrs.pop("dtype") + dtype = np.dtype(encoding["dtype"]) resolution, _ = np.datetime_data(dtype) resolution = cast(NPDatetimeUnitOptions, resolution) if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fe4c1684cbd..a158b4973de 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -351,6 +351,16 @@ def test_dtype_coercion_error(self) -> None: with pytest.raises(ValueError, match="could not safely cast"): ds.to_netcdf(path, format=format) + def test_literal_timedelta_fill_value_coercion_error(self) -> None: + for format in self.netcdf3_formats: + timedeltas = np.array( + [0, np.iinfo(np.int32).min, np.iinfo(np.int64).min] + ).astype("timedelta64[s]") + ds = Dataset({"timedeltas": ("timedeltas", timedeltas)}) + with create_tmp_file(allow_cleanup_failure=False) as path: + with pytest.raises(ValueError, match="_FillValue"): + ds.to_netcdf(path, format=format) + class DatasetIOBase: engine: T_NetcdfEngine | None = None @@ -642,6 +652,14 @@ def test_roundtrip_timedelta_data(self) -> None: ) as actual: assert_identical(expected, actual) + def test_roundtrip_literal_timedelta_data(self) -> None: + time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s") # type: ignore[arg-type, unused-ignore] + expected = Dataset( + {"td": ("td", time_deltas), "td0": time_deltas[0].to_numpy()} + ) + with self.roundtrip(expected) as actual: + assert_identical(expected, actual) + def test_roundtrip_float64_data(self) -> None: expected = Dataset({"x": ("y", np.array([1.0, 2.0, np.pi], dtype="float64"))}) with self.roundtrip(expected) as actual: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 65caab1c709..3b85edfa81e 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2006,10 +2006,9 @@ def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: assert decoded.dtype == np.dtype("timedelta64[ns]") -@pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_decode_invalid_encoding(attribute) -> None: +def test_literal_timedelta_decode_invalid_encoding() -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} - encoding = {attribute: "foo"} + encoding = {"units": "foo"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) with pytest.raises(ValueError, match="failed to prevent"): conventions.decode_cf_variable("timedeltas", encoded) From bdda733a569713f640f6ae65900fce79d6d1eb53 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 1 Jul 2025 10:20:12 -0400 Subject: [PATCH 2/4] Unify timedelta64 coding logic between the old and new approaches Always write a dtype attribute to disk regardless of how the timedeltas were decoded. --- doc/whats-new.rst | 9 +- xarray/backends/netcdf3.py | 20 +--- xarray/coding/times.py | 157 +++++++++++++----------------- xarray/tests/test_backends.py | 17 +--- xarray/tests/test_coding_times.py | 126 +++++++++++++++--------- 5 files changed, 159 insertions(+), 170 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 44711c327ea..6d5b65e0de8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -121,10 +121,11 @@ Bug fixes (:pull:`10352`). By `Spencer Clark `_. - Avoid unsafe casts from float to unsigned int in CFMaskCoder (:issue:`9815`, :pull:`9964`). By ` Elliott Sales de Andrade `_. -- Fix attribute overwriting bug when decoding literally encoded - :py:class:`numpy.timedelta64` values from disk (:issue:`10468`, - :pull:`10469`). By `Spencer Clark `_. -- Fix default ``"_FillValue"`` dtype coercion bug when literally encoding +- Fix attribute overwriting bug when decoding encoded + :py:class:`numpy.timedelta64` values from disk with a dtype attribute + (:issue:`10468`, :pull:`10469`). By `Spencer Clark + `_. +- Fix default ``"_FillValue"`` dtype coercion bug when encoding :py:class:`numpy.timedelta64` values to an on-disk format that only supports 32-bit integers (:issue:`10466`, :pull:`10469`). By `Spencer Clark `_. diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index bdec2fdad34..6f66b6c1059 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -103,7 +103,7 @@ def encode_nc3_attrs(attrs): return {k: encode_nc3_attr_value(v) for k, v in attrs.items()} -def _maybe_prepare_times(var, name=None): +def _maybe_prepare_times(var): # checks for integer-based time-like and # replaces np.iinfo(np.int64).min with _FillValue or np.nan # this keeps backwards compatibility @@ -112,21 +112,7 @@ def _maybe_prepare_times(var, name=None): if data.dtype.kind in "iu": units = var.attrs.get("units", None) if units is not None and coding.variables._is_time_like(units): - default_int64_fill_value = np.iinfo(np.int64).min - default_int32_fill_value = np.iinfo(np.int32).min - mask = data == default_int64_fill_value - - if var.attrs.get("_FillValue") == default_int64_fill_value: - if (data == default_int32_fill_value).any(): - raise ValueError( - f"Could not safely coerce default int64 _FillValue " - f"({default_int64_fill_value}) to the analogous int32 " - f"value ({default_int32_fill_value}), since it " - f"already exists as non-missing within variable " - f"{name!r}. Try explicitly setting " - f"encoding['_FillValue'] to another int32 value." - ) - var.attrs["_FillValue"] = default_int32_fill_value + mask = data == np.iinfo(np.int64).min if mask.any(): data = np.where(mask, var.attrs.get("_FillValue", np.nan), data) return data @@ -138,7 +124,7 @@ def encode_nc3_variable(var, name=None): coding.strings.CharacterArrayCoder(), ]: var = coder.encode(var, name=name) - data = _maybe_prepare_times(var, name=name) + data = _maybe_prepare_times(var) data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) return Variable(var.dims, data, attrs, var.encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 85718822f62..6dd23cd3872 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1410,6 +1410,43 @@ def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool: return isinstance(dtype, str) and dtype.startswith("timedelta64") +def resolve_time_unit_from_attrs_dtype( + attrs_dtype: str, name: T_Name +) -> PDDatetimeUnitOptions: + dtype = np.dtype(attrs_dtype) + resolution, _ = np.datetime_data(dtype) + resolution = cast(NPDatetimeUnitOptions, resolution) + if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): + time_unit = cast(PDDatetimeUnitOptions, "s") + message = ( + f"Following pandas, xarray only supports decoding to timedelta64 " + f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " + f"values for variable {name!r} have a resolution of " + f"{resolution!r}. Attempting to decode to a resolution of 's'. " + f"Note, depending on the encoded values, this may lead to an " + f"OverflowError. Additionally, data will not be identically round " + f"tripped; xarray will choose an encoding dtype of " + f"'timedelta64[s]' when re-encoding." + ) + emit_user_level_warning(message) + elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): + time_unit = cast(PDDatetimeUnitOptions, "ns") + message = ( + f"Following pandas, xarray only supports decoding to timedelta64 " + f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " + f"values for variable {name!r} have a resolution of " + f"{resolution!r}. Attempting to decode to a resolution of 'ns'. " + f"Note, depending on the encoded values, this may lead to loss of " + f"precision. Additionally, data will not be identically round " + f"tripped; xarray will choose an encoding dtype of " + f"'timedelta64[ns]' when re-encoding." + ) + emit_user_level_warning(message) + else: + time_unit = cast(PDDatetimeUnitOptions, resolution) + return time_unit + + class CFTimedeltaCoder(VariableCoder): """Coder for CF Timedelta coding. @@ -1430,7 +1467,7 @@ class CFTimedeltaCoder(VariableCoder): def __init__( self, - time_unit: PDDatetimeUnitOptions = "ns", + time_unit: PDDatetimeUnitOptions | None = None, decode_via_units: bool = True, decode_via_dtype: bool = True, ) -> None: @@ -1442,45 +1479,18 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) - has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding) - if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype: - dtype = encoding.get("dtype", None) - units = encoding.pop("units", None) + dtype = encoding.get("dtype", None) + units = encoding.pop("units", None) - # in the case of packed data we need to encode into - # float first, the correct dtype will be established - # via CFScaleOffsetCoder/CFMaskCoder - if "add_offset" in encoding or "scale_factor" in encoding: - dtype = data.dtype if data.dtype.kind == "f" else "float64" + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" - else: - resolution, _ = np.datetime_data(variable.dtype) - dtype = np.int64 - attrs_dtype = f"timedelta64[{resolution}]" - units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) - safe_setitem(attrs, "dtype", attrs_dtype, name=name) - # Remove dtype encoding if it exists to prevent it from - # interfering downstream in NonStringCoder. - encoding.pop("dtype", None) - - if any( - k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS - ): - raise ValueError( - f"Specifying 'add_offset' or 'scale_factor' is not " - f"supported when encoding the timedelta64 values of " - f"variable {name!r} with xarray's new default " - f"timedelta64 encoding approach. To encode {name!r} " - f"with xarray's previous timedelta64 encoding " - f"approach, which supports the 'add_offset' and " - f"'scale_factor' parameters, additionally set " - f"encoding['units'] to a unit of time, e.g. " - f"'seconds'. To proceed with encoding of {name!r} " - f"via xarray's new approach, remove any encoding " - f"entries for 'add_offset' or 'scale_factor'." - ) - if "_FillValue" not in encoding and "missing_value" not in encoding: - encoding["_FillValue"] = np.iinfo(np.int64).min + resolution, _ = np.datetime_data(variable.dtype) + attrs_dtype = f"timedelta64[{resolution}]" + safe_setitem(attrs, "dtype", attrs_dtype, name=name) data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) @@ -1499,57 +1509,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: ): dims, data, attrs, encoding = unpack_for_decoding(variable) units = pop_to(attrs, encoding, "units") - if is_dtype_decodable and self.decode_via_dtype: - if any( - k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS - ): - raise ValueError( - f"Decoding timedelta64 values via dtype is not " - f"supported when 'add_offset', or 'scale_factor' are " - f"present in encoding. Check the encoding parameters " - f"of variable {name!r}." - ) - # Overwrite the on-disk dtype encoding, which is numeric, with - # the dtype attribute stored on disk, which corresponds to - # a timedelta64 dtype. - encoding["dtype"] = attrs.pop("dtype") - dtype = np.dtype(encoding["dtype"]) - resolution, _ = np.datetime_data(dtype) - resolution = cast(NPDatetimeUnitOptions, resolution) - if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): - time_unit = cast(PDDatetimeUnitOptions, "s") - dtype = np.dtype("timedelta64[s]") - message = ( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values for variable {name!r} " - f"have a resolution of {resolution!r}. Attempting to " - f"decode to a resolution of 's'. Note, depending on " - f"the encoded values, this may lead to an " - f"OverflowError. Additionally, data will not be " - f"identically round tripped; xarray will choose an " - f"encoding dtype of 'timedelta64[s]' when re-encoding." - ) - emit_user_level_warning(message) - elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): - time_unit = cast(PDDatetimeUnitOptions, "ns") - dtype = np.dtype("timedelta64[ns]") - message = ( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values for variable {name!r} " - f"have a resolution of {resolution!r}. Attempting to " - f"decode to a resolution of 'ns'. Note, depending on " - f"the encoded values, this may lead to loss of " - f"precision. Additionally, data will not be " - f"identically round tripped; xarray will choose an " - f"encoding dtype of 'timedelta64[ns]' " - f"when re-encoding." - ) - emit_user_level_warning(message) + if is_dtype_decodable: + attrs_dtype = attrs.pop("dtype") + if self.time_unit is None: + time_unit = resolve_time_unit_from_attrs_dtype(attrs_dtype, name) else: - time_unit = cast(PDDatetimeUnitOptions, resolution) - elif self.decode_via_units: + time_unit = self.time_unit + else: if self._emit_decode_timedelta_future_warning: emit_user_level_warning( "In a future version, xarray will not decode " @@ -1567,8 +1533,19 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: "'CFTimedeltaCoder' instance.", FutureWarning, ) - dtype = np.dtype(f"timedelta64[{self.time_unit}]") - time_unit = self.time_unit + if self.time_unit is None: + time_unit = cast(PDDatetimeUnitOptions, "ns") + else: + time_unit = self.time_unit + + # Handle edge case that decode_via_dtype=False and + # decode_via_units=True, and timedeltas were encoded with a + # dtype attribute. We need to remove the dtype attribute + # to prevent an error during round tripping. + if has_timedelta_dtype: + attrs.pop("dtype") + + dtype = np.dtype(f"timedelta64[{time_unit}]") transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit) data = lazy_elemwise_func(data, transform, dtype=dtype) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a158b4973de..e6b00ad4f3a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -56,6 +56,7 @@ from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options +from xarray.core.types import PDDatetimeUnitOptions from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type from xarray.tests import ( @@ -351,16 +352,6 @@ def test_dtype_coercion_error(self) -> None: with pytest.raises(ValueError, match="could not safely cast"): ds.to_netcdf(path, format=format) - def test_literal_timedelta_fill_value_coercion_error(self) -> None: - for format in self.netcdf3_formats: - timedeltas = np.array( - [0, np.iinfo(np.int32).min, np.iinfo(np.int64).min] - ).astype("timedelta64[s]") - ds = Dataset({"timedeltas": ("timedeltas", timedeltas)}) - with create_tmp_file(allow_cleanup_failure=False) as path: - with pytest.raises(ValueError, match="_FillValue"): - ds.to_netcdf(path, format=format) - class DatasetIOBase: engine: T_NetcdfEngine | None = None @@ -652,8 +643,10 @@ def test_roundtrip_timedelta_data(self) -> None: ) as actual: assert_identical(expected, actual) - def test_roundtrip_literal_timedelta_data(self) -> None: - time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s") # type: ignore[arg-type, unused-ignore] + def test_roundtrip_timedelta_data_via_dtype( + self, time_unit: PDDatetimeUnitOptions + ) -> None: + time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit(time_unit) # type: ignore[arg-type, unused-ignore] expected = Dataset( {"td": ("td", time_deltas), "td0": time_deltas[0].to_numpy()} ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 3b85edfa81e..3e1b7747cb4 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -20,7 +20,6 @@ ) from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding.times import ( - _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS, _encode_datetime_with_cftime, _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, @@ -1824,7 +1823,7 @@ def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: assert_equal(variable, decoded) -_DECODE_TIMEDELTA_TESTS = { +_DECODE_TIMEDELTA_VIA_UNITS_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]"), True), "decode_timedelta=False": (True, False, np.dtype("int64"), False), "inherit-time_unit-from-decode_times": ( @@ -1856,16 +1855,16 @@ def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: @pytest.mark.parametrize( ("decode_times", "decode_timedelta", "expected_dtype", "warns"), - list(_DECODE_TIMEDELTA_TESTS.values()), - ids=list(_DECODE_TIMEDELTA_TESTS.keys()), + list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.values()), + ids=list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.keys()), ) -def test_decode_timedelta( +def test_decode_timedelta_via_units( decode_times, decode_timedelta, expected_dtype, warns ) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) - encoding = {"units": "days"} - var = Variable(["time"], timedeltas, encoding=encoding) - encoded = conventions.encode_cf_variable(var) + attrs = {"units": "days"} + var = Variable(["time"], timedeltas, encoding=attrs) + encoded = Variable(["time"], np.array([0, 1, 2]), attrs=attrs) if warns: with pytest.warns(FutureWarning, match="decode_timedelta"): decoded = conventions.decode_cf_variable( @@ -1885,6 +1884,54 @@ def test_decode_timedelta( assert decoded.dtype == expected_dtype +_DECODE_TIMEDELTA_VIA_DTYPE_TESTS = { + "default": (True, None, np.dtype("timedelta64[ns]")), + "decode_timedelta=False": (True, False, np.dtype("int64")), + "inherit-time_unit-from-decode_times": ( + CFDatetimeCoder(time_unit="s"), + None, + np.dtype("timedelta64[s]"), + ), + "set-time_unit-via-CFTimedeltaCoder-decode_times=True": ( + True, + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), + "set-time_unit-via-CFTimedeltaCoder-decode_times=False": ( + False, + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), + "override-time_unit-from-decode_times": ( + CFDatetimeCoder(time_unit="ns"), + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), +} + + +@pytest.mark.parametrize( + ("decode_times", "decode_timedelta", "expected_dtype"), + list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.values()), + ids=list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.keys()), +) +def test_decode_timedelta_via_dtype(decode_times, decode_timedelta, expected_dtype) -> None: + timedeltas = pd.timedelta_range(0, freq="D", periods=3) + encoding = {"units": "days"} + var = Variable(["time"], timedeltas, encoding=encoding) + encoded = conventions.encode_cf_variable(var) + assert encoded.attrs["dtype"] == "timedelta64[ns]" + assert encoded.attrs["units"] == encoding["units"] + decoded = conventions.decode_cf_variable( + "foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta + ) + if decode_timedelta is False: + assert_equal(encoded, decoded) + else: + assert_equal(var, decoded) + assert decoded.dtype == expected_dtype + + def test_lazy_decode_timedelta_unexpected_dtype() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.5, 1], attrs=attrs) @@ -1940,7 +1987,12 @@ def test_duck_array_decode_times(calendar) -> None: def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: - attrs = {"units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0} + attrs = { + "dtype": "timedelta64[ns]", + "units": "nanoseconds", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta @@ -1958,19 +2010,17 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None: decoded.load() -def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: +def test_timedelta64_coding_via_dtype(time_unit: PDDatetimeUnitOptions) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") variable = Variable(["time"], timedeltas) - expected_dtype = f"timedelta64[{time_unit}]" expected_units = _numpy_to_netcdf_timeunit(time_unit) encoded = conventions.encode_cf_variable(variable) - assert encoded.attrs["dtype"] == expected_dtype + assert encoded.attrs["dtype"] == f"timedelta64[{time_unit}]" assert encoded.attrs["units"] == expected_units - assert encoded.attrs["_FillValue"] == np.iinfo(np.int64).min decoded = conventions.decode_cf_variable("timedeltas", encoded) - assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert_identical(decoded, variable) @@ -1981,7 +2031,7 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: assert reencoded.dtype == encoded.dtype -def test_literal_timedelta_coding_non_pandas_coarse_resolution_warning() -> None: +def test_timedelta_coding_via_dtype_non_pandas_coarse_resolution_warning() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): @@ -1994,7 +2044,7 @@ def test_literal_timedelta_coding_non_pandas_coarse_resolution_warning() -> None @pytest.mark.xfail(reason="xarray does not recognize picoseconds as time-like") -def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: +def test_timedelta_coding_via_dtype_non_pandas_fine_resolution_warning() -> None: attrs = {"dtype": "timedelta64[ps]", "units": "picoseconds"} encoded = Variable(["time"], [0, 1000, 2000], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): @@ -2006,7 +2056,7 @@ def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: assert decoded.dtype == np.dtype("timedelta64[ns]") -def test_literal_timedelta_decode_invalid_encoding() -> None: +def test_timedelta_decode_via_dtype_invalid_encoding() -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} encoding = {"units": "foo"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) @@ -2015,7 +2065,7 @@ def test_literal_timedelta_decode_invalid_encoding() -> None: @pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: +def test_timedelta_encode_via_dtype_invalid_attribute(attribute) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) attrs = {attribute: "foo"} variable = Variable(["time"], timedeltas, attrs=attrs) @@ -2023,23 +2073,6 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: conventions.encode_cf_variable(variable) -@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_encoding_invalid_key_error(invalid_key) -> None: - encoding = {invalid_key: 1.0} - timedeltas = pd.timedelta_range(0, freq="D", periods=3) - variable = Variable(["time"], timedeltas, encoding=encoding) - with pytest.raises(ValueError, match=invalid_key): - conventions.encode_cf_variable(variable) - - -@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: - attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"} - variable = Variable(["time"], [0, 1, 2], attrs=attrs) - with pytest.raises(ValueError, match=invalid_key): - conventions.decode_cf_variable("foo", variable) - - @pytest.mark.parametrize( ("decode_via_units", "decode_via_dtype", "attrs", "expect_timedelta64"), [ @@ -2057,12 +2090,6 @@ def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: - # Note with literal timedelta encoding, we always add a _FillValue, even - # if one is not present in the original encoding parameters, which is why - # we ensure one is defined here when "dtype" is present in attrs. - if "dtype" in attrs: - attrs["_FillValue"] = np.iinfo(np.int64).min - array = np.array([0, 1, 2], dtype=np.dtype("int64")) encoded = Variable(["time"], array, attrs=attrs) @@ -2082,7 +2109,11 @@ def test_timedelta_decoding_options( # Confirm we exactly roundtrip. reencoded = conventions.encode_cf_variable(decoded) - assert_identical(reencoded, encoded) + + expected = encoded.copy() + if "dtype" not in attrs and decode_via_units: + expected.attrs["dtype"] = "timedelta64[s]" + assert_identical(reencoded, expected) def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: @@ -2092,20 +2123,21 @@ def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["units"] == "days" + assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") - with pytest.warns(FutureWarning, match="timedelta"): - decoded = conventions.decode_cf_variable("foo", encoded) + decoded = conventions.decode_cf_variable("foo", encoded) assert_identical(decoded, variable) reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) assert encoded.attrs["units"] == "days" + assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") @pytest.mark.parametrize("mask_attribute", ["_FillValue", "missing_value"]) -def test_literal_timedelta64_coding_with_mask( +def test_timedelta64_coding_via_dtype_with_mask( time_unit: PDDatetimeUnitOptions, mask_attribute: str ) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") @@ -2121,7 +2153,7 @@ def test_literal_timedelta64_coding_with_mask( assert encoded[-1] == mask decoded = conventions.decode_cf_variable("timedeltas", encoded) - assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert decoded.encoding[mask_attribute] == mask assert np.isnat(decoded[-1]) @@ -2143,7 +2175,7 @@ def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None: assert encoded.dtype == encoding["dtype"] assert encoded.attrs["units"] == encoding["units"] decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) - assert decoded.dtype == np.dtype("=m8[ns]") + assert decoded.dtype == np.dtype(f"=m8[{time_unit}]") with assert_no_warnings(): decoded.load() assert decoded.dtype == np.dtype("=m8[s]") From 153038ecf6e430469f2c2041bf9289f18a415baa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Jul 2025 15:55:45 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_coding_times.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 3e1b7747cb4..ffe1e3b4f89 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1915,7 +1915,9 @@ def test_decode_timedelta_via_units( list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.values()), ids=list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.keys()), ) -def test_decode_timedelta_via_dtype(decode_times, decode_timedelta, expected_dtype) -> None: +def test_decode_timedelta_via_dtype( + decode_times, decode_timedelta, expected_dtype +) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) encoding = {"units": "days"} var = Variable(["time"], timedeltas, encoding=encoding) From 0542961dcca63a0585b23960c1abce070ad34860 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 1 Jul 2025 13:44:08 -0400 Subject: [PATCH 4/4] Add decode_timedelta=True test case --- xarray/tests/test_coding_times.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index ffe1e3b4f89..af29716fec0 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1825,6 +1825,7 @@ def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: _DECODE_TIMEDELTA_VIA_UNITS_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]"), True), + "decode_timedelta=True": (True, True, np.dtype("timedelta64[ns]"), False), "decode_timedelta=False": (True, False, np.dtype("int64"), False), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), @@ -1887,6 +1888,7 @@ def test_decode_timedelta_via_units( _DECODE_TIMEDELTA_VIA_DTYPE_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]")), "decode_timedelta=False": (True, False, np.dtype("int64")), + "decode_timedelta=True": (True, True, np.dtype("timedelta64[ns]")), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), None, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy