From 89aa2e2a8ad9a89aae52199617c8f693881252a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 13 Feb 2025 11:57:28 +0100 Subject: [PATCH 01/13] mask/scale datetimes/timedeltas only if they will be decoded, better handle partial coding --- xarray/coding/times.py | 16 +++++++-- xarray/coding/variables.py | 57 ++++++++++++++++++++++++++----- xarray/conventions.py | 8 +++-- xarray/tests/test_coding_times.py | 18 ++++++++-- xarray/tests/test_conventions.py | 5 +-- 5 files changed, 84 insertions(+), 20 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 68369dac0d7..47f2d8ee19a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1315,9 +1315,11 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) - dtype = encoding.get("dtype", None) + dtype = encoding.pop("dtype", None) (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) - + # if no dtype is provided, preserve data.dtype in encoding + if dtype is None: + safe_setitem(encoding, "dtype", data.dtype, name=name) safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) @@ -1369,8 +1371,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + dtype = None + if "add_offset" in encoding or "scale_factor" in encoding: + encoding.pop("dtype") + dtype = data.dtype if data.dtype.kind == "f" else "float64" + data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", None) + data, encoding.pop("units", None), encoding.get("dtype", dtype) ) safe_setitem(attrs, "units", units, name=name) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83112628dbb..5a4f8b38aa4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -234,6 +234,8 @@ def _apply_mask( def _is_time_like(units): # test for time-like + # return "datetime" for datetetime-like + # return "timedelta" for timedelta-like if units is None: return False time_strings = [ @@ -255,9 +257,9 @@ def _is_time_like(units): _unpack_netcdf_time_units(units) except ValueError: return False - return True + return "datetime" else: - return any(tstr == units for tstr in time_strings) + return "timedelta" if any(tstr == units for tstr in time_strings) else False def _check_fill_values(attrs, name, dtype): @@ -367,6 +369,14 @@ def _encode_unsigned_fill_value( class CFMaskCoder(VariableCoder): """Mask or unmask fill values according to CF conventions.""" + def __init__( + self, + decode_times: bool = False, + decode_timedelta: bool = False, + ) -> None: + self.decode_times = decode_times + self.decode_timedelta = decode_timedelta + def encode(self, variable: Variable, name: T_Name = None): dims, data, attrs, encoding = unpack_for_encoding(variable) @@ -393,10 +403,13 @@ def encode(self, variable: Variable, name: T_Name = None): if fv_exists: # Ensure _FillValue is cast to same dtype as data's + # but not for packed data encoding["_FillValue"] = ( _encode_unsigned_fill_value(name, fv, dtype) if has_unsigned else dtype.type(fv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else fv ) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) @@ -409,6 +422,8 @@ def encode(self, variable: Variable, name: T_Name = None): _encode_unsigned_fill_value(name, mv, dtype) if has_unsigned else dtype.type(mv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else mv ), ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) @@ -416,10 +431,17 @@ def encode(self, variable: Variable, name: T_Name = None): # apply fillna if fill_value is not None and not pd.isnull(fill_value): # special case DateTime to properly handle NaT - if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": - data = duck_array_ops.where( - data != np.iinfo(np.int64).min, data, fill_value - ) + if _is_time_like(attrs.get("units")): + if data.dtype.kind in "iu": + data = duck_array_ops.where( + data != np.iinfo(np.int64).min, data, fill_value + ) + else: + data = duck_array_ops.fillna(data, fill_value) + if np.array(fill_value).dtype.kind in "iu": + data = duck_array_ops.astype( + duck_array_ops.around(data), type(fill_value) + ) else: data = duck_array_ops.fillna(data, fill_value) @@ -458,9 +480,15 @@ def decode(self, variable: Variable, name: T_Name = None): if encoded_fill_values: # special case DateTime to properly handle NaT + # we need to check if time-like will be decoded or not + # in further processing dtype: np.typing.DTypeLike decoded_fill_value: Any - if _is_time_like(attrs.get("units")) and data.dtype.kind in "iu": + is_time_like = _is_time_like(attrs.get("units")) + if ( + (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) + ) and data.dtype.kind in "iu": dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min else: if "scale_factor" not in attrs and "add_offset" not in attrs: @@ -549,6 +577,14 @@ class CFScaleOffsetCoder(VariableCoder): decode_values = encoded_values * scale_factor + add_offset """ + def __init__( + self, + decode_times: bool = False, + decode_timedelta: bool = False, + ) -> None: + self.decode_times = decode_times + self.decode_timedelta = decode_timedelta + def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) @@ -580,8 +616,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: add_offset = np.asarray(add_offset).item() # if we have a _FillValue/masked_value we already have the wanted # floating point dtype here (via CFMaskCoder), so no check is necessary - # only check in other cases + # only check in other cases and for time-like dtype = data.dtype + is_time_like = _is_time_like(attrs.get("units")) + if (is_time_like == "datetime" and self.decode_times) or ( + is_time_like == "timedelta" and self.decode_timedelta + ): + dtype = _choose_float_dtype(dtype, encoding) if "_FillValue" not in encoding and "missing_value" not in encoding: dtype = _choose_float_dtype(dtype, encoding) diff --git a/xarray/conventions.py b/xarray/conventions.py index f67af95b4ce..071dab43c28 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -194,8 +194,12 @@ def decode_cf_variable( if mask_and_scale: for coder in [ - variables.CFMaskCoder(), - variables.CFScaleOffsetCoder(), + variables.CFMaskCoder( + decode_times=decode_times, decode_timedelta=decode_timedelta + ), + variables.CFScaleOffsetCoder( + decode_times=decode_times, decode_timedelta=decode_timedelta + ), ]: var = coder.decode(var, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e61e5d853e..778c0090955 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1429,9 +1429,9 @@ def test_roundtrip_datetime64_nanosecond_precision_warning( ) -> None: # test warning if times can't be serialized faithfully times = [ - np.datetime64("1970-01-01T00:01:00", "ns"), - np.datetime64("NaT"), - np.datetime64("1970-01-02T00:01:00", "ns"), + np.datetime64("1970-01-01T00:01:00", time_unit), + np.datetime64("NaT", time_unit), + np.datetime64("1970-01-02T00:01:00", time_unit), ] units = "days since 1970-01-10T01:01:00" needed_units = "hours" @@ -1901,3 +1901,15 @@ def test_lazy_decode_timedelta_error() -> None: ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() + + +@pytest.mark.parametrize("decode_timedelta", [True, False]) +@pytest.mark.parametrize("mask_and_scale", [True, False]) +def test_decode_timedelta_mask_and_scale(decode_timedelta, mask_and_scale) -> None: + attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable( + "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta + ) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_equal(encoded, result) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 8d3827fac54..63b5084ece8 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -511,16 +511,13 @@ def test_decode_dask_times(self) -> None: @pytest.mark.parametrize("time_unit", ["s", "ms", "us", "ns"]) def test_decode_cf_time_kwargs(self, time_unit) -> None: - # todo: if we set timedelta attrs "units": "days" - # this errors on the last decode_cf wrt to the lazy_elemwise_func - # trying to convert twice ds = Dataset.from_dict( { "coords": { "timedelta": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "timedelta", - "attrs": {"units": "seconds"}, + "attrs": {"units": "days"}, }, "time": { "data": np.array([1, 2, 3], dtype="int64"), From d4fe3fe5f8ea2365116e242238f56dc7693d838a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:41:06 +0100 Subject: [PATCH 02/13] comments --- xarray/coding/variables.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5a4f8b38aa4..72125da3332 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -416,6 +416,7 @@ def encode(self, variable: Variable, name: T_Name = None): if mv_exists: # try to use _FillValue, if it exists to align both values # or use missing_value and ensure it's cast to same dtype as data's + # but not for packed data encoding["missing_value"] = attrs.get( "_FillValue", ( @@ -437,7 +438,11 @@ def encode(self, variable: Variable, name: T_Name = None): data != np.iinfo(np.int64).min, data, fill_value ) else: + # if we have float data (data was packed prior masking) + # we just fillna data = duck_array_ops.fillna(data, fill_value) + # but if the fill_value is of integer type + # we need to round and cast if np.array(fill_value).dtype.kind in "iu": data = duck_array_ops.astype( duck_array_ops.around(data), type(fill_value) From 058217e358dc7d39ed16937b4dd45943298b794a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:56:32 +0100 Subject: [PATCH 03/13] typing --- xarray/coding/variables.py | 9 +++++---- xarray/tests/test_coding_times.py | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 72125da3332..36b8b40acd2 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd +from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -371,8 +372,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -584,8 +585,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 778c0090955..480bd96f3f4 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -638,7 +638,9 @@ def test_cf_timedelta_2d() -> None: @pytest.mark.parametrize("encoding_unit", FREQUENCIES_TO_ENCODING_UNITS.values()) -def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None: +def test_decode_cf_timedelta_time_unit( + time_unit: PDDatetimeUnitOptions, encoding_unit +) -> None: encoded = 1 encoding_unit_as_numpy = _netcdf_to_numpy_timeunit(encoding_unit) if np.timedelta64(1, time_unit) > np.timedelta64(1, encoding_unit_as_numpy): @@ -652,7 +654,9 @@ def test_decode_cf_timedelta_time_unit(time_unit, encoding_unit) -> None: assert result.dtype == expected.dtype -def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None: +def test_decode_cf_timedelta_time_unit_out_of_bounds( + time_unit: PDDatetimeUnitOptions, +) -> None: # Define a scale factor that will guarantee overflow with the given # time_unit. scale_factor = np.timedelta64(1, time_unit) // np.timedelta64(1, "ns") @@ -661,7 +665,7 @@ def test_decode_cf_timedelta_time_unit_out_of_bounds(time_unit) -> None: decode_cf_timedelta(encoded, "days", time_unit) -def test_cf_timedelta_roundtrip_large_value(time_unit) -> None: +def test_cf_timedelta_roundtrip_large_value(time_unit: PDDatetimeUnitOptions) -> None: value = np.timedelta64(np.iinfo(np.int64).max, time_unit) encoded, units = encode_cf_timedelta(value) decoded = decode_cf_timedelta(encoded, units, time_unit=time_unit) @@ -983,7 +987,7 @@ def test_use_cftime_default_standard_calendar_out_of_range( @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) @pytest.mark.parametrize("units_year", [1500, 2000, 2500]) def test_use_cftime_default_non_standard_calendar( - calendar, units_year, time_unit + calendar, units_year, time_unit: PDDatetimeUnitOptions ) -> None: from cftime import num2date @@ -1620,7 +1624,9 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None _ENCODE_DATETIME64_VIA_DASK_TESTS.values(), ids=_ENCODE_DATETIME64_VIA_DASK_TESTS.keys(), ) -def test_encode_cf_datetime_datetime64_via_dask(freq, units, dtype, time_unit) -> None: +def test_encode_cf_datetime_datetime64_via_dask( + freq, units, dtype, time_unit: PDDatetimeUnitOptions +) -> None: import dask.array times_pd = pd.date_range(start="1700", freq=freq, periods=3, unit=time_unit) @@ -1905,7 +1911,9 @@ def test_lazy_decode_timedelta_error() -> None: @pytest.mark.parametrize("decode_timedelta", [True, False]) @pytest.mark.parametrize("mask_and_scale", [True, False]) -def test_decode_timedelta_mask_and_scale(decode_timedelta, mask_and_scale) -> None: +def test_decode_timedelta_mask_and_scale( + decode_timedelta: bool, mask_and_scale: bool +) -> None: attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( From 18274ad1a7b9b9fc715385d925234baab3ec9d6e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:57:13 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 36b8b40acd2..995b867db47 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,8 +9,8 @@ import numpy as np import pandas as pd - from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder + from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type From b80a578be72b12800d4be18216042ba6f34d3895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 10:58:40 +0100 Subject: [PATCH 05/13] Apply suggestions from code review --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 995b867db47..3d0a10c7275 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from build.lib.xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable From ff189a9d2ba33762003335350bbb5c0421a85a3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:59:10 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3d0a10c7275..83234cd0fa0 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -9,8 +9,8 @@ import numpy as np import pandas as pd -from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type From b7d61bc02825b7d8f73d10459c6419a7cd48a986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 14 Feb 2025 11:18:26 +0100 Subject: [PATCH 07/13] fix typing and imports --- xarray/coding/variables.py | 9 ++++----- xarray/conventions.py | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83234cd0fa0..72125da3332 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,7 +10,6 @@ import numpy as np import pandas as pd -from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -372,8 +371,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, - decode_timedelta: bool | CFTimedeltaCoder = False, + decode_times: bool = False, + decode_timedelta: bool = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -585,8 +584,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, - decode_timedelta: bool | CFTimedeltaCoder = False, + decode_times: bool = False, + decode_timedelta: bool = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/conventions.py b/xarray/conventions.py index 071dab43c28..53169f04457 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -193,12 +193,14 @@ def decode_cf_variable( var = variables.Numpy2StringDTypeCoder().decode(var) if mask_and_scale: + dec_times = True if decode_times else False + dec_timedelta = True if decode_timedelta else False for coder in [ variables.CFMaskCoder( - decode_times=decode_times, decode_timedelta=decode_timedelta + decode_times=dec_times, decode_timedelta=dec_timedelta ), variables.CFScaleOffsetCoder( - decode_times=decode_times, decode_timedelta=decode_timedelta + decode_times=dec_times, decode_timedelta=dec_timedelta ), ]: var = coder.decode(var, name=name) From 031aaa2122bec35408a9b1b808bb8eb9d0895622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 10:18:49 +0100 Subject: [PATCH 08/13] refactor according to review concerns and suggestions --- xarray/coding/times.py | 20 +++++++++++++----- xarray/coding/variables.py | 35 ++++++++++++++++++------------- xarray/tests/test_coding_times.py | 20 +++++++++++++++++- 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 47f2d8ee19a..01e3bd1cf1d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1316,7 +1316,14 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) dtype = encoding.pop("dtype", None) + + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) + # if no dtype is provided, preserve data.dtype in encoding if dtype is None: safe_setitem(encoding, "dtype", data.dtype, name=name) @@ -1371,17 +1378,20 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) + dtype = encoding.pop("dtype", None) + # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder - dtype = None if "add_offset" in encoding or "scale_factor" in encoding: - encoding.pop("dtype") dtype = data.dtype if data.dtype.kind == "f" else "float64" - data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", dtype) - ) + data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) + + # if no dtype is provided, preserve data.dtype in encoding + if dtype is None: + safe_setitem(encoding, "dtype", data.dtype, name=name) + safe_setitem(attrs, "units", units, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 72125da3332..77e2f0602ce 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -484,25 +484,30 @@ def decode(self, variable: Variable, name: T_Name = None): ) if encoded_fill_values: - # special case DateTime to properly handle NaT - # we need to check if time-like will be decoded or not - # in further processing dtype: np.typing.DTypeLike decoded_fill_value: Any - is_time_like = _is_time_like(attrs.get("units")) - if ( - (is_time_like == "datetime" and self.decode_times) - or (is_time_like == "timedelta" and self.decode_timedelta) - ) and data.dtype.kind in "iu": - dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min + # in case of packed data we have to decode into float + # in any case + if "scale_factor" in attrs or "add_offset" in attrs: + dtype, decoded_fill_value = ( + _choose_float_dtype(data.dtype, attrs), + np.nan, + ) else: - if "scale_factor" not in attrs and "add_offset" not in attrs: - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) - else: + # in case of no-packing special case DateTime/Timedelta to properly + # handle NaT, we need to check if time-like will be decoded + # or not in further processing + is_time_like = _is_time_like(attrs.get("units")) + if ( + (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) + ) and data.dtype.kind in "iu": dtype, decoded_fill_value = ( - _choose_float_dtype(data.dtype, attrs), - np.nan, - ) + np.int64, + np.iinfo(np.int64).min, + ) # np.dtype(f"{is_time_like}64[s]") + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) transform = partial( _apply_mask, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 480bd96f3f4..8f031593a27 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -525,6 +525,24 @@ def test_decoded_cf_datetime_array_2d(time_unit: PDDatetimeUnitOptions) -> None: assert_array_equal(np.asarray(result), expected) +@pytest.mark.parametrize("decode_times", [True, False]) +@pytest.mark.parametrize("mask_and_scale", [True, False]) +def test_decode_datetime_mask_and_scale( + decode_times: bool, mask_and_scale: bool +) -> None: + attrs = { + "units": "nanoseconds since 1970-01-01", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable( + "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times + ) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_equal(encoded, result) + + FREQUENCIES_TO_ENCODING_UNITS = { "ns": "nanoseconds", "us": "microseconds", @@ -1914,7 +1932,7 @@ def test_lazy_decode_timedelta_error() -> None: def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: - attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} + attrs = {"units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0} encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta From 03aa1b83b198b675fe21fa1144c52671f2a467ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 11:30:55 +0100 Subject: [PATCH 09/13] retain retain dtype for packed data in datetime/timedelta encoding --- xarray/coding/times.py | 16 ++++++++++------ xarray/tests/test_coding_times.py | 7 +++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 01e3bd1cf1d..06c2163a62a 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1320,13 +1320,15 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder + set_dtype_encoding = None if "add_offset" in encoding or "scale_factor" in encoding: + set_dtype_encoding = dtype dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) - # if no dtype is provided, preserve data.dtype in encoding - if dtype is None: - safe_setitem(encoding, "dtype", data.dtype, name=name) + # retain dtype for packed data + if set_dtype_encoding is not None: + safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) safe_setitem(attrs, "units", units, name=name) safe_setitem(attrs, "calendar", calendar, name=name) @@ -1383,14 +1385,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder + set_dtype_encoding = None if "add_offset" in encoding or "scale_factor" in encoding: + set_dtype_encoding = dtype dtype = data.dtype if data.dtype.kind == "f" else "float64" data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) - # if no dtype is provided, preserve data.dtype in encoding - if dtype is None: - safe_setitem(encoding, "dtype", data.dtype, name=name) + # retain dtype for packed data + if set_dtype_encoding is not None: + safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) safe_setitem(attrs, "units", units, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 8f031593a27..00f622309fc 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -532,6 +532,7 @@ def test_decode_datetime_mask_and_scale( ) -> None: attrs = { "units": "nanoseconds since 1970-01-01", + "calendar": "proleptic_gregorian", "_FillValue": np.int16(-1), "add_offset": 100000.0, } @@ -540,7 +541,8 @@ def test_decode_datetime_mask_and_scale( "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times ) result = conventions.encode_cf_variable(decoded, name="foo") - assert_equal(encoded, result) + assert_identical(encoded, result) + assert encoded.dtype == result.dtype FREQUENCIES_TO_ENCODING_UNITS = { @@ -1938,4 +1940,5 @@ def test_decode_timedelta_mask_and_scale( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta ) result = conventions.encode_cf_variable(decoded, name="foo") - assert_equal(encoded, result) + assert_identical(encoded, result) + assert encoded.dtype == result.dtype From 31ed6bbc9015e05fa02364f2b5db02c7ab0dc8eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 25 Feb 2025 13:15:00 +0100 Subject: [PATCH 10/13] simplify code, add whats-new.rst entry --- doc/whats-new.rst | 2 ++ xarray/coding/variables.py | 16 +++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a10a8c8851f..00251ba883b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,8 @@ Bug fixes Haacker `_. - Fix ``isel`` for multi-coordinate Xarray indexes (:issue:`10063`, :pull:`10066`). By `Benoit Bovy `_. +- Improve handling of dtype and NaT when encoding/decoding masked and packaged datetimes and timedeltas (:issue:`8957`, :pull:`10050`). + By `Kai Mühlbauer `_. Documentation diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 77e2f0602ce..a2534df0ec7 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -502,10 +502,8 @@ def decode(self, variable: Variable, name: T_Name = None): (is_time_like == "datetime" and self.decode_times) or (is_time_like == "timedelta" and self.decode_timedelta) ) and data.dtype.kind in "iu": - dtype, decoded_fill_value = ( - np.int64, - np.iinfo(np.int64).min, - ) # np.dtype(f"{is_time_like}64[s]") + dtype = np.int64 + decoded_fill_value = np.iinfo(np.int64).min else: dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) @@ -624,17 +622,17 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: scale_factor = np.asarray(scale_factor).item() if np.ndim(add_offset) > 0: add_offset = np.asarray(add_offset).item() - # if we have a _FillValue/masked_value we already have the wanted + # if we have a _FillValue/masked_value in encoding we already have the wanted # floating point dtype here (via CFMaskCoder), so no check is necessary # only check in other cases and for time-like dtype = data.dtype is_time_like = _is_time_like(attrs.get("units")) - if (is_time_like == "datetime" and self.decode_times) or ( - is_time_like == "timedelta" and self.decode_timedelta + if ( + ("_FillValue" not in encoding and "missing_value" not in encoding) + or (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) ): dtype = _choose_float_dtype(dtype, encoding) - if "_FillValue" not in encoding and "missing_value" not in encoding: - dtype = _choose_float_dtype(dtype, encoding) transform = partial( _scale_offset_decoding, From dab3b0e68c43054c533bb52c4353fd8f45e39277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Mar 2025 10:32:13 +0100 Subject: [PATCH 11/13] Update xarray/coding/variables.py Co-authored-by: Spencer Clark --- xarray/coding/variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index a2534df0ec7..c428ef8694b 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -234,7 +234,7 @@ def _apply_mask( def _is_time_like(units): # test for time-like - # return "datetime" for datetetime-like + # return "datetime" for datetime-like # return "timedelta" for timedelta-like if units is None: return False From a304375f6ecdf5471ef47a35ef79b4200a1cbe60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Mar 2025 11:43:54 +0100 Subject: [PATCH 12/13] refactor common code into common.py to prevent circular imports when passing decode_times and decode_timedelta to CFMaskCoder and CFScaleOffsetCoder --- xarray/coding/common.py | 136 +++++++++++++++++++++++++++++++ xarray/coding/times.py | 2 +- xarray/coding/variables.py | 160 ++++++------------------------------- xarray/conventions.py | 6 +- 4 files changed, 165 insertions(+), 139 deletions(-) create mode 100644 xarray/coding/common.py diff --git a/xarray/coding/common.py b/xarray/coding/common.py new file mode 100644 index 00000000000..1b455009668 --- /dev/null +++ b/xarray/coding/common.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +from collections.abc import Callable, Hashable, MutableMapping +from typing import TYPE_CHECKING, Any, Union + +import numpy as np + +from xarray.core import indexing +from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array + +if TYPE_CHECKING: + T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] + T_Name = Union[Hashable, None] + + +class SerializationWarning(RuntimeWarning): + """Warnings about encoding/decoding issues in serialization.""" + + +class VariableCoder: + """Base class for encoding and decoding transformations on variables. + + We use coders for transforming variables between xarray's data model and + a format suitable for serialization. For example, coders apply CF + conventions for how data should be represented in netCDF files. + + Subclasses should implement encode() and decode(), which should satisfy + the identity ``coder.decode(coder.encode(variable)) == variable``. If any + options are necessary, they should be implemented as arguments to the + __init__ method. + + The optional name argument to encode() and decode() exists solely for the + sake of better error messages, and should correspond to the name of + variables in the underlying store. + """ + + def encode(self, variable: Variable, name: T_Name = None) -> Variable: + """Convert an encoded variable to a decoded variable""" + raise NotImplementedError() + + def decode(self, variable: Variable, name: T_Name = None) -> Variable: + """Convert a decoded variable to an encoded variable""" + raise NotImplementedError() + + +class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): + """Lazily computed array holding values of elemwise-function. + + Do not construct this object directly: call lazy_elemwise_func instead. + + Values are computed upon indexing or coercion to a NumPy array. + """ + + def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): + assert not is_chunked_array(array) + self.array = indexing.as_indexable(array) + self.func = func + self._dtype = dtype + + @property + def dtype(self) -> np.dtype: + return np.dtype(self._dtype) + + def _oindex_get(self, key): + return type(self)(self.array.oindex[key], self.func, self.dtype) + + def _vindex_get(self, key): + return type(self)(self.array.vindex[key], self.func, self.dtype) + + def __getitem__(self, key): + return type(self)(self.array[key], self.func, self.dtype) + + def get_duck_array(self): + return self.func(self.array.get_duck_array()) + + def __repr__(self) -> str: + return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" + + +def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): + """Lazily apply an element-wise function to an array. + Parameters + ---------- + array : any valid value of Variable._data + func : callable + Function to apply to indexed slices of an array. For use with dask, + this should be a pickle-able object. + dtype : coercible to np.dtype + Dtype for the result of this function. + + Returns + ------- + Either a dask.array.Array or _ElementwiseFunctionArray. + """ + if is_chunked_array(array): + chunkmanager = get_chunked_array_type(array) + + return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] + else: + return _ElementwiseFunctionArray(array, func, dtype) + + +def safe_setitem(dest, key: Hashable, value, name: T_Name = None): + if key in dest: + var_str = f" on variable {name!r}" if name else "" + raise ValueError( + f"failed to prevent overwriting existing key {key} in attrs{var_str}. " + "This is probably an encoding field used by xarray to describe " + "how a variable is serialized. To proceed, remove this key from " + "the variable's attributes manually." + ) + dest[key] = value + + +def pop_to( + source: MutableMapping, dest: MutableMapping, key: Hashable, name: T_Name = None +) -> Any: + """ + A convenience function which pops a key k from source to dest. + None values are not passed on. If k already exists in dest an + error is raised. + """ + value = source.pop(key, None) + if value is not None: + safe_setitem(dest, key, value, name=name) + return value + + +def unpack_for_encoding(var: Variable) -> T_VarTuple: + return var.dims, var.data, var.attrs.copy(), var.encoding.copy() + + +def unpack_for_decoding(var: Variable) -> T_VarTuple: + return var.dims, var._data, var.attrs.copy(), var.encoding.copy() diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 06c2163a62a..997639e9a91 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -11,7 +11,7 @@ import pandas as pd from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta -from xarray.coding.variables import ( +from xarray.coding.common import ( SerializationWarning, VariableCoder, lazy_elemwise_func, diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c428ef8694b..1b7bc95e2b4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -3,87 +3,31 @@ from __future__ import annotations import warnings -from collections.abc import Callable, Hashable, MutableMapping +from collections.abc import Hashable, MutableMapping from functools import partial from typing import TYPE_CHECKING, Any, Union import numpy as np import pandas as pd +from xarray.coding.common import ( + SerializationWarning, + VariableCoder, + lazy_elemwise_func, + pop_to, + safe_setitem, + unpack_for_decoding, + unpack_for_encoding, +) +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] T_Name = Union[Hashable, None] -class SerializationWarning(RuntimeWarning): - """Warnings about encoding/decoding issues in serialization.""" - - -class VariableCoder: - """Base class for encoding and decoding transformations on variables. - - We use coders for transforming variables between xarray's data model and - a format suitable for serialization. For example, coders apply CF - conventions for how data should be represented in netCDF files. - - Subclasses should implement encode() and decode(), which should satisfy - the identity ``coder.decode(coder.encode(variable)) == variable``. If any - options are necessary, they should be implemented as arguments to the - __init__ method. - - The optional name argument to encode() and decode() exists solely for the - sake of better error messages, and should correspond to the name of - variables in the underlying store. - """ - - def encode(self, variable: Variable, name: T_Name = None) -> Variable: - """Convert an encoded variable to a decoded variable""" - raise NotImplementedError() - - def decode(self, variable: Variable, name: T_Name = None) -> Variable: - """Convert a decoded variable to an encoded variable""" - raise NotImplementedError() - - -class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): - """Lazily computed array holding values of elemwise-function. - - Do not construct this object directly: call lazy_elemwise_func instead. - - Values are computed upon indexing or coercion to a NumPy array. - """ - - def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): - assert not is_chunked_array(array) - self.array = indexing.as_indexable(array) - self.func = func - self._dtype = dtype - - @property - def dtype(self) -> np.dtype: - return np.dtype(self._dtype) - - def _oindex_get(self, key): - return type(self)(self.array.oindex[key], self.func, self.dtype) - - def _vindex_get(self, key): - return type(self)(self.array.vindex[key], self.func, self.dtype) - - def __getitem__(self, key): - return type(self)(self.array[key], self.func, self.dtype) - - def get_duck_array(self): - return self.func(self.array.get_duck_array()) - - def __repr__(self) -> str: - return f"{type(self).__name__}({self.array!r}, func={self.func!r}, dtype={self.dtype!r})" - - class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from non-native to native endianness @@ -161,63 +105,6 @@ def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) -def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): - """Lazily apply an element-wise function to an array. - Parameters - ---------- - array : any valid value of Variable._data - func : callable - Function to apply to indexed slices of an array. For use with dask, - this should be a pickle-able object. - dtype : coercible to np.dtype - Dtype for the result of this function. - - Returns - ------- - Either a dask.array.Array or _ElementwiseFunctionArray. - """ - if is_chunked_array(array): - chunkmanager = get_chunked_array_type(array) - - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] - else: - return _ElementwiseFunctionArray(array, func, dtype) - - -def unpack_for_encoding(var: Variable) -> T_VarTuple: - return var.dims, var.data, var.attrs.copy(), var.encoding.copy() - - -def unpack_for_decoding(var: Variable) -> T_VarTuple: - return var.dims, var._data, var.attrs.copy(), var.encoding.copy() - - -def safe_setitem(dest, key: Hashable, value, name: T_Name = None): - if key in dest: - var_str = f" on variable {name!r}" if name else "" - raise ValueError( - f"failed to prevent overwriting existing key {key} in attrs{var_str}. " - "This is probably an encoding field used by xarray to describe " - "how a variable is serialized. To proceed, remove this key from " - "the variable's attributes manually." - ) - dest[key] = value - - -def pop_to( - source: MutableMapping, dest: MutableMapping, key: Hashable, name: T_Name = None -) -> Any: - """ - A convenience function which pops a key k from source to dest. - None values are not passed on. If k already exists in dest an - error is raised. - """ - value = source.pop(key, None) - if value is not None: - safe_setitem(dest, key, value, name=name) - return value - - def _apply_mask( data: np.ndarray, encoded_fill_values: list, @@ -371,8 +258,8 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta @@ -407,9 +294,11 @@ def encode(self, variable: Variable, name: T_Name = None): encoding["_FillValue"] = ( _encode_unsigned_fill_value(name, fv, dtype) if has_unsigned - else dtype.type(fv) - if "add_offset" not in encoding and "scale_factor" not in encoding - else fv + else ( + dtype.type(fv) + if "add_offset" not in encoding and "scale_factor" not in encoding + else fv + ) ) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) @@ -422,9 +311,12 @@ def encode(self, variable: Variable, name: T_Name = None): ( _encode_unsigned_fill_value(name, mv, dtype) if has_unsigned - else dtype.type(mv) - if "add_offset" not in encoding and "scale_factor" not in encoding - else mv + else ( + dtype.type(mv) + if "add_offset" not in encoding + and "scale_factor" not in encoding + else mv + ) ), ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) @@ -587,8 +479,8 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool = False, - decode_timedelta: bool = False, + decode_times: bool | CFDatetimeCoder = False, + decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times self.decode_timedelta = decode_timedelta diff --git a/xarray/conventions.py b/xarray/conventions.py index 53169f04457..071dab43c28 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -193,14 +193,12 @@ def decode_cf_variable( var = variables.Numpy2StringDTypeCoder().decode(var) if mask_and_scale: - dec_times = True if decode_times else False - dec_timedelta = True if decode_timedelta else False for coder in [ variables.CFMaskCoder( - decode_times=dec_times, decode_timedelta=dec_timedelta + decode_times=decode_times, decode_timedelta=decode_timedelta ), variables.CFScaleOffsetCoder( - decode_times=dec_times, decode_timedelta=dec_timedelta + decode_times=decode_times, decode_timedelta=decode_timedelta ), ]: var = coder.decode(var, name=name) From 200eae2dc41546ab6966f7eb612242bcd8ac1f52 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 07:20:00 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- xarray/tests/test_coding_times.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c47162992d..994fc70339c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -71,7 +71,7 @@ Bug fixes By `Benoit Bovy `_. - Fix dask tokenization when opening each node in :py:func:`xarray.open_datatree` (:issue:`10098`, :pull:`10100`). By `Sam Levang `_. -- Improve handling of dtype and NaT when encoding/decoding masked and packaged +- Improve handling of dtype and NaT when encoding/decoding masked and packaged datetimes and timedeltas (:issue:`8957`, :pull:`10050`). By `Kai Mühlbauer `_. diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index c5386bab919..e736339da1b 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1949,10 +1949,10 @@ def test_decode_timedelta_mask_and_scale( assert_identical(encoded, result) assert encoded.dtype == result.dtype - + def test_decode_floating_point_timedelta_no_serialization_warning() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.1, 0.2], attrs=attrs) decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) with assert_no_warnings(): - decoded.load() \ No newline at end of file + decoded.load() pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy