Unify timedelta64 coding logic between the old and new approaches

Always write a dtype attribute to disk regardless of how the timedeltas were decoded.
pydata · spencerkclark · Jul 2, 2025 · Jun 29, 2025 · Jul 1, 2025 · Jul 1, 2025
commit bdda733a569713f640f6ae65900fce79d6d1eb53
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -121,10 +121,11 @@ Bug fixes
   (:pull:`10352`). By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Avoid unsafe casts from float to unsigned int in CFMaskCoder (:issue:`9815`, :pull:`9964`).
   By ` Elliott Sales de Andrade <https://github.com/QuLogic>`_.
-- Fix attribute overwriting bug when decoding literally encoded
-  :py:class:`numpy.timedelta64` values from disk (:issue:`10468`,
-  :pull:`10469`). By `Spencer Clark <https://github.com/spencerkclark>`_.
-- Fix default ``"_FillValue"`` dtype coercion bug when literally encoding
+- Fix attribute overwriting bug when decoding encoded
+  :py:class:`numpy.timedelta64` values from disk with a dtype attribute
+  (:issue:`10468`, :pull:`10469`). By `Spencer Clark
+  <https://github.com/spencerkclark>`_.
+- Fix default ``"_FillValue"`` dtype coercion bug when encoding
   :py:class:`numpy.timedelta64` values to an on-disk format that only supports
   32-bit integers (:issue:`10466`, :pull:`10469`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.

diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
@@ -103,7 +103,7 @@ def encode_nc3_attrs(attrs):
     return {k: encode_nc3_attr_value(v) for k, v in attrs.items()}
 
 
-def _maybe_prepare_times(var, name=None):
+def _maybe_prepare_times(var):
     # checks for integer-based time-like and
     # replaces np.iinfo(np.int64).min with _FillValue or np.nan
     # this keeps backwards compatibility
@@ -112,21 +112,7 @@ def _maybe_prepare_times(var, name=None):
     if data.dtype.kind in "iu":
         units = var.attrs.get("units", None)
         if units is not None and coding.variables._is_time_like(units):
-            default_int64_fill_value = np.iinfo(np.int64).min
-            default_int32_fill_value = np.iinfo(np.int32).min
-            mask = data == default_int64_fill_value
-
-            if var.attrs.get("_FillValue") == default_int64_fill_value:
-                if (data == default_int32_fill_value).any():
-                    raise ValueError(
-                        f"Could not safely coerce default int64 _FillValue "
-                        f"({default_int64_fill_value}) to the analogous int32 "
-                        f"value ({default_int32_fill_value}), since it "
-                        f"already exists as non-missing within variable "
-                        f"{name!r}. Try explicitly setting "
-                        f"encoding['_FillValue'] to another int32 value."
-                    )
-                var.attrs["_FillValue"] = default_int32_fill_value
+            mask = data == np.iinfo(np.int64).min
             if mask.any():
                 data = np.where(mask, var.attrs.get("_FillValue", np.nan), data)
     return data
@@ -138,7 +124,7 @@ def encode_nc3_variable(var, name=None):
         coding.strings.CharacterArrayCoder(),
     ]:
         var = coder.encode(var, name=name)
-    data = _maybe_prepare_times(var, name=name)
+    data = _maybe_prepare_times(var)
     data = coerce_nc3_dtype(data)
     attrs = encode_nc3_attrs(var.attrs)
     return Variable(var.dims, data, attrs, var.encoding)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -1410,6 +1410,43 @@ def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool:
     return isinstance(dtype, str) and dtype.startswith("timedelta64")
 
 
+def resolve_time_unit_from_attrs_dtype(
+    attrs_dtype: str, name: T_Name
+) -> PDDatetimeUnitOptions:
+    dtype = np.dtype(attrs_dtype)
+    resolution, _ = np.datetime_data(dtype)
+    resolution = cast(NPDatetimeUnitOptions, resolution)
+    if np.timedelta64(1, resolution) > np.timedelta64(1, "s"):
+        time_unit = cast(PDDatetimeUnitOptions, "s")
+        message = (
+            f"Following pandas, xarray only supports decoding to timedelta64 "
+            f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded "
+            f"values for variable {name!r} have a resolution of "
+            f"{resolution!r}. Attempting to decode to a resolution of 's'. "
+            f"Note, depending on the encoded values, this may lead to an "
+            f"OverflowError. Additionally, data will not be identically round "
+            f"tripped; xarray will choose an encoding dtype of "
+            f"'timedelta64[s]' when re-encoding."
+        )
+        emit_user_level_warning(message)
+    elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"):
+        time_unit = cast(PDDatetimeUnitOptions, "ns")
+        message = (
+            f"Following pandas, xarray only supports decoding to timedelta64 "
+            f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded "
+            f"values for variable {name!r} have a resolution of "
+            f"{resolution!r}. Attempting to decode to a resolution of 'ns'. "
+            f"Note, depending on the encoded values, this may lead to loss of "
+            f"precision. Additionally, data will not be identically round "
+            f"tripped; xarray will choose an encoding dtype of "
+            f"'timedelta64[ns]' when re-encoding."
+        )
+        emit_user_level_warning(message)
+    else:
+        time_unit = cast(PDDatetimeUnitOptions, resolution)
+    return time_unit
+
+
 class CFTimedeltaCoder(VariableCoder):
     """Coder for CF Timedelta coding.
 
@@ -1430,7 +1467,7 @@ class CFTimedeltaCoder(VariableCoder):
 
     def __init__(
         self,
-        time_unit: PDDatetimeUnitOptions = "ns",
+        time_unit: PDDatetimeUnitOptions | None = None,
         decode_via_units: bool = True,
         decode_via_dtype: bool = True,
     ) -> None:
@@ -1442,45 +1479,18 @@ def __init__(
     def encode(self, variable: Variable, name: T_Name = None) -> Variable:
         if np.issubdtype(variable.data.dtype, np.timedelta64):
             dims, data, attrs, encoding = unpack_for_encoding(variable)
-            has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding)
-            if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype:
-                dtype = encoding.get("dtype", None)
-                units = encoding.pop("units", None)
+            dtype = encoding.get("dtype", None)
+            units = encoding.pop("units", None)
 
-                # in the case of packed data we need to encode into
-                # float first, the correct dtype will be established
-                # via CFScaleOffsetCoder/CFMaskCoder
-                if "add_offset" in encoding or "scale_factor" in encoding:
-                    dtype = data.dtype if data.dtype.kind == "f" else "float64"
+            # in the case of packed data we need to encode into
+            # float first, the correct dtype will be established
+            # via CFScaleOffsetCoder/CFMaskCoder
+            if "add_offset" in encoding or "scale_factor" in encoding:
+                dtype = data.dtype if data.dtype.kind == "f" else "float64"
 
-            else:
-                resolution, _ = np.datetime_data(variable.dtype)
-                dtype = np.int64
-                attrs_dtype = f"timedelta64[{resolution}]"
-                units = _numpy_dtype_to_netcdf_timeunit(variable.dtype)
-                safe_setitem(attrs, "dtype", attrs_dtype, name=name)
-                # Remove dtype encoding if it exists to prevent it from
-                # interfering downstream in NonStringCoder.
-                encoding.pop("dtype", None)
-
-                if any(
-                    k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
-                ):
-                    raise ValueError(
-                        f"Specifying 'add_offset' or 'scale_factor' is not "
-                        f"supported when encoding the timedelta64 values of "
-                        f"variable {name!r} with xarray's new default "
-                        f"timedelta64 encoding approach. To encode {name!r} "
-                        f"with xarray's previous timedelta64 encoding "
-                        f"approach, which supports the 'add_offset' and "
-                        f"'scale_factor' parameters, additionally set "
-                        f"encoding['units'] to a unit of time, e.g. "
-                        f"'seconds'. To proceed with encoding of {name!r} "
-                        f"via xarray's new approach, remove any encoding "
-                        f"entries for 'add_offset' or 'scale_factor'."
-                    )
-                if "_FillValue" not in encoding and "missing_value" not in encoding:
-                    encoding["_FillValue"] = np.iinfo(np.int64).min
+            resolution, _ = np.datetime_data(variable.dtype)
+            attrs_dtype = f"timedelta64[{resolution}]"
+            safe_setitem(attrs, "dtype", attrs_dtype, name=name)
 
             data, units = encode_cf_timedelta(data, units, dtype)
             safe_setitem(attrs, "units", units, name=name)
@@ -1499,57 +1509,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
         ):
             dims, data, attrs, encoding = unpack_for_decoding(variable)
             units = pop_to(attrs, encoding, "units")
-            if is_dtype_decodable and self.decode_via_dtype:
-                if any(
-                    k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
-                ):
-                    raise ValueError(
-                        f"Decoding timedelta64 values via dtype is not "
-                        f"supported when 'add_offset', or 'scale_factor' are "
-                        f"present in encoding. Check the encoding parameters "
-                        f"of variable {name!r}."
-                    )
-                # Overwrite the on-disk dtype encoding, which is numeric, with
-                # the dtype attribute stored on disk, which corresponds to
-                # a timedelta64 dtype.
-                encoding["dtype"] = attrs.pop("dtype")
-                dtype = np.dtype(encoding["dtype"])
-                resolution, _ = np.datetime_data(dtype)
-                resolution = cast(NPDatetimeUnitOptions, resolution)
-                if np.timedelta64(1, resolution) > np.timedelta64(1, "s"):
-                    time_unit = cast(PDDatetimeUnitOptions, "s")
-                    dtype = np.dtype("timedelta64[s]")
-                    message = (
-                        f"Following pandas, xarray only supports decoding to "
-                        f"timedelta64 values with a resolution of 's', 'ms', "
-                        f"'us', or 'ns'. Encoded values for variable {name!r} "
-                        f"have a resolution of {resolution!r}. Attempting to "
-                        f"decode to a resolution of 's'. Note, depending on "
-                        f"the encoded values, this may lead to an "
-                        f"OverflowError. Additionally, data will not be "
-                        f"identically round tripped; xarray will choose an "
-                        f"encoding dtype of 'timedelta64[s]' when re-encoding."
-                    )
-                    emit_user_level_warning(message)
-                elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"):
-                    time_unit = cast(PDDatetimeUnitOptions, "ns")
-                    dtype = np.dtype("timedelta64[ns]")
-                    message = (
-                        f"Following pandas, xarray only supports decoding to "
-                        f"timedelta64 values with a resolution of 's', 'ms', "
-                        f"'us', or 'ns'. Encoded values for variable {name!r} "
-                        f"have a resolution of {resolution!r}. Attempting to "
-                        f"decode to a resolution of 'ns'. Note, depending on "
-                        f"the encoded values, this may lead to loss of "
-                        f"precision. Additionally, data will not be "
-                        f"identically round tripped; xarray will choose an "
-                        f"encoding dtype of 'timedelta64[ns]' "
-                        f"when re-encoding."
-                    )
-                    emit_user_level_warning(message)
+            if is_dtype_decodable:
+                attrs_dtype = attrs.pop("dtype")
+                if self.time_unit is None:
+                    time_unit = resolve_time_unit_from_attrs_dtype(attrs_dtype, name)
                 else:
-                    time_unit = cast(PDDatetimeUnitOptions, resolution)
-            elif self.decode_via_units:
+                    time_unit = self.time_unit
+            else:
                 if self._emit_decode_timedelta_future_warning:
                     emit_user_level_warning(
                         "In a future version, xarray will not decode "
@@ -1567,8 +1533,19 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
                         "'CFTimedeltaCoder' instance.",
                         FutureWarning,
                     )
-                dtype = np.dtype(f"timedelta64[{self.time_unit}]")
-                time_unit = self.time_unit
+                if self.time_unit is None:
+                    time_unit = cast(PDDatetimeUnitOptions, "ns")
+                else:
+                    time_unit = self.time_unit
+
+                # Handle edge case that decode_via_dtype=False and
+                # decode_via_units=True, and timedeltas were encoded with a
+                # dtype attribute. We need to remove the dtype attribute
+                # to prevent an error during round tripping.
+                if has_timedelta_dtype:
+                    attrs.pop("dtype")
+
+            dtype = np.dtype(f"timedelta64[{time_unit}]")
             transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit)
             data = lazy_elemwise_func(data, transform, dtype=dtype)
             return Variable(dims, data, attrs, encoding, fastpath=True)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -56,6 +56,7 @@
 from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
 from xarray.core.options import set_options
+from xarray.core.types import PDDatetimeUnitOptions
 from xarray.core.utils import module_available
 from xarray.namedarray.pycompat import array_type
 from xarray.tests import (
@@ -351,16 +352,6 @@ def test_dtype_coercion_error(self) -> None:
                 with pytest.raises(ValueError, match="could not safely cast"):
                     ds.to_netcdf(path, format=format)
 
-    def test_literal_timedelta_fill_value_coercion_error(self) -> None:
-        for format in self.netcdf3_formats:
-            timedeltas = np.array(
-                [0, np.iinfo(np.int32).min, np.iinfo(np.int64).min]
-            ).astype("timedelta64[s]")
-            ds = Dataset({"timedeltas": ("timedeltas", timedeltas)})
-            with create_tmp_file(allow_cleanup_failure=False) as path:
-                with pytest.raises(ValueError, match="_FillValue"):
-                    ds.to_netcdf(path, format=format)
-
 
 class DatasetIOBase:
     engine: T_NetcdfEngine | None = None
@@ -652,8 +643,10 @@ def test_roundtrip_timedelta_data(self) -> None:
         ) as actual:
             assert_identical(expected, actual)
 
-    def test_roundtrip_literal_timedelta_data(self) -> None:
-        time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s")  # type: ignore[arg-type, unused-ignore]
+    def test_roundtrip_timedelta_data_via_dtype(
+        self, time_unit: PDDatetimeUnitOptions
+    ) -> None:
+        time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit(time_unit)  # type: ignore[arg-type, unused-ignore]
         expected = Dataset(
             {"td": ("td", time_deltas), "td0": time_deltas[0].to_numpy()}
         )