diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 83b08ae5129..1564b07c27d 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -77,3 +77,50 @@ re-open it directly with Zarr: import shutil shutil.rmtree("rasm.zarr") + +Chunk Key Encoding +------------------ + +When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded +through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This +is particularly useful when working with Zarr V2 arrays and you need to control the +dimension separator in chunk keys. + +For example, to specify a custom separator for chunk keys: + +.. jupyter-execute:: + + import xarray as xr + import numpy as np + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + + # Create a custom chunk key encoding with "/" as separator + enc = V2ChunkKeyEncoding(separator="/").to_dict() + + # Create and write a dataset with custom chunk key encoding + arr = np.ones((42, 100)) + ds = xr.DataArray(arr, name="var1").to_dataset() + ds.to_zarr( + "example.zarr", + zarr_format=2, + mode="w", + encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}}, + ) + +The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding +configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from +``zarr.core.chunk_key_encodings`` to generate this configuration. This is particularly +useful when you need to ensure compatibility with specific Zarr V2 storage layouts or +when working with tools that expect a particular chunk key format. + +.. note:: + The ``chunk_key_encoding`` option is only relevant when writing to Zarr stores. + When reading Zarr arrays, Xarray automatically detects and uses the appropriate + chunk key encoding based on the store's format and configuration. + +.. jupyter-execute:: + :hide-code: + + import shutil + + shutil.rmtree("example.zarr") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 163aa77274c..049a10fbe04 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -164,6 +164,8 @@ Bug fixes By `Mathias Hauser `_. - Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`) By `Pascal Bourgault `_. +- Enable ``chunk_key_encoding`` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) + By `BrianMichell `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 6645453b009..b86b5d0b374 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -395,6 +395,7 @@ def extract_zarr_variable_encoding( "serializer", "cache_metadata", "write_empty_chunks", + "chunk_key_encoding", } if zarr_format == 3: valid_encodings.add("fill_value") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e7576e1d6bd..e40213e6f46 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3691,6 +3691,39 @@ def create_zarr_target(self): else: yield {} + def test_chunk_key_encoding_v2(self) -> None: + encoding = {"name": "v2", "configuration": {"separator": "/"}} + + # Create a dataset with a variable name containing a period + data = np.ones((4, 4)) + original = Dataset({"var1": (("x", "y"), data)}) + + # Set up chunk key encoding with slash separator + encoding = { + "var1": { + "chunk_key_encoding": encoding, + "chunks": (2, 2), + } + } + + # Write to store with custom encoding + with self.create_zarr_target() as store: + original.to_zarr(store, encoding=encoding) + + # Verify the chunk keys in store use the slash separator + if not has_zarr_v3: + chunk_keys = [k for k in store.keys() if k.startswith("var1/")] + assert len(chunk_keys) > 0 + for key in chunk_keys: + assert "/" in key + assert "." not in key.split("/")[1:] # No dots in chunk coordinates + + # Read back and verify data + with xr.open_zarr(store) as actual: + assert_identical(original, actual) + # Verify chunks are preserved + assert actual["var1"].encoding["chunks"] == (2, 2) + @requires_zarr @pytest.mark.skipif( pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy