From ebc79bdddb2ab63bc8708496f93d55309dc5a7c4 Mon Sep 17 00:00:00 2001 From: Brian Michell Date: Tue, 29 Apr 2025 13:11:09 -0500 Subject: [PATCH 1/9] Adds chunk key encoding to kwargs passed to zarr Accept chunk_key_encoding for Datasets and DataArrays Add test Add documentation for chunk key encoding --- doc/internals/zarr-encoding-spec.rst | 44 ++++++++++++++++++++++++++++ xarray/backends/zarr.py | 1 + xarray/tests/test_backends.py | 33 +++++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 958dad166e1..9431d2bc945 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -73,3 +73,47 @@ re-open it directly with Zarr: import shutil shutil.rmtree("rasm.zarr") + +Chunk Key Encoding +----------------- + +When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded +through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This +is particularly useful when working with Zarr V2 arrays and you need to control the +dimension separator in chunk keys. + +For example, to specify a custom separator for chunk keys: + +.. ipython:: python + :okwarning: + + import xarray as xr + import numpy as np + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + + # Create a custom chunk key encoding with "/" as separator + enc = V2ChunkKeyEncoding(separator="/").to_dict() + + # Create and write a dataset with custom chunk key encoding + arr = np.ones((42, 100)) + ds = xr.DataArray(arr, name="var1").to_dataset() + ds.to_zarr("example.zarr", zarr_format=2, mode="w", + encoding={"var1": {"chunks": (42, 50), + "chunk_key_encoding": enc}}) + +The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding +configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from +``zarr.core.chunk_key_encodings`` to generate this configuration. This is particularly +useful when you need to ensure compatibility with specific Zarr V2 storage layouts or +when working with tools that expect a particular chunk key format. + +.. note:: + The ``chunk_key_encoding`` option is only relevant when writing to Zarr stores. + When reading Zarr arrays, Xarray automatically detects and uses the appropriate + chunk key encoding based on the store's format and configuration. + +.. ipython:: python + :suppress: + + import shutil + shutil.rmtree("example.zarr") diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1a46346dda7..b11646d711a 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -464,6 +464,7 @@ def extract_zarr_variable_encoding( "serializer", "cache_metadata", "write_empty_chunks", + "chunk_key_encoding", } if zarr_format == 3: valid_encodings.add("fill_value") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1d9c90b37b1..3498fa4adeb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3661,6 +3661,39 @@ def create_zarr_target(self): else: yield {} + def test_chunk_key_encoding(self) -> None: + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + + # Create a dataset with a variable name containing a period + data = np.ones((4, 4)) + original = Dataset({"var1": (("x", "y"), data)}) + + # Set up chunk key encoding with slash separator + encoding = { + "var1": { + "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(), + "chunks": (2, 2), + } + } + + # Write to store with custom encoding + with self.create_zarr_target() as store: + original.to_zarr(store, encoding=encoding) + + # Verify the chunk keys in store use the slash separator + if not has_zarr_v3: + chunk_keys = [k for k in store.keys() if k.startswith("var1/")] + assert len(chunk_keys) > 0 + for key in chunk_keys: + assert "/" in key + assert "." not in key.split("/")[1:] # No dots in chunk coordinates + + # Read back and verify data + with xr.open_zarr(store) as actual: + assert_identical(original, actual) + # Verify chunks are preserved + assert actual["var1"].encoding["chunks"] == (2, 2) + @requires_zarr @pytest.mark.skipif( From 9f680007bdd1d92ec5cb3e7c95b32f010438c7ff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 18:18:06 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/internals/zarr-encoding-spec.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 9431d2bc945..62fc8c0a8d2 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -97,9 +97,12 @@ For example, to specify a custom separator for chunk keys: # Create and write a dataset with custom chunk key encoding arr = np.ones((42, 100)) ds = xr.DataArray(arr, name="var1").to_dataset() - ds.to_zarr("example.zarr", zarr_format=2, mode="w", - encoding={"var1": {"chunks": (42, 50), - "chunk_key_encoding": enc}}) + ds.to_zarr( + "example.zarr", + zarr_format=2, + mode="w", + encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}}, + ) The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from @@ -116,4 +119,5 @@ when working with tools that expect a particular chunk key format. :suppress: import shutil + shutil.rmtree("example.zarr") From 27c52cf50ecfcf74b8ab4ccb3e12f667243ccb5e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 29 Apr 2025 18:19:29 +0000 Subject: [PATCH 3/9] Update whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 868ce6005c0..a7fbb12e105 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,6 +76,8 @@ Bug fixes By `Mathias Hauser `_. - Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`) By `Pascal Bourgault `_. +- Enable `chunk_key_encoding` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) + By `BrianMichell `_. Documentation ~~~~~~~~~~~~~ From 3249fb91c4e87d3f7501583b4c556f7ef757df76 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 29 Apr 2025 18:25:56 +0000 Subject: [PATCH 4/9] Fixing pre-commit --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a7fbb12e105..9e7f4ebf1b1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,7 +76,7 @@ Bug fixes By `Mathias Hauser `_. - Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`) By `Pascal Bourgault `_. -- Enable `chunk_key_encoding` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) +- Enable ``chunk_key_encoding`` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) By `BrianMichell `_. Documentation From bcc0e5a3be193a0add45042861d5cba87f5eac2d Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 29 Apr 2025 21:01:40 +0000 Subject: [PATCH 5/9] Add fallback for zarr<3 --- xarray/tests/test_backends.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3498fa4adeb..21709672cdc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3662,7 +3662,12 @@ def create_zarr_target(self): yield {} def test_chunk_key_encoding(self) -> None: - from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + try: + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + encoding = V2ChunkKeyEncoding(separator="/").to_dict() + except ImportError: + # Fallback for zarr<3 + encoding = {'name': 'v2', 'configuration': {'separator': '/'}} # Create a dataset with a variable name containing a period data = np.ones((4, 4)) @@ -3671,7 +3676,7 @@ def test_chunk_key_encoding(self) -> None: # Set up chunk key encoding with slash separator encoding = { "var1": { - "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(), + "chunk_key_encoding": encoding, "chunks": (2, 2), } } From 471061cea0cd1135e18b1218ebce557119e1fe4a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:02:24 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_backends.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 21709672cdc..56a9e412fac 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3664,10 +3664,11 @@ def create_zarr_target(self): def test_chunk_key_encoding(self) -> None: try: from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + encoding = V2ChunkKeyEncoding(separator="/").to_dict() except ImportError: # Fallback for zarr<3 - encoding = {'name': 'v2', 'configuration': {'separator': '/'}} + encoding = {"name": "v2", "configuration": {"separator": "/"}} # Create a dataset with a variable name containing a period data = np.ones((4, 4)) From 1ac47b9b04036b521654fc9cb2fe8a553e1b2eac Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 1 May 2025 15:24:57 +0000 Subject: [PATCH 7/9] Fix docs build warning --- doc/internals/zarr-encoding-spec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 62fc8c0a8d2..83f07929b03 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -75,7 +75,7 @@ re-open it directly with Zarr: shutil.rmtree("rasm.zarr") Chunk Key Encoding ------------------ +------------------ When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This From a484aa2440e06dc6a0e8b7e07ddc3152b556936d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 10 Jun 2025 09:18:19 -0600 Subject: [PATCH 8/9] Update xarray/tests/test_backends.py --- xarray/tests/test_backends.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5873c7f40ed..62eee786ebc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3691,14 +3691,8 @@ def create_zarr_target(self): else: yield {} - def test_chunk_key_encoding(self) -> None: - try: - from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding - - encoding = V2ChunkKeyEncoding(separator="/").to_dict() - except ImportError: - # Fallback for zarr<3 - encoding = {"name": "v2", "configuration": {"separator": "/"}} + def test_chunk_key_encoding_v2(self) -> None: + encoding = {"name": "v2", "configuration": {"separator": "/"}} # Create a dataset with a variable name containing a period data = np.ones((4, 4)) From 5d522671208fa95acb25adac46407da7f84f128e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 10 Jun 2025 09:21:09 -0600 Subject: [PATCH 9/9] Apply suggestions from code review --- doc/internals/zarr-encoding-spec.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 184962c6f2d..1564b07c27d 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -88,8 +88,7 @@ dimension separator in chunk keys. For example, to specify a custom separator for chunk keys: -.. ipython:: python - :okwarning: +.. jupyter-execute:: import xarray as xr import numpy as np @@ -119,8 +118,8 @@ when working with tools that expect a particular chunk key format. When reading Zarr arrays, Xarray automatically detects and uses the appropriate chunk key encoding based on the store's format and configuration. -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import shutil pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy