From ed6257041097d18e6e20d8fdde37fb05dcf3a9be Mon Sep 17 00:00:00 2001 From: Jacob Prince-Bieker Date: Mon, 13 Jan 2025 17:16:05 +0000 Subject: [PATCH 1/5] Add `shards` to `valid_encodings` to enable sharded Zarr writing --- xarray/backends/zarr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 383c385e1d5..402763ced54 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -448,6 +448,7 @@ def extract_zarr_variable_encoding( safe_to_drop = {"source", "original_shape", "preferred_chunks"} valid_encodings = { "chunks", + "shards", "compressor", # TODO: delete when min zarr >=3 "compressors", "filters", From b9d3b60806097e6bcc17a8ff38aa315ff22b4ffa Mon Sep 17 00:00:00 2001 From: Jacob Prince-Bieker Date: Fri, 17 Jan 2025 08:52:36 +0000 Subject: [PATCH 2/5] Add test for shards --- xarray/tests/test_backends.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index cfca5e69048..d504b5bf273 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2490,6 +2490,24 @@ def test_chunk_encoding(self) -> None: with self.roundtrip(data) as actual: pass + def test_shard_encoding(self) -> None: + # These datasets have no dask chunks. All chunking/sharding specified in + # encoding + data = create_test_data() + chunks = (1, 1) + shards = (5, 5) + data["var2"].encoding.update({"chunks": chunks}) + data["var2"].encoding.update({"shards": shards}) + + with self.roundtrip(data) as actual: + assert shards == actual["var2"].encoding["shards"] + + # expect an error with shards not divisible by chunks + data["var2"].encoding.update({"chunks": (2, 2)}) + with pytest.raises(TypeError): + with self.roundtrip(data) as actual: + pass + @requires_dask @pytest.mark.skipif( ON_WINDOWS, From 365a55800939c8636dd8864c66dfd4d7f0f6054a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 08:52:56 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d504b5bf273..1970b29ac4f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2507,7 +2507,7 @@ def test_shard_encoding(self) -> None: with pytest.raises(TypeError): with self.roundtrip(data) as actual: pass - + @requires_dask @pytest.mark.skipif( ON_WINDOWS, From 877aad63f2052b8ec51bfa4fd04b758a23a93b66 Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Fri, 17 Jan 2025 09:39:51 +0000 Subject: [PATCH 4/5] Limit shard test to only Zarr V3 --- xarray/tests/test_backends.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1970b29ac4f..5ac2c451b9e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2493,20 +2493,20 @@ def test_chunk_encoding(self) -> None: def test_shard_encoding(self) -> None: # These datasets have no dask chunks. All chunking/sharding specified in # encoding - data = create_test_data() - chunks = (1, 1) - shards = (5, 5) - data["var2"].encoding.update({"chunks": chunks}) - data["var2"].encoding.update({"shards": shards}) - - with self.roundtrip(data) as actual: - assert shards == actual["var2"].encoding["shards"] - - # expect an error with shards not divisible by chunks - data["var2"].encoding.update({"chunks": (2, 2)}) - with pytest.raises(TypeError): + if has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3: + data = create_test_data() + chunks = (1, 1) + shards = (5, 5) + data["var2"].encoding.update({"chunks": chunks}) + data["var2"].encoding.update({"shards": shards}) with self.roundtrip(data) as actual: - pass + assert shards == actual["var2"].encoding["shards"] + + # expect an error with shards not divisible by chunks + data["var2"].encoding.update({"chunks": (2, 2)}) + with pytest.raises(TypeError): + with self.roundtrip(data) as actual: + pass @requires_dask @pytest.mark.skipif( From 4b8b7a343158bae1cc8d98a9ecc389346405e36b Mon Sep 17 00:00:00 2001 From: Jacob Bieker Date: Fri, 17 Jan 2025 09:44:26 +0000 Subject: [PATCH 5/5] Update test and add shards to encoding when loading --- xarray/backends/zarr.py | 1 + xarray/tests/test_backends.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 402763ced54..381073b1c14 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -830,6 +830,7 @@ def open_store_variable(self, name): { "compressors": zarr_array.compressors, "filters": zarr_array.filters, + "shards": zarr_array.shards, } ) if self.zarr_group.metadata.zarr_format == 3: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 5ac2c451b9e..3ecca6f211e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2504,7 +2504,7 @@ def test_shard_encoding(self) -> None: # expect an error with shards not divisible by chunks data["var2"].encoding.update({"chunks": (2, 2)}) - with pytest.raises(TypeError): + with pytest.raises(ValueError): with self.roundtrip(data) as actual: pass pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy