diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 66bd461157e..c4a7db21208 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -120,6 +120,9 @@ Bug Fixes By `Emmanuel Ferdman `_. - :func:`combine_by_coords` no longer returns an empty dataset when a generator is passed as ``data_objects`` (:issue:`10114`, :pull:`11265`). By `Amartya Anand `_. +- Restore support for ``-1`` chunk sizes in Zarr encoding, mapping them to the + full length of each written dimension (:issue:`11288`). + By `Sarthak `_. - Fix h5netcdf backend module detection and ros3 tests (:issue:`11243`, :pull:`11274`). By `Kai Mühlbauer `_. diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d9279dc2de9..8f8280f1130 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -333,7 +333,7 @@ async def async_getitem(self, key): ) -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, shape): """ Given encoding chunks (possibly None or []) and variable chunks (possibly None or []). @@ -389,6 +389,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): var_chunks, ndim, name, + shape, ) for x in enc_chunks_tuple: @@ -400,6 +401,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): f"for variable named {name!r}." ) + # Preserve xarray's documented convention that -1 means the full length + # of a dimension when encoding chunk sizes for zarr. + enc_chunks_tuple = tuple( + dim_size if chunk == -1 else chunk + for chunk, dim_size in zip(enc_chunks_tuple, shape, strict=True) + ) + # if there are chunks in encoding and the variable data is a numpy array, # we use the specified chunks if not var_chunks: @@ -532,6 +540,7 @@ def extract_zarr_variable_encoding( var_chunks=variable.chunks, ndim=variable.ndim, name=name, + shape=variable.shape, ) if _zarr_v3() and chunks is None: chunks = "auto" diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e42bfc2cd9f..a898214b09a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2902,6 +2902,14 @@ def test_chunk_encoding(self) -> None: with self.roundtrip(data) as actual: pass + def test_chunk_encoding_full_dimension_sentinel(self) -> None: + data = create_test_data() + data["var2"].encoding.update({"chunks": (5, -1)}) + + with self.roundtrip(data) as actual: + assert actual["var2"].encoding["chunks"] == (5, data["var2"].shape[1]) + assert data["var2"].encoding["chunks"] == (5, -1) + def test_shard_encoding(self) -> None: # These datasets have no dask chunks. All chunking/sharding specified in # encoding