From ee9d38dc9cc41e44b9f745b0f9735462d1e4b1f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20M=C3=A5nsson?= Date: Sun, 18 May 2025 02:08:50 +0200 Subject: [PATCH 1/6] Test for unexpected changes of input coordinate attrs, GH8047 The additional tests in test_ufuncs.py and test_computation.py currently fail due to GH8047, because apply_ufunc() relies on merge_coordinates_without_align() which may overwrite .attrs on input coordinates. The additional tests in test_merge.py currently pass, because Dataset.merge() dosen't seem affected by the bug. --- xarray/tests/test_computation.py | 17 +++++++++++++++++ xarray/tests/test_merge.py | 20 ++++++++++++++++++++ xarray/tests/test_ufuncs.py | 13 +++++++++++++ 3 files changed, 50 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index cb12f3df534..9c005081f3f 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2206,6 +2206,7 @@ def test_where() -> None: def test_where_attrs() -> None: cond = xr.DataArray([True, False], coords={"a": [0, 1]}, attrs={"attr": "cond_da"}) cond["a"].attrs = {"attr": "cond_coord"} + input_cond = cond.copy() x = xr.DataArray([1, 1], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) x["a"].attrs = {"attr": "x_coord"} y = xr.DataArray([0, 0], coords={"a": [0, 1]}, attrs={"attr": "y_da"}) @@ -2216,6 +2217,22 @@ def test_where_attrs() -> None: expected = xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) expected["a"].attrs = {"attr": "x_coord"} assert_identical(expected, actual) + # Check also that input coordinate attributes weren't modified by reference + assert x["a"].attrs == {"attr": "x_coord"} + assert y["a"].attrs == {"attr": "y_coord"} + assert cond["a"].attrs == {"attr": "cond_coord"} + assert_identical(cond, input_cond) + + # 3 DataArrays, drop attrs + actual = xr.where(cond, x, y, keep_attrs=False) + expected = xr.DataArray([1, 0], coords={"a": [0, 1]}) + assert_identical(expected, actual) + assert_identical(expected.coords['a'], actual.coords['a']) + # Check also that input coordinate attributes weren't modified by reference + assert x["a"].attrs == {"attr": "x_coord"} + assert y["a"].attrs == {"attr": "y_coord"} + assert cond["a"].attrs == {"attr": "cond_coord"} + assert_identical(cond, input_cond) # x as a scalar, takes no attrs actual = xr.where(cond, 0, y, keep_attrs=True) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 302d26df8f3..1b4e1e3e94d 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -183,9 +183,11 @@ def test_merge_arrays_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" + input_attrs1 = attrs1.copy() data1 = xr.Dataset( {"var1": ("dim1", [], attrs1)}, coords={"dim1": ("dim1", [], attrs1)} ) + input_attrs2 = attrs2.copy() data2 = xr.Dataset( {"var1": ("dim1", [], attrs2)}, coords={"dim1": ("dim1", [], attrs2)} ) @@ -202,6 +204,12 @@ def test_merge_arrays_attrs_variables( assert_identical(actual, expected) + # Check also that input attributes weren't modified + assert data1["var1"].attrs == input_attrs1 + assert data1.coords["dim1"].attrs == input_attrs1 + assert data2["var1"].attrs == input_attrs2 + assert data2.coords["dim1"].attrs == input_attrs2 + def test_merge_attrs_override_copy(self): ds1 = xr.Dataset(attrs={"x": 0}) ds2 = xr.Dataset(attrs={"x": 1}) @@ -344,6 +352,18 @@ def test_merge(self): with pytest.raises(ValueError, match=r"should be coordinates or not"): data.merge(data.reset_coords()) + def test_merge_drop_attrs(self): + data = create_test_data() + ds1 = data[["var1"]] + ds2 = data[["var3"]] + ds1.coords["dim2"].attrs["keep me"] = "example" + ds2.coords["numbers"].attrs["foo"] = "bar" + actual = ds1.merge(ds2, combine_attrs="drop") + assert actual.coords["dim2"].attrs == {} + assert actual.coords["numbers"].attrs == {} + assert ds1.coords["dim2"].attrs["keep me"] == "example" + assert ds2.coords["numbers"].attrs["foo"] == "bar" + def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": 0}) ds2 = xr.Dataset({"x": ("y", [0, 0])}) diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py index 61cd88e30ac..00d1ed29b32 100644 --- a/xarray/tests/test_ufuncs.py +++ b/xarray/tests/test_ufuncs.py @@ -62,6 +62,19 @@ def test_binary_out(): assert_identical(actual_exponent, arg) +def test_binary_coord_attrs(): + t = xr.Variable("t", np.arange(2, 4), attrs={"units": "s"}) + x = xr.DataArray(t.values**2, coords={"t": t}, attrs={"units": "s^2"}) + y = xr.DataArray(t.values**3, coords={"t": t}, attrs={"units": "s^3"}) + z1 = xr.apply_ufunc(np.add, x, y, keep_attrs=True) + assert z1.coords["t"].attrs == {"units": "s"} + z2 = xr.apply_ufunc(np.add, x, y, keep_attrs=False) + assert z2.coords["t"].attrs == {} + # Check also that input array's coordinate attributes weren't affected + assert t.attrs == {"units": "s"} + assert x.coords["t"].attrs == {"units": "s"} + + def test_groupby(): ds = xr.Dataset({"a": ("x", [0, 0, 0])}, {"c": ("x", [0, 0, 1])}) ds_grouped = ds.groupby("c") From b872f7ec731247d932e79f3c802a3cf932a66bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20M=C3=A5nsson?= Date: Sun, 18 May 2025 02:26:04 +0200 Subject: [PATCH 2/6] Fix GH8047 by setting attrs on copy in having merge_collected(). Calls to xarray.apply_ufunc() (which is used by for instance xarray.where()) have a call stack of core.apply_ufunc.apply_ufunc() core.apply_ufunc.apply_dataarray_vfunc() core.apply_ufunc.build_output_coords_and_indexes structure.merge.merge_coordinates_without_align() structure.merge.merge_collected() and in merge_collected() the .attrs of a coordinate in an original input array could be overwritten depending on combine_attrs, even if the intent was just to produce the desired attributes for the returned result. This very simple fix always makes a copy before assigning attributes. --- xarray/structure/merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 7d773ce0b4b..d40564ada28 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -283,7 +283,9 @@ def merge_collected( "conflicting attribute values on combined " f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) - merged_vars[name] = variable + # Make a shallow copy to so that assigning merged_vars[name].attrs + # does not affect the original input variable. + merged_vars[name] = variable.copy(False) merged_vars[name].attrs = merge_attrs( [var.attrs for var, _ in indexed_elements], combine_attrs=combine_attrs, From b8f6cef6c06b3e14a2a1c3899ec19e3b08dea381 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 18 May 2025 21:28:37 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/structure/merge.py | 2 +- xarray/tests/test_computation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index d40564ada28..276259728df 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -283,7 +283,7 @@ def merge_collected( "conflicting attribute values on combined " f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) - # Make a shallow copy to so that assigning merged_vars[name].attrs + # Make a shallow copy to so that assigning merged_vars[name].attrs # does not affect the original input variable. merged_vars[name] = variable.copy(False) merged_vars[name].attrs = merge_attrs( diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 9c005081f3f..ef9d67b99d6 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2227,7 +2227,7 @@ def test_where_attrs() -> None: actual = xr.where(cond, x, y, keep_attrs=False) expected = xr.DataArray([1, 0], coords={"a": [0, 1]}) assert_identical(expected, actual) - assert_identical(expected.coords['a'], actual.coords['a']) + assert_identical(expected.coords["a"], actual.coords["a"]) # Check also that input coordinate attributes weren't modified by reference assert x["a"].attrs == {"attr": "x_coord"} assert y["a"].attrs == {"attr": "y_coord"} From 4f464934093efa362558fc7e6d49d5d3a7461565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20M=C3=A5nsson?= Date: Sat, 24 May 2025 12:43:45 +0200 Subject: [PATCH 4/6] Simple optimization by only copying if there are attributes. --- xarray/structure/merge.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 276259728df..85fd1f1545b 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -283,13 +283,17 @@ def merge_collected( "conflicting attribute values on combined " f"variable {name!r}:\nfirst value: {variable.attrs!r}\nsecond value: {other_variable.attrs!r}" ) - # Make a shallow copy to so that assigning merged_vars[name].attrs - # does not affect the original input variable. - merged_vars[name] = variable.copy(False) - merged_vars[name].attrs = merge_attrs( + attrs = merge_attrs( [var.attrs for var, _ in indexed_elements], combine_attrs=combine_attrs, ) + if variable.attrs or attrs: + # Make a shallow copy to so that assigning merged_vars[name].attrs + # does not affect the original input variable. + merged_vars[name] = variable.copy(False) + merged_vars[name].attrs = attrs + else: + merged_vars[name] = variable merged_indexes[name] = index else: variables = [variable for variable, _ in elements_list] From 0ac36aacb0afea23bc416e66f2011967a6c6d056 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 24 May 2025 10:44:55 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/structure/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 85fd1f1545b..872f1ce9432 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -288,7 +288,7 @@ def merge_collected( combine_attrs=combine_attrs, ) if variable.attrs or attrs: - # Make a shallow copy to so that assigning merged_vars[name].attrs + # Make a shallow copy to so that assigning merged_vars[name].attrs # does not affect the original input variable. merged_vars[name] = variable.copy(False) merged_vars[name].attrs = attrs From 8ae440680b7621a02ba4a176f72a69b000e0867b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 27 May 2025 11:47:56 -0600 Subject: [PATCH 6/6] Update xarray/structure/merge.py --- xarray/structure/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 872f1ce9432..ca1e5ccb2bb 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -290,7 +290,7 @@ def merge_collected( if variable.attrs or attrs: # Make a shallow copy to so that assigning merged_vars[name].attrs # does not affect the original input variable. - merged_vars[name] = variable.copy(False) + merged_vars[name] = variable.copy(deep=False) merged_vars[name].attrs = attrs else: merged_vars[name] = variable pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy