From cf78cf8d862f8ee0aba8aabfe3b2dba0aeb6ed82 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Sun, 1 Jun 2025 19:52:45 +0200 Subject: [PATCH 01/22] switch user-guide from ipython sphinx extenstion to jupyter-execute --- doc/user-guide/combining.rst | 46 ++--- doc/user-guide/computation.rst | 134 ++++++------- doc/user-guide/dask.rst | 28 +-- doc/user-guide/data-structures.rst | 105 +++++------ doc/user-guide/duckarrays.rst | 26 +-- doc/user-guide/groupby.rst | 42 ++--- doc/user-guide/hierarchical-data.rst | 130 ++++++------- doc/user-guide/indexing.rst | 102 +++++----- doc/user-guide/interpolation.rst | 44 +++-- doc/user-guide/io.rst | 118 ++++++------ doc/user-guide/pandas.rst | 26 +-- doc/user-guide/plotting.rst | 270 ++++++++------------------- doc/user-guide/reshaping.rst | 56 +++--- doc/user-guide/terminology.rst | 14 +- doc/user-guide/testing.rst | 38 ++-- doc/user-guide/time-series.rst | 54 +++--- doc/user-guide/weather-climate.rst | 37 ++-- 17 files changed, 575 insertions(+), 695 deletions(-) diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst index 53d5fc17cbd..1356920e560 100644 --- a/doc/user-guide/combining.rst +++ b/doc/user-guide/combining.rst @@ -3,8 +3,8 @@ Combining data -------------- -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -27,7 +27,7 @@ into a larger object, you can use :py:func:`~xarray.concat`. ``concat`` takes an iterable of ``DataArray`` or ``Dataset`` objects, as well as a dimension name, and concatenates along that dimension: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.arange(6).reshape(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] @@ -41,7 +41,7 @@ dimension name, and concatenates along that dimension: In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: -.. ipython:: python +.. jupyter-execute:: da.sel(x="a") xr.concat([da.isel(x=0), da.isel(x=1)], "x") @@ -50,7 +50,7 @@ If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first dimension: -.. ipython:: python +.. jupyter-execute:: xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim") @@ -58,13 +58,13 @@ The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is used to label the values along the new dimension: -.. ipython:: python +.. jupyter-execute:: xr.concat([da.isel(x=0), da.isel(x=1)], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: -.. ipython:: python +.. jupyter-execute:: ds = da.to_dataset(name="foo") xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") @@ -85,7 +85,7 @@ To combine variables and coordinates between multiple ``DataArray`` and/or ``Dataset``, ``DataArray`` or dictionaries of objects convertible to ``DataArray`` objects: -.. ipython:: python +.. jupyter-execute:: xr.merge([ds, ds.rename({"foo": "bar"})]) xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) @@ -94,7 +94,7 @@ If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index coordinates: -.. ipython:: python +.. jupyter-execute:: other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) @@ -117,7 +117,7 @@ if you attempt to merge two variables with the same name but different values: The same non-destructive merging between ``DataArray`` index coordinates is used in the :py:class:`~xarray.Dataset` constructor: -.. ipython:: python +.. jupyter-execute:: xr.Dataset({"a": da.isel(x=slice(0, 1)), "b": da.isel(x=slice(1, 2))}) @@ -132,7 +132,7 @@ using values from the called object to fill holes. The resulting coordinates are the union of coordinate labels. Vacant cells as a result of the outer-join are filled with ``NaN``. For example: -.. ipython:: python +.. jupyter-execute:: ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) @@ -153,7 +153,7 @@ In contrast to ``merge``, :py:meth:`~xarray.Dataset.update` modifies a dataset in-place without checking for conflicts, and will overwrite any existing variables with new values: -.. ipython:: python +.. jupyter-execute:: ds.update({"space": ("space", [10.2, 9.4, 3.9])}) @@ -164,14 +164,14 @@ replace all dataset variables that use it. ``update`` also performs automatic alignment if necessary. Unlike ``merge``, it maintains the alignment of the original array instead of merging indexes: -.. ipython:: python +.. jupyter-execute:: ds.update(other) The exact same alignment logic when setting a variable with ``__setitem__`` syntax: -.. ipython:: python +.. jupyter-execute:: ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz @@ -187,14 +187,14 @@ the optional ``compat`` argument on ``concat`` and ``merge``. :py:attr:`~xarray.Dataset.equals` checks dimension names, indexes and array values: -.. ipython:: python +.. jupyter-execute:: da.equals(da.copy()) :py:attr:`~xarray.Dataset.identical` also checks attributes, and the name of each object: -.. ipython:: python +.. jupyter-execute:: da.identical(da.rename("bar")) @@ -202,7 +202,7 @@ object: check that allows variables to have different dimensions, as long as values are constant along those new dimensions: -.. ipython:: python +.. jupyter-execute:: left = xr.Dataset(coords={"x": 0}) right = xr.Dataset({"x": [0, 0, 0]}) @@ -214,7 +214,7 @@ missing values marked by ``NaN`` in the same locations. In contrast, the ``==`` operation performs element-wise comparison (like numpy): -.. ipython:: python +.. jupyter-execute:: da == da.copy() @@ -232,7 +232,7 @@ methods it allows the merging of xarray objects with locations where *either* have ``NaN`` values. This can be used to combine data with overlapping coordinates as long as any non-missing values agree or are disjoint: -.. ipython:: python +.. jupyter-execute:: ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) @@ -264,7 +264,7 @@ each processor wrote out data to a separate file. A domain which was decomposed into 4 parts, 2 each along both the x and y axes, requires organising the datasets into a doubly-nested list, e.g: -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray( name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] @@ -279,7 +279,7 @@ along two times, and contain two different variables, we can pass ``None`` to ``'concat_dim'`` to specify the dimension of the nested list over which we wish to use ``merge`` instead of ``concat``: -.. ipython:: python +.. jupyter-execute:: temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) @@ -294,8 +294,8 @@ Here we combine two datasets using their common dimension coordinates. Notice they are concatenated in order based on the values in their dimension coordinates, not on their position in the list passed to ``combine_by_coords``. -.. ipython:: python - :okwarning: +.. jupyter-execute:: + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index 9953808e931..45fef2b5f80 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -18,8 +18,8 @@ Basic array math Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -27,7 +27,7 @@ numpy) over all array values: np.random.seed(123456) -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray( np.random.default_rng(0).random((2, 3)), @@ -41,25 +41,25 @@ a DataArray: __ https://numpy.org/doc/stable/reference/ufuncs.html -.. ipython:: python +.. jupyter-execute:: np.sin(arr) Use :py:func:`~xarray.where` to conditionally switch between values: -.. ipython:: python +.. jupyter-execute:: xr.where(arr > 0, "positive", "negative") Use ``@`` to compute the :py:func:`~xarray.dot` product: -.. ipython:: python +.. jupyter-execute:: arr @ arr Data arrays also implement many :py:class:`numpy.ndarray` methods: -.. ipython:: python +.. jupyter-execute:: arr.round(2) arr.T @@ -87,7 +87,7 @@ methods for working with missing data from pandas: It returns a new xarray object with the same dimensions as the original object, but with boolean values indicating where **missing values** are present. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() @@ -99,7 +99,7 @@ object has 'True' values in the third and fourth positions and 'False' values in object. It returns a new xarray object with the same dimensions as the original object, but with boolean values indicating where **non-missing values** are present. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.notnull() @@ -113,7 +113,7 @@ non-missing values along one or more dimensions of an xarray object. It returns the same dimensions as the original object, but with each element replaced by the count of non-missing values along the specified dimensions. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.count() @@ -126,7 +126,7 @@ the number of non-null elements in x. It returns a new xarray object with the same dimensions as the original object, but with missing values removed. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.dropna(dim="x") @@ -138,7 +138,7 @@ original order. :py:meth:`~xarray.DataArray.fillna` is a method in xarray that can be used to fill missing or null values in an xarray object with a specified value or method. It returns a new xarray object with the same dimensions as the original object, but with missing values filled. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.fillna(-1) @@ -151,7 +151,7 @@ returns a new :py:class:`~xarray.DataArray` object with five elements, containin xarray object along one or more dimensions. It returns a new xarray object with the same dimensions as the original object, but with missing values replaced by the last non-missing value along the specified dimensions. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.ffill("x") @@ -164,7 +164,7 @@ five elements, containing the values [0, 1, 1, 1, 2] in the original order. xarray object along one or more dimensions. It returns a new xarray object with the same dimensions as the original object, but with missing values replaced by the next non-missing value along the specified dimensions. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.bfill("x") @@ -180,7 +180,7 @@ Xarray objects also have an :py:meth:`~xarray.DataArray.interpolate_na` method for filling missing values via 1D interpolation. It returns a new xarray object with the same dimensions as the original object, but with missing values interpolated. -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray( [0, 1, np.nan, np.nan, 2], @@ -212,7 +212,7 @@ Aggregation methods have been updated to take a ``dim`` argument instead of ``axis``. This allows for very intuitive syntax for aggregation methods that are applied along particular dimension(s): -.. ipython:: python +.. jupyter-execute:: arr.sum(dim="x") arr.std(["x", "y"]) @@ -223,13 +223,13 @@ If you need to figure out the axis number for a dimension yourself (say, for wrapping code designed to work with numpy arrays), you can use the :py:meth:`~xarray.DataArray.get_axis_num` method: -.. ipython:: python +.. jupyter-execute:: arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: -.. ipython:: python +.. jupyter-execute:: xr.DataArray([1, 2, np.nan, 3]).mean() @@ -244,7 +244,7 @@ Rolling window operations ``DataArray`` objects include a :py:meth:`~xarray.DataArray.rolling` method. This method supports rolling window aggregation: -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr @@ -253,14 +253,14 @@ method supports rolling window aggregation: name of the dimension as a key (e.g. ``y``) and the window size as the value (e.g. ``3``). We get back a ``Rolling`` object: -.. ipython:: python +.. jupyter-execute:: arr.rolling(y=3) Aggregation and summary methods can be applied directly to the ``Rolling`` object: -.. ipython:: python +.. jupyter-execute:: r = arr.rolling(y=3) r.reduce(np.std) @@ -270,7 +270,7 @@ Aggregation results are assigned the coordinate at the end of each window by default, but can be centered by passing ``center=True`` when constructing the ``Rolling`` object: -.. ipython:: python +.. jupyter-execute:: r = arr.rolling(y=3, center=True) r.mean() @@ -280,7 +280,7 @@ array produce ``nan``\s. Setting ``min_periods`` in the call to ``rolling`` changes the minimum number of observations within the window required to have a value when aggregating: -.. ipython:: python +.. jupyter-execute:: r = arr.rolling(y=3, min_periods=2) r.mean() @@ -289,7 +289,7 @@ a value when aggregating: From version 0.17, xarray supports multidimensional rolling, -.. ipython:: python +.. jupyter-execute:: r = arr.rolling(x=2, y=3, min_periods=2) r.mean() @@ -330,7 +330,7 @@ the last position. You can use this for more advanced rolling operations such as strided rolling, windowed rolling, convolution, short-time FFT etc. -.. ipython:: python +.. jupyter-execute:: # rolling with 2-point stride rolling_da = r.construct(x="x_win", y="y_win", stride=2) @@ -341,7 +341,7 @@ Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. You can also use ``construct`` to compute a weighted rolling sum: -.. ipython:: python +.. jupyter-execute:: weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) arr.rolling(y=3).construct(y="window").dot(weight) @@ -363,7 +363,7 @@ Weighted array reductions and :py:meth:`Dataset.weighted` array reduction methods. They currently support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``. -.. ipython:: python +.. jupyter-execute:: coords = dict(month=("month", [1, 2, 3])) @@ -372,60 +372,60 @@ support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``. Create a weighted object: -.. ipython:: python +.. jupyter-execute:: weighted_prec = prec.weighted(weights) weighted_prec Calculate the weighted sum: -.. ipython:: python +.. jupyter-execute:: weighted_prec.sum() Calculate the weighted mean: -.. ipython:: python +.. jupyter-execute:: weighted_prec.mean(dim="month") Calculate the weighted quantile: -.. ipython:: python +.. jupyter-execute:: weighted_prec.quantile(q=0.5, dim="month") The weighted sum corresponds to: -.. ipython:: python +.. jupyter-execute:: weighted_sum = (prec * weights).sum() weighted_sum the weighted mean to: -.. ipython:: python +.. jupyter-execute:: weighted_mean = weighted_sum / weights.sum() weighted_mean the weighted variance to: -.. ipython:: python +.. jupyter-execute:: weighted_var = weighted_prec.sum_of_squares() / weights.sum() weighted_var and the weighted standard deviation to: -.. ipython:: python +.. jupyter-execute:: weighted_std = np.sqrt(weighted_var) weighted_std However, the functions also take missing values in the data into account: -.. ipython:: python +.. jupyter-execute:: data = xr.DataArray([np.nan, 2, 4]) weights = xr.DataArray([8, 1, 1]) @@ -438,7 +438,7 @@ in 0.6. If the weights add up to to 0, ``sum`` returns 0: -.. ipython:: python +.. jupyter-execute:: data = xr.DataArray([1.0, 1.0]) weights = xr.DataArray([-1.0, 1.0]) @@ -447,7 +447,7 @@ If the weights add up to to 0, ``sum`` returns 0: and ``mean``, ``std`` and ``var`` return ``nan``: -.. ipython:: python +.. jupyter-execute:: data.weighted(weights).mean() @@ -465,7 +465,7 @@ Coarsen large arrays :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` methods. This supports block aggregation along multiple dimensions, -.. ipython:: python +.. jupyter-execute:: x = np.linspace(0, 10, 300) t = pd.date_range("1999-12-15", periods=364) @@ -479,7 +479,7 @@ methods. This supports block aggregation along multiple dimensions, In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, -.. ipython:: python +.. jupyter-execute:: da.coarsen(time=7, x=2).mean() @@ -488,14 +488,14 @@ length is not a multiple of the corresponding window size. You can choose ``boundary='trim'`` or ``boundary='pad'`` options for trimming the excess entries or padding ``nan`` to insufficient entries, -.. ipython:: python +.. jupyter-execute:: da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, -.. ipython:: python +.. jupyter-execute:: da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() @@ -510,7 +510,7 @@ Xarray objects have some handy methods for the computation with their coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by central finite differences using their coordinates, -.. ipython:: python +.. jupyter-execute:: a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a @@ -518,7 +518,7 @@ central finite differences using their coordinates, This method can be used also for multidimensional arrays, -.. ipython:: python +.. jupyter-execute:: a = xr.DataArray( np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} @@ -528,7 +528,7 @@ This method can be used also for multidimensional arrays, :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, -.. ipython:: python +.. jupyter-execute:: a.integrate("x") @@ -546,7 +546,7 @@ Xarray objects provide an interface for performing linear or polynomial regressi using the least-squares method. :py:meth:`~xarray.DataArray.polyfit` computes the best fitting coefficients along a given dimension and for a given order, -.. ipython:: python +.. jupyter-execute:: x = xr.DataArray(np.arange(10), dims=["x"], name="x") a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) @@ -556,7 +556,7 @@ best fitting coefficients along a given dimension and for a given order, The method outputs a dataset containing the coefficients (and more if ``full=True``). The inverse operation is done with :py:meth:`~xarray.polyval`, -.. ipython:: python +.. jupyter-execute:: xr.polyval(coord=x, coeffs=out.polyfit_coefficients) @@ -576,7 +576,7 @@ user-defined functions and can fit along multiple coordinates. For example, we can fit a relationship between two ``DataArray`` objects, maintaining a unique fit at each spatial coordinate but aggregating over the time dimension: -.. ipython:: python +.. jupyter-execute:: def exponential(x, a, xc): return np.exp((x - xc) / a) @@ -606,7 +606,7 @@ We can also fit multi-dimensional functions, and even use a wrapper function to simultaneously fit a summation of several functions, such as this field containing two gaussian peaks: -.. ipython:: python +.. jupyter-execute:: def gaussian_2d(coords, a, xc, yc, xalpha, yalpha): x, y = coords @@ -660,7 +660,7 @@ operations to work, as commonly done in numpy with :py:func:`numpy.reshape` or This is best illustrated by a few examples. Consider two one-dimensional arrays with different sizes aligned along different dimensions: -.. ipython:: python +.. jupyter-execute:: a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a @@ -670,14 +670,14 @@ arrays with different sizes aligned along different dimensions: With xarray, we can apply binary mathematical operations to these arrays, and their dimensions are expanded automatically: -.. ipython:: python +.. jupyter-execute:: a * b Moreover, dimensions are always reordered to the order in which they first appeared: -.. ipython:: python +.. jupyter-execute:: c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c @@ -685,14 +685,14 @@ appeared: This means, for example, that you always subtract an array from its transpose: -.. ipython:: python +.. jupyter-execute:: c - c.T You can explicitly broadcast xarray data structures by using the :py:func:`~xarray.broadcast` function: -.. ipython:: python +.. jupyter-execute:: a2, b2 = xr.broadcast(a, b) a2 @@ -711,7 +711,7 @@ Similarly to pandas, this alignment is automatic for arithmetic on binary operations. The default result of a binary operation is by the *intersection* (not the union) of coordinate labels: -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] @@ -729,7 +729,7 @@ matching dimensions must have the same size: However, one can explicitly change this default automatic alignment type ("inner") via :py:func:`~xarray.set_options()` in context manager: -.. ipython:: python +.. jupyter-execute:: with xr.set_options(arithmetic_join="outer"): arr + arr[:1] @@ -756,7 +756,7 @@ Although index coordinates are aligned, other coordinates are not, and if their values conflict, they will be dropped. This is necessary, for example, because indexing turns 1D coordinates into scalar coordinates: -.. ipython:: python +.. jupyter-execute:: arr[0] arr[1] @@ -766,7 +766,7 @@ indexing turns 1D coordinates into scalar coordinates: Still, xarray will persist other coordinates in arithmetic, as long as there are no conflicting values: -.. ipython:: python +.. jupyter-execute:: # only one argument has the 'x' coordinate arr[0] + 1 @@ -779,7 +779,7 @@ Math with datasets Datasets support arithmetic operations by automatically looping over all data variables: -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset( { @@ -792,7 +792,7 @@ variables: Datasets support most of the same methods found on data arrays: -.. ipython:: python +.. jupyter-execute:: ds.mean(dim="x") abs(ds) @@ -801,7 +801,7 @@ Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or alternatively you can use :py:meth:`~xarray.Dataset.map` to map a function to each variable in a dataset: -.. ipython:: python +.. jupyter-execute:: np.sin(ds) ds.map(np.sin) @@ -809,13 +809,13 @@ to each variable in a dataset: Datasets also use looping over variables for *broadcasting* in binary arithmetic. You can do arithmetic between any ``DataArray`` and a dataset: -.. ipython:: python +.. jupyter-execute:: ds + arr Arithmetic between two datasets matches data variables of the same name: -.. ipython:: python +.. jupyter-execute:: ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 @@ -858,7 +858,7 @@ functions/methods are written using ``apply_ufunc``. Simple functions that act independently on each value should work without any additional arguments: -.. ipython:: python +.. jupyter-execute:: squared_error = lambda x, y: (x - y) ** 2 arr1 = xr.DataArray([0, 1, 2, 3], dims="x") @@ -885,15 +885,15 @@ to set ``axis=-1``. As an example, here is how we would wrap np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} ) -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: def vector_norm(x, dim, ord=None): return xr.apply_ufunc( np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} ) -.. ipython:: python +.. jupyter-execute:: vector_norm(arr1, dim="x") diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 184681aa4c9..46d5174c6e5 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -115,8 +115,8 @@ When reading data, Dask divides your dataset into smaller chunks. You can specif Loading Dask Arrays ~~~~~~~~~~~~~~~~~~~ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import os @@ -148,7 +148,7 @@ There are a few common cases where you may want to convert lazy Dask arrays into To do this, you can use :py:meth:`Dataset.compute` or :py:meth:`DataArray.compute`: -.. ipython:: python +.. jupyter-execute:: ds.compute() @@ -171,7 +171,7 @@ You can also access :py:attr:`DataArray.values`, which will always be a NumPy ar NumPy ufuncs like :py:func:`numpy.sin` transparently work on all xarray objects, including those that store lazy Dask arrays: -.. ipython:: python +.. jupyter-execute:: import numpy as np @@ -347,7 +347,7 @@ Functions that consume and return Xarray objects can be easily applied in parall Your function will receive an Xarray Dataset or DataArray subset to one chunk along each chunked dimension. -.. ipython:: python +.. jupyter-execute:: ds.temperature @@ -356,7 +356,7 @@ At compute time, a function applied with :py:func:`map_blocks` will receive a Da (time x latitude x longitude) with values loaded. The following snippet illustrates how to check the shape of the object received by the applied function. -.. ipython:: python +.. jupyter-execute:: def func(da): print(da.sizes) @@ -375,7 +375,7 @@ work for your function, provide the ``template`` kwarg (see :ref:`below `: -.. ipython:: python +.. jupyter-execute:: dt["child-node"].to_dataset() Like with :py:class:`~xarray.Dataset`, you can access the data and coordinate variables of a node separately via the :py:attr:`~xarray.DataTree.data_vars` and :py:attr:`~xarray.DataTree.coords` attributes: -.. ipython:: python +.. jupyter-execute:: dt["child-node"].data_vars dt["child-node"].coords @@ -675,7 +676,7 @@ We can update a datatree in-place using Python's standard dictionary syntax, similar to how we can for Dataset objects. For example, to create this example DataTree from scratch, we could have written: -.. ipython:: python +.. jupyter-execute:: dt = xr.DataTree(name="root") dt["foo"] = "orange" @@ -720,7 +721,7 @@ size). Some examples: -.. ipython:: python +.. jupyter-execute:: # Set up coordinates time = xr.DataArray(data=["2022-01", "2023-01"], dims="time") @@ -780,7 +781,7 @@ that it applies to all descendent nodes. Similarly, ``station`` is in the base ``weather`` and in the ``temperature`` sub-tree. Notice the inherited coordinates are explicitly shown in the tree representation under ``Inherited coordinates:``. -.. ipython:: python +.. jupyter-execute:: dt2["/weather"] @@ -788,14 +789,14 @@ Accessing any of the lower level trees through the :py:func:`.dataset `_ library provides a sparse array type which is useful for representing nD array objects like sparse matrices in a memory-efficient manner. We can create a sparse array object (of the :py:class:`sparse.COO` type) from a numpy array like this: -.. ipython:: python +.. jupyter-execute:: from sparse import COO + import xarray as xr + import numpy as np x = np.eye(4, dtype=np.uint8) # create diagonal identity matrix s = COO.from_numpy(x) @@ -63,14 +65,14 @@ Sparse array objects can be converted back to a "dense" numpy array by calling : Just like :py:class:`numpy.ndarray` objects, :py:class:`sparse.COO` arrays support indexing -.. ipython:: python +.. jupyter-execute:: s[1, 1] # diagonal elements should be ones s[2, 3] # off-diagonal elements should be zero broadcasting, -.. ipython:: python +.. jupyter-execute:: x2 = np.zeros( (4, 1), dtype=np.uint8 @@ -80,14 +82,14 @@ broadcasting, and various computation methods -.. ipython:: python +.. jupyter-execute:: s.sum(axis=1) This numpy-like array also supports calling so-called `numpy ufuncs `_ ("universal functions") on it directly: -.. ipython:: python +.. jupyter-execute:: np.sum(s, axis=1) @@ -113,7 +115,7 @@ both accept data in various forms through their ``data`` argument, but in fact t For example, we can wrap the sparse array we created earlier inside a new DataArray object: -.. ipython:: python +.. jupyter-execute:: s_da = xr.DataArray(s, dims=["i", "j"]) s_da @@ -123,7 +125,7 @@ representation of the underlying wrapped array. Of course our sparse array object is still there underneath - it's stored under the ``.data`` attribute of the dataarray: -.. ipython:: python +.. jupyter-execute:: s_da.data @@ -132,7 +134,7 @@ Array methods We saw above that numpy-like arrays provide numpy methods. Xarray automatically uses these when you call the corresponding xarray method: -.. ipython:: python +.. jupyter-execute:: s_da.sum(dim="j") @@ -141,7 +143,7 @@ Converting wrapped types If you want to change the type inside your xarray object you can use :py:meth:`DataArray.as_numpy`: -.. ipython:: python +.. jupyter-execute:: s_da.as_numpy() @@ -152,12 +154,12 @@ If instead you want to convert to numpy and return that numpy array you can use always uses :py:func:`numpy.asarray` which will fail for some array types (e.g. ``cupy``), whereas :py:meth:`~DataArray.to_numpy` uses the correct method depending on the array type. -.. ipython:: python +.. jupyter-execute:: s_da.to_numpy() -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: s_da.values diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst index 673e23d75ac..dfed7dbac83 100644 --- a/doc/user-guide/groupby.rst +++ b/doc/user-guide/groupby.rst @@ -37,8 +37,8 @@ Split Let's create a simple example dataset: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -46,7 +46,7 @@ Let's create a simple example dataset: np.random.seed(123456) -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 3))}, @@ -58,26 +58,26 @@ Let's create a simple example dataset: If we groupby the name of a variable or coordinate in a dataset (we can also use a DataArray directly), we get back a ``GroupBy`` object: -.. ipython:: python +.. jupyter-execute:: ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: -.. ipython:: python +.. jupyter-execute:: ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: -.. ipython:: python +.. jupyter-execute:: list(ds.groupby("letters")) You can index out a particular group: -.. ipython:: python +.. jupyter-execute:: ds.groupby("letters")["b"] @@ -91,7 +91,7 @@ but instead want to "bin" the data into coarser groups. You could always create a customized coordinate, but xarray facilitates this via the :py:meth:`Dataset.groupby_bins` method. -.. ipython:: python +.. jupyter-execute:: x_bins = [0, 25, 50] ds.groupby_bins("x", x_bins).groups @@ -102,7 +102,7 @@ labeled with strings using set notation to precisely identify the bin limits. To override this behavior, you can specify the bin labels explicitly. Here we choose ``float`` labels which identify the bin centers: -.. ipython:: python +.. jupyter-execute:: x_bin_labels = [12.5, 37.5] ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups @@ -115,7 +115,7 @@ To apply a function to each group, you can use the flexible :py:meth:`core.groupby.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: -.. ipython:: python +.. jupyter-execute:: def standardize(x): return (x - x.mean()) / x.std() @@ -127,14 +127,14 @@ GroupBy objects also have a :py:meth:`core.groupby.DatasetGroupBy.reduce` method methods like :py:meth:`core.groupby.DatasetGroupBy.mean` as shortcuts for applying an aggregation function: -.. ipython:: python +.. jupyter-execute:: arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: -.. ipython:: python +.. jupyter-execute:: ds.groupby("x").std(...) @@ -151,7 +151,7 @@ There are two special aggregation operations that are currently only found on groupby objects: first and last. These provide the first or last example of values for group along the grouped dimension: -.. ipython:: python +.. jupyter-execute:: ds.groupby("letters").first(...) @@ -166,7 +166,7 @@ for ``(GroupBy, Dataset)`` and ``(GroupBy, DataArray)`` pairs, as long as the dataset or data array uses the unique grouped values as one of its index coordinates. For example: -.. ipython:: python +.. jupyter-execute:: alt = arr.groupby("letters").mean(...) alt @@ -191,7 +191,7 @@ operations over multidimensional coordinate variables: __ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( [[0, 1], [2, 3]], @@ -209,7 +209,7 @@ Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`Dataset.groupby_bins` may be desirable: -.. ipython:: python +.. jupyter-execute:: da.groupby_bins("lon", [0, 45, 50]).sum() @@ -217,7 +217,7 @@ These methods group by ``lon`` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, applying your function, and then unstacking the result: -.. ipython:: python +.. jupyter-execute:: stacked = da.stack(gridcell=["ny", "nx"]) stacked.groupby("gridcell").sum(...).unstack("gridcell") @@ -310,7 +310,7 @@ Grouping by multiple variables Use grouper objects to group by multiple dimensions: -.. ipython:: python +.. jupyter-execute:: from xarray.groupers import UniqueGrouper @@ -318,14 +318,14 @@ Use grouper objects to group by multiple dimensions: The above is sugar for using ``UniqueGrouper`` objects directly: -.. ipython:: python +.. jupyter-execute:: da.groupby(lat=UniqueGrouper(), lon=UniqueGrouper()).sum() Different groupers can be combined to construct sophisticated GroupBy operations. -.. ipython:: python +.. jupyter-execute:: from xarray.groupers import BinGrouper @@ -347,7 +347,7 @@ Shuffling is a generalization of sorting a DataArray or Dataset by another DataA Shuffling reorders the DataArray or the DataArrays in a Dataset such that all members of a group occur sequentially. For example, Shuffle the object using either :py:class:`DatasetGroupBy` or :py:class:`DataArrayGroupBy` as appropriate. -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( dims="x", diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst index 5f3a341323f..d4da2531f82 100644 --- a/doc/user-guide/hierarchical-data.rst +++ b/doc/user-guide/hierarchical-data.rst @@ -3,8 +3,8 @@ Hierarchical data ================= -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -54,7 +54,7 @@ Here we go into more detail about how to create a tree node-by-node, using a fam Let's start by defining nodes representing the two siblings, Bart and Lisa Simpson: -.. ipython:: python +.. jupyter-execute:: bart = xr.DataTree(name="Bart") lisa = xr.DataTree(name="Lisa") @@ -62,27 +62,27 @@ Let's start by defining nodes representing the two siblings, Bart and Lisa Simps Each of these node objects knows their own :py:class:`~xarray.DataTree.name`, but they currently have no relationship to one another. We can connect them by creating another node representing a common parent, Homer Simpson: -.. ipython:: python +.. jupyter-execute:: homer = xr.DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa}) Here we set the children of Homer in the node's constructor. We now have a small family tree -.. ipython:: python +.. jupyter-execute:: homer where we can see how these individual Simpson family members are related to one another. The nodes representing Bart and Lisa are now connected - we can confirm their sibling rivalry by examining the :py:class:`~xarray.DataTree.siblings` property: -.. ipython:: python +.. jupyter-execute:: list(homer["Bart"].siblings) But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Homer's :py:class:`~xarray.DataTree.children` property to include her: -.. ipython:: python +.. jupyter-execute:: maggie = xr.DataTree(name="Maggie") homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie} @@ -90,7 +90,7 @@ But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Ho Let's check that Maggie knows who her Dad is: -.. ipython:: python +.. jupyter-execute:: maggie.parent.name @@ -103,20 +103,20 @@ That's good - updating the properties of our nodes does not break the internal c Homer is currently listed as having no parent (the so-called "root node" of this tree), but we can update his :py:class:`~xarray.DataTree.parent` property: -.. ipython:: python +.. jupyter-execute:: abe = xr.DataTree(name="Abe") abe.children = {"Homer": homer} Abe is now the "root" of this tree, which we can see by examining the :py:class:`~xarray.DataTree.root` property of any node in the tree -.. ipython:: python +.. jupyter-execute:: maggie.root.name We can see the whole tree by printing Abe's node or just part of the tree by printing Homer's node: -.. ipython:: python +.. jupyter-execute:: abe abe["Homer"] @@ -125,7 +125,7 @@ We can see the whole tree by printing Abe's node or just part of the tree by pri In episode 28, Abe Simpson reveals that he had another son, Herbert "Herb" Simpson. We can add Herbert to the family tree without displacing Homer by :py:meth:`~xarray.DataTree.assign`-ing another child to Abe: -.. ipython:: python +.. jupyter-execute:: herbert = xr.DataTree(name="Herb") abe = abe.assign({"Herbert": herbert}) @@ -145,8 +145,8 @@ Certain manipulations of our tree are forbidden, if they would create an inconsi In episode 51 of the show Futurama, Philip J. Fry travels back in time and accidentally becomes his own Grandfather. If we try similar time-travelling hijinks with Homer, we get a :py:class:`~xarray.InvalidTreeError` raised: -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: abe["Homer"].children = {"Abe": abe} @@ -157,7 +157,7 @@ Ancestry in an Evolutionary Tree Let's use a different example of a tree to discuss more complex relationships between nodes - the phylogenetic tree, or tree of life. -.. ipython:: python +.. jupyter-execute:: vertebrates = xr.DataTree.from_dict( { @@ -180,7 +180,7 @@ Let's use a different example of a tree to discuss more complex relationships be We have used the :py:meth:`~xarray.DataTree.from_dict` constructor method as a preferred way to quickly create a whole tree, and :ref:`filesystem paths` (to be explained shortly) to select two nodes of interest. -.. ipython:: python +.. jupyter-execute:: vertebrates @@ -191,7 +191,7 @@ Here both the species and the features used to group them are represented by :py We can however get a list of only the nodes we used to represent species by using the fact that all those nodes have no children - they are "leaf nodes". We can check if a node is a leaf with :py:meth:`~xarray.DataTree.is_leaf`, and get a list of all leaves with the :py:class:`~xarray.DataTree.leaves` property: -.. ipython:: python +.. jupyter-execute:: primates.is_leaf [node.name for node in vertebrates.leaves] @@ -200,7 +200,7 @@ Pretending that this is a true evolutionary tree for a moment, we can find the f the distinguishing feature of the common ancestor of all vertebrate life (the root node), and even the distinguishing feature of the common ancestor of any two species (the common ancestor of two nodes): -.. ipython:: python +.. jupyter-execute:: [node.name for node in reversed(primates.parents)] primates.root.name @@ -210,8 +210,8 @@ We can only find a common ancestor between two nodes that lie in the same tree. If we try to find the common evolutionary ancestor between primates and an Alien species that has no relationship to Earth's evolutionary tree, an error will be raised. -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: alien = xr.DataTree(name="Xenomorph") primates.find_common_ancestor(alien) @@ -229,7 +229,7 @@ Properties We can navigate trees using the :py:class:`~xarray.DataTree.parent` and :py:class:`~xarray.DataTree.children` properties of each node, for example: -.. ipython:: python +.. jupyter-execute:: lisa.parent.children["Bart"].name @@ -244,7 +244,7 @@ In general :py:class:`~xarray.DataTree.DataTree` objects support almost the enti including :py:meth:`~xarray.DataTree.keys`, :py:class:`~xarray.DataTree.values`, :py:class:`~xarray.DataTree.items`, :py:meth:`~xarray.DataTree.__delitem__` and :py:meth:`~xarray.DataTree.update`. -.. ipython:: python +.. jupyter-execute:: vertebrates["Bony Skeleton"]["Ray-finned Fish"] @@ -252,7 +252,7 @@ Note that the dict-like interface combines access to child :py:class:`~xarray.Da so if we have a node that contains both children and data, calling :py:meth:`~xarray.DataTree.keys` will list both names of child nodes and names of data variables: -.. ipython:: python +.. jupyter-execute:: dt = xr.DataTree( dataset=xr.Dataset({"foo": 0, "bar": 1}), @@ -268,7 +268,7 @@ Attribute-like access You can also select both variables and child nodes through dot indexing -.. ipython:: python +.. jupyter-execute:: dt.foo dt.a @@ -295,7 +295,7 @@ This is an extension of the conventional dictionary ``__getitem__`` syntax to al Like with filepaths, paths within the tree can either be relative to the current node, e.g. -.. ipython:: python +.. jupyter-execute:: abe["Homer/Bart"].name abe["./Homer/Bart"].name # alternative syntax @@ -306,7 +306,7 @@ A path specified from the root (as opposed to being specified relative to an arb or as an "absolute path". The root node is referred to by ``"/"``, so the path from the root node to its grand-child would be ``"/child/grandchild"``, e.g. -.. ipython:: python +.. jupyter-execute:: # access lisa's sibling by a relative path. lisa["../Bart"] @@ -317,14 +317,14 @@ The root node is referred to by ``"/"``, so the path from the root node to its g Relative paths between nodes also support the ``"../"`` syntax to mean the parent of the current node. We can use this with ``__setitem__`` to add a missing entry to our evolutionary tree, but add it relative to a more familiar node of interest: -.. ipython:: python +.. jupyter-execute:: primates["../../Two Fenestrae/Crocodiles"] = xr.DataTree() print(vertebrates) Given two nodes in a tree, we can also find their relative path: -.. ipython:: python +.. jupyter-execute:: bart.relative_to(lisa) @@ -332,7 +332,7 @@ You can use this filepath feature to build a nested tree from a dictionary of fi If we have a dictionary where each key is a valid path, and each value is either valid data or ``None``, we can construct a complex tree quickly using the alternative constructor :py:meth:`~xarray.DataTree.from_dict()`: -.. ipython:: python +.. jupyter-execute:: d = { "/": xr.Dataset({"foo": "orange"}), @@ -357,7 +357,7 @@ Iterating over trees You can iterate over every node in a tree using the subtree :py:class:`~xarray.DataTree.subtree` property. This returns an iterable of nodes, which yields them in depth-first order. -.. ipython:: python +.. jupyter-execute:: for node in vertebrates.subtree: print(node.path) @@ -372,7 +372,7 @@ For example, we could keep only the nodes containing data by looping over all no checking if they contain any data using :py:class:`~xarray.DataTree.has_data`, then rebuilding a new tree using only the paths of those nodes: -.. ipython:: python +.. jupyter-execute:: non_empty_nodes = { path: node.dataset for path, node in dt.subtree_with_keys if node.has_data @@ -396,7 +396,7 @@ We can subset our tree to select only nodes of interest in various ways. Similarly to on a real filesystem, matching nodes by common patterns in their paths is often useful. We can use :py:meth:`xarray.DataTree.match` for this: -.. ipython:: python +.. jupyter-execute:: dt = xr.DataTree.from_dict( { @@ -414,7 +414,7 @@ We can also subset trees by the contents of the nodes. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: First lets recreate the tree but with an ``age`` data variable in every node: -.. ipython:: python +.. jupyter-execute:: simpsons = xr.DataTree.from_dict( { @@ -431,7 +431,7 @@ First lets recreate the tree but with an ``age`` data variable in every node: Now let's filter out the minors: -.. ipython:: python +.. jupyter-execute:: simpsons.filter(lambda node: node["age"] > 18) @@ -454,7 +454,7 @@ You can check if a tree is a hollow tree by using the :py:class:`~xarray.DataTre We can see that the Simpson's family is not hollow because the data variable ``"age"`` is present at some nodes which have children (i.e. Abe and Homer). -.. ipython:: python +.. jupyter-execute:: simpsons.is_hollow @@ -471,7 +471,7 @@ Operations and Methods on Trees To show how applying operations across a whole tree at once can be useful, let's first create a example scientific dataset. -.. ipython:: python +.. jupyter-execute:: def time_stamps(n_samples, T): """Create an array of evenly-spaced time stamps""" @@ -523,7 +523,7 @@ let's first create a example scientific dataset. Most xarray computation methods also exist as methods on datatree objects, so you can for example take the mean value of these two timeseries at once: -.. ipython:: python +.. jupyter-execute:: voltages.mean(dim="time") @@ -532,8 +532,8 @@ tree one-by-one. The arguments passed to the method are used for every node, so the values of the arguments you pass might be valid for one node and invalid for another -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: voltages.isel(time=12) @@ -545,7 +545,7 @@ Arithmetic Methods on Trees Arithmetic methods are also implemented, so you can e.g. add a scalar to every dataset in the tree at once. For example, we can advance the timeline of the Simpsons by a decade just by -.. ipython:: python +.. jupyter-execute:: simpsons + 10 @@ -565,14 +565,14 @@ and returns one (or more) xarray datasets. For example, we can define a function to calculate the Root Mean Square of a timeseries -.. ipython:: python +.. jupyter-execute:: def rms(signal): return np.sqrt(np.mean(signal**2)) Then calculate the RMS value of these signals: -.. ipython:: python +.. jupyter-execute:: voltages.map_over_datasets(rms) @@ -595,7 +595,7 @@ To iterate over the corresponding nodes in multiple trees, use :py:class:`~xarray.DataTree.subtree_with_keys`. This combines well with :py:meth:`xarray.DataTree.from_dict()` to build a new tree: -.. ipython:: python +.. jupyter-execute:: dt1 = xr.DataTree.from_dict({"a": xr.Dataset({"x": 1}), "b": xr.Dataset({"x": 2})}) dt2 = xr.DataTree.from_dict( @@ -609,7 +609,7 @@ To iterate over the corresponding nodes in multiple trees, use Alternatively, you apply a function directly to paired datasets at every node using :py:func:`xarray.map_over_datasets`: -.. ipython:: python +.. jupyter-execute:: xr.map_over_datasets(lambda x, y: x + y, dt1, dt2) @@ -623,8 +623,8 @@ or "isomorphic", if the full paths to all of their descendent nodes are the same Applying :py:func:`~xarray.group_subtrees` to trees with different structures raises :py:class:`~xarray.TreeIsomorphismError`: -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: tree = xr.DataTree.from_dict({"a": None, "a/b": None, "a/c": None}) simple_tree = xr.DataTree.from_dict({"a": None}) @@ -633,20 +633,20 @@ raises :py:class:`~xarray.TreeIsomorphismError`: We can explicitly also check if any two trees are isomorphic using the :py:meth:`~xarray.DataTree.isomorphic` method: -.. ipython:: python +.. jupyter-execute:: tree.isomorphic(simple_tree) Corresponding tree nodes do not need to have the same data in order to be considered isomorphic: -.. ipython:: python +.. jupyter-execute:: tree_with_data = xr.DataTree.from_dict({"a": xr.Dataset({"foo": 1})}) simple_tree.isomorphic(tree_with_data) They also do not need to define child nodes in the same order: -.. ipython:: python +.. jupyter-execute:: reordered_tree = xr.DataTree.from_dict({"a": None, "a/c": None, "a/b": None}) tree.isomorphic(reordered_tree) @@ -657,7 +657,7 @@ Arithmetic Between Multiple Trees Arithmetic operations like multiplication are binary operations, so as long as we have two isomorphic trees, we can do arithmetic between them. -.. ipython:: python +.. jupyter-execute:: currents = xr.DataTree.from_dict( { @@ -687,7 +687,7 @@ we can do arithmetic between them. We could use this feature to quickly calculate the electrical power in our signal, P=IV. -.. ipython:: python +.. jupyter-execute:: power = currents * voltages power @@ -712,7 +712,7 @@ Exact alignment means that shared dimensions must be the same length, and indexe To demonstrate, let's first generate some example datasets which are not aligned with one another: -.. ipython:: python +.. jupyter-execute:: # (drop the attributes just to make the printed representation shorter) ds = xr.tutorial.open_dataset("air_temperature").drop_attrs() @@ -723,7 +723,7 @@ To demonstrate, let's first generate some example datasets which are not aligned These datasets have different lengths along the ``time`` dimension, and are therefore not aligned along that dimension. -.. ipython:: python +.. jupyter-execute:: ds_daily.sizes ds_weekly.sizes @@ -731,16 +731,16 @@ These datasets have different lengths along the ``time`` dimension, and are ther We cannot store these non-alignable variables on a single :py:class:`~xarray.Dataset` object, because they do not exactly align: -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: xr.align(ds_daily, ds_weekly, ds_monthly, join="exact") But we :ref:`previously said ` that multi-resolution data is a good use case for :py:class:`~xarray.DataTree`, so surely we should be able to store these in a single :py:class:`~xarray.DataTree`? If we first try to create a :py:class:`~xarray.DataTree` with these different-length time dimensions present in both parents and children, we will still get an alignment error: -.. ipython:: python - :okexcept: +.. jupyter-execute:: + :raises: xr.DataTree.from_dict({"daily": ds_daily, "daily/weekly": ds_weekly}) @@ -757,7 +757,7 @@ This alignment check is performed up through the tree, all the way to the root, To represent our unalignable data in a single :py:class:`~xarray.DataTree`, we must instead place all variables which are a function of these different-length dimensions into nodes that are not direct descendents of one another, e.g. organize them as siblings. -.. ipython:: python +.. jupyter-execute:: dt = xr.DataTree.from_dict( {"daily": ds_daily, "weekly": ds_weekly, "monthly": ds_monthly} @@ -769,13 +769,13 @@ Now we have a valid :py:class:`~xarray.DataTree` structure which contains all th This is a useful way to organise our data because we can still operate on all the groups at once. For example we can extract all three timeseries at a specific lat-lon location: -.. ipython:: python +.. jupyter-execute:: dt.sel(lat=75, lon=300) or compute the standard deviation of each timeseries to find out how it varies with sampling frequency: -.. ipython:: python +.. jupyter-execute:: dt.std(dim="time") @@ -786,7 +786,7 @@ Coordinate Inheritance Notice that in the trees we constructed above there is some redundancy - the ``lat`` and ``lon`` variables appear in each sibling group, but are identical across the groups. -.. ipython:: python +.. jupyter-execute:: dt @@ -797,7 +797,7 @@ We can use "Coordinate Inheritance" to define them only once in a parent group a Let's instead place only the time-dependent variables in the child groups, and put the non-time-dependent ``lat`` and ``lon`` variables in the parent (root) group: -.. ipython:: python +.. jupyter-execute:: dt = xr.DataTree.from_dict( { @@ -814,7 +814,7 @@ Defining the common coordinates just once also ensures that the spatial coordina We can still access the coordinates defined in the parent groups from any of the child groups as if they were actually present on the child groups: -.. ipython:: python +.. jupyter-execute:: dt.daily.coords dt["daily/lat"] @@ -823,7 +823,7 @@ As we can still access them, we say that the ``lat`` and ``lon`` coordinates in If we print just one of the child nodes, it will still display inherited coordinates, but explicitly mark them as such: -.. ipython:: python +.. jupyter-execute:: print(dt["/daily"]) @@ -831,7 +831,7 @@ This helps to differentiate which variables are defined on the datatree node tha We can also still perform all the same operations on the whole tree: -.. ipython:: python +.. jupyter-execute:: dt.sel(lat=[75], lon=[300]) diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 784a1f83ff7..23281819826 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -3,8 +3,8 @@ Indexing and selecting data =========================== -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -62,7 +62,7 @@ Indexing a :py:class:`~xarray.DataArray` directly works (mostly) just like it does for numpy arrays, except that the returned object is always another DataArray: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), @@ -87,7 +87,7 @@ Xarray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attribute: -.. ipython:: python +.. jupyter-execute:: da.loc["2000-01-01":"2000-01-02", "IA"] @@ -104,7 +104,7 @@ __ https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-label Setting values with label based indexing is also supported: -.. ipython:: python +.. jupyter-execute:: da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -119,7 +119,7 @@ use them explicitly to slice data. There are two ways to do this: 1. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: - .. ipython:: python + .. jupyter-execute:: # index by integer array indices da.isel(space=0, time=slice(None, 2)) @@ -130,7 +130,7 @@ use them explicitly to slice data. There are two ways to do this: 2. Use a dictionary as the argument for array positional or label based array indexing: - .. ipython:: python + .. jupyter-execute:: # index by integer array indices da[dict(space=0, time=slice(None, 2))] @@ -163,7 +163,7 @@ support ``method`` and ``tolerance`` keyword argument. The method parameter allo enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, ``'backfill'`` or ``'nearest'``: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) da.sel(x=[1.1, 1.9], method="nearest") @@ -172,7 +172,7 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, Tolerance limits the maximum distance for valid matches with an inexact lookup: -.. ipython:: python +.. jupyter-execute:: da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) @@ -189,14 +189,14 @@ However, you don't need to use ``method`` to do inexact slicing. Slicing already returns all values inside the range (inclusive), as long as the index labels are monotonic increasing: -.. ipython:: python +.. jupyter-execute:: da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: -.. ipython:: python +.. jupyter-execute:: reversed_da = da[::-1] reversed_da.loc[3.1:0.9] @@ -216,7 +216,7 @@ Dataset indexing We can also use these methods to index all variables in a dataset simultaneously, returning a new dataset: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), @@ -233,7 +233,7 @@ Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different arrays). However, you can do normal indexing with dimension names: -.. ipython:: python +.. jupyter-execute:: ds[dict(space=[0], time=[0])] ds.loc[dict(time="2000-01-01")] @@ -244,7 +244,7 @@ Dropping labels and dimensions The :py:meth:`~xarray.Dataset.drop_sel` method returns a new object with the listed index labels along a dimension dropped: -.. ipython:: python +.. jupyter-execute:: ds.drop_sel(space=["IN", "IL"]) @@ -253,7 +253,7 @@ index labels along a dimension dropped: Use :py:meth:`~xarray.Dataset.drop_dims` to drop a full dimension from a Dataset. Any variables with these dimensions are also dropped: -.. ipython:: python +.. jupyter-execute:: ds.drop_dims("time") @@ -267,7 +267,7 @@ However, it is sometimes useful to select an object with the same shape as the original data, but with some elements masked. To do this type of selection in xarray, use :py:meth:`~xarray.DataArray.where`: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) @@ -278,7 +278,7 @@ usual xarray broadcasting and alignment rules for binary operations (e.g., ``+``) between the object being indexed and the condition, as described in :ref:`compute`: -.. ipython:: python +.. jupyter-execute:: da.where(da.y < 2) @@ -287,7 +287,7 @@ where the selected data size is much smaller than the original data, use of the option ``drop=True`` clips coordinate elements that are fully masked: -.. ipython:: python +.. jupyter-execute:: da.where(da.y < 2, drop=True) @@ -300,7 +300,7 @@ To check whether elements of an xarray object contain a single object, you can compare with the equality operator ``==`` (e.g., ``arr == 3``). To check multiple values, use :py:meth:`~xarray.DataArray.isin`: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) @@ -309,7 +309,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: :py:meth:`~xarray.DataArray.where` to support indexing by arrays that are not already labels of an array: -.. ipython:: python +.. jupyter-execute:: lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) @@ -332,7 +332,7 @@ understood as orthogonally. Each indexer component selects independently along the corresponding dimension, similar to how vector indexing works in Fortran or MATLAB, or after using the :py:func:`numpy.ix_` helper: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.arange(12).reshape((3, 4)), @@ -347,7 +347,7 @@ as indexers. Dimensions on resultant arrays are given by the ordered union of the indexers' dimensions: -.. ipython:: python +.. jupyter-execute:: ind_x = xr.DataArray([0, 1], dims=["x"]) ind_y = xr.DataArray([0, 1], dims=["y"]) @@ -356,7 +356,7 @@ dimensions: Slices or sequences/arrays without named-dimensions are treated as if they have the same dimension which is indexed along: -.. ipython:: python +.. jupyter-execute:: # Because [0, 1] is used to index along dimension 'x', # it is assumed to have dimension 'x' @@ -366,7 +366,7 @@ Furthermore, you can use multi-dimensional :py:meth:`~xarray.DataArray` as indexers, where the resultant array dimension is also determined by indexers' dimension: -.. ipython:: python +.. jupyter-execute:: ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] @@ -380,7 +380,7 @@ See :ref:`indexing.rules` for the complete specification. Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: -.. ipython:: python +.. jupyter-execute:: ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] @@ -390,7 +390,7 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: These methods may also be applied to ``Dataset`` objects -.. ipython:: python +.. jupyter-execute:: ds = da.to_dataset(name="bar") ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) @@ -405,7 +405,7 @@ of the closest latitude and longitude are renamed to an output dimension named "points": -.. ipython:: python +.. jupyter-execute:: ds = xr.tutorial.open_dataset("air_temperature") @@ -440,7 +440,7 @@ Assigning values with indexing To select and assign values to a portion of a :py:meth:`~xarray.DataArray` you can use indexing with ``.loc`` : -.. ipython:: python +.. jupyter-execute:: ds = xr.tutorial.open_dataset("air_temperature") @@ -459,7 +459,7 @@ can use indexing with ``.loc`` : or :py:meth:`~xarray.where`: -.. ipython:: python +.. jupyter-execute:: # modify one grid point using xr.where() ds["empty"] = xr.where( @@ -479,7 +479,7 @@ or :py:meth:`~xarray.where`: Vectorized indexing can also be used to assign values to xarray object. -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.arange(12).reshape((3, 4)), @@ -500,7 +500,7 @@ Vectorized indexing can also be used to assign values to xarray object. Like ``numpy.ndarray``, value assignment sometimes works differently from what one may expect. -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray([0, 1, 2, 3], dims=["x"]) ind = xr.DataArray([0, 0, 0], dims=["x"]) @@ -539,7 +539,7 @@ __ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-in Assigning values with the chained indexing using ``.sel`` or ``.isel`` fails silently. - .. ipython:: python + .. jupyter-execute:: da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this @@ -548,8 +548,8 @@ __ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-in You can also assign values to all variables of a :py:class:`Dataset` at once: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + ds_org = xr.tutorial.open_dataset("eraint_uvz").isel( latitude=slice(56, 59), longitude=slice(255, 258), level=0 @@ -584,7 +584,7 @@ More advanced indexing The use of :py:meth:`~xarray.DataArray` objects as indexers enables very flexible indexing. The following is an example of the pointwise indexing: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"]) da @@ -597,7 +597,7 @@ and mapped along a new dimension ``z``. If you want to add a coordinate to the new dimension ``z``, you can supply a :py:class:`~xarray.DataArray` with a coordinate, -.. ipython:: python +.. jupyter-execute:: da.isel( x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}), @@ -607,7 +607,7 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate, Analogously, label-based pointwise-indexing is also possible by the ``.sel`` method: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), @@ -638,14 +638,14 @@ useful for greater control and for increased performance. To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: -.. ipython:: python +.. jupyter-execute:: da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: -.. ipython:: python +.. jupyter-execute:: foo = da.rename("foo") baz = (10 * da[:2, :2]).rename("baz") @@ -654,21 +654,21 @@ To demonstrate, we will make a subset DataArray with new values: Reindexing ``foo`` with ``baz`` selects out the first two values along each dimension: -.. ipython:: python +.. jupyter-execute:: foo.reindex_like(baz) The opposite operation asks us to reindex to a larger shape, so we fill in the missing values with ``NaN``: -.. ipython:: python +.. jupyter-execute:: baz.reindex_like(foo) The :py:func:`~xarray.align` function lets us perform more flexible database-like ``'inner'``, ``'outer'``, ``'left'`` and ``'right'`` joins: -.. ipython:: python +.. jupyter-execute:: xr.align(foo, baz, join="inner") xr.align(foo, baz, join="outer") @@ -676,7 +676,7 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: -.. ipython:: python +.. jupyter-execute:: ds ds.reindex_like(baz) @@ -693,7 +693,7 @@ Coordinate labels for each dimension are optional (as of xarray v0.9). Label based indexing with ``.sel`` and ``.loc`` uses standard positional, integer-based indexing as a fallback for dimensions without a coordinate label: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) @@ -715,7 +715,7 @@ Xarray uses the :py:class:`pandas.Index` internally to perform indexing operations. If you need to access the underlying indexes, they are available through the :py:attr:`~xarray.DataArray.indexes` attribute. -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 3), @@ -732,7 +732,7 @@ Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension, falling back to a default :py:class:`pandas.RangeIndex` if it has no coordinate labels: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray([1, 2, 3], dims="x") da @@ -780,7 +780,7 @@ Just like pandas, advanced indexing on multi-level indexes is possible with i.e., a tuple of slices, labels, list of labels, or any selector allowed by pandas: -.. ipython:: python +.. jupyter-execute:: midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) @@ -790,20 +790,20 @@ pandas: You can also select multiple elements by providing a list of labels or tuples or a slice of tuples: -.. ipython:: python +.. jupyter-execute:: mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: -.. ipython:: python +.. jupyter-execute:: mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: -.. ipython:: python +.. jupyter-execute:: mda.sel(one="a", two=0) @@ -815,7 +815,7 @@ Like pandas, xarray handles partial selection on multi-index (level drop). As shown below, it also renames the dimension / coordinate when the multi-index is reduced to a single index. -.. ipython:: python +.. jupyter-execute:: mda.loc[{"one": "a"}, ...] diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index f1199ec7af3..1fb8d1548d3 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -3,12 +3,13 @@ Interpolating data ================== -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd import xarray as xr + import matplotlib.pyplot as plt np.random.seed(123456) @@ -26,7 +27,7 @@ Scalar and 1-dimensional interpolation Interpolating a :py:class:`~xarray.DataArray` works mostly like labeled indexing of a :py:class:`~xarray.DataArray`, -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.sin(0.3 * np.arange(12).reshape(4, 3)), @@ -42,7 +43,7 @@ indexing of a :py:class:`~xarray.DataArray`, Similar to the indexing, :py:meth:`~xarray.DataArray.interp` also accepts an array-like, which gives the interpolated result as an array. -.. ipython:: python +.. jupyter-execute:: # label lookup da.sel(time=[2, 3]) @@ -52,7 +53,7 @@ array-like, which gives the interpolated result as an array. To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. -.. ipython:: python +.. jupyter-execute:: da_dt64 = xr.DataArray( [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] @@ -62,7 +63,7 @@ To interpolate data with a :py:doc:`numpy.datetime64 `_. +zarr `_. These options can be passed to the ``to_zarr`` method as variable encoding. For example: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: ! rm -rf foo.zarr -.. ipython:: python - :okwarning: +.. jupyter-execute:: import zarr - from numcodecs.blosc import Blosc + from zarr.codecs import BloscCodec - compressor = Blosc(cname="zstd", clevel=3, shuffle=2) - ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) + compressor = BloscCodec(cname="zstd", clevel=3, shuffle="shuffle") + ds.to_zarr("foo.zarr", encoding={"foo": {"compressors": [compressor]}}) .. note:: @@ -871,13 +869,13 @@ To resize and then append values along an existing dimension in a store, set ``append_dim``. This is a good option if data always arrives in a particular order, e.g., for time-stepping a simulation: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: ! rm -rf path/to/directory.zarr -.. ipython:: python - :okwarning: +.. jupyter-execute:: + ds1 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, @@ -932,7 +930,7 @@ For example, let's say we're working with a dataset with dimensions ``('time', 'x', 'y')``, a variable ``Tair`` which is chunked in ``x`` and ``y``, and two multi-dimensional coordinates ``xc`` and ``yc``: -.. ipython:: python +.. jupyter-execute:: ds = xr.tutorial.open_dataset("rasm") @@ -944,8 +942,8 @@ These multi-dimensional coordinates are only two-dimensional and take up very li space on disk or in memory, yet when writing to disk the default zarr behavior is to split them into chunks: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + ds.to_zarr("path/to/directory.zarr", mode="w") ! ls -R path/to/directory.zarr @@ -953,14 +951,14 @@ split them into chunks: This may cause unwanted overhead on some systems, such as when reading from a cloud storage provider. To disable this chunking, we can specify a chunk size equal to the -length of each dimension by using the shorthand chunk size ``-1``: +length of each dimension by using the shorthand chunk size ``None``: + +.. jupyter-execute:: -.. ipython:: python - :okwarning: ds.to_zarr( "path/to/directory.zarr", - encoding={"xc": {"chunks": (-1, -1)}, "yc": {"chunks": (-1, -1)}}, + encoding={"xc": {"chunks": None}, "yc": {"chunks": None}}, mode="w", ) ! ls -R path/to/directory.zarr @@ -1068,7 +1066,8 @@ You can view the whole dataset with from this combined reference using the above The following example shows opening a combined references generated from a ``.hdf`` file stored locally. -.. ipython:: python +.. jupyter-execute:: + :raises: storage_options = { "target_protocol": "file", @@ -1104,7 +1103,7 @@ DataArray ``to_iris`` and ``from_iris`` If iris is installed, xarray can convert a ``DataArray`` into a ``Cube`` using :py:meth:`DataArray.to_iris`: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4, 5), @@ -1118,7 +1117,7 @@ If iris is installed, xarray can convert a ``DataArray`` into a ``Cube`` using Conversely, we can create a new ``DataArray`` object from a ``Cube`` using :py:meth:`DataArray.from_iris`: -.. ipython:: python +.. jupyter-execute:: da_cube = xr.DataArray.from_iris(cube) da_cube @@ -1132,16 +1131,14 @@ using actual disk files. For example: -.. ipython:: python - :okwarning: +.. jupyter-execute:: ds = xr.tutorial.open_dataset("air_temperature_gradient") cubes = ncdata.iris_xarray.cubes_from_xarray(ds) print(cubes) print(cubes[1]) -.. ipython:: python - :okwarning: +.. jupyter-execute:: ds = ncdata.iris_xarray.cubes_to_xarray(cubes) print(ds) @@ -1168,17 +1165,6 @@ For example, we can open a connection to GBs of weather data produced by the __ https://www.prism.oregonstate.edu/ __ https://iri.columbia.edu/ -.. ipython source code for this section - we don't use this to avoid hitting the DAP server on every doc build. - - remote_data = xr.open_dataset( - 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - decode_times=False) - tmax = remote_data.tmax[:500, ::3, ::3] - tmax - - @savefig opendap-prism-tmax.png - tmax[0].plot() .. ipython:: :verbatim: @@ -1292,7 +1278,7 @@ Pickle The simplest way to serialize an xarray object is to use Python's built-in pickle module: -.. ipython:: python +.. jupyter-execute:: import pickle @@ -1327,7 +1313,7 @@ Dictionary We can convert a ``Dataset`` (or a ``DataArray``) to a dict using :py:meth:`Dataset.to_dict`: -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset({"foo": ("x", np.arange(30))}) ds @@ -1338,7 +1324,7 @@ We can convert a ``Dataset`` (or a ``DataArray``) to a dict using We can create a new xarray object from a dict using :py:meth:`Dataset.from_dict`: -.. ipython:: python +.. jupyter-execute:: ds_dict = xr.Dataset.from_dict(d) ds_dict @@ -1351,12 +1337,12 @@ be quite large. To export just the dataset schema without the data itself, use the ``data=False`` option: -.. ipython:: python +.. jupyter-execute:: ds.to_dict(data=False) -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # We're now done with the dataset named `ds`. Although the `with` statement closed # the dataset, displaying the unpickled pickle of `ds` re-opened "saved_on_disk.nc". @@ -1424,8 +1410,8 @@ GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened usin .. _io.cfgrib: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import shutil diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst index 9e070ae6e57..ac6105fcdf4 100644 --- a/doc/user-guide/pandas.rst +++ b/doc/user-guide/pandas.rst @@ -14,8 +14,8 @@ aware libraries such as `Seaborn`__. __ https://pandas.pydata.org/pandas-docs/stable/visualization.html __ https://seaborn.pydata.org/ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -46,7 +46,7 @@ Dataset and DataFrame To convert any dataset to a ``DataFrame`` in tidy form, use the :py:meth:`Dataset.to_dataframe()` method: -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset( {"foo": (("x", "y"), np.random.randn(2, 3))}, @@ -74,7 +74,7 @@ To create a ``Dataset`` from a ``DataFrame``, use the :py:meth:`Dataset.from_dataframe` class method or the equivalent :py:meth:`pandas.DataFrame.to_xarray` method: -.. ipython:: python +.. jupyter-execute:: xr.Dataset.from_dataframe(df) @@ -95,7 +95,7 @@ DataArray and Series of ``Series``. The methods are very similar to those for working with DataFrames: -.. ipython:: python +.. jupyter-execute:: s = ds["foo"].to_series() s @@ -105,7 +105,7 @@ DataFrames: Both the ``from_series`` and ``from_dataframe`` methods use reindexing, so they work even if the hierarchical index is not a full tensor product: -.. ipython:: python +.. jupyter-execute:: s[::2] s[::2].to_xarray() @@ -141,7 +141,7 @@ DataArray directly into a pandas object with the same dimensionality, if available in pandas (i.e., a 1D array is converted to a :py:class:`~pandas.Series` and 2D to :py:class:`~pandas.DataFrame`): -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray( np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] @@ -153,7 +153,7 @@ To perform the inverse operation of converting any pandas objects into a data array with the same shape, simply use the :py:class:`DataArray` constructor: -.. ipython:: python +.. jupyter-execute:: xr.DataArray(df) @@ -161,7 +161,7 @@ Both the ``DataArray`` and ``Dataset`` constructors directly convert pandas objects into xarray objects with the same shape. This means that they preserve all use of multi-indexes: -.. ipython:: python +.. jupyter-execute:: index = pd.MultiIndex.from_arrays( [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] @@ -200,9 +200,9 @@ So you can represent a Panel, in two ways: Let's take a look: -.. ipython:: python +.. jupyter-execute:: - data = np.random.default_rng(0).rand(2, 3, 4) + data = np.random.default_rng(0).random((2, 3, 4)) items = list("ab") major_axis = list("mno") minor_axis = pd.date_range(start="2000", periods=4, name="date") @@ -222,7 +222,7 @@ With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: To put this data in a ``DataArray``, write: -.. ipython:: python +.. jupyter-execute:: array = xr.DataArray(data, [items, major_axis, minor_axis]) array @@ -233,7 +233,7 @@ respectively, while the third retains its name ``date``. You can also easily convert this data into ``Dataset``: -.. ipython:: python +.. jupyter-execute:: array.to_dataset(dim="dim_0") diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index 42cbd1eb5b0..f317dcd94cf 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -51,8 +51,8 @@ For more extensive plotting applications consider the following projects: Imports ~~~~~~~ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl @@ -61,7 +61,7 @@ Imports The following imports are necessary for all of the examples. -.. ipython:: python +.. jupyter-execute:: import numpy as np import pandas as pd @@ -70,7 +70,7 @@ The following imports are necessary for all of the examples. For these examples we'll use the North American air temperature dataset. -.. ipython:: python +.. jupyter-execute:: airtemps = xr.tutorial.open_dataset("air_temperature") airtemps @@ -98,12 +98,10 @@ One Dimension The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - air1d = air.isel(lat=10, lon=10) - @savefig plotting_1d_simple.png width=4in + air1d = air.isel(lat=10, lon=10) air1d.plot() Xarray uses the coordinate name along with metadata ``attrs.long_name``, @@ -114,7 +112,7 @@ The names ``long_name``, ``standard_name`` and ``units`` are copied from the When choosing names, the order of precedence is ``long_name``, ``standard_name`` and finally ``DataArray.name``. The y-axis label in the above plot was constructed from the ``long_name`` and ``units`` attributes of ``air1d``. -.. ipython:: python +.. jupyter-execute:: air1d.attrs @@ -131,10 +129,8 @@ can be used: .. _matplotlib.pyplot.plot: https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_1d_additional_args.png width=4in air1d[:200].plot.line("b-^") .. note:: @@ -144,10 +140,8 @@ can be used: Keyword arguments work the same way, and are more explicit. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_sin3.png width=4in air1d[:200].plot.line(color="purple", marker="o") ========================= @@ -159,8 +153,7 @@ To add the plot to an existing axis pass in the axis as a keyword argument In this example ``axs`` is an array consisting of the left and right axes created by ``plt.subplots``. -.. ipython:: python - :okwarning: +.. jupyter-execute:: fig, axs = plt.subplots(ncols=2) @@ -170,8 +163,6 @@ axes created by ``plt.subplots``. air1d.plot.hist(ax=axs[1]) plt.tight_layout() - - @savefig plotting_example_existing_axes.png width=6in plt.draw() On the right is a histogram created by :py:func:`xarray.plot.hist`. @@ -187,15 +178,13 @@ control the figure size. For convenience, xarray's plotting methods also support the ``aspect`` and ``size`` arguments which control the size of the resulting image via the formula ``figsize = (aspect * size, size)``: -.. ipython:: python - :okwarning: +.. jupyter-execute:: air1d.plot(aspect=2, size=3) - @savefig plotting_example_size_and_aspect.png plt.tight_layout() -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # create a dummy figure so sphinx plots everything below normally plt.figure() @@ -229,8 +218,7 @@ However, you can also use non-dimension coordinates, MultiIndex levels, and dime without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch) from the time and assign it as a non-dimension coordinate: -.. ipython:: python - :okwarning: +.. jupyter-execute:: decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d") air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day.data)) @@ -238,24 +226,21 @@ from the time and assign it as a non-dimension coordinate: To use ``'decimal_day'`` as x coordinate it must be explicitly specified: -.. ipython:: python - :okwarning: +.. jupyter-execute:: air1d_multi.plot(x="decimal_day") Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``, it is also possible to use a MultiIndex level as x-axis: -.. ipython:: python - :okwarning: +.. jupyter-execute:: air1d_multi = air1d_multi.set_index(date=("time", "decimal_day")) air1d_multi.plot(x="decimal_day") Finally, if a dataset does not have any coordinates it enumerates all data points: -.. ipython:: python - :okwarning: +.. jupyter-execute:: air1d_multi = air1d_multi.drop_vars(["date", "time", "decimal_day"]) air1d_multi.plot() @@ -270,10 +255,8 @@ It is possible to make line plots of two-dimensional data by calling :py:func:`x with appropriate arguments. Consider the 3D variable ``air`` defined above. We can use line plots to check the variation of air temperature at three different latitudes along a longitude line: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_multiple_lines_x_kwarg.png air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -292,10 +275,8 @@ If required, the automatic legend can be turned off using ``add_legend=False``. It is also possible to make line plots such that the data are on the x-axis and a dimension is on the y-axis. This can be done by specifying the appropriate ``y`` keyword argument. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_xy_kwarg.png air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ @@ -305,18 +286,15 @@ It is also possible to make line plots such that the data are on the x-axis and As an alternative, also a step plot similar to matplotlib's ``plt.step`` can be made using 1D data. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_step.png width=4in air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy when plotting data grouped with :py:meth:`Dataset.groupby_bins`. -.. ipython:: python - :okwarning: +.. jupyter-execute:: air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() @@ -325,7 +303,6 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. (air_mean + air_std).plot.step(ls=":") (air_mean - air_std).plot.step(ls=":") plt.ylim(-20, 30) - @savefig plotting_example_step_groupby.png width=4in plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument @@ -338,10 +315,8 @@ Other axes kwargs The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes direction. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_xincrease_yincrease_kwarg.png air.isel(time=10, lon=[10, 11]).plot.line( y="lat", hue="lon", xincrease=False, yincrease=False ) @@ -362,21 +337,16 @@ Two Dimensions The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. -.. ipython:: python - :okwarning: +.. jupyter-execute:: air2d = air.isel(time=500) - - @savefig 2d_simple.png width=4in air2d.plot() All 2d plots in xarray allow the use of the keyword arguments ``yincrease`` and ``xincrease``. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig 2d_simple_yincrease.png width=4in air2d.plot(yincrease=False) .. note:: @@ -393,14 +363,10 @@ and ``xincrease``. Xarray plots data with :ref:`missing_values`. -.. ipython:: python - :okwarning: +.. jupyter-execute:: bad_air2d = air2d.copy() - bad_air2d[dict(lat=slice(0, 10), lon=slice(0, 25))] = np.nan - - @savefig plotting_missing_values.png width=4in bad_air2d.plot() ======================== @@ -411,14 +377,12 @@ It's not necessary for the coordinates to be evenly spaced. Both :py:func:`xarray.plot.pcolormesh` (default) and :py:func:`xarray.plot.contourf` can produce plots with nonuniform coordinates. -.. ipython:: python - :okwarning: +.. jupyter-execute:: b = air2d.copy() # Apply a nonlinear transformation to one of the coords b.coords["lat"] = np.log(b.coords["lat"]) - @savefig plotting_nonuniform_coords.png width=4in b.plot() ==================== @@ -429,26 +393,20 @@ There are several other options for plotting 2D data. Contour plot using :py:meth:`DataArray.plot.contour()` -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_contour.png width=4in air2d.plot.contour() Filled contour plot using :py:meth:`DataArray.plot.contourf()` -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_contourf.png width=4in air2d.plot.contourf() Surface plot using :py:meth:`DataArray.plot.surface()` -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_surface.png width=4in # transpose just to make the example look a bit nicer air2d.T.plot.surface() @@ -459,16 +417,13 @@ Surface plot using :py:meth:`DataArray.plot.surface()` Since this is a thin wrapper around matplotlib, all the functionality of matplotlib is available. -.. ipython:: python - :okwarning: +.. jupyter-execute:: air2d.plot(cmap=plt.cm.Blues) plt.title("These colors prove North America\nhas fallen in the ocean") plt.ylabel("latitude") plt.xlabel("longitude") plt.tight_layout() - - @savefig plotting_2d_call_matplotlib.png width=4in plt.draw() .. note:: @@ -479,13 +434,10 @@ matplotlib is available. In the example below, ``plt.xlabel`` effectively does nothing, since ``d_ylog.plot()`` updates the xlabel. - .. ipython:: python - :okwarning: + .. jupyter-execute:: plt.xlabel("Never gonna see this.") air2d.plot() - - @savefig plotting_2d_call_matplotlib2.png width=4in plt.draw() =========== @@ -495,10 +447,8 @@ matplotlib is available. Xarray borrows logic from Seaborn to infer what kind of color map to use. For example, consider the original data in Kelvins rather than Celsius: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_kelvin.png width=4in airtemps.air.isel(time=0).plot() The Celsius data contain 0, so a diverging color map was used. The @@ -514,14 +464,12 @@ Outliers often have an extreme effect on the output of the plot. Here we add two bad data points. This affects the color scale, washing out the plot. -.. ipython:: python - :okwarning: +.. jupyter-execute:: air_outliers = airtemps.air.isel(time=0).copy() air_outliers[0, 0] = 100 air_outliers[-1, -1] = 400 - @savefig plotting_robust1.png width=4in air_outliers.plot() This plot shows that we have outliers. The easy way to visualize @@ -530,10 +478,8 @@ the data without the outliers is to pass the parameter This will use the 2nd and 98th percentiles of the data to compute the color limits. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_robust2.png width=4in air_outliers.plot(robust=True) Observe that the ranges of the color bar have changed. The arrows on the @@ -549,28 +495,23 @@ rather than the default continuous colormaps that matplotlib uses. The ``levels`` keyword argument can be used to generate plots with discrete colormaps. For example, to make a plot with 8 discrete color intervals: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_discrete_levels.png width=4in air2d.plot(levels=8) It is also possible to use a list of levels to specify the boundaries of the discrete colormap: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_listed_levels.png width=4in air2d.plot(levels=[0, 12, 18, 30]) You can also specify a list of discrete colors through the ``colors`` argument: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] - @savefig plotting_custom_colors_levels.png width=4in air2d.plot(levels=[0, 12, 18, 30], colors=flatui) Finally, if you have `Seaborn `_ @@ -579,10 +520,8 @@ argument. Note that ``levels`` *must* be specified with seaborn color palettes if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_seaborn_palette.png width=4in air2d.plot(levels=10, cmap="husl") plt.draw() @@ -614,7 +553,7 @@ size of this dimension from 2920 -> 12. A simpler way is to just take a slice on that dimension. So let's use a slice to pick 6 times throughout the first year. -.. ipython:: python +.. jupyter-execute:: t = air.isel(time=slice(0, 365 * 4, 250)) t.coords @@ -627,18 +566,14 @@ The easiest way to create faceted plots is to pass in ``row`` or ``col`` arguments to the xarray plotting methods/functions. This returns a :py:class:`xarray.plot.FacetGrid` object. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plot_facet_dataarray.png g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plot_facet_dataarray_line.png g_simple_line = t.isel(lat=slice(0, None, 4)).plot( x="lon", hue="lat", col="time", col_wrap=3 ) @@ -652,15 +587,13 @@ Here we create a 4 dimensional array by taking the original data and adding a fixed amount. Now we can see how the temperature maps would compare if one were much hotter. -.. ipython:: python - :okwarning: +.. jupyter-execute:: t2 = t.isel(time=slice(0, 2)) t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords - @savefig plot_facet_4d.png t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ @@ -669,19 +602,18 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: plt.close("all") -.. ipython:: python - :okwarning: +.. jupyter-execute:: + hasoutliers = t.isel(time=slice(0, 5)).copy() hasoutliers[0, 0, 0] = -100 hasoutliers[-1, -1, -1] = 400 - @savefig plot_facet_robust.png g = hasoutliers.plot.pcolormesh( x="lon", y="lat", @@ -704,7 +636,7 @@ It borrows an API and code from `Seaborn's FacetGrid The structure is contained within the ``axs`` and ``name_dicts`` attributes, both 2d NumPy object arrays. -.. ipython:: python +.. jupyter-execute:: g.axs @@ -714,15 +646,15 @@ It's possible to select the :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` corresponding to the FacetGrid through the ``name_dicts``. -.. ipython:: python +.. jupyter-execute:: g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. -.. ipython:: python - :okwarning: +.. jupyter-execute:: + g = t.plot.imshow(x="lon", y="lat", col="time", col_wrap=3, robust=True) @@ -732,7 +664,6 @@ they have been plotted. bottomright = g.axs[-1, -1] bottomright.annotate("bottom right", (240, 40)) - @savefig plot_facet_iterator.png plt.draw() @@ -754,7 +685,7 @@ Datasets Xarray has limited support for plotting Dataset variables against each other. Consider this dataset -.. ipython:: python +.. jupyter-execute:: ds = xr.tutorial.scatter_example_dataset(seed=42) ds @@ -765,83 +696,64 @@ Scatter Let's plot the ``A`` DataArray as a function of the ``y`` coord -.. ipython:: python - :okwarning: +.. jupyter-execute:: + ds.A - @savefig da_A_y.png ds.A.plot.scatter(x="y") Same plot can be displayed using the dataset: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_A_y.png ds.plot.scatter(x="y", y="A") Now suppose we want to scatter the ``A`` DataArray against the ``B`` DataArray -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_simple_scatter.png ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_hue_scatter.png ds.plot.scatter(x="A", y="B", hue="w") You can force a legend instead of a colorbar by setting ``add_legend=True, add_colorbar=False``. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_discrete_legend_hue_scatter.png ds.plot.scatter(x="A", y="B", hue="w", add_legend=True, add_colorbar=False) -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_discrete_colorbar_hue_scatter.png ds.plot.scatter(x="A", y="B", hue="w", add_legend=False, add_colorbar=True) The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_hue_size_scatter.png ds.plot.scatter(x="A", y="B", hue="y", markersize="z") The ``z`` kwarg lets you plot the data along the z-axis as well. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_hue_size_scatter_z.png ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x") Faceting is also possible -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_facet_scatter.png ds.plot.scatter(x="A", y="B", hue="y", markersize="x", row="x", col="w") And adding the z-axis -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_facet_scatter_z.png ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x", row="x", col="w") For more advanced scatter plots, we recommend converting the relevant data variables @@ -852,19 +764,15 @@ Quiver Visualizing vector fields is supported with quiver plots: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_simple_quiver.png ds.isel(w=1, z=1).plot.quiver(x="x", y="y", u="A", v="B") where ``u`` and ``v`` denote the x and y direction components of the arrow vectors. Again, faceting is also possible: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_facet_quiver.png ds.plot.quiver(x="x", y="y", u="A", v="B", col="w", row="z", scale=4) ``scale`` is required for faceted quiver plots. @@ -875,20 +783,16 @@ Streamplot Visualizing vector fields is also supported with streamline plots: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_simple_streamplot.png ds.isel(w=1, z=1).plot.streamplot(x="x", y="y", u="A", v="B") where ``u`` and ``v`` denote the x and y direction components of the vectors tangent to the streamlines. Again, faceting is also possible: -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig ds_facet_streamplot.png ds.plot.streamplot(x="x", y="y", u="A", v="B", col="w", row="z") .. _plot-maps: @@ -900,8 +804,8 @@ To follow this section you'll need to have Cartopy installed and working. This script will plot the air temperature on a map. -.. ipython:: python - :okwarning: +.. jupyter-execute:: + import cartopy.crs as ccrs @@ -913,15 +817,14 @@ This script will plot the air temperature on a map. ) p.axes.set_global() - @savefig plotting_maps_cartopy.png width=100% p.axes.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + p = air.isel(time=[0, 4]).plot( transform=ccrs.PlateCarree(), @@ -931,7 +834,7 @@ by faceting are accessible in the object returned by ``plot``: for ax in p.axs.flat: ax.coastlines() ax.gridlines() - @savefig plotting_maps_cartopy_facetting.png width=100% + plt.draw() @@ -952,8 +855,8 @@ There are three ways to use the xarray plotting functionality: These are provided for user convenience; they all call the same code. -.. ipython:: python - :okwarning: +.. jupyter-execute:: + import xarray.plot as xplt @@ -964,7 +867,7 @@ These are provided for user convenience; they all call the same code. xplt.plot(da, ax=axs[1, 0]) xplt.line(da, ax=axs[1, 1]) plt.tight_layout() - @savefig plotting_ways_to_use.png width=6in + plt.draw() Here the output is the same. Since the data is 1 dimensional the line plot @@ -989,7 +892,7 @@ Coordinates If you'd like to find out what's really going on in the coordinate system, read on. -.. ipython:: python +.. jupyter-execute:: a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 @@ -1002,10 +905,8 @@ Before reading on, you may want to look at the coordinates and think carefully about what the limits, labels, and orientation for each of the axes should be. -.. ipython:: python - :okwarning: +.. jupyter-execute:: - @savefig plotting_example_2d_simple.png width=4in a.plot() It may seem strange that @@ -1023,8 +924,8 @@ You can plot irregular grids defined by multidimensional coordinates with xarray, but you'll have to tell the plot function to use these coordinates instead of the default ones: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) lon += lat / 10 @@ -1035,7 +936,6 @@ instead of the default ones: coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, ) - @savefig plotting_example_2d_irreg.png width=4in da.plot.pcolormesh(x="lon", y="lat") Note that in this case, xarray still follows the pixel centered convention. @@ -1043,8 +943,7 @@ This might be undesirable in some cases, for example when your data is defined on a polar projection (:issue:`781`). This is why the default is to not follow this convention when plotting on a map: -.. ipython:: python - :okwarning: +.. jupyter-execute:: import cartopy.crs as ccrs @@ -1052,20 +951,17 @@ this convention when plotting on a map: da.plot.pcolormesh(x="lon", y="lat", ax=ax) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() - @savefig plotting_example_2d_irreg_map.png width=4in ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: -.. ipython:: python - :okwarning: +.. jupyter-execute:: ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh(x="lon", y="lat", ax=ax, infer_intervals=True) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() - @savefig plotting_example_2d_irreg_map_infer.png width=4in ax.gridlines(draw_labels=True) .. note:: @@ -1077,10 +973,8 @@ You can however decide to infer the cell boundaries and use the One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name. -.. ipython:: python - :okwarning: +.. jupyter-execute:: f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) - @savefig plotting_example_2d_hue_xy.png da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index aa96190f820..b607d7f0040 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -11,8 +11,8 @@ These methods are particularly useful for reshaping xarray objects for use in ma Importing the library --------------------- -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -27,7 +27,7 @@ To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An ellipsis (`...`) can be used to represent all other dimensions: -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) ds.transpose("y", "z", "x") @@ -41,7 +41,7 @@ To expand a :py:class:`~xarray.DataArray` or all variables on a :py:class:`~xarray.Dataset` along a new dimension, use :py:meth:`~xarray.DataArray.expand_dims` -.. ipython:: python +.. jupyter-execute:: expanded = ds.expand_dims("w") expanded @@ -52,7 +52,7 @@ To remove such a size-1 dimension from the :py:class:`~xarray.DataArray` or :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.squeeze` -.. ipython:: python +.. jupyter-execute:: expanded.squeeze("w") @@ -61,7 +61,7 @@ Converting between datasets and arrays To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_dataarray`: -.. ipython:: python +.. jupyter-execute:: arr = ds.to_dataarray() arr @@ -73,14 +73,14 @@ coordinates. To convert back from a DataArray to a Dataset, use :py:meth:`~xarray.DataArray.to_dataset`: -.. ipython:: python +.. jupyter-execute:: arr.to_dataset(dim="variable") The broadcasting behavior of ``to_dataarray`` means that the resulting array includes the union of data variable dimensions: -.. ipython:: python +.. jupyter-execute:: ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) @@ -94,7 +94,7 @@ Otherwise, the result could not be represented as an orthogonal array. If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray will be converted into a Dataset of one variable: -.. ipython:: python +.. jupyter-execute:: arr.to_dataset(name="combined") @@ -107,7 +107,7 @@ As part of xarray's nascent support for :py:class:`pandas.MultiIndex`, we have implemented :py:meth:`~xarray.DataArray.stack` and :py:meth:`~xarray.DataArray.unstack` method, for combining or splitting dimensions: -.. ipython:: python +.. jupyter-execute:: array = xr.DataArray( np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] @@ -118,7 +118,7 @@ implemented :py:meth:`~xarray.DataArray.stack` and As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: -.. ipython:: python +.. jupyter-execute:: stacked = array.stack(z=[..., "x"]) stacked @@ -131,7 +131,7 @@ Like :py:meth:`DataFrame.unstack`, xarray's ``unstack` always succeeds, even if the multi-index being unstacked does not contain all possible levels. Missing levels are filled in with ``NaN`` in the resulting object: -.. ipython:: python +.. jupyter-execute:: stacked2 = stacked[::2] stacked2 @@ -140,7 +140,7 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: -.. ipython:: python +.. jupyter-execute:: array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) array.stack(z=("x", "y")) @@ -171,7 +171,7 @@ Just as with :py:meth:`xarray.Dataset.stack` the stacked coordinate is represented by a :py:class:`pandas.MultiIndex` object. These methods are used like this: -.. ipython:: python +.. jupyter-execute:: data = xr.Dataset( data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, @@ -206,7 +206,7 @@ multi-indexes without modifying the data and its dimensions. You can create a multi-index from several 1-dimensional variables and/or coordinates using :py:meth:`~xarray.DataArray.set_index`: -.. ipython:: python +.. jupyter-execute:: da = xr.DataArray( np.random.rand(4), @@ -222,7 +222,7 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: These coordinates can now be used for indexing, e.g., -.. ipython:: python +.. jupyter-execute:: mda.sel(band="a") @@ -230,14 +230,14 @@ Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful for serialization): -.. ipython:: python +.. jupyter-execute:: mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: -.. ipython:: python +.. jupyter-execute:: mda.reorder_levels(x=["wavenumber", "band"]) @@ -245,7 +245,7 @@ As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove labels for one or several dimensions: -.. ipython:: python +.. jupyter-execute:: array = xr.DataArray([1, 2, 3], dims="x") array @@ -262,7 +262,7 @@ Shift and roll To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` and :py:meth:`~xarray.Dataset.roll` methods: -.. ipython:: python +.. jupyter-execute:: array = xr.DataArray([1, 2, 3, 4], dims="x") array.shift(x=2) @@ -277,7 +277,7 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and :py:meth:`~xarray.Dataset.sortby`. The input can be an individual or list of 1D ``DataArray`` objects: -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset( { @@ -292,7 +292,7 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and As a shortcut, you can refer to existing coordinates by name: -.. ipython:: python +.. jupyter-execute:: ds.sortby("x") ds.sortby(["y", "x"]) @@ -309,24 +309,23 @@ it can also be used to reorganise your data without applying a computation via : Taking our example tutorial air temperature dataset over the Northern US -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl mpl.rcdefaults() -.. ipython:: python +.. jupyter-execute:: air = xr.tutorial.open_dataset("air_temperature")["air"] - @savefig pre_coarsening.png air.isel(time=0).plot(x="lon", y="lat") we can split this up into sub-regions of size ``(9, 18)`` points using :py:meth:`~xarray.computation.rolling.DataArrayCoarsen.construct`: -.. ipython:: python +.. jupyter-execute:: regions = air.coarsen(lat=9, lon=18, boundary="pad").construct( lon=("x_coarse", "x_fine"), lat=("y_coarse", "y_fine") @@ -338,9 +337,8 @@ The ``boundary="pad"`` kwarg ensured that all regions are the same size even tho By plotting these 9 regions together via :ref:`faceting` we can see how they relate to the original data. -.. ipython:: python +.. jupyter-execute:: - @savefig post_coarsening.png regions.isel(time=0).plot( x="x_fine", y="y_fine", col="x_coarse", row="y_coarse", yincrease=False ) diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index c581fcb374d..f54fbecb826 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -131,8 +131,8 @@ complete examples, please consult the relevant documentation.* __ https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html - .. ipython:: python - :suppress: + .. jupyter-execute:: + :hide-code: import numpy as np import xarray as xr @@ -141,7 +141,7 @@ complete examples, please consult the relevant documentation.* Aligning refers to the process of ensuring that two or more DataArrays or Datasets have the same dimensions and coordinates, so that they can be combined or compared properly. - .. ipython:: python + .. jupyter-execute:: x = xr.DataArray( [[25, 35], [10, 24]], @@ -161,7 +161,7 @@ complete examples, please consult the relevant documentation.* When performing operations on arrays with different shapes and dimensions, xarray will automatically attempt to broadcast the arrays to a common shape before the operation is applied. - .. ipython:: python + .. jupyter-execute:: # 'a' has shape (3,) and 'b' has shape (4,) a = xr.DataArray(np.array([1, 2, 3]), dims=["x"]) @@ -175,7 +175,7 @@ complete examples, please consult the relevant documentation.* the same dimensions. When merging, xarray aligns the variables and coordinates of the different datasets along the specified dimensions and creates a new ``Dataset`` containing all the variables and coordinates. - .. ipython:: python + .. jupyter-execute:: # create two 1D arrays with names arr1 = xr.DataArray( @@ -194,7 +194,7 @@ complete examples, please consult the relevant documentation.* xarray arranges the datasets or dataarrays along a new dimension, and the resulting ``Dataset`` or ``Dataarray`` will have the same variables and coordinates along the other dimensions. - .. ipython:: python + .. jupyter-execute:: a = xr.DataArray([[1, 2], [3, 4]], dims=("x", "y")) b = xr.DataArray([[5, 6], [7, 8]], dims=("x", "y")) @@ -205,7 +205,7 @@ complete examples, please consult the relevant documentation.* Combining is the process of arranging two or more DataArrays or Datasets into a single ``DataArray`` or ``Dataset`` using some combination of merging and concatenation operations. - .. ipython:: python + .. jupyter-execute:: ds1 = xr.Dataset( {"data": xr.DataArray([[1, 2], [3, 4]], dims=("x", "y"))}, diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 434c0790139..ff9ebe922e4 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -3,8 +3,8 @@ Testing your code ================= -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -55,7 +55,7 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`: -.. ipython:: python +.. jupyter-execute:: import hypothesis.extra.numpy as npst @@ -65,7 +65,7 @@ Generating Examples To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, which is a general hypothesis method valid for all strategies. -.. ipython:: python +.. jupyter-execute:: import xarray.testing.strategies as xrst @@ -79,11 +79,11 @@ range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the :py:func:`hypothesis.given` decorator: -.. ipython:: python +.. jupyter-execute:: from hypothesis import given -.. ipython:: python +.. jupyter-execute:: @given(xrst.variables()) def test_function_that_acts_on_variables(var): @@ -96,7 +96,7 @@ Chaining Strategies Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated examples. -.. ipython:: python +.. jupyter-execute:: # generate a Variable containing an array with a complex number dtype, but all other details still arbitrary from hypothesis.extra.numpy import complex_number_dtypes @@ -112,7 +112,7 @@ Fixing Arguments If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples over all other aspects, then use :py:func:`hypothesis.strategies.just()`. -.. ipython:: python +.. jupyter-execute:: import hypothesis.strategies as st @@ -125,14 +125,14 @@ special strategy that just contains a single example.) To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. -.. ipython:: python +.. jupyter-execute:: # Generates only variables with dimensions ["x", "y"], of lengths 2 & 3 respectively xrst.variables(dims=st.just({"x": 2, "y": 3})).example() You can also use this to specify that you want examples which are missing some part of the data structure, for instance -.. ipython:: python +.. jupyter-execute:: # Generates a Variable with no attributes xrst.variables(attrs=st.just({})).example() @@ -140,7 +140,7 @@ You can also use this to specify that you want examples which are missing some p Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the objects your chained strategy will generate. -.. ipython:: python +.. jupyter-execute:: fixed_x_variable_y_maybe_z = st.fixed_dictionaries( {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} @@ -171,16 +171,16 @@ Imagine we want to write a strategy which generates arbitrary ``Variable`` objec 1. Create a xarray object with numpy data and use the hypothesis' ``.map()`` method to convert the underlying array to a different type: -.. ipython:: python +.. jupyter-execute:: import sparse -.. ipython:: python +.. jupyter-execute:: def convert_to_sparse(var): return var.copy(data=sparse.COO.from_numpy(var.to_numpy())) -.. ipython:: python +.. jupyter-execute:: sparse_variables = xrst.variables(dims=xrst.dimension_names(min_dims=1)).map( convert_to_sparse @@ -191,7 +191,7 @@ different type: 2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: -.. ipython:: python +.. jupyter-execute:: def sparse_random_arrays(shape: tuple[int, ...]) -> sparse._coo.core.COO: """Strategy which generates random sparse.COO arrays""" @@ -210,7 +210,7 @@ different type: return sparse_random_arrays(shape=shape) -.. ipython:: python +.. jupyter-execute:: sparse_random_variables = xrst.variables( array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64")) @@ -238,7 +238,7 @@ If the array type you want to generate has an array API-compliant top-level name (e.g. that which is conventionally imported as ``xp`` or similar), you can use this neat trick: -.. ipython:: python +.. jupyter-execute:: import numpy as xp # compatible in numpy 2.0 @@ -265,7 +265,7 @@ is useful. It works for lists of dimension names -.. ipython:: python +.. jupyter-execute:: dims = ["x", "y", "z"] xrst.unique_subset_of(dims).example() @@ -273,7 +273,7 @@ It works for lists of dimension names as well as for mappings of dimension names to sizes -.. ipython:: python +.. jupyter-execute:: dim_sizes = {"x": 2, "y": 3, "z": 4} xrst.unique_subset_of(dim_sizes).example() diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index cb3e94e3645..cbaba5427cc 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -11,8 +11,8 @@ Accordingly, we've copied many of features that make working with time-series data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -30,7 +30,7 @@ data, which offer vectorized operations with numpy and smooth integration with p To convert to or create regular arrays of :py:class:`numpy.datetime64` data, we recommend using :py:func:`pandas.to_datetime`, :py:class:`pandas.DatetimeIndex`, or :py:func:`xarray.date_range`: -.. ipython:: python +.. jupyter-execute:: pd.to_datetime(["2000-01-01", "2000-02-02"]) pd.DatetimeIndex( @@ -51,7 +51,7 @@ using :py:func:`pandas.to_datetime`, :py:class:`pandas.DatetimeIndex`, or :py:fu Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects (with us-resolution): -.. ipython:: python +.. jupyter-execute:: import datetime @@ -78,7 +78,7 @@ attribute like ``'days since 2000-01-01'``). You can manual decode arrays in this form by passing a dataset to :py:func:`decode_cf`: -.. ipython:: python +.. jupyter-execute:: attrs = {"units": "hours since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) @@ -101,7 +101,7 @@ This allows for several useful and succinct forms of indexing, particularly for ``datetime64`` data. For example, we support indexing with strings for single items and with the ``slice`` object: -.. ipython:: python +.. jupyter-execute:: time = pd.date_range("2000-01-01", freq="h", periods=365 * 24) ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) @@ -111,7 +111,7 @@ items and with the ``slice`` object: You can also select a particular time by indexing with a :py:class:`datetime.time` object: -.. ipython:: python +.. jupyter-execute:: ds.sel(time=datetime.time(12)) @@ -127,7 +127,7 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. _pandas accessors: https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dt-accessors -.. ipython:: python +.. jupyter-execute:: time = pd.date_range("2000-01-01", freq="6h", periods=365 * 4) ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) @@ -144,7 +144,7 @@ and "quarter": __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components -.. ipython:: python +.. jupyter-execute:: ds["time.month"] ds["time.dayofyear"] @@ -152,7 +152,7 @@ __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: -.. ipython:: python +.. jupyter-execute:: ds["time.season"] ds["time"].dt.season @@ -166,7 +166,7 @@ In addition, xarray supports rounding operations ``floor``, ``ceil``, and ``roun __ https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases -.. ipython:: python +.. jupyter-execute:: ds["time"].dt.floor("D") @@ -175,7 +175,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. _datetime.strftime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior -.. ipython:: python +.. jupyter-execute:: ds["time"].dt.strftime("%a, %b %d %H:%M") @@ -185,13 +185,13 @@ Indexing Using Datetime Components ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use use the ``.dt`` accessor when subsetting your data as well. For example, we can subset for the month of January using the following: -.. ipython:: python +.. jupyter-execute:: ds.isel(time=(ds.time.dt.month == 1)) You can also search for multiple months (in this case January through March), using ``isin``: -.. ipython:: python +.. jupyter-execute:: ds.isel(time=ds.time.dt.month.isin([1, 2, 3])) @@ -209,7 +209,7 @@ Resampling and grouped operations Datetime components couple particularly well with grouped operations for analyzing features that repeat over time. Here's how to calculate the mean by time of day: -.. ipython:: python +.. jupyter-execute:: ds.groupby("time.hour").mean() @@ -222,7 +222,7 @@ same api as :py:meth:`pandas.DataFrame.resample` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: -.. ipython:: python +.. jupyter-execute:: ds.resample(time="6h") @@ -230,14 +230,14 @@ This will create a specialized :py:class:`~xarray.core.resample.DatasetResample` object which saves information necessary for resampling. All of the reduction methods which work with :py:class:`Dataset` or :py:class:`DataArray` objects can also be used for resampling: -.. ipython:: python +.. jupyter-execute:: ds.resample(time="6h").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: -.. ipython:: python +.. jupyter-execute:: ds.resample(time="6h").reduce(np.mean) @@ -257,7 +257,7 @@ In order to limit the scope of the methods ``ffill``, ``bfill``, ``pad`` and ``nearest`` the ``tolerance`` argument can be set in coordinate units. Data that has indices outside of the given ``tolerance`` are set to ``NaN``. -.. ipython:: python +.. jupyter-execute:: ds.resample(time="1h").nearest(tolerance="1h") @@ -265,7 +265,7 @@ It is often desirable to center the time values after a resampling operation. That can be accomplished by updating the resampled dataset time coordinate values using time offset arithmetic via the :py:func:`pandas.tseries.frequencies.to_offset` function. -.. ipython:: python +.. jupyter-execute:: resampled_ds = ds.resample(time="6h").mean() offset = pd.tseries.frequencies.to_offset("6h") / 2 @@ -292,7 +292,7 @@ Quite commonly one wants more flexibility in defining seasons. For these use-cas .. currentmodule:: xarray.groupers -.. ipython:: python +.. jupyter-execute:: from xarray.groupers import SeasonGrouper @@ -302,28 +302,28 @@ Quite commonly one wants more flexibility in defining seasons. For these use-cas Note how the seasons are in the specified order, unlike ``.groupby("time.season")`` where the seasons are sorted alphabetically. -.. ipython:: python +.. jupyter-execute:: ds.groupby("time.season").mean() :py:class:`SeasonGrouper` supports overlapping seasons: -.. ipython:: python +.. jupyter-execute:: ds.groupby(time=SeasonGrouper(["DJFM", "MAMJ", "JJAS", "SOND"])).mean() Skipping months is allowed: -.. ipython:: python +.. jupyter-execute:: ds.groupby(time=SeasonGrouper(["JJAS"])).mean() Use :py:class:`SeasonResampler` to specify custom seasons. -.. ipython:: python +.. jupyter-execute:: from xarray.groupers import SeasonResampler @@ -335,7 +335,7 @@ span the end of the year (e.g. DJF). By default :py:class:`SeasonResampler` will season that is incomplete (e.g. the first DJF season for a time series that starts in Jan). Pass the ``drop_incomplete=False`` kwarg to :py:class:`SeasonResampler` to disable this behaviour. -.. ipython:: python +.. jupyter-execute:: from xarray.groupers import SeasonResampler @@ -346,6 +346,6 @@ Pass the ``drop_incomplete=False`` kwarg to :py:class:`SeasonResampler` to disab Seasons need not be of the same length: -.. ipython:: python +.. jupyter-execute:: ds.resample(time=SeasonResampler(["JF", "MAM", "JJAS", "OND"])).mean() diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index d56811aa2ad..e3f3315fe08 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -5,10 +5,11 @@ Weather and climate data ======================== -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import xarray as xr + import numpy as np Xarray can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include :ref:`automatic labelling of plots` with descriptive names and units if proper metadata is present and support for non-standard calendars used in climate science through the ``cftime`` module (explained in the :ref:`CFTimeIndex` section). There are also a number of :ref:`geosciences-focused projects that build on xarray`. @@ -87,7 +88,7 @@ For example, you can create a DataArray indexed by a time coordinate with dates from a no-leap calendar and a :py:class:`~xarray.CFTimeIndex` will automatically be used: -.. ipython:: python +.. jupyter-execute:: from itertools import product from cftime import DatetimeNoLeap @@ -105,7 +106,7 @@ instance, we can create the same dates and DataArray we created above using :py:class:`~xarray.CFTimeIndex` for non-standard calendars, but can be nice to use to be explicit): -.. ipython:: python +.. jupyter-execute:: dates = xr.date_range( start="0001", periods=24, freq="MS", calendar="noleap", use_cftime=True @@ -117,7 +118,7 @@ infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D :py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with ``np.datetime64`` and ``np.timedelta64`` data (with "s", "ms", "us" or "ns" resolution). -.. ipython:: python +.. jupyter-execute:: xr.infer_freq(dates) @@ -128,7 +129,7 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. _datetime.strftime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior -.. ipython:: python +.. jupyter-execute:: dates.strftime("%c") da["time"].dt.strftime("%Y%m%d") @@ -141,7 +142,7 @@ use ``pandas`` when possible, i.e. when the calendar is ``standard``/``gregorian .. _1582-10-15: https://en.wikipedia.org/wiki/Gregorian_calendar -.. ipython:: python +.. jupyter-execute:: dates = xr.date_range( start="2001", periods=24, freq="MS", calendar="noleap", use_cftime=True @@ -158,7 +159,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - `Partial datetime string indexing`_: -.. ipython:: python +.. jupyter-execute:: da.sel(time="0001") da.sel(time=slice("0001-05", "0002-02")) @@ -180,7 +181,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: "season", "dayofyear", "dayofweek", and "days_in_month") with the addition of "calendar", absent from pandas: -.. ipython:: python +.. jupyter-execute:: da.time.dt.year da.time.dt.month @@ -192,7 +193,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: -.. ipython:: python +.. jupyter-execute:: da.time.dt.ceil("3D") da.time.dt.floor("5D") @@ -201,38 +202,38 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - Group-by operations based on datetime accessor attributes (e.g. by month of the year): -.. ipython:: python +.. jupyter-execute:: da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: -.. ipython:: python +.. jupyter-execute:: da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: -.. ipython:: python +.. jupyter-execute:: da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: -.. ipython:: python +.. jupyter-execute:: da.differentiate("time") - Serialization: -.. ipython:: python +.. jupyter-execute:: da.to_netcdf("example-no-leap.nc") reopened = xr.open_dataset("example-no-leap.nc") reopened -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import os @@ -241,7 +242,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: -.. ipython:: python +.. jupyter-execute:: da.resample(time="81min", closed="right", label="right", offset="3min").mean() From b8894779eb5b98927e8f533dbb4f3324fbf3f17d Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Sun, 1 Jun 2025 19:53:08 +0200 Subject: [PATCH 02/22] switch internals to jupyter-execute --- doc/internals/duck-arrays-integration.rst | 3 +- doc/internals/extending-xarray.rst | 11 +-- doc/internals/how-to-add-new-backend.rst | 7 +- doc/internals/internal-design.rst | 18 ++--- doc/internals/time-coding.rst | 85 +++++++++++------------ doc/internals/zarr-encoding-spec.rst | 7 +- 6 files changed, 66 insertions(+), 65 deletions(-) diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst index 43b17be8bb8..f0813a097cd 100644 --- a/doc/internals/duck-arrays-integration.rst +++ b/doc/internals/duck-arrays-integration.rst @@ -70,10 +70,11 @@ To avoid duplicated information, this method must omit information about the sha :term:`dtype`. For example, the string representation of a ``dask`` array or a ``sparse`` matrix would be: -.. ipython:: python +.. jupyter-execute:: import dask.array as da import xarray as xr + import numpy as np import sparse a = da.linspace(0, 1, 20, chunks=2) diff --git a/doc/internals/extending-xarray.rst b/doc/internals/extending-xarray.rst index 6c6ce002a7d..12355bf0c6e 100644 --- a/doc/internals/extending-xarray.rst +++ b/doc/internals/extending-xarray.rst @@ -4,10 +4,11 @@ Extending xarray using accessors ================================ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import xarray as xr + import numpy as np Xarray is designed as a general purpose library and hence tries to avoid @@ -89,12 +90,12 @@ reasons: Back in an interactive IPython session, we can use these properties: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: exec(open("examples/_code/accessor_example.py").read()) -.. ipython:: python +.. jupyter-execute:: ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index e4f6d54f75c..31ac8d6f245 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -221,12 +221,13 @@ performs the inverse transformation. In the following an example on how to use the coders ``decode`` method: -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import xarray as xr + import numpy as np -.. ipython:: python +.. jupyter-execute:: var = xr.Variable( dims=("x",), data=np.arange(10.0), attrs={"scale_factor": 10, "add_offset": 2} diff --git a/doc/internals/internal-design.rst b/doc/internals/internal-design.rst index 0785535d51c..4430789522e 100644 --- a/doc/internals/internal-design.rst +++ b/doc/internals/internal-design.rst @@ -1,5 +1,5 @@ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -150,7 +150,7 @@ Lazy Loading If we open a ``Variable`` object from disk using :py:func:`~xarray.open_dataset` we can see that the actual values of the array wrapped by the data variable are not displayed. -.. ipython:: python +.. jupyter-execute:: da = xr.tutorial.open_dataset("air_temperature")["air"] var = da.variable @@ -162,7 +162,7 @@ This is because the values have not yet been loaded. If we look at the private attribute :py:meth:`~xarray.Variable._data` containing the underlying array object, we see something interesting: -.. ipython:: python +.. jupyter-execute:: var._data @@ -171,13 +171,13 @@ but provide important functionality. Calling the public :py:attr:`~xarray.Variable.data` property loads the underlying array into memory. -.. ipython:: python +.. jupyter-execute:: var.data This array is now cached, which we can see by accessing the private attribute again: -.. ipython:: python +.. jupyter-execute:: var._data @@ -189,14 +189,14 @@ subsequent analysis, by deferring loading data until after indexing is performed Let's open the data from disk again. -.. ipython:: python +.. jupyter-execute:: da = xr.tutorial.open_dataset("air_temperature")["air"] var = da.variable Now, notice how even after subsetting the data has does not get loaded: -.. ipython:: python +.. jupyter-execute:: var.isel(time=0) @@ -204,7 +204,7 @@ The shape has changed, but the values are still not shown. Looking at the private attribute again shows how this indexing information was propagated via the hidden lazy indexing classes: -.. ipython:: python +.. jupyter-execute:: var.isel(time=0)._data diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index 3e4ca10ef4d..a62f03926ab 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -1,5 +1,5 @@ -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -30,14 +30,14 @@ In normal operation :py:func:`pandas.to_datetime` returns a :py:class:`pandas.Ti When the arguments are numeric (not strings or ``np.datetime64`` values) ``"unit"`` can be anything from ``'Y'``, ``'W'``, ``'D'``, ``'h'``, ``'m'``, ``'s'``, ``'ms'``, ``'us'`` or ``'ns'``, though the returned resolution will be ``"ns"``. -.. ipython:: python +.. jupyter-execute:: f"Minimum datetime: {pd.to_datetime(int64_min, unit="ns")}" f"Maximum datetime: {pd.to_datetime(int64_max, unit="ns")}" For input values which can't be represented in nanosecond resolution an :py:class:`pandas.OutOfBoundsDatetime` exception is raised: -.. ipython:: python +.. jupyter-execute:: try: dtime = pd.to_datetime(int64_max, unit="us") @@ -56,7 +56,7 @@ and :py:meth:`pandas.DatetimeIndex.as_unit` respectively. ``as_unit`` takes one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'`` as an argument. That means we are able to represent datetimes with second, millisecond, microsecond or nanosecond resolution. -.. ipython:: python +.. jupyter-execute:: time = pd.to_datetime(np.datetime64(0, "D")) print("Datetime:", time, np.asarray([time.to_numpy()]).dtype) @@ -70,7 +70,7 @@ and :py:meth:`pandas.DatetimeIndex.as_unit` respectively. .. warning:: Input data with resolution higher than ``'ns'`` (eg. ``'ps'``, ``'fs'``, ``'as'``) is truncated (not rounded) at the ``'ns'``-level. This is `currently broken `_ for the ``'ps'`` input, where it is interpreted as ``'ns'``. - .. ipython:: python + .. jupyter-execute:: print("Good:", pd.to_datetime([np.datetime64(1901901901901, "as")])) print("Good:", pd.to_datetime([np.datetime64(1901901901901, "fs")])) @@ -82,7 +82,7 @@ and :py:meth:`pandas.DatetimeIndex.as_unit` respectively. .. warning:: Care has to be taken, as some configurations of input data will raise. The following shows, that we are safe to use :py:func:`pandas.to_datetime` when providing :py:class:`numpy.datetime64` as scalar or numpy array as input. - .. ipython:: python + .. jupyter-execute:: print( "Works:", @@ -119,13 +119,13 @@ The function :py:func:`pandas.to_timedelta` is used within xarray for inferring In normal operation :py:func:`pandas.to_timedelta` returns a :py:class:`pandas.Timedelta` (for scalar input) or :py:class:`pandas.TimedeltaIndex` (for array-like input) which are ``np.timedelta64`` values with ``ns`` resolution internally. That has the implication, that the usable timedelta covers only roughly 585 years. To accommodate for that, we are working around that limitation in the encoding and decoding step. -.. ipython:: python +.. jupyter-execute:: f"Maximum timedelta range: ({pd.to_timedelta(int64_min, unit="ns")}, {pd.to_timedelta(int64_max, unit="ns")})" For input values which can't be represented in nanosecond resolution an :py:class:`pandas.OutOfBoundsTimedelta` exception is raised: -.. ipython:: python +.. jupyter-execute:: try: delta = pd.to_timedelta(int64_max, unit="us") @@ -143,7 +143,7 @@ and :py:meth:`pandas.TimedeltaIndex.as_unit` respectively. ``as_unit`` takes one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'`` as an argument. That means we are able to represent timedeltas with second, millisecond, microsecond or nanosecond resolution. -.. ipython:: python +.. jupyter-execute:: delta = pd.to_timedelta(np.timedelta64(1, "D")) print("Timedelta:", delta, np.asarray([delta.to_numpy()]).dtype) @@ -157,7 +157,7 @@ and :py:meth:`pandas.TimedeltaIndex.as_unit` respectively. .. warning:: Care has to be taken, as some configurations of input data will raise. The following shows, that we are safe to use :py:func:`pandas.to_timedelta` when providing :py:class:`numpy.timedelta64` as scalar or numpy array as input. - .. ipython:: python + .. jupyter-execute:: print( "Works:", @@ -198,7 +198,7 @@ In normal operation :py:class:`pandas.Timestamp` holds the timestamp in the prov The same conversion rules apply here as for :py:func:`pandas.to_timedelta` (see `to_timedelta`_). Depending on the internal resolution Timestamps can be represented in the range: -.. ipython:: python +.. jupyter-execute:: for unit in ["s", "ms", "us", "ns"]: print( @@ -210,7 +210,7 @@ Since relaxing the resolution, this enhances the range to several hundreds of th .. warning:: When initialized with a datetime string this is only defined from ``-9999-01-01`` to ``9999-12-31``. - .. ipython:: python + .. jupyter-execute:: try: print("Works:", pd.Timestamp("-9999-01-01 00:00:00")) @@ -222,7 +222,7 @@ Since relaxing the resolution, this enhances the range to several hundreds of th .. note:: :py:class:`pandas.Timestamp` is the only current possibility to correctly import time reference strings. It handles non-ISO formatted strings, keeps the resolution of the strings (``'s'``, ``'ms'`` etc.) and imports time zones. When initialized with :py:class:`numpy.datetime64` instead of a string it even overcomes the above limitation of the possible time range. - .. ipython:: python + .. jupyter-execute:: try: print("Handles non-ISO:", pd.Timestamp("92-1-8 151542")) @@ -255,7 +255,7 @@ DatetimeIndex :py:class:`pandas.DatetimeIndex` is used to wrap ``np.datetime64`` values or other datetime-likes when encoding. The resolution of the DatetimeIndex depends on the input, but can be only one of ``'s'``, ``'ms'``, ``'us'``, ``'ns'``. Lower resolution input is automatically converted to ``'s'``, higher resolution input is cut to ``'ns'``. :py:class:`pandas.DatetimeIndex` will raise :py:class:`pandas.OutOfBoundsDatetime` if the input can't be represented in the given resolution. -.. ipython:: python +.. jupyter-execute:: try: print( @@ -327,7 +327,7 @@ Decoding of ``values`` with a time unit specification like ``"seconds since 1992 5. Finally, the ``values`` (at this point converted to ``int64`` values) are cast to ``datetime64[unit]`` (using the above retrieved unit) and added to the reference time :py:class:`pandas.Timestamp`. -.. ipython:: python +.. jupyter-execute:: calendar = "proleptic_gregorian" values = np.array([-1000 * 365, 0, 1000 * 365], dtype="int64") @@ -336,14 +336,14 @@ Decoding of ``values`` with a time unit specification like ``"seconds since 1992 assert dt.dtype == "datetime64[us]" dt -.. ipython:: python +.. jupyter-execute:: units = "microseconds since 2000-01-01 00:00:00" dt = xr.coding.times.decode_cf_datetime(values, units, calendar, time_unit="s") assert dt.dtype == "datetime64[us]" dt -.. ipython:: python +.. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "days since 2000-01-01 00:00:00.001" @@ -351,7 +351,7 @@ Decoding of ``values`` with a time unit specification like ``"seconds since 1992 assert dt.dtype == "datetime64[ms]" dt -.. ipython:: python +.. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "hours since 2000-01-01" @@ -359,7 +359,7 @@ Decoding of ``values`` with a time unit specification like ``"seconds since 1992 assert dt.dtype == "datetime64[s]" dt -.. ipython:: python +.. jupyter-execute:: values = np.array([0, 0.25, 0.5, 0.75, 1.0], dtype="float64") units = "hours since 2000-01-01 00:00:00 03:30" @@ -367,7 +367,7 @@ Decoding of ``values`` with a time unit specification like ``"seconds since 1992 assert dt.dtype == "datetime64[s]" dt -.. ipython:: python +.. jupyter-execute:: values = np.array([-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64") units = "days since 0001-01-01 00:00:00" @@ -393,8 +393,7 @@ For encoding the process is more or less a reversal of the above, but we have to 11. Divide ``time_deltas`` by ``delta``, use floor division (integer) or normal division (float) 12. Return result -.. ipython:: python - :okwarning: +.. jupyter-execute:: calendar = "proleptic_gregorian" dates = np.array( @@ -441,17 +440,17 @@ Default Time Unit The current default time unit of xarray is ``'ns'``. When setting keyword argument ``time_unit`` unit to ``'s'`` (the lowest resolution pandas allows) datetimes will be converted to at least ``'s'``-resolution, if possible. The same holds true for ``'ms'`` and ``'us'``. -.. ipython:: python +.. jupyter-execute:: attrs = {"units": "hours since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf("test-datetimes1.nc") -.. ipython:: python +.. jupyter-execute:: xr.open_dataset("test-datetimes1.nc") -.. ipython:: python +.. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset("test-datetimes1.nc", decode_times=coder) @@ -459,17 +458,17 @@ The current default time unit of xarray is ``'ns'``. When setting keyword argume If a coarser unit is requested the datetimes are decoded into their native on-disk resolution, if possible. -.. ipython:: python +.. jupyter-execute:: attrs = {"units": "milliseconds since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf("test-datetimes2.nc") -.. ipython:: python +.. jupyter-execute:: xr.open_dataset("test-datetimes2.nc") -.. ipython:: python +.. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset("test-datetimes2.nc", decode_times=coder) @@ -477,21 +476,21 @@ on-disk resolution, if possible. Similar logic applies for decoding timedelta values. The default resolution is ``"ns"``: -.. ipython:: python +.. jupyter-execute:: attrs = {"units": "hours"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf("test-timedeltas1.nc") -.. ipython:: python - :okwarning: +.. jupyter-execute:: + xr.open_dataset("test-timedeltas1.nc") By default, timedeltas will be decoded to the same resolution as datetimes: -.. ipython:: python - :okwarning: +.. jupyter-execute:: + coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset("test-timedeltas1.nc", decode_times=coder) @@ -499,7 +498,7 @@ By default, timedeltas will be decoded to the same resolution as datetimes: but if one would like to decode timedeltas to a different resolution, one can provide a coder specifically for timedeltas to ``decode_timedelta``: -.. ipython:: python +.. jupyter-execute:: timedelta_coder = xr.coders.CFTimedeltaCoder(time_unit="ms") xr.open_dataset( @@ -509,26 +508,26 @@ provide a coder specifically for timedeltas to ``decode_timedelta``: As with datetimes, if a coarser unit is requested the timedeltas are decoded into their native on-disk resolution, if possible: -.. ipython:: python +.. jupyter-execute:: attrs = {"units": "milliseconds"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) ds.to_netcdf("test-timedeltas2.nc") -.. ipython:: python - :okwarning: +.. jupyter-execute:: + xr.open_dataset("test-timedeltas2.nc") -.. ipython:: python - :okwarning: +.. jupyter-execute:: + coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset("test-timedeltas2.nc", decode_times=coder) To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 `_) pass ``False`` to ``decode_timedelta``: -.. ipython:: python +.. jupyter-execute:: xr.open_dataset("test-timedeltas2.nc", decode_timedelta=False) @@ -538,8 +537,8 @@ To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 < -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # Cleanup import os diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 958dad166e1..d5f81dbb245 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -52,8 +52,7 @@ for more details. As a concrete example, here we write a tutorial dataset to Zarr and then re-open it directly with Zarr: -.. ipython:: python - :okwarning: +.. jupyter-execute:: import os import xarray as xr @@ -67,8 +66,8 @@ re-open it directly with Zarr: print(zgroup.tree()) dict(zgroup["Tair"].attrs) -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import shutil From beacea8698b3fd9e094d2bc6b579d8005c35eaea Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Sun, 1 Jun 2025 19:54:57 +0200 Subject: [PATCH 03/22] switch remain doc files to jupyter-execute --- doc/contribute/contributing.rst | 17 ++++---- doc/get-help/faq.rst | 10 ++--- doc/whats-new.rst | 71 ++++++++++++++------------------- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/doc/contribute/contributing.rst b/doc/contribute/contributing.rst index e1f506ef7c1..d7ca0a8cb03 100644 --- a/doc/contribute/contributing.rst +++ b/doc/contribute/contributing.rst @@ -387,24 +387,25 @@ Some other important things to know about the docs: for a detailed explanation, or look at some of the existing functions to extend it in a similar manner. -- The tutorials make heavy use of the `ipython directive - `_ sphinx extension. - This directive lets you put code in the documentation which will be run +- The documentation makes heavy use of the `jupyter-sphinx extension + `_. + The ``jupyter-execute`` directive lets you put code in the documentation which will be run during the doc build. For example: .. code:: rst - .. ipython:: python + .. jupyter-execute:: x = 2 x**3 - will be rendered as:: + will be rendered as: - In [1]: x = 2 + .. jupyter-execute:: + :hide-code: - In [2]: x**3 - Out[2]: 8 + x = 2 + x**3 Almost all code examples in the docs are run (and the output saved) during the doc build. This approach means that code examples will always be up to date, diff --git a/doc/get-help/faq.rst b/doc/get-help/faq.rst index 3cd8bbe5bc9..7e956cbff3c 100644 --- a/doc/get-help/faq.rst +++ b/doc/get-help/faq.rst @@ -3,8 +3,8 @@ Frequently Asked Questions ========================== -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -101,7 +101,7 @@ Unfortunately, this means we sometimes have to explicitly cast our results from xarray when using them in other libraries. As an illustration, the following code fragment -.. ipython:: python +.. jupyter-execute:: arr = xr.DataArray([1, 2, 3]) pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) @@ -109,14 +109,14 @@ code fragment does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: -.. ipython:: python +.. jupyter-execute:: pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time -.. ipython:: python +.. jupyter-execute:: pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bee4156f1e5..c6fe5f803a4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -375,7 +375,7 @@ error messages have been removed or rewritten. Xarray will now also allow non-nanosecond datetimes (with ``'us'``, ``'ms'`` or ``'s'`` resolution) when creating DataArray's from scratch, picking the lowest possible resolution: -.. ipython:: python +.. code:: python xr.DataArray(data=[np.datetime64("2000-01-01", "D")], dims=("time",)) @@ -6245,7 +6245,7 @@ Enhancements (:issue:`1617`). This enables using NumPy ufuncs directly on ``xarray.Dataset`` objects with recent versions of NumPy (v1.13 and newer): - .. ipython:: python + .. code:: python ds = xr.Dataset({"a": 1}) np.sin(ds) @@ -6337,7 +6337,7 @@ Enhancements - Reduce methods such as :py:func:`DataArray.sum()` now handles object-type array. - .. ipython:: python + .. code:: python da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") da.sum() @@ -7785,8 +7785,7 @@ Enhancements - New ``xray.Dataset.shift`` and ``xray.Dataset.roll`` methods for shifting/rotating datasets or arrays along a dimension: - .. ipython:: python - :okwarning: + .. code:: python array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) @@ -7801,7 +7800,7 @@ Enhancements - New function ``xray.broadcast`` for explicitly broadcasting ``DataArray`` and ``Dataset`` objects against each other. For example: - .. ipython:: python + .. code:: python a = xray.DataArray([1, 2, 3], dims="x") b = xray.DataArray([5, 6], dims="y") @@ -7999,12 +7998,10 @@ Enhancements - New ``xray.Dataset.where`` method for masking xray objects according to some criteria. This works particularly well with multi-dimensional data: - .. ipython:: python + .. code:: python ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) ds["distance"] = np.sqrt(ds.x**2 + ds.y**2) - - @savefig where_example.png width=4in height=4in ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` @@ -8013,7 +8010,7 @@ Enhancements - New ``xray.DataArray.to_masked_array`` convenience method for returning a numpy.ma.MaskedArray. - .. ipython:: python + .. code:: python da = xray.DataArray(np.random.random_sample(size=(5, 4))) da.where(da < 0.5) @@ -8163,12 +8160,11 @@ Backwards incompatible changes Now, the default always concatenates data variables: - .. ipython:: python - :suppress: + .. code:: python ds = xray.Dataset({"x": 0}) - .. ipython:: python + .. code:: python xray.concat([ds, ds], dim="y") @@ -8181,7 +8177,7 @@ Enhancements ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: - .. ipython:: python + .. code:: python ds = xray.Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, @@ -8194,7 +8190,7 @@ Enhancements - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: - .. ipython:: python + .. code:: python array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) @@ -8207,7 +8203,7 @@ Enhancements methods patterned off the new :py:meth:`DataFrame.assign ` method in pandas: - .. ipython:: python + .. code:: python ds = xray.Dataset({"y": ("x", [1, 2, 3])}) ds.assign(z=lambda ds: ds.y**2) @@ -8257,7 +8253,7 @@ Enhancements It can be used either as a context manager, in which case the default is restored outside the context: - .. ipython:: python + .. code:: python ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): @@ -8296,8 +8292,7 @@ Enhancements a new temporal resolution. The syntax is the `same as pandas`_, except you need to supply the time dimension explicitly: - .. ipython:: python - :verbatim: + .. code:: python time = pd.date_range("2000-01-01", freq="6H", periods=10) array = xray.DataArray(np.arange(10), [("time", time)]) @@ -8306,31 +8301,27 @@ Enhancements You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: - .. ipython:: python - :verbatim: + .. code:: python array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: - .. ipython:: python - :verbatim: + .. code:: python array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: - .. ipython:: python - :verbatim: + .. code:: python array.groupby("time.day").first() These methods combine well with ``resample``: - .. ipython:: python - :verbatim: + .. code:: python array.resample("1D", dim="time", how="first") @@ -8338,10 +8329,9 @@ Enhancements - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension out for another: - .. ipython:: python + .. code:: python ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) - ds ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. @@ -8386,7 +8376,7 @@ Breaking changes :ref:`For arithmetic`, we align based on the **intersection** of labels: - .. ipython:: python + .. code:: python lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) @@ -8395,21 +8385,21 @@ Breaking changes :ref:`For dataset construction and merging`, we align based on the **union** of labels: - .. ipython:: python + .. code:: python xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: - .. ipython:: python + .. code:: python lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: - .. ipython:: python + .. code:: python xray.DataArray([1, 2, np.nan, 3]).mean() @@ -8425,7 +8415,7 @@ Breaking changes persists through arithmetic, even though it has different shapes on each DataArray: - .. ipython:: python + .. code:: python a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") @@ -8437,7 +8427,7 @@ Breaking changes the name ``'month'``, not ``'time.month'`` (:issue:`345`). This makes it easier to index the resulting arrays when they are used with ``groupby``: - .. ipython:: python + .. code:: python time = xray.DataArray( pd.date_range("2000-01-01", periods=365), dims="time", name="time" @@ -8480,7 +8470,7 @@ Enhancements - Support for ``xray.Dataset.reindex`` with a fill method. This provides a useful shortcut for upsampling: - .. ipython:: python + .. code:: python data = xray.DataArray([1, 2, 3], [("x", range(3))]) data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") @@ -8501,8 +8491,7 @@ Enhancements - The new ``xray.Dataset.drop`` and ``xray.DataArray.drop`` methods makes it easy to drop explicitly listed variables or index labels: - .. ipython:: python - :okwarning: + .. code:: python # drop variables ds = xray.Dataset({"x": 0, "y": 1}) @@ -8575,7 +8564,7 @@ Backwards incompatible changes ``datetime64[ns]`` arrays when stored in an xray object, using machinery borrowed from pandas: - .. ipython:: python + .. code:: python from datetime import datetime @@ -8593,7 +8582,7 @@ Enhancements - Due to popular demand, we have added experimental attribute style access as a shortcut for dataset variables, coordinates and attributes: - .. ipython:: python + .. code:: python ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) ds.tmin.units @@ -8604,7 +8593,7 @@ Enhancements - You can now use a dictionary for indexing with labeled dimensions. This provides a safe way to do assignment with labeled dimensions: - .. ipython:: python + .. code:: python array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 From 3b111d98f42ea6b59067e84d9293419ebf136f17 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 15:26:27 +0200 Subject: [PATCH 04/22] manual review of data model section --- doc/user-guide/dask.rst | 113 +++++++++++++-------------- doc/user-guide/data-structures.rst | 60 ++++++++++++-- doc/user-guide/hierarchical-data.rst | 48 ++++++++---- doc/user-guide/terminology.rst | 19 +++-- 4 files changed, 151 insertions(+), 89 deletions(-) diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 46d5174c6e5..ef6dbd594f9 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -5,14 +5,45 @@ Parallel Computing with Dask ============================ +.. jupyter-execute:: + + # Note that it's not necessary to import dask to use xarray with dask. + import numpy as np + import pandas as pd + import xarray as xr + import bottleneck + +.. jupyter-execute:: + :hide-code: + + import os + + np.random.seed(123456) + + # limit the amount of information printed to screen + xr.set_options(display_expand_data=False) + np.set_printoptions(precision=3, linewidth=100, threshold=10, edgeitems=2) + + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") + + Xarray integrates with `Dask `__, a general purpose library for parallel computing, to handle larger-than-memory computations. If you’ve been using Xarray to read in large datasets or split up data across a number of files, you may already be using Dask: .. code-block:: python - import xarray as xr - ds = xr.open_zarr("/path/to/data.zarr") timeseries = ds["temp"].mean(dim=["x", "y"]).compute() # Compute result @@ -115,31 +146,6 @@ When reading data, Dask divides your dataset into smaller chunks. You can specif Loading Dask Arrays ~~~~~~~~~~~~~~~~~~~ -.. jupyter-execute:: - :hide-code: - - import os - - import numpy as np - import pandas as pd - import xarray as xr - - np.random.seed(123456) - np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - - ds = xr.Dataset( - { - "temperature": ( - ("time", "latitude", "longitude"), - np.random.randn(30, 180, 180), - ), - "time": pd.date_range("2015-01-01", periods=30), - "longitude": np.arange(180), - "latitude": np.arange(89.5, -90.5, -1), - } - ) - ds.to_netcdf("example-data.nc") - There are a few common cases where you may want to convert lazy Dask arrays into eager, in-memory Xarray data structures: - You want to inspect smaller intermediate results when working interactively or debugging @@ -158,11 +164,12 @@ To do this, you can use :py:meth:`Dataset.compute` or :py:meth:`DataArray.comput You can also access :py:attr:`DataArray.values`, which will always be a NumPy array: -.. ipython:: - :verbatim: +.. jupyter-input:: + + ds.temperature.values + +.. jupyter-output:: - In [5]: ds.temperature.values - Out[5]: array([[[ 4.691e-01, -2.829e-01, ..., -5.577e-01, 3.814e-01], [ 1.337e+00, -1.531e+00, ..., 8.726e-01, -1.538e+00], ... @@ -173,8 +180,6 @@ that store lazy Dask arrays: .. jupyter-execute:: - import numpy as np - np.sin(ds) To access Dask arrays directly, use the :py:attr:`DataArray.data` attribute which exposes the DataArray's underlying array type. @@ -249,11 +254,6 @@ we use to calculate `Spearman's rank-correlation coefficient `. @@ -860,7 +895,13 @@ To convert back and forth between data and coordinates, you can use the .. jupyter-execute:: ds.reset_coords() + +.. jupyter-execute:: + ds.set_coords(["temperature", "precipitation"]) + +.. jupyter-execute:: + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, @@ -930,6 +971,9 @@ For convenience multi-index levels are directly accessible as "virtual" or .. jupyter-execute:: mda["band"] + +.. jupyter-execute:: + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst index d4da2531f82..9bdc1e91d27 100644 --- a/doc/user-guide/hierarchical-data.rst +++ b/doc/user-guide/hierarchical-data.rst @@ -5,6 +5,7 @@ Hierarchical data .. jupyter-execute:: :hide-code: + :hide-output: import numpy as np import pandas as pd @@ -71,6 +72,8 @@ We now have a small family tree .. jupyter-execute:: + # Enable text display instead of 'html' for compactness + xr.set_options(display_style="text") homer where we can see how these individual Simpson family members are related to one another. @@ -119,6 +122,9 @@ We can see the whole tree by printing Abe's node or just part of the tree by pri .. jupyter-execute:: abe + +.. jupyter-execute:: + abe["Homer"] @@ -131,8 +137,10 @@ We can add Herbert to the family tree without displacing Homer by :py:meth:`~xar abe = abe.assign({"Herbert": herbert}) abe - abe["Herbert"].name - herbert.name +.. jupyter-execute:: + + print(abe["Herbert"].name) + print(herbert.name) .. note:: This example shows a subtlety - the returned tree has Homer's brother listed as ``"Herbert"``, @@ -173,6 +181,7 @@ Let's use a different example of a tree to discuss more complex relationships be ) primates = vertebrates["/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates"] + dinosaurs = vertebrates[ "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs" ] @@ -193,7 +202,7 @@ We can check if a node is a leaf with :py:meth:`~xarray.DataTree.is_leaf`, and g .. jupyter-execute:: - primates.is_leaf + print(primates.is_leaf) [node.name for node in vertebrates.leaves] Pretending that this is a true evolutionary tree for a moment, we can find the features of the evolutionary ancestors (so-called "ancestor" nodes), @@ -202,9 +211,9 @@ and even the distinguishing feature of the common ancestor of any two species (t .. jupyter-execute:: - [node.name for node in reversed(primates.parents)] - primates.root.name - primates.find_common_ancestor(dinosaurs).name + print([node.name for node in reversed(primates.parents)]) + print(primates.root.name) + print(primates.find_common_ancestor(dinosaurs).name) We can only find a common ancestor between two nodes that lie in the same tree. If we try to find the common evolutionary ancestor between primates and an Alien species that has no relationship to Earth's evolutionary tree, @@ -270,8 +279,8 @@ You can also select both variables and child nodes through dot indexing .. jupyter-execute:: - dt.foo - dt.a + print(dt.foo) + print(dt.a) .. _filesystem paths: @@ -297,8 +306,8 @@ Like with filepaths, paths within the tree can either be relative to the current .. jupyter-execute:: - abe["Homer/Bart"].name - abe["./Homer/Bart"].name # alternative syntax + print(abe["Homer/Bart"].name) + print(abe["./Homer/Bart"].name) # alternative syntax or relative to the root node. A path specified from the root (as opposed to being specified relative to an arbitrary node in the tree) is sometimes also referred to as a @@ -309,9 +318,9 @@ The root node is referred to by ``"/"``, so the path from the root node to its g .. jupyter-execute:: # access lisa's sibling by a relative path. - lisa["../Bart"] + print(lisa["../Bart"]) # or from absolute path - lisa["/Homer/Bart"] + print(lisa["/Homer/Bart"]) Relative paths between nodes also support the ``"../"`` syntax to mean the parent of the current node. @@ -683,6 +692,8 @@ we can do arithmetic between them. ) currents +.. jupyter-execute:: + currents.isomorphic(voltages) We could use this feature to quickly calculate the electrical power in our signal, P=IV. @@ -725,9 +736,9 @@ These datasets have different lengths along the ``time`` dimension, and are ther .. jupyter-execute:: - ds_daily.sizes - ds_weekly.sizes - ds_monthly.sizes + print(ds_daily.sizes) + print(ds_weekly.sizes) + print(ds_monthly.sizes) We cannot store these non-alignable variables on a single :py:class:`~xarray.Dataset` object, because they do not exactly align: @@ -817,6 +828,9 @@ We can still access the coordinates defined in the parent groups from any of the .. jupyter-execute:: dt.daily.coords + +.. jupyter-execute:: + dt["daily/lat"] As we can still access them, we say that the ``lat`` and ``lon`` coordinates in the child groups have been "inherited" from their common parent group. @@ -825,7 +839,7 @@ If we print just one of the child nodes, it will still display inherited coordin .. jupyter-execute:: - print(dt["/daily"]) + dt["/daily"] This helps to differentiate which variables are defined on the datatree node that you are currently looking at, and which were defined somewhere above it. @@ -835,4 +849,6 @@ We can also still perform all the same operations on the whole tree: dt.sel(lat=[75], lon=[300]) +.. jupyter-execute:: + dt.std(dim="time") diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index f54fbecb826..1c1b930c9c7 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -9,6 +9,12 @@ pandas; so we've put together a glossary of its terms. Here,* ``arr`` *refers to an xarray* :py:class:`DataArray` *in the examples. For more complete examples, please consult the relevant documentation.* +.. jupyter-execute:: + :hide-code: + + import numpy as np + import xarray as xr + .. glossary:: DataArray @@ -131,12 +137,6 @@ complete examples, please consult the relevant documentation.* __ https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html - .. jupyter-execute:: - :hide-code: - - import numpy as np - import xarray as xr - Aligning Aligning refers to the process of ensuring that two or more DataArrays or Datasets have the same dimensions and coordinates, so that they can be combined or compared properly. @@ -153,8 +153,11 @@ complete examples, please consult the relevant documentation.* dims=("lat", "lon"), coords={"lat": [35.0, 42.0], "lon": [100.0, 120.0]}, ) - x - y + a, b = xr.align(x, y) + + # By default, an "inner join" is performed + # so "a" is a copy of "x" where coordinates match "y" + a Broadcasting A technique that allows operations to be performed on arrays with different shapes and dimensions. From 58a540628376092e9241137e189e4d4fa911441f Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 16:19:54 +0200 Subject: [PATCH 05/22] manual review core-operations --- doc/user-guide/combining.rst | 34 ++++++++++---- doc/user-guide/computation.rst | 51 +++++++++++++++++++-- doc/user-guide/groupby.rst | 9 ++++ doc/user-guide/indexing.rst | 78 ++++++++++++++++++++++++++++++-- doc/user-guide/interpolation.rst | 28 ++++++++++-- doc/user-guide/reshaping.rst | 61 +++++++++++++++++++------ 6 files changed, 227 insertions(+), 34 deletions(-) diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst index 1356920e560..8591d9d4e9b 100644 --- a/doc/user-guide/combining.rst +++ b/doc/user-guide/combining.rst @@ -12,6 +12,8 @@ Combining data np.random.seed(123456) + %xmode minimal + * For combining datasets or data arrays along a single dimension, see concatenate_. * For combining datasets with different variables, see merge_. * For combining datasets or data arrays with different indexes or missing values, see combine_. @@ -33,8 +35,14 @@ dimension name, and concatenates along that dimension: np.arange(6).reshape(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] ) da.isel(y=slice(0, 1)) # same as da[:, :1] + +.. jupyter-execute:: + # This resembles how you would use np.concatenate: xr.concat([da[:, :1], da[:, 1:]], dim="y") + +.. jupyter-execute:: + # For more friendly pandas-like indexing you can use: xr.concat([da.isel(y=slice(0, 1)), da.isel(y=slice(1, None))], dim="y") @@ -44,6 +52,9 @@ new dimension by stacking lower dimensional arrays together: .. jupyter-execute:: da.sel(x="a") + +.. jupyter-execute:: + xr.concat([da.isel(x=0), da.isel(x=1)], "x") If the second argument to ``concat`` is a new dimension name, the arrays will @@ -88,6 +99,9 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. jupyter-execute:: xr.merge([ds, ds.rename({"foo": "bar"})]) + +.. jupyter-execute:: + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by @@ -102,17 +116,11 @@ coordinates: This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised if you attempt to merge two variables with the same name but different values: -.. ipython:: +.. jupyter-execute:: + :raises: + + xr.merge([ds, ds + 1]) - @verbatim - In [1]: xr.merge([ds, ds + 1]) - MergeError: conflicting values for variable 'foo' on objects to be combined: - first value: - array([[ 0.4691123 , -0.28286334, -1.5090585 ], - [-1.13563237, 1.21211203, -0.17321465]]) - second value: - array([[ 1.4691123 , 0.71713666, -0.5090585 ], - [-0.13563237, 2.21211203, 0.82678535]]) The same non-destructive merging between ``DataArray`` index coordinates is used in the :py:class:`~xarray.Dataset` constructor: @@ -137,6 +145,9 @@ are filled with ``NaN``. For example: ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) + +.. jupyter-execute:: + ar1.combine_first(ar0) For datasets, ``ds0.combine_first(ds1)`` works similarly to @@ -270,6 +281,9 @@ datasets into a doubly-nested list, e.g: name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] ) arr + +.. jupyter-execute:: + ds_grid = [[arr, arr], [arr, arr]] xr.combine_nested(ds_grid, concat_dim=["x", "y"]) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index 45fef2b5f80..1a897c6ade6 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -34,6 +34,9 @@ numpy) over all array values: [("x", ["a", "b"]), ("y", [10, 20, 30])], ) arr - 3 + +.. jupyter-execute:: + abs(arr) You can also use any of numpy's or scipy's many `ufunc`__ functions directly on @@ -62,10 +65,18 @@ Data arrays also implement many :py:class:`numpy.ndarray` methods: .. jupyter-execute:: arr.round(2) + +.. jupyter-execute:: + arr.T +.. jupyter-execute:: + intarr = xr.DataArray([0, 1, 2, 3, 4, 5]) intarr << 2 # only supported for int types + +.. jupyter-execute:: + intarr >> 1 .. _missing_values: @@ -215,7 +226,13 @@ applied along particular dimension(s): .. jupyter-execute:: arr.sum(dim="x") + +.. jupyter-execute:: + arr.std(["x", "y"]) + +.. jupyter-execute:: + arr.min() @@ -264,6 +281,9 @@ object: r = arr.rolling(y=3) r.reduce(np.std) + +.. jupyter-execute:: + r.mean() Aggregation results are assigned the coordinate at the end of each window by @@ -284,6 +304,9 @@ a value when aggregating: r = arr.rolling(y=3, min_periods=2) r.mean() + +.. jupyter-execute:: + r = arr.rolling(y=3, center=True, min_periods=2) r.mean() @@ -335,6 +358,9 @@ windowed rolling, convolution, short-time FFT etc. # rolling with 2-point stride rolling_da = r.construct(x="x_win", y="y_win", stride=2) rolling_da + +.. jupyter-execute:: + rolling_da.mean(["x_win", "y_win"], skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view @@ -513,7 +539,6 @@ central finite differences using their coordinates, .. jupyter-execute:: a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) - a a.differentiate("x") This method can be used also for multidimensional arrays, @@ -664,6 +689,9 @@ arrays with different sizes aligned along different dimensions: a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a + +.. jupyter-execute:: + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b @@ -681,6 +709,9 @@ appeared: c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c + +.. jupyter-execute:: + a + c This means, for example, that you always subtract an array from its transpose: @@ -696,6 +727,9 @@ You can explicitly broadcast xarray data structures by using the a2, b2 = xr.broadcast(a, b) a2 + +.. jupyter-execute:: + b2 .. _math automatic alignment: @@ -759,7 +793,13 @@ indexing turns 1D coordinates into scalar coordinates: .. jupyter-execute:: arr[0] + +.. jupyter-execute:: + arr[1] + +.. jupyter-execute:: + # notice that the scalar coordinate 'x' is silently dropped arr[1] - arr[0] @@ -770,6 +810,9 @@ are no conflicting values: # only one argument has the 'x' coordinate arr[0] + 1 + +.. jupyter-execute:: + # both arguments have the same 'x' coordinate arr[0] - arr[0] @@ -795,6 +838,9 @@ Datasets support most of the same methods found on data arrays: .. jupyter-execute:: ds.mean(dim="x") + +.. jupyter-execute:: + abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -803,8 +849,7 @@ to each variable in a dataset: .. jupyter-execute:: - np.sin(ds) - ds.map(np.sin) + np.sin(ds) # equivalent to ds.map(np.sin) Datasets also use looping over variables for *broadcasting* in binary arithmetic. You can do arithmetic between any ``DataArray`` and a dataset: diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst index dfed7dbac83..1c6b6626f11 100644 --- a/doc/user-guide/groupby.rst +++ b/doc/user-guide/groupby.rst @@ -170,6 +170,9 @@ coordinates. For example: alt = arr.groupby("letters").mean(...) alt + +.. jupyter-execute:: + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -202,7 +205,13 @@ __ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dime dims=["ny", "nx"], ) da + +.. jupyter-execute:: + da.groupby("lon").sum(...) + +.. jupyter-execute:: + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 23281819826..826c7ebba29 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -72,7 +72,13 @@ DataArray: ], ) da[:2] + +.. jupyter-execute:: + da[0, 0] + +.. jupyter-execute:: + da[:, [2, 1]] Attributes are persisted in all indexing operations. @@ -124,6 +130,8 @@ use them explicitly to slice data. There are two ways to do this: # index by integer array indices da.isel(space=0, time=slice(None, 2)) + .. jupyter-execute:: + # index by dimension coordinate labels da.sel(time=slice("2000-01-01", "2000-01-02")) @@ -135,6 +143,8 @@ use them explicitly to slice data. There are two ways to do this: # index by integer array indices da[dict(space=0, time=slice(None, 2))] + .. jupyter-execute:: + # index by dimension coordinate labels da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] @@ -167,7 +177,13 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) da.sel(x=[1.1, 1.9], method="nearest") + +.. jupyter-execute:: + da.sel(x=0.1, method="backfill") + +.. jupyter-execute:: + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: @@ -179,11 +195,10 @@ Tolerance limits the maximum distance for valid matches with an inexact lookup: The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: -.. ipython:: - :verbatim: +.. jupyter-execute:: + :raises: - In [1]: da.sel(x=slice(1, 3), method="nearest") - NotImplementedError + da.sel(x=slice(1, 3), method="nearest") However, you don't need to use ``method`` to do inexact slicing. Slicing already returns all values inside the range (inclusive), as long as the index @@ -227,6 +242,9 @@ simultaneously, returning a new dataset: ) ds = da.to_dataset(name="foo") ds.isel(space=[0], time=[0]) + +.. jupyter-execute:: + ds.sel(time="2000-01-01") Positional indexing on a dataset is not supported because the ordering of @@ -236,6 +254,9 @@ arrays). However, you can do normal indexing with dimension names: .. jupyter-execute:: ds[dict(space=[0], time=[0])] + +.. jupyter-execute:: + ds.loc[dict(time="2000-01-01")] Dropping labels and dimensions @@ -340,6 +361,9 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, ) da + +.. jupyter-execute:: + da[[0, 2, 2], [1, 3]] For more flexibility, you can supply :py:meth:`~xarray.DataArray` objects @@ -385,6 +409,8 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] +.. jupyter-execute:: + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) @@ -487,14 +513,21 @@ Vectorized indexing can also be used to assign values to xarray object. coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]}, ) da + +.. jupyter-execute:: + da[0] = -1 # assignment with broadcasting da +.. jupyter-execute:: + ind_x = xr.DataArray([0, 1], dims=["x"]) ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1) da +.. jupyter-execute:: + da[ind_x, ind_y] += 100 # increment is also possible da @@ -550,7 +583,6 @@ You can also assign values to all variables of a :py:class:`Dataset` at once: .. jupyter-execute:: - ds_org = xr.tutorial.open_dataset("eraint_uvz").isel( latitude=slice(56, 59), longitude=slice(255, 258), level=0 ) @@ -558,18 +590,30 @@ You can also assign values to all variables of a :py:class:`Dataset` at once: ds = xr.zeros_like(ds_org) ds +.. jupyter-execute:: + # by integer ds[dict(latitude=2, longitude=2)] = 1 ds["u"] + +.. jupyter-execute:: + ds["v"] +.. jupyter-execute:: + # by label ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = 100 ds["u"] +.. jupyter-execute:: + # dataset as new values new_dat = ds_org.loc[dict(latitude=48, longitude=[11.25, 12])] new_dat + +.. jupyter-execute:: + ds.loc[dict(latitude=47.25, longitude=[11.25, 12])] = new_dat ds["u"] @@ -588,6 +632,9 @@ flexible indexing. The following is an example of the pointwise indexing: da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=["x", "y"]) da + +.. jupyter-execute:: + da.isel(x=xr.DataArray([0, 1, 6], dims="z"), y=xr.DataArray([0, 1, 0], dims="z")) @@ -671,6 +718,9 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. jupyter-execute:: xr.align(foo, baz, join="inner") + +.. jupyter-execute:: + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between @@ -679,7 +729,13 @@ Both ``reindex_like`` and ``align`` work interchangeably between .. jupyter-execute:: ds + +.. jupyter-execute:: + ds.reindex_like(baz) + +.. jupyter-execute:: + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -725,7 +781,13 @@ through the :py:attr:`~xarray.DataArray.indexes` attribute. ], ) da + +.. jupyter-execute:: + da.indexes + +.. jupyter-execute:: + da.indexes["time"] Use :py:meth:`~xarray.DataArray.get_index` to get an index for a dimension, @@ -736,6 +798,9 @@ labels: da = xr.DataArray([1, 2, 3], dims="x") da + +.. jupyter-execute:: + da.get_index("x") @@ -785,6 +850,9 @@ pandas: midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda + +.. jupyter-execute:: + mda.sel(x=(list("ab"), [0])) You can also select multiple elements by providing a list of labels or tuples or diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 1fb8d1548d3..35e876edede 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -36,6 +36,8 @@ indexing of a :py:class:`~xarray.DataArray`, # label lookup da.sel(time=3) +.. jupyter-execute:: + # interpolation da.interp(time=2.5) @@ -48,6 +50,8 @@ array-like, which gives the interpolated result as an array. # label lookup da.sel(time=[2, 3]) +.. jupyter-execute:: + # interpolation da.interp(time=[2.5, 3.5]) @@ -90,6 +94,8 @@ is carried out. # label lookup da.sel(time=2, space=0.1) +.. jupyter-execute:: + # interpolation da.interp(time=2.5, space=0.15) @@ -100,6 +106,8 @@ Array-like coordinates are also accepted: # label lookup da.sel(time=[2, 3], space=[0.1, 0.2]) +.. jupyter-execute:: + # interpolation da.interp(time=[1.5, 2.5], space=[0.15, 0.25]) @@ -154,7 +162,7 @@ The interpolation method can be specified by the optional ``method`` argument. da.plot.line("o", label="original") da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") - plt.legend() + plt.legend(); Additional keyword arguments can be passed to scipy's functions. @@ -162,8 +170,14 @@ Additional keyword arguments can be passed to scipy's functions. # fill 0 for the outside of the original coordinates. da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) + +.. jupyter-execute:: + # 1-dimensional extrapolation da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) + +.. jupyter-execute:: + # multi-dimensional extrapolation da = xr.DataArray( np.sin(0.3 * np.arange(12).reshape(4, 3)), @@ -205,6 +219,8 @@ For example: y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) +.. jupyter-execute:: + # advanced interpolation, without extrapolation x = xr.DataArray([0.5, 1.5, 2.5, 3.5], dims="z") y = xr.DataArray([0.15, 0.25, 0.35, 0.45], dims="z") @@ -246,6 +262,9 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) + +.. jupyter-execute:: + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and @@ -255,6 +274,9 @@ then make the interpolation dropped = da.dropna("x") dropped + +.. jupyter-execute:: + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, @@ -297,7 +319,7 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. new_lat = np.linspace(ds.lat[0].item(), ds.lat[-1].item(), ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) - axes[1].set_title("Interpolated data") + axes[1].set_title("Interpolated data"); Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -327,4 +349,4 @@ The remapping can be done as follows dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) - axes[1].set_title("Remapped data") + axes[1].set_title("Remapped data"); diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index b607d7f0040..be10684ec29 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -20,6 +20,11 @@ Importing the library np.random.seed(123456) + # Use defaults so we don't get gridlines in generated docs + import matplotlib as mpl + + mpl.rcdefaults() + Reordering dimensions --------------------- @@ -30,8 +35,10 @@ ellipsis (`...`) can be used to represent all other dimensions: .. jupyter-execute:: ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) - ds.transpose("y", "z", "x") - ds.transpose(..., "x") # equivalent + ds.transpose("y", "z", "x") # equivalent to ds.transpose(..., "x") + +.. jupyter-execute:: + ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -87,6 +94,8 @@ includes the union of data variable dimensions: # the input dataset has 4 elements ds2 +.. jupyter-execute:: + # the resulting array has 6 elements ds2.to_dataarray() @@ -114,6 +123,9 @@ implemented :py:meth:`~xarray.DataArray.stack` and ) stacked = array.stack(z=("x", "y")) stacked + +.. jupyter-execute:: + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -135,6 +147,9 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 + +.. jupyter-execute:: + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike @@ -144,6 +159,9 @@ pandas, it does not automatically drop missing values. Compare: array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) array.stack(z=("x", "y")) + +.. jupyter-execute:: + array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -178,8 +196,14 @@ like this: coords={"y": ["u", "v", "w"]}, ) data + +.. jupyter-execute:: + stacked = data.to_stacked_array("z", sample_dims=["x"]) stacked + +.. jupyter-execute:: + unstacked = stacked.to_unstacked_dataset("z") unstacked @@ -217,6 +241,9 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: dims="x", ) da + +.. jupyter-execute:: + mda = da.set_index(x=["band", "wavenumber"]) mda @@ -249,8 +276,14 @@ labels for one or several dimensions: array = xr.DataArray([1, 2, 3], dims="x") array + +.. jupyter-execute:: + array["c"] = ("x", ["a", "b", "c"]) array.set_index(x="c") + +.. jupyter-execute:: + array = array.set_index(x="c") array = array.reset_index("x", drop=True) @@ -266,6 +299,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an array = xr.DataArray([1, 2, 3, 4], dims="x") array.shift(x=2) + +.. jupyter-execute:: + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -295,7 +331,13 @@ As a shortcut, you can refer to existing coordinates by name: .. jupyter-execute:: ds.sortby("x") + +.. jupyter-execute:: + ds.sortby(["y", "x"]) + +.. jupyter-execute:: + ds.sortby(["y", "x"], ascending=False) .. _reshape.coarsen: @@ -309,19 +351,11 @@ it can also be used to reorganise your data without applying a computation via : Taking our example tutorial air temperature dataset over the Northern US -.. jupyter-execute:: - :hide-code: - - # Use defaults so we don't get gridlines in generated docs - import matplotlib as mpl - - mpl.rcdefaults() - .. jupyter-execute:: air = xr.tutorial.open_dataset("air_temperature")["air"] - air.isel(time=0).plot(x="lon", y="lat") + air.isel(time=0).plot(x="lon", y="lat"); we can split this up into sub-regions of size ``(9, 18)`` points using :py:meth:`~xarray.computation.rolling.DataArrayCoarsen.construct`: @@ -330,7 +364,8 @@ we can split this up into sub-regions of size ``(9, 18)`` points using :py:meth: regions = air.coarsen(lat=9, lon=18, boundary="pad").construct( lon=("x_coarse", "x_fine"), lat=("y_coarse", "y_fine") ) - regions + with xr.set_options(display_expand_data=False): + regions 9 new regions have been created, each of size 9 by 18 points. The ``boundary="pad"`` kwarg ensured that all regions are the same size even though the data does not evenly divide into these sizes. @@ -341,7 +376,7 @@ By plotting these 9 regions together via :ref:`faceting` we c regions.isel(time=0).plot( x="x_fine", y="y_fine", col="x_coarse", row="y_coarse", yincrease=False - ) + ); We are now free to easily apply any custom computation to each coarsened region of our new dataarray. This would involve specifying that applied functions should act over the ``"x_fine"`` and ``"y_fine"`` dimensions, From 8765673c7930d8c93eb84b0d82fc6de77c7c7060 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 17:56:27 +0200 Subject: [PATCH 06/22] manual review of IO --- doc/user-guide/complex-numbers.rst | 28 +++++++----- doc/user-guide/io.rst | 72 ++++++++++++++++-------------- 2 files changed, 54 insertions(+), 46 deletions(-) diff --git a/doc/user-guide/complex-numbers.rst b/doc/user-guide/complex-numbers.rst index e62b9d6f8d8..ea9df880142 100644 --- a/doc/user-guide/complex-numbers.rst +++ b/doc/user-guide/complex-numbers.rst @@ -5,14 +5,17 @@ Complex Numbers =============== +.. jupyter-execute:: + :hide-code: + + import numpy as np + import xarray as xr + Xarray leverages NumPy to seamlessly handle complex numbers in :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects. In the examples below, we are using a DataArray named ``da`` with complex elements (of :math:`\mathbb{C}`): -.. ipython:: python - - import xarray as xr - import numpy as np +.. jupyter-execute:: data = np.array([[1 + 2j, 3 + 4j], [5 + 6j, 7 + 8j]]) da = xr.DataArray( @@ -27,9 +30,12 @@ Operations on Complex Data -------------------------- You can access real and imaginary components using the ``.real`` and ``.imag`` attributes. Most NumPy universal functions (ufuncs) like :py:doc:`numpy.abs ` or :py:doc:`numpy.angle ` work directly. -.. ipython:: python +.. jupyter-execute:: da.real + +.. jupyter-execute:: + np.abs(da) .. note:: @@ -46,8 +52,7 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py This requires the `h5netcdf `_ library to be installed. - .. ipython:: python - :okwarning: + .. jupyter-execute:: # write the data to disk da.to_netcdf("complex_nums_h5.nc", engine="h5netcdf") @@ -61,8 +66,7 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py Requires the `netcdf4-python (>= 1.7.1) `_ library and you have to enable ``auto_complex=True``. - .. ipython:: python - :okwarning: + .. jupyter-execute:: # write the data to disk da.to_netcdf("complex_nums_nc4.nc", engine="netcdf4", auto_complex=True) @@ -84,7 +88,7 @@ Alternative: Manual Handling If direct writing is not supported (e.g., targeting NetCDF3), you can manually split the complex array into separate real and imaginary variables before saving: -.. ipython:: python +.. jupyter-execute:: # Write data to file ds_manual = xr.Dataset( @@ -106,8 +110,8 @@ Recommendations - Use ``h5netcdf`` for HDF5-based storage when interoperability with HDF5 is desired. - For maximum legacy support (NetCDF3), manually handle real/imaginary components. -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: # Cleanup import os diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 1bd3c4054f7..3d17e875481 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -286,6 +286,9 @@ You can view this encoding information (among others) in the .. jupyter-execute:: ds_disk["y"].encoding + +.. jupyter-execute:: + ds_disk.encoding Note that all operations that manipulate variables other than indexing @@ -876,7 +879,6 @@ order, e.g., for time-stepping a simulation: .. jupyter-execute:: - ds1 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ @@ -886,6 +888,9 @@ order, e.g., for time-stepping a simulation: }, ) ds1.to_zarr("path/to/directory.zarr") + +.. jupyter-execute:: + ds2 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ @@ -944,7 +949,6 @@ split them into chunks: .. jupyter-execute:: - ds.to_zarr("path/to/directory.zarr", mode="w") ! ls -R path/to/directory.zarr @@ -955,7 +959,6 @@ length of each dimension by using the shorthand chunk size ``None``: .. jupyter-execute:: - ds.to_zarr( "path/to/directory.zarr", encoding={"xc": {"chunks": None}, "yc": {"chunks": None}}, @@ -1166,16 +1169,16 @@ __ https://www.prism.oregonstate.edu/ __ https://iri.columbia.edu/ -.. ipython:: - :verbatim: +.. jupyter-input:: + + remote_data = xr.open_dataset( + "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + decode_times=False, + ) + remote_data - In [3]: remote_data = xr.open_dataset( - ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", - ...: decode_times=False, - ...: ) +.. jupyter-output:: - In [4]: remote_data - Out[4]: Dimensions: (T: 1422, X: 1405, Y: 621) Coordinates: @@ -1207,13 +1210,13 @@ __ https://iri.columbia.edu/ We can select and slice this data any number of times, and nothing is loaded over the network until we look at particular values: -.. ipython:: - :verbatim: +.. jupyter-input:: - In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] + tmax = remote_data["tmax"][:500, ::3, ::3] + tmax + +.. jupyter-output:: - In [5]: tmax - Out[5]: [48541500 values with dtype=float64] Coordinates: @@ -1226,8 +1229,10 @@ over the network until we look at particular values: units: Celsius_scale expires: 1443657600 +.. jupyter-input:: + # the data is downloaded automatically when we make the plot - In [6]: tmax[0].plot() + tmax[0].plot() .. image:: ../_static/opendap-prism-tmax.png @@ -1316,8 +1321,6 @@ We can convert a ``Dataset`` (or a ``DataArray``) to a dict using .. jupyter-execute:: ds = xr.Dataset({"foo": ("x", np.arange(30))}) - ds - d = ds.to_dict() d @@ -1365,15 +1368,15 @@ Rasterio GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened using the `rioxarray`_ extension. `rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. -.. ipython:: - :verbatim: +.. jupyter-input:: - In [1]: import rioxarray + import rioxarray - In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") + rds = rioxarray.open_rasterio("RGB.byte.tif") + rds + +.. jupyter-output:: - In [3]: rds - Out[3]: [1703814 values with dtype=uint8] Coordinates: @@ -1392,15 +1395,17 @@ GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened usin add_offset: 0.0 grid_mapping: spatial_ref - In [4]: rds.rio.crs - Out[4]: CRS.from_epsg(32618) +.. jupyter-input:: + + rds.rio.crs + # CRS.from_epsg(32618) - In [5]: rds4326 = rds.rio.reproject("epsg:4326") + rds4326 = rds.rio.reproject("epsg:4326") - In [6]: rds4326.rio.crs - Out[6]: CRS.from_epsg(4326) + rds4326.rio.crs + # CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") + rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -1425,10 +1430,9 @@ Xarray supports reading GRIB files via ECMWF cfgrib_ python driver, if it is installed. To open a GRIB file supply ``engine='cfgrib'`` to :py:func:`open_dataset` after installing cfgrib_: -.. ipython:: - :verbatim: +.. jupyter-input:: - In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") + ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing cfgrib via conda:: From 04780876388002b52c65ba5b1f6c5930eee71ffa Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 18:23:49 +0200 Subject: [PATCH 07/22] manual review of plotting --- doc/user-guide/plotting.rst | 163 +++++++++++++++--------------------- 1 file changed, 67 insertions(+), 96 deletions(-) diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index f317dcd94cf..dca5570d250 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -63,11 +63,13 @@ The following imports are necessary for all of the examples. .. jupyter-execute:: + import cartopy.crs as ccrs + import matplotlib.pyplot as plt import numpy as np import pandas as pd - import matplotlib.pyplot as plt import xarray as xr + For these examples we'll use the North American air temperature dataset. .. jupyter-execute:: @@ -75,6 +77,9 @@ For these examples we'll use the North American air temperature dataset. airtemps = xr.tutorial.open_dataset("air_temperature") airtemps + +.. jupyter-execute:: + # Convert to celsius air = airtemps.air - 273.15 @@ -100,9 +105,8 @@ The simplest way to make a plot is to call the :py:func:`DataArray.plot()` metho .. jupyter-execute:: - air1d = air.isel(lat=10, lon=10) - air1d.plot() + air1d.plot(); Xarray uses the coordinate name along with metadata ``attrs.long_name``, ``attrs.standard_name``, ``DataArray.name`` and ``attrs.units`` (if available) @@ -131,7 +135,7 @@ can be used: .. jupyter-execute:: - air1d[:200].plot.line("b-^") + air1d[:200].plot.line("b-^"); .. note:: Not all xarray plotting methods support passing positional arguments @@ -142,7 +146,7 @@ Keyword arguments work the same way, and are more explicit. .. jupyter-execute:: - air1d[:200].plot.line(color="purple", marker="o") + air1d[:200].plot.line(color="purple", marker="o"); ========================= Adding to Existing Axis @@ -157,13 +161,10 @@ axes created by ``plt.subplots``. fig, axs = plt.subplots(ncols=2) - axs + print(axs) air1d.plot(ax=axs[0]) - air1d.plot.hist(ax=axs[1]) - - plt.tight_layout() - plt.draw() + air1d.plot.hist(ax=axs[1]); On the right is a histogram created by :py:func:`xarray.plot.hist`. @@ -180,14 +181,7 @@ resulting image via the formula ``figsize = (aspect * size, size)``: .. jupyter-execute:: - air1d.plot(aspect=2, size=3) - plt.tight_layout() - -.. jupyter-execute:: - :hide-code: - - # create a dummy figure so sphinx plots everything below normally - plt.figure() + air1d.plot(aspect=2, size=3); This feature also works with :ref:`plotting.faceting`. For facet plots, ``size`` and ``aspect`` refer to a single panel (so that ``aspect * size`` @@ -228,7 +222,7 @@ To use ``'decimal_day'`` as x coordinate it must be explicitly specified: .. jupyter-execute:: - air1d_multi.plot(x="decimal_day") + air1d_multi.plot(x="decimal_day"); Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``, it is also possible to use a MultiIndex level as x-axis: @@ -236,14 +230,14 @@ it is also possible to use a MultiIndex level as x-axis: .. jupyter-execute:: air1d_multi = air1d_multi.set_index(date=("time", "decimal_day")) - air1d_multi.plot(x="decimal_day") + air1d_multi.plot(x="decimal_day"); Finally, if a dataset does not have any coordinates it enumerates all data points: .. jupyter-execute:: air1d_multi = air1d_multi.drop_vars(["date", "time", "decimal_day"]) - air1d_multi.plot() + air1d_multi.plot(); The same applies to 2D plots below. @@ -257,7 +251,7 @@ plots to check the variation of air temperature at three different latitudes alo .. jupyter-execute:: - air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time"); It is required to explicitly specify either @@ -277,7 +271,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. jupyter-execute:: - air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon"); ============ Step plots @@ -288,7 +282,7 @@ made using 1D data. .. jupyter-execute:: - air1d[:20].plot.step(where="mid") + air1d[:20].plot.step(where="mid"); The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -303,7 +297,7 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. (air_mean + air_std).plot.step(ls=":") (air_mean - air_std).plot.step(ls=":") plt.ylim(-20, 30) - plt.title("Zonal mean temperature") + plt.title("Zonal mean temperature"); In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -319,7 +313,7 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d air.isel(time=10, lon=[10, 11]).plot.line( y="lat", hue="lon", xincrease=False, yincrease=False - ) + ); In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. @@ -340,14 +334,14 @@ by default when the data is two-dimensional. .. jupyter-execute:: air2d = air.isel(time=500) - air2d.plot() + air2d.plot(); All 2d plots in xarray allow the use of the keyword arguments ``yincrease`` and ``xincrease``. .. jupyter-execute:: - air2d.plot(yincrease=False) + air2d.plot(yincrease=False); .. note:: @@ -367,7 +361,7 @@ Xarray plots data with :ref:`missing_values`. bad_air2d = air2d.copy() bad_air2d[dict(lat=slice(0, 10), lon=slice(0, 25))] = np.nan - bad_air2d.plot() + bad_air2d.plot(); ======================== Nonuniform Coordinates @@ -383,7 +377,7 @@ produce plots with nonuniform coordinates. # Apply a nonlinear transformation to one of the coords b.coords["lat"] = np.log(b.coords["lat"]) - b.plot() + b.plot(); ==================== Other types of plot @@ -395,20 +389,20 @@ Contour plot using :py:meth:`DataArray.plot.contour()` .. jupyter-execute:: - air2d.plot.contour() + air2d.plot.contour(); Filled contour plot using :py:meth:`DataArray.plot.contourf()` .. jupyter-execute:: - air2d.plot.contourf() + air2d.plot.contourf(); Surface plot using :py:meth:`DataArray.plot.surface()` .. jupyter-execute:: # transpose just to make the example look a bit nicer - air2d.T.plot.surface() + air2d.T.plot.surface(); ==================== Calling Matplotlib @@ -422,9 +416,7 @@ matplotlib is available. air2d.plot(cmap=plt.cm.Blues) plt.title("These colors prove North America\nhas fallen in the ocean") plt.ylabel("latitude") - plt.xlabel("longitude") - plt.tight_layout() - plt.draw() + plt.xlabel("longitude"); .. note:: @@ -437,8 +429,7 @@ matplotlib is available. .. jupyter-execute:: plt.xlabel("Never gonna see this.") - air2d.plot() - plt.draw() + air2d.plot(); =========== Colormaps @@ -449,7 +440,7 @@ example, consider the original data in Kelvins rather than Celsius: .. jupyter-execute:: - airtemps.air.isel(time=0).plot() + airtemps.air.isel(time=0).plot(); The Celsius data contain 0, so a diverging color map was used. The Kelvins do not have 0, so the default color map was used. @@ -470,7 +461,7 @@ washing out the plot. air_outliers[0, 0] = 100 air_outliers[-1, -1] = 400 - air_outliers.plot() + air_outliers.plot(); This plot shows that we have outliers. The easy way to visualize the data without the outliers is to pass the parameter @@ -480,7 +471,7 @@ percentiles of the data to compute the color limits. .. jupyter-execute:: - air_outliers.plot(robust=True) + air_outliers.plot(robust=True); Observe that the ranges of the color bar have changed. The arrows on the color bar indicate @@ -497,22 +488,21 @@ colormaps. For example, to make a plot with 8 discrete color intervals: .. jupyter-execute:: - air2d.plot(levels=8) + air2d.plot(levels=8); It is also possible to use a list of levels to specify the boundaries of the discrete colormap: .. jupyter-execute:: - air2d.plot(levels=[0, 12, 18, 30]) + air2d.plot(levels=[0, 12, 18, 30]); You can also specify a list of discrete colors through the ``colors`` argument: .. jupyter-execute:: - flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] - air2d.plot(levels=[0, 12, 18, 30], colors=flatui) + air2d.plot(levels=[0, 12, 18, 30], colors=flatui); Finally, if you have `Seaborn `_ installed, you can also specify a seaborn color palette to the ``cmap`` @@ -522,8 +512,7 @@ since levels are chosen automatically). .. jupyter-execute:: - air2d.plot(levels=10, cmap="husl") - plt.draw() + air2d.plot(levels=10, cmap="husl"); .. _plotting.faceting: @@ -568,7 +557,7 @@ arguments to the xarray plotting methods/functions. This returns a .. jupyter-execute:: - g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3); Faceting also works for line plots. @@ -576,7 +565,7 @@ Faceting also works for line plots. g_simple_line = t.isel(lat=slice(0, None, 4)).plot( x="lon", hue="lat", col="time", col_wrap=3 - ) + ); =============== 4 dimensional @@ -594,7 +583,7 @@ one were much hotter. # This is a 4d array t4d.coords - t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim"); ================ Other features @@ -603,12 +592,6 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. jupyter-execute:: - :hide-code: - - plt.close("all") - -.. jupyter-execute:: - hasoutliers = t.isel(time=slice(0, 5)).copy() hasoutliers[0, 0, 0] = -100 @@ -640,6 +623,8 @@ attributes, both 2d NumPy object arrays. g.axs +.. jupyter-execute:: + g.name_dicts It's possible to select the :py:class:`xarray.DataArray` or @@ -662,9 +647,7 @@ they have been plotted. ax.set_title("Air Temperature %d" % i) bottomright = g.axs[-1, -1] - bottomright.annotate("bottom right", (240, 40)) - - plt.draw() + bottomright.annotate("bottom right", (240, 40)); :py:class:`~xarray.plot.FacetGrid` objects have methods that let you customize the automatically generated @@ -698,63 +681,65 @@ Let's plot the ``A`` DataArray as a function of the ``y`` coord .. jupyter-execute:: + with xr.set_options(display_expand_data=False): + display(ds.A) - ds.A +.. jupyter-execute:: - ds.A.plot.scatter(x="y") + ds.A.plot.scatter(x="y"); Same plot can be displayed using the dataset: .. jupyter-execute:: - ds.plot.scatter(x="y", y="A") + ds.plot.scatter(x="y", y="A"); Now suppose we want to scatter the ``A`` DataArray against the ``B`` DataArray .. jupyter-execute:: - ds.plot.scatter(x="A", y="B") + ds.plot.scatter(x="A", y="B"); The ``hue`` kwarg lets you vary the color by variable value .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", hue="w") + ds.plot.scatter(x="A", y="B", hue="w"); You can force a legend instead of a colorbar by setting ``add_legend=True, add_colorbar=False``. .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", hue="w", add_legend=True, add_colorbar=False) + ds.plot.scatter(x="A", y="B", hue="w", add_legend=True, add_colorbar=False); .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", hue="w", add_legend=False, add_colorbar=True) + ds.plot.scatter(x="A", y="B", hue="w", add_legend=False, add_colorbar=True); The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", hue="y", markersize="z") + ds.plot.scatter(x="A", y="B", hue="y", markersize="z"); The ``z`` kwarg lets you plot the data along the z-axis as well. .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x") + ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x"); Faceting is also possible .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", hue="y", markersize="x", row="x", col="w") + ds.plot.scatter(x="A", y="B", hue="y", markersize="x", row="x", col="w"); And adding the z-axis .. jupyter-execute:: - ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x", row="x", col="w") + ds.plot.scatter(x="A", y="B", z="z", hue="y", markersize="x", row="x", col="w"); For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -766,14 +751,14 @@ Visualizing vector fields is supported with quiver plots: .. jupyter-execute:: - ds.isel(w=1, z=1).plot.quiver(x="x", y="y", u="A", v="B") + ds.isel(w=1, z=1).plot.quiver(x="x", y="y", u="A", v="B"); where ``u`` and ``v`` denote the x and y direction components of the arrow vectors. Again, faceting is also possible: .. jupyter-execute:: - ds.plot.quiver(x="x", y="y", u="A", v="B", col="w", row="z", scale=4) + ds.plot.quiver(x="x", y="y", u="A", v="B", col="w", row="z", scale=4); ``scale`` is required for faceted quiver plots. The scale determines the number of data units per arrow length unit, i.e. a smaller scale parameter makes the arrow longer. @@ -785,7 +770,7 @@ Visualizing vector fields is also supported with streamline plots: .. jupyter-execute:: - ds.isel(w=1, z=1).plot.streamplot(x="x", y="y", u="A", v="B") + ds.isel(w=1, z=1).plot.streamplot(x="x", y="y", u="A", v="B"); where ``u`` and ``v`` denote the x and y direction components of the vectors tangent to the streamlines. @@ -793,7 +778,7 @@ Again, faceting is also possible: .. jupyter-execute:: - ds.plot.streamplot(x="x", y="y", u="A", v="B", col="w", row="z") + ds.plot.streamplot(x="x", y="y", u="A", v="B", col="w", row="z"); .. _plot-maps: @@ -806,9 +791,6 @@ This script will plot the air temperature on a map. .. jupyter-execute:: - - import cartopy.crs as ccrs - air = xr.tutorial.open_dataset("air_temperature").air p = air.isel(time=0).plot( @@ -817,7 +799,7 @@ This script will plot the air temperature on a map. ) p.axes.set_global() - p.axes.coastlines() + p.axes.coastlines(); When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created @@ -835,8 +817,6 @@ by faceting are accessible in the object returned by ``plot``: ax.coastlines() ax.gridlines() - plt.draw() - Details ------- @@ -857,18 +837,12 @@ These are provided for user convenience; they all call the same code. .. jupyter-execute:: - - import xarray.plot as xplt - da = xr.DataArray(range(5)) fig, axs = plt.subplots(ncols=2, nrows=2) da.plot(ax=axs[0, 0]) da.plot.line(ax=axs[0, 1]) - xplt.plot(da, ax=axs[1, 0]) - xplt.line(da, ax=axs[1, 1]) - plt.tight_layout() - - plt.draw() + xr.plot.plot(da, ax=axs[1, 0]) + xr.plot.line(da, ax=axs[1, 1]); Here the output is the same. Since the data is 1 dimensional the line plot was used. @@ -907,7 +881,7 @@ each of the axes should be. .. jupyter-execute:: - a.plot() + a.plot(); It may seem strange that the values on the y axis are decreasing with -0.5 on the top. This is because @@ -926,7 +900,6 @@ instead of the default ones: .. jupyter-execute:: - lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) lon += lat / 10 lat += lon / 10 @@ -936,7 +909,7 @@ instead of the default ones: coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, ) - da.plot.pcolormesh(x="lon", y="lat") + da.plot.pcolormesh(x="lon", y="lat"); Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -945,13 +918,11 @@ this convention when plotting on a map: .. jupyter-execute:: - import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh(x="lon", y="lat", ax=ax) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() - ax.gridlines(draw_labels=True) + ax.gridlines(draw_labels=True); You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: @@ -962,7 +933,7 @@ You can however decide to infer the cell boundaries and use the da.plot.pcolormesh(x="lon", y="lat", ax=ax, infer_intervals=True) ax.scatter(lon, lat, transform=ccrs.PlateCarree()) ax.coastlines() - ax.gridlines(draw_labels=True) + ax.gridlines(draw_labels=True); .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -977,4 +948,4 @@ One can also make line plots with multidimensional coordinates. In this case, `` f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) - da.plot.line(x="lon", hue="x", ax=ax[1]) + da.plot.line(x="lon", hue="x", ax=ax[1]); From 7ec6b56be2965145ff90dad117a0f3400eee7b02 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 18:34:34 +0200 Subject: [PATCH 08/22] manual review of interoperability --- doc/user-guide/duckarrays.rst | 6 ++++++ doc/user-guide/pandas.rst | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/duckarrays.rst b/doc/user-guide/duckarrays.rst index 90fd5e8b996..41859828546 100644 --- a/doc/user-guide/duckarrays.rst +++ b/doc/user-guide/duckarrays.rst @@ -54,6 +54,9 @@ in a memory-efficient manner. We can create a sparse array object (of the :py:cl from sparse import COO import xarray as xr import numpy as np + %xmode minimal + +.. jupyter-execute:: x = np.eye(4, dtype=np.uint8) # create diagonal identity matrix s = COO.from_numpy(x) @@ -68,6 +71,9 @@ Just like :py:class:`numpy.ndarray` objects, :py:class:`sparse.COO` arrays suppo .. jupyter-execute:: s[1, 1] # diagonal elements should be ones + +.. jupyter-execute:: + s[2, 3] # off-diagonal elements should be zero broadcasting, diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst index ac6105fcdf4..cd0a1907565 100644 --- a/doc/user-guide/pandas.rst +++ b/doc/user-guide/pandas.rst @@ -58,6 +58,9 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the }, ) ds + +.. jupyter-execute:: + df = ds.to_dataframe() df @@ -99,6 +102,9 @@ DataFrames: s = ds["foo"].to_series() s + +.. jupyter-execute:: + # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -108,6 +114,9 @@ work even if the hierarchical index is not a full tensor product: .. jupyter-execute:: s[::2] + +.. jupyter-execute:: + s[::2].to_xarray() Lossless and reversible conversion @@ -209,11 +218,12 @@ Let's take a look: With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: -.. ipython:: - :verbatim: +.. jupyter-input:: + + pd.Panel(data, items, major_axis, minor_axis) + +.. jupyter-output:: - In [1]: pd.Panel(data, items, major_axis, minor_axis) - Out[1]: Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) Items axis: a to b From 00aad0bfc03e012e342d975d206086f855f25135 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Mon, 2 Jun 2025 20:09:30 +0200 Subject: [PATCH 09/22] review domain-specific and testing --- doc/user-guide/testing.rst | 21 ++++++++++++++++- doc/user-guide/time-series.rst | 24 ++++++++++++++++++++ doc/user-guide/weather-climate.rst | 36 +++++++++++++++++++++++++++--- 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index ff9ebe922e4..55b7d457d35 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -70,7 +70,13 @@ which is a general hypothesis method valid for all strategies. import xarray.testing.strategies as xrst xrst.variables().example() + +.. jupyter-execute:: + xrst.variables().example() + +.. jupyter-execute:: + xrst.variables().example() You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide @@ -147,9 +153,13 @@ objects your chained strategy will generate. ) fixed_x_variable_y_maybe_z.example() - special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z) +.. jupyter-execute:: + special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z) special_variables.example() + +.. jupyter-execute:: + special_variables.example() Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a @@ -187,6 +197,9 @@ different type: ) sparse_variables.example() + +.. jupyter-execute:: + sparse_variables.example() 2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: @@ -269,6 +282,9 @@ It works for lists of dimension names dims = ["x", "y", "z"] xrst.unique_subset_of(dims).example() + +.. jupyter-execute:: + xrst.unique_subset_of(dims).example() as well as for mappings of dimension names to sizes @@ -277,6 +293,9 @@ as well as for mappings of dimension names to sizes dim_sizes = {"x": 2, "y": 3, "z": 4} xrst.unique_subset_of(dim_sizes).example() + +.. jupyter-execute:: + xrst.unique_subset_of(dim_sizes).example() This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions. diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index cbaba5427cc..f9ee96f752b 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -33,10 +33,19 @@ using :py:func:`pandas.to_datetime`, :py:class:`pandas.DatetimeIndex`, or :py:fu .. jupyter-execute:: pd.to_datetime(["2000-01-01", "2000-02-02"]) + +.. jupyter-execute:: + pd.DatetimeIndex( ["2000-01-01 00:00:00", "2000-02-02 00:00:00"], dtype="datetime64[s]" ) + +.. jupyter-execute:: + xr.date_range("2000-01-01", periods=365) + +.. jupyter-execute:: + xr.date_range("2000-01-01", periods=365, unit="s") @@ -84,6 +93,9 @@ You can manual decode arrays in this form by passing a dataset to ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) # Default decoding to 'ns'-resolution xr.decode_cf(ds) + +.. jupyter-execute:: + # Decoding to 's'-resolution coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.decode_cf(ds, decode_times=coder) @@ -106,6 +118,9 @@ items and with the ``slice`` object: time = pd.date_range("2000-01-01", freq="h", periods=365 * 24) ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) ds.sel(time="2000-01") + +.. jupyter-execute:: + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a @@ -132,6 +147,9 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. time = pd.date_range("2000-01-01", freq="6h", periods=365 * 4) ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour + +.. jupyter-execute:: + ds.time.dt.dayofweek The ``.dt`` accessor works on both coordinate dimensions as well as @@ -147,6 +165,9 @@ __ https://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. jupyter-execute:: ds["time.month"] + +.. jupyter-execute:: + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of @@ -155,6 +176,9 @@ datetime components supported by pandas: .. jupyter-execute:: ds["time.season"] + +.. jupyter-execute:: + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index e3f3315fe08..0578e74557e 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -132,6 +132,9 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. jupyter-execute:: dates.strftime("%c") + +.. jupyter-execute:: + da["time"].dt.strftime("%Y%m%d") Conversion between non-standard calendar and to/from pandas DatetimeIndexes is @@ -162,6 +165,9 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. jupyter-execute:: da.sel(time="0001") + +.. jupyter-execute:: + da.sel(time=slice("0001-05", "0002-02")) .. note:: @@ -184,20 +190,44 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. jupyter-execute:: da.time.dt.year + +.. jupyter-execute:: + da.time.dt.month + +.. jupyter-execute:: + da.time.dt.season + +.. jupyter-execute:: + da.time.dt.dayofyear + +.. jupyter-execute:: + da.time.dt.dayofweek + +.. jupyter-execute:: + da.time.dt.days_in_month + +.. jupyter-execute:: + da.time.dt.calendar - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. jupyter-execute:: - da.time.dt.ceil("3D") - da.time.dt.floor("5D") - da.time.dt.round("2D") + da.time.dt.ceil("3D").head() + +.. jupyter-execute:: + + da.time.dt.floor("5D").head() + +.. jupyter-execute:: + + da.time.dt.round("2D").head() - Group-by operations based on datetime accessor attributes (e.g. by month of the year): From e3800190f0178a13cff99712cc52b68de9b4435c Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 10:36:49 +0200 Subject: [PATCH 10/22] review outputs in internals section --- doc/internals/duck-arrays-integration.rst | 6 +++ doc/internals/extending-xarray.rst | 3 ++ doc/internals/how-to-add-new-backend.rst | 58 ++++++++++++++++------- doc/internals/time-coding.rst | 20 ++++++-- doc/internals/zarr-encoding-spec.rst | 9 +++- 5 files changed, 73 insertions(+), 23 deletions(-) diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst index f0813a097cd..ab2f8494500 100644 --- a/doc/internals/duck-arrays-integration.rst +++ b/doc/internals/duck-arrays-integration.rst @@ -77,12 +77,18 @@ To avoid duplicated information, this method must omit information about the sha import numpy as np import sparse +.. jupyter-execute:: + a = da.linspace(0, 1, 20, chunks=2) a +.. jupyter-execute:: + b = np.eye(10) b[[5, 7, 3, 0], [6, 8, 2, 9]] = 2 b = sparse.COO.from_numpy(b) b +.. jupyter-execute:: + xr.Dataset(dict(a=("x", a), b=(("y", "z"), b))) diff --git a/doc/internals/extending-xarray.rst b/doc/internals/extending-xarray.rst index 12355bf0c6e..2a7a6413f49 100644 --- a/doc/internals/extending-xarray.rst +++ b/doc/internals/extending-xarray.rst @@ -99,6 +99,9 @@ Back in an interactive IPython session, we can use these properties: ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center + +.. jupyter-execute:: + ds.geo.plot() The intent here is that libraries that extend xarray could add such an accessor diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 31ac8d6f245..d3b5c3a9267 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -234,9 +234,14 @@ In the following an example on how to use the coders ``decode`` method: ) var +.. jupyter-execute:: + coder = xr.coding.variables.CFScaleOffsetCoder() decoded_var = coder.decode(var) decoded_var + +.. jupyter-execute:: + decoded_var.encoding Some of the transformations can be common to more backends, so before @@ -433,20 +438,32 @@ In the ``BASIC`` indexing support, numbers and slices are supported. Example: -.. ipython:: - :verbatim: +.. jupyter-input:: - In [1]: # () shall return the full array - ...: backend_array._raw_indexing_method(()) - Out[1]: array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) + # () shall return the full array + backend_array._raw_indexing_method(()) + +.. jupyter-output:: + + array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) + +.. jupyter-input:: + + # shall support integers + backend_array._raw_indexing_method(1, 1) - In [2]: # shall support integers - ...: backend_array._raw_indexing_method(1, 1) - Out[2]: 5 +.. jupyter-output:: - In [3]: # shall support slices - ...: backend_array._raw_indexing_method(slice(0, 3), slice(2, 4)) - Out[3]: array([[2, 3], [6, 7], [10, 11]]) + 5 + +.. jupyter-input:: + + # shall support slices + backend_array._raw_indexing_method(slice(0, 3), slice(2, 4)) + +.. jupyter-output:: + + array([[2, 3], [6, 7], [10, 11]]) **OUTER** @@ -454,15 +471,22 @@ The ``OUTER`` indexing shall support number, slices and in addition it shall support also lists of integers. The outer indexing is equivalent to combining multiple input list with ``itertools.product()``: -.. ipython:: - :verbatim: +.. jupyter-input:: + + backend_array._raw_indexing_method([0, 1], [0, 1, 2]) - In [1]: backend_array._raw_indexing_method([0, 1], [0, 1, 2]) - Out[1]: array([[0, 1, 2], [4, 5, 6]]) +.. jupyter-output:: + + array([[0, 1, 2], [4, 5, 6]]) + +.. jupyter-input:: # shall support integers - In [2]: backend_array._raw_indexing_method(1, 1) - Out[2]: 5 + backend_array._raw_indexing_method(1, 1) + +.. jupyter-output:: + + 5 **OUTER_1VECTOR** diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index a62f03926ab..6fce6552f4e 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -32,8 +32,8 @@ When the arguments are numeric (not strings or ``np.datetime64`` values) ``"unit .. jupyter-execute:: - f"Minimum datetime: {pd.to_datetime(int64_min, unit="ns")}" - f"Maximum datetime: {pd.to_datetime(int64_max, unit="ns")}" + print(f"Minimum datetime: {pd.to_datetime(int64_min, unit="ns")}") + print(f"Maximum datetime: {pd.to_datetime(int64_max, unit="ns")}") For input values which can't be represented in nanosecond resolution an :py:class:`pandas.OutOfBoundsDatetime` exception is raised: @@ -43,6 +43,9 @@ For input values which can't be represented in nanosecond resolution an :py:clas dtime = pd.to_datetime(int64_max, unit="us") except Exception as err: print(err) + +.. jupyter-execute:: + try: dtime = pd.to_datetime(uint64_max, unit="ns") print("Wrong:", dtime) @@ -62,6 +65,9 @@ and :py:meth:`pandas.DatetimeIndex.as_unit` respectively. print("Datetime:", time, np.asarray([time.to_numpy()]).dtype) print("Datetime as_unit('ms'):", time.as_unit("ms")) print("Datetime to_numpy():", time.as_unit("ms").to_numpy()) + +.. jupyter-execute:: + time = pd.to_datetime(np.array([-1000, 1, 2], dtype="datetime64[Y]")) print("DatetimeIndex:", time) print("DatetimeIndex as_unit('us'):", time.as_unit("us")) @@ -131,6 +137,9 @@ For input values which can't be represented in nanosecond resolution an :py:clas delta = pd.to_timedelta(int64_max, unit="us") except Exception as err: print("First:", err) + +.. jupyter-execute:: + try: delta = pd.to_timedelta(uint64_max, unit="ns") except Exception as err: @@ -149,6 +158,9 @@ and :py:meth:`pandas.TimedeltaIndex.as_unit` respectively. print("Timedelta:", delta, np.asarray([delta.to_numpy()]).dtype) print("Timedelta as_unit('ms'):", delta.as_unit("ms")) print("Timedelta to_numpy():", delta.as_unit("ms").to_numpy()) + +.. jupyter-execute:: + delta = pd.to_timedelta([0, 1, 2], unit="D") print("TimedeltaIndex:", delta) print("TimedeltaIndex as_unit('ms'):", delta.as_unit("ms")) @@ -415,6 +427,8 @@ For encoding the process is more or less a reversal of the above, but we have to print(values) np.testing.assert_array_equal(values, orig_values) +.. jupyter-execute:: + dates = np.array( [ "-2000-01-01T01:00:00", @@ -516,12 +530,10 @@ into their native on-disk resolution, if possible: .. jupyter-execute:: - xr.open_dataset("test-timedeltas2.nc") .. jupyter-execute:: - coder = xr.coders.CFDatetimeCoder(time_unit="s") xr.open_dataset("test-timedeltas2.nc", decode_times=coder) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index d5f81dbb245..26f21058039 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -60,10 +60,15 @@ re-open it directly with Zarr: ds = xr.tutorial.load_dataset("rasm") ds.to_zarr("rasm.zarr", mode="w") + os.listdir("rasm.zarr") + +.. jupyter-execute:: zgroup = zarr.open("rasm.zarr") - print(os.listdir("rasm.zarr")) - print(zgroup.tree()) + zgroup.tree() + +.. jupyter-execute:: + dict(zgroup["Tair"].attrs) .. jupyter-execute:: From 84a797610ca9fdcd1507a64c8c5f8fe45b1f1478 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 10:54:15 +0200 Subject: [PATCH 11/22] fully remove ipython directive --- .gitignore | 1 - doc/conf.py | 4 +- doc/user-guide/computation.rst | 10 +- doc/whats-new.rst | 222 +++++++++++++++++++-------------- 4 files changed, 132 insertions(+), 105 deletions(-) diff --git a/.gitignore b/.gitignore index bb55d26d6f1..3c02c76e706 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,6 @@ __pycache__ doc/*.nc doc/auto_gallery doc/rasm.zarr -doc/savefig # C extensions *.so diff --git a/doc/conf.py b/doc/conf.py index b7a63ebcc57..15d39f6860d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -61,8 +61,6 @@ "sphinx.ext.extlinks", "sphinx.ext.mathjax", "sphinx.ext.napoleon", - "IPython.sphinxext.ipython_directive", - "IPython.sphinxext.ipython_console_highlighting", "jupyter_sphinx", "nbsphinx", "sphinx_autosummary_accessors", @@ -213,7 +211,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build", "**.ipynb_checkpoints"] +exclude_patterns = ["_build", "debug.ipynb", "**.ipynb_checkpoints"] # The name of the Pygments (syntax highlighting) style to use. diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index 1a897c6ade6..6565fee5346 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -27,6 +27,8 @@ numpy) over all array values: np.random.seed(123456) + %xmode minimal + .. jupyter-execute:: arr = xr.DataArray( @@ -753,12 +755,10 @@ operations. The default result of a binary operation is by the *intersection* If coordinate values for a dimension are missing on either argument, all matching dimensions must have the same size: -.. ipython:: - :verbatim: - - In [1]: arr + xr.DataArray([1, 2], dims="x") - ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 +.. jupyter-execute:: + :raises: + arr + xr.DataArray([1, 2], dims="x") However, one can explicitly change this default automatic alignment type ("inner") via :py:func:`~xarray.set_options()` in context manager: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c6fe5f803a4..d9e410d6579 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5,8 +5,8 @@ What's New ========== -.. ipython:: python - :suppress: +.. jupyter-execute:: + :hide-code: import numpy as np import pandas as pd @@ -24,6 +24,8 @@ v2025.05.0 (unreleased) New Features ~~~~~~~~~~~~ +- Switch docs to jupyter-execute sphinx extension for HTML reprs. (:issue:`3893`, :pull:`10383`) + By `Scott Henderson `_. - Allow an Xarray index that uses multiple dimensions checking equality with another index for only a subset of those dimensions (i.e., ignoring the dimensions that are excluded from alignment). @@ -6491,23 +6493,15 @@ Breaking changes Old syntax: - .. ipython:: - :verbatim: + .. jupyter-input:: - In [1]: ds.resample("24H", dim="time", how="max") - Out[1]: - - [...] + ds.resample("24H", dim="time", how="max") New syntax: - .. ipython:: - :verbatim: + .. jupyter-input:: - In [1]: ds.resample(time="24H").max() - Out[1]: - - [...] + ds.resample(time="24H").max() Note that both versions are currently supported, but using the old syntax will produce a warning encouraging users to adopt the new syntax. @@ -6569,21 +6563,25 @@ Enhancements - New function :py:func:`~xarray.where` for conditionally switching between values in xarray objects, like :py:func:`numpy.where`: - .. ipython:: - :verbatim: - In [1]: import xarray as xr + .. jupyter-input:: + + import xarray as xr + + arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) + + xr.where(arr % 2, "even", "odd") - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, "even", "odd") - Out[3]: + .. jupyter-output:: + array([['even', 'odd', 'even'], ['odd', 'even', 'odd']], dtype=' - [...] By `Willi Rath `_. @@ -7094,17 +7089,19 @@ Breaking changes by their appearance in list of "Dimensions without coordinates" in the ``Dataset`` or ``DataArray`` repr: - .. ipython:: - :verbatim: + .. jupyter-input:: + + xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) + + .. jupyter-output:: - In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) - Out[1]: Dimensions: (x: 1, y: 2) Dimensions without coordinates: x, y Data variables: foo (x, y) int64 1 2 + This has a number of implications: - :py:func:`~align` and :py:meth:`~Dataset.reindex` can now error, if @@ -7551,16 +7548,16 @@ Enhancements - Rolling window operations on DataArray objects are now supported via a new :py:meth:`DataArray.rolling` method. For example: - .. ipython:: - :verbatim: + .. jupyter-input:: + + import xarray as xr + import numpy as np - In [1]: import xarray as xr - ...: import numpy as np + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) + arr - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) + .. jupyter-output:: - In [3]: arr - Out[3]: array([[ 0. , 0.5, 1. , 1.5, 2. ], [ 2.5, 3. , 3.5, 4. , 4.5], @@ -7569,8 +7566,12 @@ Enhancements * x (x) int64 0 1 2 * y (y) int64 0 1 2 3 4 - In [4]: arr.rolling(y=3, min_periods=2).mean() - Out[4]: + .. jupyter-input:: + + arr.rolling(y=3, min_periods=2).mean() + + .. jupyter-output:: + array([[ nan, 0.25, 0.5 , 1. , 1.5 ], [ nan, 2.75, 3. , 3.5 , 4. ], @@ -7693,11 +7694,12 @@ Breaking changes corresponding coordinate. You will now need to provide coordinate labels explicitly. Here's the old behavior: - .. ipython:: - :verbatim: + .. jupyter-input:: + + xray.DataArray([4, 5, 6], dims="x", name="x") + + .. jupyter-output:: - In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") - Out[2]: array([4, 5, 6]) Coordinates: @@ -7705,11 +7707,12 @@ Breaking changes and the new behavior (compare the values of the ``x`` coordinate): - .. ipython:: - :verbatim: + .. jupyter-input:: + + xray.DataArray([4, 5, 6], dims="x", name="x") + + .. jupyter-output:: - In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") - Out[2]: array([4, 5, 6]) Coordinates: @@ -7728,30 +7731,39 @@ Enhancements - Basic support for :py:class:`~pandas.MultiIndex` coordinates on xray objects, including indexing, :py:meth:`~DataArray.stack` and :py:meth:`~DataArray.unstack`: - .. ipython:: - :verbatim: + .. jupyter-input:: + + df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) + + s = df.set_index(["x", "y"])["foo"] - In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) + arr = xray.DataArray(s, dims="z") - In [8]: s = df.set_index(["x", "y"])["foo"] + arr - In [12]: arr = xray.DataArray(s, dims="z") + .. jupyter-output:: - In [13]: arr - Out[13]: array([0, 1, 2]) Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes["z"] - Out[19]: + .. jupyter-input:: + + arr.indexes["z"] + + .. jupyter-output:: + MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack("z") - Out[14]: + .. jupyter-input:: + + arr.unstack("z") + + .. jupyter-output:: + array([[ 0., nan], [ 1., 2.]]) @@ -7759,8 +7771,12 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack("z").stack(z=("x", "y")) - Out[26]: + .. jupyter-input:: + + arr.unstack("z").stack(z=("x", "y")) + + .. jupyter-output:: + array([ 0., nan, 1., 2.]) Coordinates: @@ -7870,13 +7886,14 @@ Enhancements the ``tolerance`` argument for controlling nearest-neighbor selection (:issue:`629`): - .. ipython:: - :verbatim: + .. jupyter-input:: + + array = xray.DataArray([1, 2, 3], dims="x") - In [5]: array = xray.DataArray([1, 2, 3], dims="x") + array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) + + .. jupyter-output:: - In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) - Out[6]: array([ 2., nan]) Coordinates: @@ -7952,17 +7969,18 @@ Enhancements - Added ``xray.Dataset.isel_points`` and ``xray.Dataset.sel_points`` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). - .. ipython:: - :verbatim: + .. jupyter-input:: - In [1]: da = xray.DataArray( + da = xray.DataArray( ...: np.arange(56).reshape((7, 8)), ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, ...: dims=["x", "y"], ...: ) - In [2]: da - Out[2]: + da + + .. jupyter-output:: + array([[ 0, 1, 2, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 13, 14, 15], @@ -7975,9 +7993,13 @@ Enhancements * y (y) int64 0 10 20 30 40 50 60 70 * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' + .. jupyter-input:: + # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") - Out[3]: + da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") + + .. jupyter-output:: + array([ 0, 9, 48]) Coordinates: @@ -7985,9 +8007,13 @@ Enhancements x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 + .. jupyter-input:: + # or equivalently by label - In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") - Out[9]: + da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") + + .. jupyter-output:: + array([ 0, 9, 48]) Coordinates: @@ -8069,14 +8095,13 @@ Enhancements with dask.array. For example, to save a dataset too big to fit into memory to one file per year, we could write: - .. ipython:: - :verbatim: + .. jupyter-input:: - In [1]: years, datasets = zip(*ds.groupby("time.year")) + years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ["%s.nc" % y for y in years] + paths = ["%s.nc" % y for y in years] - In [3]: xray.save_mfdataset(datasets, paths) + xray.save_mfdataset(datasets, paths) Bug fixes ~~~~~~~~~ @@ -8144,13 +8169,14 @@ Backwards incompatible changes surprising behavior, where the behavior of groupby and concat operations could depend on runtime values (:issue:`268`). For example: - .. ipython:: - :verbatim: + .. jupyter-input:: + + ds = xray.Dataset({"x": 0}) - In [1]: ds = xray.Dataset({"x": 0}) + xray.concat([ds, ds], dim="y") + + .. jupyter-output:: - In [2]: xray.concat([ds, ds], dim="y") - Out[2]: Dimensions: () Coordinates: @@ -8217,11 +8243,12 @@ Enhancements .. use verbatim because I can't seem to install pandas 0.16.1 on RTD :( - .. ipython:: - :verbatim: + .. jupyter-input:: + + ds.sel(x=1.1, method="nearest") + + .. jupyter-output:: - In [12]: ds.sel(x=1.1, method="nearest") - Out[12]: Dimensions: () Coordinates: @@ -8229,8 +8256,12 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method="pad") - Out[13]: + .. jupyter-input:: + + ds.sel(x=[1.1, 2.1], method="pad") + + .. jupyter-output:: + Dimensions: (x: 2) Coordinates: @@ -8261,10 +8292,9 @@ Enhancements Or to set a global option: - .. ipython:: - :verbatim: + .. jupyter-input:: - In [1]: xray.set_options(display_width=80) + xray.set_options(display_width=80) The default value for the ``display_width`` option is 80. From cd35a8d7ce2e26957921182163ef0d340eab14e3 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 12:51:31 +0200 Subject: [PATCH 12/22] handle execution warnings in time-coding --- doc/internals/time-coding.rst | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index 6fce6552f4e..fbba3b99069 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -418,11 +418,11 @@ For encoding the process is more or less a reversal of the above, but we have to dtype="datetime64[s]", ) orig_values = np.array( - [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" + [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="float64" ) units = "days since 0001-01-01 00:00:00" values, _, _ = xr.coding.times.encode_cf_datetime( - dates, units, calendar, dtype=np.dtype("int64") + dates, units, calendar, dtype=np.dtype("float64") ) print(values) np.testing.assert_array_equal(values, orig_values) @@ -439,11 +439,11 @@ For encoding the process is more or less a reversal of the above, but we have to dtype="datetime64[s]", ) orig_values = np.array( - [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" + [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="float64" ) units = "days since 0001-01-01 00:00:00" values, units, _ = xr.coding.times.encode_cf_datetime( - dates, units, calendar, dtype=np.dtype("int64") + dates, units, calendar, dtype=np.dtype("float64") ) print(values, units) @@ -497,7 +497,7 @@ Similar logic applies for decoding timedelta values. The default resolution is ds.to_netcdf("test-timedeltas1.nc") .. jupyter-execute:: - + :stderr: xr.open_dataset("test-timedeltas1.nc") @@ -505,9 +505,8 @@ By default, timedeltas will be decoded to the same resolution as datetimes: .. jupyter-execute:: - coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-timedeltas1.nc", decode_times=coder) + xr.open_dataset("test-timedeltas1.nc", decode_times=coder, decode_timedelta=True) but if one would like to decode timedeltas to a different resolution, one can provide a coder specifically for timedeltas to ``decode_timedelta``: @@ -530,12 +529,12 @@ into their native on-disk resolution, if possible: .. jupyter-execute:: - xr.open_dataset("test-timedeltas2.nc") + xr.open_dataset("test-timedeltas2.nc", decode_timedelta=True) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-timedeltas2.nc", decode_times=coder) + xr.open_dataset("test-timedeltas2.nc", decode_times=coder, decode_timedelta=True) To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 `_) pass ``False`` to ``decode_timedelta``: From 420e7fb4363e3c2efa17c34a3ca63759fb3ff831 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 12:52:05 +0200 Subject: [PATCH 13/22] use zarr v2 and consolidated=False to silence execution warnings --- doc/internals/zarr-encoding-spec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 26f21058039..83b08ae5129 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -59,7 +59,7 @@ re-open it directly with Zarr: import zarr ds = xr.tutorial.load_dataset("rasm") - ds.to_zarr("rasm.zarr", mode="w") + ds.to_zarr("rasm.zarr", mode="w", consolidated=False) os.listdir("rasm.zarr") .. jupyter-execute:: From 0f36da28d8aa0b46c9e2c03815ac53b556de57ba Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 12:52:37 +0200 Subject: [PATCH 14/22] cleanup, handle more warnings for RTD build --- doc/user-guide/indexing.rst | 10 ++++++---- doc/user-guide/io.rst | 28 +++++++++++++++------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 826c7ebba29..52d239abde3 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -12,6 +12,8 @@ Indexing and selecting data np.random.seed(123456) + %xmode minimal + Xarray offers extremely flexible indexing routines that combine the best features of NumPy and pandas for data selection. @@ -582,6 +584,7 @@ __ https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-in You can also assign values to all variables of a :py:class:`Dataset` at once: .. jupyter-execute:: + :stderr: ds_org = xr.tutorial.open_dataset("eraint_uvz").isel( latitude=slice(56, 59), longitude=slice(255, 258), level=0 @@ -758,11 +761,10 @@ Alignment between xarray objects where one or both do not have coordinate labels succeeds only if all dimensions of the same name have the same length. Otherwise, it raises an informative error: -.. ipython:: - :verbatim: +.. jupyter-execute:: + :raises: - In [62]: xr.align(da, da[:2]) - ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension sizes: {2, 3} + xr.align(da, da[:2]) Underlying Indexes ------------------ diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 3d17e875481..6ee357142e4 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -664,7 +664,7 @@ To write to a local directory, we pass a path to a directory: ! rm -rf path/to/directory.zarr .. jupyter-execute:: - + :stderr: ds = xr.Dataset( {"foo": (("x", "y"), np.random.rand(4, 5))}, @@ -674,7 +674,7 @@ To write to a local directory, we pass a path to a directory: "z": ("x", list("abcd")), }, ) - ds.to_zarr("path/to/directory.zarr") + ds.to_zarr("path/to/directory.zarr", zarr_format=2, consolidated=False) (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -702,7 +702,7 @@ To read back a zarr dataset that has been created this way, we use the .. jupyter-execute:: - ds_zarr = xr.open_zarr("path/to/directory.zarr") + ds_zarr = xr.open_zarr("path/to/directory.zarr", consolidated=False) ds_zarr Cloud Storage Buckets @@ -776,7 +776,6 @@ to Zarr: .. jupyter-execute:: - import dask.array # The values of this dask array are entirely irrelevant; only the dtype, @@ -785,7 +784,7 @@ to Zarr: ds = xr.Dataset({"foo": ("x", dummies)}, coords={"x": np.arange(30)}) path = "path/to/directory.zarr" # Now we write the metadata without computing any array values - ds.to_zarr(path, compute=False) + ds.to_zarr(path, compute=False, consolidated=False) Now, a Zarr store with the correct variable shapes and attributes exists that can be filled out by subsequent calls to ``to_zarr``. @@ -800,9 +799,9 @@ where the data should be written (in index space, not label space), e.g., # we would create them separately possibly even from separate processes. ds = xr.Dataset({"foo": ("x", np.arange(30))}, coords={"x": np.arange(30)}) # Any of the following region specifications are valid - ds.isel(x=slice(0, 10)).to_zarr(path, region="auto") - ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"}) - ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}) + ds.isel(x=slice(0, 10)).to_zarr(path, region="auto", consolidated=False) + ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"}, consolidated=False) + ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}, consolidated=False) Concurrent writes with ``region`` are safe as long as they modify distinct chunks in the underlying Zarr arrays (or use an appropriate ``lock``). @@ -833,7 +832,7 @@ For example: from zarr.codecs import BloscCodec compressor = BloscCodec(cname="zstd", clevel=3, shuffle="shuffle") - ds.to_zarr("foo.zarr", encoding={"foo": {"compressors": [compressor]}}) + ds.to_zarr("foo.zarr", consolidated=False, encoding={"foo": {"compressors": [compressor]}}) .. note:: @@ -887,7 +886,7 @@ order, e.g., for time-stepping a simulation: "t": pd.date_range("2001-01-01", periods=2), }, ) - ds1.to_zarr("path/to/directory.zarr") + ds1.to_zarr("path/to/directory.zarr", consolidated=False) .. jupyter-execute:: @@ -899,7 +898,7 @@ order, e.g., for time-stepping a simulation: "t": pd.date_range("2001-01-03", periods=2), }, ) - ds2.to_zarr("path/to/directory.zarr", append_dim="t") + ds2.to_zarr("path/to/directory.zarr", append_dim="t", consolidated=False) .. _io.zarr.writing_chunks: @@ -949,7 +948,7 @@ split them into chunks: .. jupyter-execute:: - ds.to_zarr("path/to/directory.zarr", mode="w") + ds.to_zarr("path/to/directory.zarr", mode="w", consolidated=False) ! ls -R path/to/directory.zarr @@ -962,6 +961,7 @@ length of each dimension by using the shorthand chunk size ``None``: ds.to_zarr( "path/to/directory.zarr", encoding={"xc": {"chunks": None}, "yc": {"chunks": None}}, + consolidated=False, mode="w", ) ! ls -R path/to/directory.zarr @@ -1003,7 +1003,7 @@ By default Xarray uses a feature called *consolidated metadata*, storing all metadata for the entire dataset with a single key (by default called ``.zmetadata``). This typically drastically speeds up opening the store. (For more information on this feature, consult the -`zarr docs on consolidating metadata `_.) +`zarr docs on consolidating metadata `_.) By default, xarray writes consolidated metadata and attempts to read stores with consolidated metadata, falling back to use non-consolidated metadata for @@ -1135,6 +1135,7 @@ using actual disk files. For example: .. jupyter-execute:: + :stderr: ds = xr.tutorial.open_dataset("air_temperature_gradient") cubes = ncdata.iris_xarray.cubes_from_xarray(ds) @@ -1142,6 +1143,7 @@ For example: print(cubes[1]) .. jupyter-execute:: + :stderr: ds = ncdata.iris_xarray.cubes_to_xarray(cubes) print(ds) From 07886dfa03acf2c3b3b2eab2eb6a8c36d5f0a256 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 13:06:02 +0200 Subject: [PATCH 15/22] catch cartopy coastline download warning --- doc/user-guide/plotting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index dca5570d250..c566b06c9cf 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -790,6 +790,7 @@ To follow this section you'll need to have Cartopy installed and working. This script will plot the air temperature on a map. .. jupyter-execute:: + :stderr: air = xr.tutorial.open_dataset("air_temperature").air @@ -807,7 +808,6 @@ by faceting are accessible in the object returned by ``plot``: .. jupyter-execute:: - p = air.isel(time=[0, 4]).plot( transform=ccrs.PlateCarree(), col="time", From 6e569857ad64eb27ba506093488cfb9e956042d0 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 14:11:12 +0200 Subject: [PATCH 16/22] catch downloading 50m coastline warning too --- doc/user-guide/plotting.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index c566b06c9cf..0694698132a 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -69,7 +69,6 @@ The following imports are necessary for all of the examples. import pandas as pd import xarray as xr - For these examples we'll use the North American air temperature dataset. .. jupyter-execute:: @@ -77,7 +76,6 @@ For these examples we'll use the North American air temperature dataset. airtemps = xr.tutorial.open_dataset("air_temperature") airtemps - .. jupyter-execute:: # Convert to celsius @@ -917,6 +915,7 @@ on a polar projection (:issue:`781`). This is why the default is to not follow this convention when plotting on a map: .. jupyter-execute:: + :stderr: ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh(x="lon", y="lat", ax=ax) From 3de4b5e67012e197cb1eb4bc243e15d2617a75f1 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 14:44:30 +0200 Subject: [PATCH 17/22] silence xmode minimal printouts, more compact numpy printout --- doc/internals/internal-design.rst | 2 +- doc/user-guide/combining.rst | 1 + doc/user-guide/computation.rst | 1 + doc/user-guide/indexing.rst | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/internals/internal-design.rst b/doc/internals/internal-design.rst index 4430789522e..a690fa62981 100644 --- a/doc/internals/internal-design.rst +++ b/doc/internals/internal-design.rst @@ -6,7 +6,7 @@ import xarray as xr np.random.seed(123456) - np.set_printoptions(threshold=20) + np.set_printoptions(threshold=10, edgeitems=2) .. _internal design: diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst index 8591d9d4e9b..cc4fd3adcf4 100644 --- a/doc/user-guide/combining.rst +++ b/doc/user-guide/combining.rst @@ -5,6 +5,7 @@ Combining data .. jupyter-execute:: :hide-code: + :hide-output: import numpy as np import pandas as pd diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index 6565fee5346..028030d96df 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -20,6 +20,7 @@ numpy) over all array values: .. jupyter-execute:: :hide-code: + :hide-output: import numpy as np import pandas as pd diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 52d239abde3..2f3719ffc7f 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -5,6 +5,7 @@ Indexing and selecting data .. jupyter-execute:: :hide-code: + :hide-output: import numpy as np import pandas as pd From 22edd956f8ec8f648b14aa3e23494144dbd57f09 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Tue, 3 Jun 2025 17:00:29 +0200 Subject: [PATCH 18/22] dont execute code in whatsnew --- doc/contribute/contributing.rst | 1 - doc/whats-new.rst | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/doc/contribute/contributing.rst b/doc/contribute/contributing.rst index d7ca0a8cb03..e0ece730cd1 100644 --- a/doc/contribute/contributing.rst +++ b/doc/contribute/contributing.rst @@ -402,7 +402,6 @@ Some other important things to know about the docs: will be rendered as: .. jupyter-execute:: - :hide-code: x = 2 x**3 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d9e410d6579..77c8c49f316 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5,18 +5,6 @@ What's New ========== -.. jupyter-execute:: - :hide-code: - - import numpy as np - import pandas as pd - import xarray as xray - import xarray - import xarray as xr - - np.random.seed(123456) - - .. _whats-new.2025.05.0: v2025.05.0 (unreleased) From aeaeff63941d8002be0aa841378cacc9b15247f1 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Wed, 4 Jun 2025 14:30:08 +0200 Subject: [PATCH 19/22] fix dark mode for datatrees --- doc/user-guide/hierarchical-data.rst | 65 ++++++++++++++++------------ 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst index 9bdc1e91d27..a350b7851de 100644 --- a/doc/user-guide/hierarchical-data.rst +++ b/doc/user-guide/hierarchical-data.rst @@ -68,15 +68,21 @@ We can connect them by creating another node representing a common parent, Homer homer = xr.DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa}) Here we set the children of Homer in the node's constructor. -We now have a small family tree +We now have a small family tree where we can see how these individual Simpson family members are related to one another: .. jupyter-execute:: - # Enable text display instead of 'html' for compactness - xr.set_options(display_style="text") - homer + print(homer) + +.. note:: + We use ``print()`` above to show the compact tree hierarchy. + :py:class:`~xarray.DataTree` objects also have an interactive HTML representation that is enabled by default in editors such as JupyterLab and VSCode. + The HTML representation is especially helpful for larger trees and exploring new datasets, as it allows you to expand and collapse nodes. + If you prefer the text representations you can also set ``xr.set_options(display_style="text")``. + +.. + Comment:: may remove note and print()s after upstream theme changes https://github.com/pydata/pydata-sphinx-theme/pull/2187 -where we can see how these individual Simpson family members are related to one another. The nodes representing Bart and Lisa are now connected - we can confirm their sibling rivalry by examining the :py:class:`~xarray.DataTree.siblings` property: .. jupyter-execute:: @@ -89,7 +95,7 @@ But oops, we forgot Homer's third daughter, Maggie! Let's add her by updating Ho maggie = xr.DataTree(name="Maggie") homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie} - homer + print(homer) Let's check that Maggie knows who her Dad is: @@ -121,12 +127,11 @@ We can see the whole tree by printing Abe's node or just part of the tree by pri .. jupyter-execute:: - abe + print(abe) .. jupyter-execute:: - abe["Homer"] - + print(abe["Homer"]) In episode 28, Abe Simpson reveals that he had another son, Herbert "Herb" Simpson. We can add Herbert to the family tree without displacing Homer by :py:meth:`~xarray.DataTree.assign`-ing another child to Abe: @@ -135,7 +140,7 @@ We can add Herbert to the family tree without displacing Homer by :py:meth:`~xar herbert = xr.DataTree(name="Herb") abe = abe.assign({"Herbert": herbert}) - abe + print(abe) .. jupyter-execute:: @@ -191,7 +196,7 @@ and :ref:`filesystem paths` (to be explained shortly) to select two nodes of int .. jupyter-execute:: - vertebrates + print(vertebrates) This tree shows various families of species, grouped by their common features (making it technically a `"Cladogram" `_, rather than an evolutionary tree). @@ -255,7 +260,7 @@ including :py:meth:`~xarray.DataTree.keys`, :py:class:`~xarray.DataTree.values`, .. jupyter-execute:: - vertebrates["Bony Skeleton"]["Ray-finned Fish"] + print(vertebrates["Bony Skeleton"]["Ray-finned Fish"]) Note that the dict-like interface combines access to child :py:class:`~xarray.DataTree` nodes and stored :py:class:`~xarray.DataArrays`, so if we have a node that contains both children and data, calling :py:meth:`~xarray.DataTree.keys` will list both names of child nodes and @@ -350,7 +355,7 @@ we can construct a complex tree quickly using the alternative constructor :py:me "a/c/d": None, } dt = xr.DataTree.from_dict(d) - dt + print(dt) .. note:: @@ -386,7 +391,7 @@ then rebuilding a new tree using only the paths of those nodes: non_empty_nodes = { path: node.dataset for path, node in dt.subtree_with_keys if node.has_data } - xr.DataTree.from_dict(non_empty_nodes) + print(xr.DataTree.from_dict(non_empty_nodes)) You can see this tree is similar to the ``dt`` object above, except that it is missing the empty nodes ``a/c`` and ``a/c/d``. @@ -416,7 +421,7 @@ We can use :py:meth:`xarray.DataTree.match` for this: } ) result = dt.match("*/B") - result + print(result) We can also subset trees by the contents of the nodes. :py:meth:`xarray.DataTree.filter` retains only the nodes of a tree that meet a certain condition. @@ -436,13 +441,13 @@ First lets recreate the tree but with an ``age`` data variable in every node: }, name="Abe", ) - simpsons + print(simpsons) Now let's filter out the minors: .. jupyter-execute:: - simpsons.filter(lambda node: node["age"] > 18) + print(simpsons.filter(lambda node: node["age"] > 18)) The result is a new tree, containing only the nodes matching the condition. @@ -527,14 +532,14 @@ let's first create a example scientific dataset. ), } ) - voltages + print(voltages) Most xarray computation methods also exist as methods on datatree objects, so you can for example take the mean value of these two timeseries at once: .. jupyter-execute:: - voltages.mean(dim="time") + print(voltages.mean(dim="time")) This works by mapping the standard :py:meth:`xarray.Dataset.mean()` method over the dataset stored in each node of the tree one-by-one. @@ -556,7 +561,7 @@ For example, we can advance the timeline of the Simpsons by a decade just by .. jupyter-execute:: - simpsons + 10 + print(simpsons + 10) See that the same change (fast-forwarding by adding 10 years to the age of each character) has been applied to every node. @@ -583,7 +588,7 @@ Then calculate the RMS value of these signals: .. jupyter-execute:: - voltages.map_over_datasets(rms) + print(voltages.map_over_datasets(rms)) .. _multiple trees: @@ -613,14 +618,16 @@ To iterate over the corresponding nodes in multiple trees, use result = {} for path, (node1, node2) in xr.group_subtrees(dt1, dt2): result[path] = node1.dataset + node2.dataset - xr.DataTree.from_dict(result) + dt3 = xr.DataTree.from_dict(result) + print(dt3) Alternatively, you apply a function directly to paired datasets at every node using :py:func:`xarray.map_over_datasets`: .. jupyter-execute:: - xr.map_over_datasets(lambda x, y: x + y, dt1, dt2) + dt3 = xr.map_over_datasets(lambda x, y: x + y, dt1, dt2) + print(dt3) Comparing Trees for Isomorphism ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -690,7 +697,7 @@ we can do arithmetic between them. ), } ) - currents + print(currents) .. jupyter-execute:: @@ -701,7 +708,7 @@ We could use this feature to quickly calculate the electrical power in our signa .. jupyter-execute:: power = currents * voltages - power + print(power) .. _hierarchical-data.alignment-and-coordinate-inheritance: @@ -773,7 +780,7 @@ To represent our unalignable data in a single :py:class:`~xarray.DataTree`, we m dt = xr.DataTree.from_dict( {"daily": ds_daily, "weekly": ds_weekly, "monthly": ds_monthly} ) - dt + print(dt) Now we have a valid :py:class:`~xarray.DataTree` structure which contains all the data at each different time frequency, stored in a separate group. @@ -782,13 +789,15 @@ For example we can extract all three timeseries at a specific lat-lon location: .. jupyter-execute:: - dt.sel(lat=75, lon=300) + dt_sel = dt.sel(lat=75, lon=300) + print(dt_sel) or compute the standard deviation of each timeseries to find out how it varies with sampling frequency: .. jupyter-execute:: - dt.std(dim="time") + dt_std = dt.std(dim="time") + print(dt_std) .. _coordinate-inheritance: From 673ce4482e55d10c20b6ce8158481618cf01e8c3 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Wed, 4 Jun 2025 17:08:58 +0200 Subject: [PATCH 20/22] fix mermaid diagram, kerchunk, ncdata, and zarr sections --- doc/user-guide/io.rst | 119 +++++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 6ee357142e4..27d04baa306 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -41,37 +41,58 @@ Following the diagram is detailed information on many popular backends. You can learn more about using and developing backends in the `Xarray tutorial JupyterBook `_. +.. + _comment: mermaid Flowcharg "link" text gets secondary color background, SVG icon fill gets primary color + +.. raw:: html + + + .. mermaid:: + :config: {"theme":"base","themeVariables":{"fontSize":"20px","primaryColor":"#fff","primaryTextColor":"#fff","primaryBorderColor":"#59c7d6","lineColor":"#e28126","secondaryColor":"#767985"}} :alt: Flowchart illustrating how to choose the right backend engine to read your data flowchart LR - built-in-eng["""Is your data stored in one of these formats? - - netCDF4 (netcdf4) - - netCDF3 (scipy) - - Zarr (zarr) - - DODS/OPeNDAP (pydap) - - HDF5 (h5netcdf) - """] - - built-in("""You're in luck! Xarray bundles a backend for this format. + built-in-eng["`**Is your data stored in one of these formats?** + - netCDF4 + - netCDF3 + - Zarr + - DODS/OPeNDAP + - HDF5 + `"] + + built-in("`**You're in luck!** Xarray bundles a backend to automatically read these formats. Open data using xr.open_dataset(). We recommend - always setting the engine you want to use.""") + explicitly setting engine='xxxx' for faster loading.`") - installed-eng["""One of these formats? - - GRIB (cfgrib) - - TileDB (tiledb) - - GeoTIFF, JPEG-2000, ESRI-hdf (rioxarray, via GDAL) - - Sentinel-1 SAFE (xarray-sentinel) + installed-eng["""One of these formats? + - GRIB + - TileDB + - GeoTIFF, JPEG-2000, etc. (via GDAL) + - Sentinel-1 SAFE """] - installed("""Install the package indicated in parentheses to your - Python environment. Restart the kernel and use - xr.open_dataset(files, engine='rioxarray').""") + installed("""Install the linked backend library and use it with + xr.open_dataset(file, engine='xxxx').""") - other("""Ask around to see if someone in your data community - has created an Xarray backend for your data type. - If not, you may need to create your own or consider - exporting your data to a more common format.""") + other["`**Options:** + - Look around to see if someone has created an Xarray backend for your format! + - Create your own backend + - Convert your data to a supported format + `"] built-in-eng -->|Yes| built-in built-in-eng -->|No| installed-eng @@ -79,16 +100,16 @@ You can learn more about using and developing backends in the installed-eng -->|Yes| installed installed-eng -->|No| other - click built-in-eng "https://docs.xarray.dev/en/stable/getting-started-guide/faq.html#how-do-i-open-format-x-file-as-an-xarray-dataset" - click other "https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html" + click built-in-eng "https://docs.xarray.dev/en/stable/get-help/faq.html#how-do-i-open-format-x-file-as-an-xarray-dataset" + - classDef quesNodefmt fill:#9DEEF4,stroke:#206C89,text-align:left + classDef quesNodefmt font-size:12pt,fill:#0e4666,stroke:#59c7d6,stroke-width:3 class built-in-eng,installed-eng quesNodefmt - classDef ansNodefmt fill:#FFAA05,stroke:#E37F17,text-align:left,white-space:nowrap + classDef ansNodefmt font-size:12pt,fill:#4a4a4a,stroke:#17afb4,stroke-width:3 class built-in,installed,other ansNodefmt - linkStyle default font-size:20pt,color:#206C89 + linkStyle default font-size:18pt,stroke-width:4 .. _io.netcdf: @@ -948,23 +969,23 @@ split them into chunks: .. jupyter-execute:: - ds.to_zarr("path/to/directory.zarr", mode="w", consolidated=False) - ! ls -R path/to/directory.zarr + ds.to_zarr("path/to/directory.zarr", consolidated=False, mode="w") + !ls path/to/directory.zarr/*/*/ This may cause unwanted overhead on some systems, such as when reading from a cloud storage provider. To disable this chunking, we can specify a chunk size equal to the -length of each dimension by using the shorthand chunk size ``None``: +shape of each coordinate array in the ``encoding`` argument: .. jupyter-execute:: ds.to_zarr( "path/to/directory.zarr", - encoding={"xc": {"chunks": None}, "yc": {"chunks": None}}, + encoding={"xc": {"chunks": ds.xc.shape}, "yc": {"chunks": ds.yc.shape}}, consolidated=False, mode="w", ) - ! ls -R path/to/directory.zarr + !ls path/to/directory.zarr/*/*/ The number of chunks on Tair matches our dask chunks, while there is now only a single @@ -1043,7 +1064,7 @@ with ``_FillValue`` using the ``use_zarr_fill_value_as_mask`` kwarg to :py:func: Kerchunk -------- -`Kerchunk `_ is a Python library +`Kerchunk `_ is a Python library that allows you to access chunked and compressed data formats (such as NetCDF3, NetCDF4, HDF5, GRIB2, TIFF & FITS), many of which are primary data formats for many data archives, by viewing the whole archive as an ephemeral `Zarr`_ dataset which allows for parallel, chunk-specific access. @@ -1067,24 +1088,19 @@ with ``xarray``, especially when these archives are large in size. A single comb reference can refer to thousands of the original data files present in these archives. You can view the whole dataset with from this combined reference using the above packages. -The following example shows opening a combined references generated from a ``.hdf`` file stored locally. +The following example shows opening a single ``json`` reference to the ``saved_on_disk.h5`` file created above. +If the file were instead stored remotely (e.g. ``s3://saved_on_disk.h5``) you can use ``storage_options`` +that are used to `configure fsspec `_: .. jupyter-execute:: - :raises: - storage_options = { - "target_protocol": "file", - } - - # add the `remote_protocol` key in `storage_options` if you're accessing a file remotely - - ds1 = xr.open_dataset( + ds_kerchunked = xr.open_dataset( "./combined.json", engine="kerchunk", - storage_options=storage_options, + storage_options={}, ) - ds1 + ds_kerchunked .. note:: @@ -1115,7 +1131,7 @@ If iris is installed, xarray can convert a ``DataArray`` into a ``Cube`` using ) cube = da.to_iris() - cube + print(cube) Conversely, we can create a new ``DataArray`` object from a ``Cube`` using :py:meth:`DataArray.from_iris`: @@ -1132,7 +1148,7 @@ datasets. It uses the file saving and loading functions in both projects to pro more "correct" translation between them, but still with very low overhead and not using actual disk files. -For example: +Here we load an xarray dataset and convert it to Iris cubes: .. jupyter-execute:: :stderr: @@ -1140,13 +1156,20 @@ For example: ds = xr.tutorial.open_dataset("air_temperature_gradient") cubes = ncdata.iris_xarray.cubes_from_xarray(ds) print(cubes) + +.. jupyter-execute:: + print(cubes[1]) +And we can convert the cubes back to an xarray dataset: + .. jupyter-execute:: - :stderr: + + # ensure dataset-level and variable-level attributes loaded correctly + iris.FUTURE.save_split_attrs = True ds = ncdata.iris_xarray.cubes_to_xarray(cubes) - print(ds) + ds Ncdata can also adjust file data within load and save operations, to fix data loading problems or provide exact save formatting without needing to modify files on disk. From fe4ed5460eefde6c870a6d99287d9aa6da164132 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Wed, 4 Jun 2025 18:05:32 +0200 Subject: [PATCH 21/22] use tree command to check local zarrs --- doc/user-guide/io.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 27d04baa306..1aea3df2e32 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -970,7 +970,7 @@ split them into chunks: .. jupyter-execute:: ds.to_zarr("path/to/directory.zarr", consolidated=False, mode="w") - !ls path/to/directory.zarr/*/*/ + !tree -I zarr.json path/to/directory.zarr This may cause unwanted overhead on some systems, such as when reading from a cloud @@ -985,7 +985,7 @@ shape of each coordinate array in the ``encoding`` argument: consolidated=False, mode="w", ) - !ls path/to/directory.zarr/*/*/ + !tree -I zarr.json path/to/directory.zarr The number of chunks on Tair matches our dask chunks, while there is now only a single From 8ca78195f291b8af8e78ff611ccc361d7f4e8dc3 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Wed, 4 Jun 2025 18:38:02 +0200 Subject: [PATCH 22/22] address time-coding review --- doc/internals/time-coding.rst | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index fbba3b99069..3aec88f176a 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -418,16 +418,17 @@ For encoding the process is more or less a reversal of the above, but we have to dtype="datetime64[s]", ) orig_values = np.array( - [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="float64" + [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" ) units = "days since 0001-01-01 00:00:00" values, _, _ = xr.coding.times.encode_cf_datetime( - dates, units, calendar, dtype=np.dtype("float64") + dates, units, calendar, dtype=np.dtype("int64") ) - print(values) + print(values, units) np.testing.assert_array_equal(values, orig_values) .. jupyter-execute:: + :stderr: dates = np.array( [ @@ -439,13 +440,17 @@ For encoding the process is more or less a reversal of the above, but we have to dtype="datetime64[s]", ) orig_values = np.array( - [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="float64" + [-2002 * 365 - 121, -366, 365, 2000 * 365 + 119], dtype="int64" ) + orig_values *= 24 # Convert to hours + orig_values[0] += 1 # Adjust for the hour offset in dates above + units = "days since 0001-01-01 00:00:00" values, units, _ = xr.coding.times.encode_cf_datetime( - dates, units, calendar, dtype=np.dtype("float64") + dates, units, calendar, dtype=np.dtype("int64") ) print(values, units) + np.testing.assert_array_equal(values, orig_values) .. _internals.default_timeunit: pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy