diff --git a/doc/api.rst b/doc/api.rst index d7c2370d348..67c81aaf601 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1591,6 +1591,8 @@ Tutorial tutorial.open_dataset tutorial.load_dataset + tutorial.open_datatree + tutorial.load_datatree Testing ======= diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 05e03869553..e3022fed50d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,8 @@ v2025.02.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added :py:meth:`tutorial.open_datatree` and :py:meth:`tutorial.load_datatree` + By `Eni Awowale `_. - Added :py:meth:`DataTree.filter_like` to conveniently restructure a DataTree like another DataTree (:issue:`10096`, :pull:`10097`). By `Kobe Vandelanotte `_. - Added :py:meth:`Coordinates.from_xindex` as convenience for creating a new :py:class:`Coordinates` object diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index 9d59219c204..e271da6863b 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -1,20 +1,15 @@ from __future__ import annotations -import pytest - -from xarray import DataArray, tutorial -from xarray.tests import assert_identical, network +from xarray import DataArray, DataTree, tutorial +from xarray.testing import assert_identical +from xarray.tests import network @network class TestLoadDataset: - @pytest.fixture(autouse=True) - def setUp(self): - self.testfile = "tiny" - def test_download_from_github(self, tmp_path) -> None: cache_dir = tmp_path / tutorial._default_cache_dir_name - ds = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load() + ds = tutorial.open_dataset("tiny", cache_dir=cache_dir).load() tiny = DataArray(range(5), name="tiny").to_dataset() assert_identical(ds, tiny) @@ -24,7 +19,27 @@ def test_download_from_github_load_without_cache( cache_dir = tmp_path / tutorial._default_cache_dir_name ds_nocache = tutorial.open_dataset( - self.testfile, cache=False, cache_dir=cache_dir + "tiny", cache=False, cache_dir=cache_dir + ).load() + ds_cache = tutorial.open_dataset("tiny", cache_dir=cache_dir).load() + assert_identical(ds_cache, ds_nocache) + + +@network +class TestLoadDataTree: + def test_download_from_github(self, tmp_path) -> None: + cache_dir = tmp_path / tutorial._default_cache_dir_name + ds = tutorial.open_datatree("tiny", cache_dir=cache_dir).load() + tiny = DataTree.from_dict({"/": DataArray(range(5), name="tiny").to_dataset()}) + assert_identical(ds, tiny) + + def test_download_from_github_load_without_cache( + self, tmp_path, monkeypatch + ) -> None: + cache_dir = tmp_path / tutorial._default_cache_dir_name + + ds_nocache = tutorial.open_datatree( + "tiny", cache=False, cache_dir=cache_dir ).load() - ds_cache = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load() + ds_cache = tutorial.open_datatree("tiny", cache_dir=cache_dir).load() assert_identical(ds_cache, ds_nocache) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index cfc6a5147d3..ec832694a99 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -16,8 +16,10 @@ import numpy as np from xarray.backends.api import open_dataset as _open_dataset +from xarray.backends.api import open_datatree as _open_datatree from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset +from xarray.core.datatree import DataTree if TYPE_CHECKING: from xarray.backends.api import T_Engine @@ -248,3 +250,140 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset: ds.B.attrs["units"] = "Bunits" return ds + + +def open_datatree( + name: str, + cache: bool = True, + cache_dir: None | str | os.PathLike = None, + *, + engine: T_Engine = None, + **kws, +) -> DataTree: + """ + Open a dataset as a `DataTree` from the online repository (requires internet). + + If a local copy is found then always use that to avoid network traffic. + + Available datasets: + + * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z + * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z + * ``"air_temperature"``: NCEP reanalysis subset + * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients + * ``"basin_mask"``: Dataset with ocean basins marked using integers + * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 + * ``"rasm"``: Output of the Regional Arctic System Model (RASM) + * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output + * ``"tiny"``: small synthetic dataset with a 1D data variable + * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK + * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data + * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages + + Parameters + ---------- + name : str + Name of the file containing the dataset. + e.g. 'air_temperature' + cache_dir : path-like, optional + The directory in which to search for and write cached data. + cache : bool, optional + If True, then cache data locally for use on subsequent calls + **kws : dict, optional + Passed to xarray.open_dataset + + See Also + -------- + tutorial.load_datatree + open_datatree + """ + try: + import pooch + except ImportError as e: + raise ImportError( + "tutorial.open_dataset depends on pooch to download and manage datasets." + " To proceed please install pooch." + ) from e + + logger = pooch.get_logger() + logger.setLevel("WARNING") + + cache_dir = _construct_cache_dir(cache_dir) + if name in external_urls: + url = external_urls[name] + else: + path = pathlib.Path(name) + if not path.suffix: + # process the name + default_extension = ".nc" + if engine is None: + _check_netcdf_engine_installed(name) + path = path.with_suffix(default_extension) + elif path.suffix == ".grib": + if engine is None: + engine = "cfgrib" + try: + import cfgrib # noqa: F401 + except ImportError as e: + raise ImportError( + "Reading this tutorial dataset requires the cfgrib package." + ) from e + + url = f"{base_url}/raw/{version}/{path.name}" + + headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"} + downloader = pooch.HTTPDownloader(headers=headers) + + # retrieve the file + filepath = pooch.retrieve( + url=url, known_hash=None, path=cache_dir, downloader=downloader + ) + ds = _open_datatree(filepath, engine=engine, **kws) + if not cache: + ds = ds.load() + pathlib.Path(filepath).unlink() + + return ds + + +def load_datatree(*args, **kwargs) -> DataTree: + """ + Open, load into memory (as a `DataTree`), and close a dataset from the online repository + (requires internet). + + If a local copy is found then always use that to avoid network traffic. + + Available datasets: + + * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z + * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z + * ``"air_temperature"``: NCEP reanalysis subset + * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients + * ``"basin_mask"``: Dataset with ocean basins marked using integers + * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 + * ``"rasm"``: Output of the Regional Arctic System Model (RASM) + * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output + * ``"tiny"``: small synthetic dataset with a 1D data variable + * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK + * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data + * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages + + Parameters + ---------- + name : str + Name of the file containing the dataset. + e.g. 'air_temperature' + cache_dir : path-like, optional + The directory in which to search for and write cached data. + cache : bool, optional + If True, then cache data locally for use on subsequent calls + **kws : dict, optional + Passed to xarray.open_datatree + + See Also + -------- + tutorial.open_datatree + open_datatree + """ + with open_datatree(*args, **kwargs) as ds: + return ds.load() pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy