diff --git a/xarray/backends/api.py b/xarray/backends/api.py index fd992f3e5d8..841a8f53ed7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -395,7 +395,7 @@ def open_dataset( decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, decode_times: ( - bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None + bool | str | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None ) = None, decode_timedelta: ( bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None @@ -465,10 +465,15 @@ class (a subclass of ``BackendEntrypoint``) can also be used. be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. - decode_times : bool, CFDatetimeCoder or dict-like, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them - encoded as numbers. + decode_times : str, bool, CFDatetimeCoder or dict-like, optional. + Decode times encoded in the standard NetCDF datetime format + into datetime objects. + If "raise", an Exception will be raised if any time variable cannot be decoded. + If "warn", a Warning will be emitted if any time variable cannot be decoded. + If "ignore", any time variables that cannot be decoded will pass through unchanged. + If False, no time variables will be decoded. + True is the same as raise. + If a :py:class:`coders.CFDatetimeCoder`, it will be used to decode time variables. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f0a4c195263..9d97ad1de64 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -361,9 +361,10 @@ def _decode_cf_datetime_dtype( ) msg = ( f"unable to decode time units {units!r} with {calendar_msg!r}. Try " - "opening your dataset with decode_times=False or installing cftime " - "if it is not installed." + "opening your dataset with decode_times=False or 'warn' or 'ignore'." ) + if cftime is None: + msg += " Install cftime if your variable uses a cf-specific calendar." raise ValueError(msg) from err else: dtype = getattr(result, "dtype", np.dtype("object")) @@ -1370,15 +1371,24 @@ class CFDatetimeCoder(VariableCoder): May not be supported by all the backends. time_unit : PDDatetimeUnitOptions Target resolution when decoding dates. Defaults to "ns". + on_error : str, optional + What to do if there is an error when attempting to decode + a time variable. Options are: "raise", "warn", "ignore". + Defaults to "raise". """ def __init__( self, use_cftime: bool | None = None, time_unit: PDDatetimeUnitOptions = "ns", + on_error: str = "raise", ) -> None: self.use_cftime = use_cftime self.time_unit = time_unit + if on_error in {"raise", "warn", "ignore"}: + self.on_error = on_error + else: + raise ValueError('on_error must be one of "raise", "warn", "ignore")') def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.dtype, np.datetime64) or contains_cftime_datetimes( @@ -1411,9 +1421,19 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = pop_to(attrs, encoding, "units") calendar = pop_to(attrs, encoding, "calendar") - dtype = _decode_cf_datetime_dtype( - data, units, calendar, self.use_cftime, self.time_unit - ) + try: + dtype = _decode_cf_datetime_dtype( + data, units, calendar, self.use_cftime, self.time_unit + ) + except ValueError as err: + if self.on_error == "ignore": + return variable + elif self.on_error == "warn": + emit_user_level_warning(err.args[0]) + return variable + else: + raise + transform = partial( decode_cf_datetime, units=units, diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 6a5deb09152..7e1c40a0697 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -271,7 +271,7 @@ class CFMaskCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, + decode_times: bool | str | CFDatetimeCoder = False, decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times @@ -499,7 +499,7 @@ class CFScaleOffsetCoder(VariableCoder): def __init__( self, - decode_times: bool | CFDatetimeCoder = False, + decode_times: bool | str | CFDatetimeCoder = False, decode_timedelta: bool | CFTimedeltaCoder = False, ) -> None: self.decode_times = decode_times diff --git a/xarray/conventions.py b/xarray/conventions.py index d3ee05e5da1..d25aaaeb996 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -16,6 +16,7 @@ _contains_datetime_like_objects, contains_cftime_datetimes, ) +from xarray.core.types import DECODE_TIMES_OPTIONS from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable from xarray.namedarray.utils import is_duck_array @@ -38,7 +39,6 @@ "formula_terms", ) - if TYPE_CHECKING: from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset @@ -111,7 +111,7 @@ def decode_cf_variable( var: Variable, concat_characters: bool = True, mask_and_scale: bool = True, - decode_times: bool | CFDatetimeCoder = True, + decode_times: bool | str | CFDatetimeCoder = True, decode_endianness: bool = True, stack_char_dim: bool = True, use_cftime: bool | None = None, @@ -138,8 +138,11 @@ def decode_cf_variable( Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). If the _Unsigned attribute is present treat integer arrays as unsigned. - decode_times : bool or CFDatetimeCoder + decode_times : bool or str or CFDatetimeCoder + One of "error", "ignore", "warn", False, or a CFDatetimeCoder Decode cf times ("hours since 2000-01-01") to np.datetime64. + If not False, then non-decodable time units will raise an + exception, a warning, or ignore the error. decode_endianness : bool Decode arrays from non-native to native endianness. stack_char_dim : bool @@ -223,7 +226,15 @@ def decode_cf_variable( " ds = xr.open_dataset(decode_times=time_coder)\n", FutureWarning, ) - decode_times = CFDatetimeCoder(use_cftime=use_cftime) + try: + on_error = DECODE_TIMES_OPTIONS[decode_times] + except KeyError: + raise ValueError( + "`decode_times` must be one of: " + f"{set(k for k in DECODE_TIMES_OPTIONS.keys() if isinstance(k, str))}" + ) from None + decode_times = CFDatetimeCoder(use_cftime=use_cftime, on_error=on_error) + elif use_cftime is not None: raise TypeError( "Usage of 'use_cftime' as a kwarg is not allowed " @@ -352,7 +363,10 @@ def decode_cf_variables( attributes: T_Attrs, concat_characters: bool | Mapping[str, bool] = True, mask_and_scale: bool | Mapping[str, bool] = True, - decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, + decode_times: bool + | str + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | Mapping[str, bool] | None = None, @@ -413,7 +427,8 @@ def stackable(dim: Hashable) -> bool: concat_characters=_item_or_default(concat_characters, k, True), mask_and_scale=_item_or_default(mask_and_scale, k, True), decode_times=cast( - bool | CFDatetimeCoder, _item_or_default(decode_times, k, True) + bool | str | CFDatetimeCoder, + _item_or_default(decode_times, k, True), ), stack_char_dim=stack_char_dim, use_cftime=_item_or_default(use_cftime, k, None), @@ -499,7 +514,10 @@ def decode_cf( obj: T_DatasetOrAbstractstore, concat_characters: bool = True, mask_and_scale: bool = True, - decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, + decode_times: bool + | str + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | None = None, @@ -521,7 +539,7 @@ def decode_cf( mask_and_scale : bool, optional Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder], optional + decode_times : bool | str | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder], optional Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. decode_coords : bool or {"coordinates", "all"}, optional diff --git a/xarray/core/types.py b/xarray/core/types.py index 69cee210798..86db0cf4972 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -281,6 +281,16 @@ def copy( "366_day", ] +# Used by open_dataset to set the cf time decoding options +# maybe add a "never" instead of False? +DECODE_TIMES_OPTIONS = { + True: "raise", + False: False, + "raise": "raise", + "warn": "warn", + "ignore": "ignore", +} + CoarsenBoundaryOptions = Literal["exact", "trim", "pad"] SideOptions = Literal["left", "right"] InclusiveOptions = Literal["both", "neither", "left", "right"] diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 1c373f3a00a..9cc80ffa8b2 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -3,6 +3,8 @@ import io import re import sys +import tempfile +import warnings from numbers import Number import numpy as np @@ -13,6 +15,7 @@ from xarray.tests import ( assert_identical, assert_no_warnings, + requires_cftime, requires_dask, requires_h5netcdf, requires_netCDF4, @@ -323,3 +326,91 @@ def test_default_indexes_passthrough(self, create_default_indexes): ) assert initial.coords.equals(final.coords) + + +@pytest.fixture(scope="module") +def wonky_time_file(): + """ + Create a netCDF file with two time variables -- one good, one bad. + + Alternatively, we could put a small netcdf file in the test data. + """ + ds = xr.Dataset( + { + "good_time": ("good_time", [0, 1, 2], {"units": "days since 2000-01-01"}), + "bad_time": ( + "bad_time", + [1.0, 2.0, 3.0], + {"units": "mdays since 2000-01-01"}, + ), + } + ) + with tempfile.NamedTemporaryFile( + suffix=".nc", + delete=False, + ) as tmp: + ds.to_netcdf(tmp.name) + yield tmp.name + + +@requires_cftime +@requires_netCDF4 +class Test_decode_times_options: + # NOTE: it may not be best to test the top-level api + # function, but I wasn't sure where else to fit + # this in. + # also -- this only tests netCDF4, perhaps it could be + # paramatrized for all supported backends? + # or maybe that's not necessary -- same code paths. + + # Also -- this duplicates the tests in test_conventions.py + # I suppose it could just test one flag to make sure it + # gets passed through correctly. + """ + Tests for passing decode_times flags into open_dataset + with netCDF files + """ + + def test_decode_times_default(self, wonky_time_file) -> None: + # should raise + with pytest.raises(ValueError, match="unable to decode time units") as err: + ds = xr.open_dataset(wonky_time_file) + ds.close() + print(err.value) + + def test_decode_times_bad_flag(self, wonky_time_file) -> None: + # should raise + with pytest.raises(ValueError, match="`decode_times` must be one of") as err: + ds = xr.open_dataset(wonky_time_file, decode_times="bad_flag") + ds.close() + print(err.value) + + @pytest.mark.parametrize("flag", [True, "raise"]) + def test_decode_times_error(self, flag, wonky_time_file) -> None: + # should raise + with pytest.raises(ValueError, match="unable to decode time units"): + ds = xr.open_dataset(wonky_time_file, decode_times=flag) + ds.close() + + def test_decode_times_warn(self, wonky_time_file) -> None: + # should warn + with pytest.warns(UserWarning, match="unable to decode time units"): + ds = xr.open_dataset(wonky_time_file, decode_times="warn") + ds.close() + assert str(ds["good_time"].dtype) == "datetime64[ns]" + assert str(ds["bad_time"].dtype) == "float64" + + def test_decode_times_false(self, wonky_time_file) -> None: + # should not decode the time variables + ds = xr.open_dataset(wonky_time_file, decode_times=False) + assert str(ds["good_time"].dtype) == "int64" + assert str(ds["bad_time"].dtype) == "float64" + ds.close() + + def test_decode_times_ignore(self, wonky_time_file) -> None: + # should decode the one good time unit + with warnings.catch_warnings(): + warnings.simplefilter("error") + ds = xr.open_dataset(wonky_time_file, decode_times="ignore") + ds.close() + assert str(ds["good_time"].dtype) == "datetime64[ns]" diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index e03f15a5119..cfaecc947fd 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2217,3 +2217,57 @@ def test_roundtrip_empty_datetime64_array(time_unit: PDDatetimeUnitOptions) -> N ) assert_identical(variable, roundtripped) assert roundtripped.dtype == variable.dtype + + +@requires_cftime +def test_on_error_raises(): + """ + By default, decoding errors should raise + """ + array = np.array([0, 1, 2], dtype=np.dtype("int64")) + encoded = Variable(["time"], array, attrs={"units": "ms since 00:00:00"}) + + # default is "raise" + coder = CFDatetimeCoder() + + with pytest.raises(ValueError): + coder.decode(encoded) + + # setting to "raise" should do the same thing. + coder = CFDatetimeCoder(on_error="raise") + + with pytest.raises(ValueError): + coder.decode(encoded) + + +@requires_cftime +def test_on_error_ignore(): + """ + If on_error="ignore", no change. + """ + array = np.array([0, 1, 2], dtype=np.dtype("int64")) + encoded = Variable(["time"], array, attrs={"units": "ms since 00:00:00"}) + + coder = CFDatetimeCoder(on_error="ignore") + + decoded = coder.decode(encoded) + + # it shouldn't have changed the variable + assert decoded is encoded + + +@requires_cftime +def test_on_error_warn(): + """ + If on_error="warn", no change, with a warning. + """ + array = np.array([0, 1, 2], dtype=np.dtype("int64")) + encoded = Variable(["time"], array, attrs={"units": "ms since 2000:00:00"}) + + coder = CFDatetimeCoder(on_error="warn") + + with pytest.warns(UserWarning, match="unable to decode time units"): + decoded = coder.decode(encoded) + + # it shouldn't have changed the variable + assert decoded is encoded diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 38b835fd3d5..354522d5c81 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -269,6 +269,62 @@ def test_emit_coordinates_attribute_in_encoding(self) -> None: @requires_cftime class TestDecodeCF: + ## tests for the decode_times flags + dataset_with_one_good_one_bad_time = Dataset( + { + "good_time": ("good_time", [0, 1, 2], {"units": "days since 2000-01-01"}), + "bad_time": ( + "bad_time", + [1.0, 2.0, 3.0], + {"units": "mdays since 2000-01-01"}, + ), + } + ) + + def test_decode_times_bad_flag(self) -> None: + # should raise + with pytest.raises(ValueError, match="`decode_times` must be one of"): + result = decode_cf( + self.dataset_with_one_good_one_bad_time, decode_times="bad_flag" + ) + + def test_decode_times_default(self) -> None: + # should raise + with pytest.raises(ValueError, match="unable to decode time units"): + result = decode_cf(self.dataset_with_one_good_one_bad_time) + + @pytest.mark.parametrize("flag", [True, "raise"]) + def test_decode_times_error(self, flag) -> None: + # should raise + with pytest.raises(ValueError, match="unable to decode time units"): + result = decode_cf( + self.dataset_with_one_good_one_bad_time, decode_times=flag + ) + + def test_decode_times_warn(self) -> None: + # should warn + with pytest.warns(UserWarning, match="unable to decode time units"): + result = decode_cf( + self.dataset_with_one_good_one_bad_time, decode_times="warn" + ) + assert str(result["good_time"].dtype) == "datetime64[ns]" + + @pytest.mark.parametrize("flag", [False]) + def test_decode_times_false(self, flag) -> None: + # should not decode the time variables + result = decode_cf(self.dataset_with_one_good_one_bad_time, decode_times=flag) + + assert result.equals(self.dataset_with_one_good_one_bad_time) + + def test_decode_times_ignore(self) -> None: + # should decode the one good time unit + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = decode_cf( + self.dataset_with_one_good_one_bad_time, decode_times="ignore" + ) + assert str(result["good_time"].dtype) == "datetime64[ns]" + def test_dataset(self) -> None: original = Dataset( {