diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 59235fd5f..da0aa3a15 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,7 +1,11 @@ # Upcoming Release ## Major features and improvements + ## Bug fixes and other changes + +- Relaxed all `gcsfs` upper-bound pins (previously capped below `2023.7`). + ## Community contributions # Release 9.2.0 @@ -13,12 +17,12 @@ - Added `autogen` mode to `OpikTraceDataset` for tracing AutoGen agent conversations with OpenTelemetry integration. - Added the following new **experimental** datasets: -| Type | Description | Location | -|-------------------------------|----------------------------------------------------------------------------------| -------------------------------------- | -| `mlrun.MLRunAbstractDataset` | A base dataset for MLRun integration, can be used directly for generic artifacts | `kedro_datasets_experimental.mlrun` | -| `mlrun.MLRunModel` | A dataset for saving and loading ML models via MLRun with framework metadata | `kedro_datasets_experimental.mlrun` | -| `mlrun.MLRunDataframeDataset` | A dataset for saving and loading pandas DataFrames as MLRun artifacts | `kedro_datasets_experimental.mlrun` | -| `mlrun.MLRunResult` | A dataset for logging scalar results and metrics to MLRun | `kedro_datasets_experimental.mlrun` | +| Type | Description | Location | +| ----------------------------- | -------------------------------------------------------------------------------- | ----------------------------------- | +| `mlrun.MLRunAbstractDataset` | A base dataset for MLRun integration, can be used directly for generic artifacts | `kedro_datasets_experimental.mlrun` | +| `mlrun.MLRunModel` | A dataset for saving and loading ML models via MLRun with framework metadata | `kedro_datasets_experimental.mlrun` | +| `mlrun.MLRunDataframeDataset` | A dataset for saving and loading pandas DataFrames as MLRun artifacts | `kedro_datasets_experimental.mlrun` | +| `mlrun.MLRunResult` | A dataset for logging scalar results and metrics to MLRun | `kedro_datasets_experimental.mlrun` | ## Bug fixes and other changes @@ -27,6 +31,8 @@ ## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: + [Katerina Molchanova](https://github.com/rokatyy) # Release 9.1.1 @@ -48,9 +54,9 @@ - Added the following new **experimental** datasets: -| Type | Description | Location | -| -------------------------- | ------------------------------------------------------------------------------ | -------------------------------------- | -| `chromadb.ChromaDBDataset` | A dataset for loading and saving data to ChromaDB vector database collections. | `kedro_datasets_experimental.chromadb` | +| Type | Description | Location | +| ----------------------------- | -------------------------------------------------------------------------------- | -------------------------------------- | +| `chromadb.ChromaDBDataset` | A dataset for loading and saving data to ChromaDB vector database collections. | `kedro_datasets_experimental.chromadb` | | `mlrun.MLRunAbstractDataset` | A base dataset for MLRun integration, can be used directly for generic artifacts | `kedro_datasets_experimental.mlrun` | | `mlrun.MLRunModel` | A dataset for saving and loading ML models via MLRun with framework metadata | `kedro_datasets_experimental.mlrun` | | `mlrun.MLRunDataframeDataset` | A dataset for saving and loading pandas DataFrames as MLRun artifacts | `kedro_datasets_experimental.mlrun` | @@ -63,6 +69,8 @@ ## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: + - [Armand Masseau](https://github.com/armandmasseaugit) - [SRIKAR-8-77](https://github.com/SRIKAR-8-77) @@ -83,7 +91,7 @@ - Graduated the following **experimental** datasets to core: | Type | Description | Location | -| ----------------------------------- | ------------------------------------------------------ |--------------------------- | +| ----------------------------------- | ------------------------------------------------------ | -------------------------- | | `langchain.ChatOpenAIDataset` | A dataset for loading a ChatOpenAI LangChain model. | `kedro_datasets.langchain` | | `langchain.OpenAIEmbeddingsDataset` | A dataset for loading an OpenAIEmbeddings model. | `kedro_datasets.langchain` | | `langchain.ChatAnthropicDataset` | A dataset for loading a ChatAnthropic LangChain model. | `kedro_datasets.langchain` | @@ -92,7 +100,7 @@ - Added the following new **experimental** datasets: | Type | Description | Location | -| ---------------------------------- | ----------------------------------------------------------------------------- | --------------------------------------- | +| ---------------------------------- | ------------------------------------------------------------------------------ | --------------------------------------- | | `langfuse.LangfuseTraceDataset` | A dataset to provide Langfuse tracing clients and callbacks. | `kedro_datasets_experimental.langfuse` | | `langchain.LangChainPromptDataset` | A dataset for loading LangChain prompts. | `kedro_datasets_experimental.langchain` | | `pypdf.PDFDataset` | A dataset to read PDF files and extract text using pypdf. | `kedro_datasets_experimental.pypdf` | @@ -101,14 +109,15 @@ | `opik.OpikPromptDataset` | A dataset to provide Opik integration for handling prompts. | `kedro_datasets_experimental.opik` | | `opik.OpikTraceDataset` | A dataset to provide Opik tracing clients and callbacks. | `kedro_datasets_experimental.opik` | - ## Bug fixes and other changes + - Add HTMLPreview type. - Fixed `StudyDataset` to properly propagate a RDB password through the dataset's `credentials`. ## Community contributions Many thanks to the following Kedroids for contributing PRs to this release: + - [Guillaume Tauzin](https://github.com/gtauzin) - [gitgud5000](https://github.com/gitgud5000) @@ -130,6 +139,8 @@ Many thanks to the following Kedroids for contributing PRs to this release: ## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: + - [Minura Punchihewa](https://github.com/MinuraPunchihewa) - [gitgud5000](https://github.com/gitgud5000) @@ -491,7 +502,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Renamed dataset and error classes, in accordance with the [Kedro lexicon](https://github.com/kedro-org/kedro/wiki/Kedro-documentation-style-guide#kedro-lexicon). Dataset classes ending with "DataSet" are deprecated and will be removed in 2.0.0. -# Release 1.7.0: +# Release 1.7.0 ## Major features and improvements @@ -512,13 +523,13 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [Walber Moreira](https://github.com/wmoreiraa) -# Release 1.6.0: +# Release 1.6.0 ## Major features and improvements - Added support for Python 3.11. -# Release 1.5.3: +# Release 1.5.3 ## Bug fixes and other changes @@ -530,13 +541,13 @@ Many thanks to the following Kedroids for contributing PRs to this release: ## Community contributions -# Release 1.5.2: +# Release 1.5.2 ## Bug fixes and other changes - Fixed problematic `kedro-datasets` optional dependencies. -# Release 1.5.1: +# Release 1.5.1 ## Bug fixes and other changes @@ -576,13 +587,13 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Fixed documentations of `GeoJSONDataSet` and `SparkStreamingDataSet`. - Fixed problematic docstrings causing Read the Docs builds on Kedro to fail. -# Release 1.4.1: +# Release 1.4.1 ## Bug fixes and other changes - Fixed missing `pickle.PickleDataSet` extras in `setup.py`. -# Release 1.4.0: +# Release 1.4.0 ## Major features and improvements @@ -596,7 +607,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Fixed problematic docstrings of `APIDataSet`. -# Release 1.3.0: +# Release 1.3.0 ## Major features and improvements @@ -627,7 +638,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [McDonnellJoseph](https://github.com/McDonnellJoseph) - [Danny Farah](https://github.com/dannyrfar) -# Release 1.2.0: +# Release 1.2.0 ## Major features and improvements @@ -638,13 +649,13 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Fixed a docstring in the Pandas `SQLQueryDataSet` as part of the Sphinx revamp on Kedro. -# Release 1.1.1: +# Release 1.1.1 ## Bug fixes and other changes - Fixed problematic docstrings causing Read the Docs builds on Kedro to fail. -# Release 1.1.0: +# Release 1.1.0 ## Major features and improvements @@ -660,7 +671,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Add `mssql` backend to the `SQLQueryDataSet` DataSet using `pyodbc` library. - Added a warning when the user tries to use `SparkDataSet` on Databricks without specifying a file path with the `/dbfs/` prefix. -# Release 1.0.2: +# Release 1.0.2 ## Bug fixes and other changes @@ -668,13 +679,13 @@ Many thanks to the following Kedroids for contributing PRs to this release: - Relaxed PyArrow range in line with pandas. - Fixed outdated links to the dill package documentation. -# Release 1.0.1: +# Release 1.0.1 ## Bug fixes and other changes - Fixed docstring formatting in `VideoDataSet` that was causing the documentation builds to fail. -# Release 1.0.0: +# Release 1.0.0 First official release of Kedro-Datasets. @@ -686,7 +697,7 @@ The datasets have always been part of the core Kedro Framework project inside `k - Changed `pandas.ParquetDataSet` to load data using pandas instead of parquet. -# Release 0.1.0: +# Release 0.1.0 The initial release of Kedro-Datasets. diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 93a5bbbd4..263f38dc5 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -35,7 +35,7 @@ spark-emr = [] # Uses EMR runtime Spark # Filesystem specific packages for Spark spark-s3 = ["s3fs>=2021.4"] -spark-gcs = ["gcsfs>=2023.1, <2023.7"] +spark-gcs = ["gcsfs>=2023.1"] spark-azure = ["adlfs>=2023.1"] spark-hdfs = ["pyarrow>=7.0"] # PyArrow includes HDFS support @@ -276,7 +276,7 @@ test = [ "dill~=0.3.1", "filelock>=3.4.0, <4.0", "fiona >=1.8, <2.0", - "gcsfs>=2023.1, <2023.7", + "gcsfs>=2023.1", "geopandas>=0.8.0, <2.0", "hdfs>=2.5.8, <3.0", "holoviews>=1.13.0", @@ -405,7 +405,7 @@ experimental_test = [ "u8darts[all]", "pypdf>=3.0.0", "moto==5.0.0", - "gcsfs>=2023.1, <2023.7", + "gcsfs>=2023.1", "s3fs>=2021.04", "reportlab>=3.6.0", "chromadb>=1.0.0", diff --git a/kedro-datasets/tests/conftest.py b/kedro-datasets/tests/conftest.py index 7521c458b..bf70634b7 100644 --- a/kedro-datasets/tests/conftest.py +++ b/kedro-datasets/tests/conftest.py @@ -77,10 +77,19 @@ def headers(self) -> dict: def raw_headers(self) -> aiohttp.typedefs.RawHeaders: return self._headers.items() + def close(self) -> None: + # No-op: the mock doesn't hold real connections or event loops, + # but newer aiohttp (3.13+) calls close() which accesses _loop. + pass + + def release(self) -> None: + pass + @fixture(scope="session", autouse=True) def patch_aiobotocore(): import aiobotocore.endpoint # noqa: PLC0415 + import aiobotocore.httpchecksum # noqa: PLC0415 def factory(original: Callable) -> Callable: def patched_convert_to_response_dict( @@ -91,10 +100,30 @@ def patched_convert_to_response_dict( return patched_convert_to_response_dict + # Save the original function to restore later + original_convert_to_response_dict = aiobotocore.endpoint.convert_to_response_dict aiobotocore.endpoint.convert_to_response_dict = factory( aiobotocore.endpoint.convert_to_response_dict ) + # Remove async overrides from AioAwsChunkedWrapper so it falls back to + # the synchronous base-class methods. Moto returns synchronous responses, + # and the async overrides raise errors (e.g. KeyError: 'response') when + # s3fs tries to use them with mocked S3 writes. + # Save original attributes to restore them during teardown + saved_attrs = {} + for attr in ("_make_chunk", "read"): + if hasattr(aiobotocore.httpchecksum.AioAwsChunkedWrapper, attr): + saved_attrs[attr] = getattr(aiobotocore.httpchecksum.AioAwsChunkedWrapper, attr) + delattr(aiobotocore.httpchecksum.AioAwsChunkedWrapper, attr) + + yield + + # Restore original state during teardown + aiobotocore.endpoint.convert_to_response_dict = original_convert_to_response_dict + for attr, value in saved_attrs.items(): + setattr(aiobotocore.httpchecksum.AioAwsChunkedWrapper, attr, value) + @fixture(params=[None]) def load_version(request):