From 50cefc75a21e1391e162b763f17db03191fa6e99 Mon Sep 17 00:00:00 2001 From: Hayden Rodrigues Date: Fri, 29 Aug 2025 14:48:57 +0100 Subject: [PATCH 1/4] feat: Improve `Result.get` error handling for invalid columns --- pyprobe/result.py | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/pyprobe/result.py b/pyprobe/result.py index f725b49e..350d9a31 100644 --- a/pyprobe/result.py +++ b/pyprobe/result.py @@ -1,5 +1,6 @@ """A module for the Result class.""" +import difflib import re from collections.abc import Callable from functools import wraps @@ -332,8 +333,7 @@ def __getitem__(self, *column_names: str) -> "Result": ) def get( - self, - *column_names: str, + self, *column_names: str ) -> NDArray[np.float64] | tuple[NDArray[np.float64], ...]: """Return one or more columns of the data as separate 1D numpy arrays. @@ -341,22 +341,46 @@ def get( column_names (str): The column name(s) to return. Returns: - Union[NDArray[np.float64], Tuple[NDArray[np.float64], ...]]: + Union[NDArray[np.float64], tuple[NDArray[np.float64],...]]: The column(s) as numpy array(s). Raises: - ValueError: If no column names are provided. - ValueError: If a column name is not in the data. + ValueError: If no column names are provided + ValueError: If a column is not in the data. Includes suggested close matches + if available. """ - array = self.data_with_columns(*column_names).to_numpy() if len(column_names) == 0: error_msg = "At least one column name must be provided." logger.error(error_msg) raise ValueError(error_msg) - elif len(column_names) == 1: - return array.T[0] + + unrecognized_names = set(column_names) - set(self.column_list) + if not unrecognized_names: + return ( + self.data_with_columns(*column_names).to_numpy().T[0] + if len(column_names) == 1 + else tuple(self.data_with_columns(*column_names).to_numpy().T) + ) else: - return tuple(array.T) + error_msgs = [] + for name in unrecognized_names: + matches = difflib.get_close_matches( + name, self.column_list, n=1, cutoff=0.5 + ) + if matches: + error_msg = ( + f'Column "{name}" not found. Did you mean "{matches[0]}"?' + ) + logger.error(error_msg) + error_msgs.append(error_msg) + else: + error_msg = ( + f'Column "{name}" not found and no close match found. ' + f"Available columns: {', '.join(self.column_list)}" + ) + logger.error(error_msg) + error_msgs.append(error_msg) + raise ValueError("\n" + "\n".join(f"- {msg}" for msg in error_msgs)) @property def contains_lazyframe(self) -> bool: From d0584a1fca2952619ca641a8dce6204869211687 Mon Sep 17 00:00:00 2001 From: Hayden Rodrigues Date: Thu, 7 Aug 2025 10:57:25 +0100 Subject: [PATCH 2/4] test: Add tests for `Result.get` error handling --- tests/test_result.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_result.py b/tests/test_result.py index 4bc9f383..8eb59933 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -125,7 +125,7 @@ def test_live_dataframe(): @pytest.fixture -def Result_fixture(lazyframe_fixture, info_fixture): +def Result_fixture(lazyframe_fixture, info_fixture): """Return a Result instance.""" return Result( base_dataframe=lazyframe_fixture, @@ -189,6 +189,13 @@ def test_get(Result_fixture): voltage, Result_fixture.data["Voltage [V]"].to_numpy(), ) + # Test with a mistyped column + with pytest.raises(ValueError): + current = Result_fixture.get("Crrent [A]") + np_testing.assert_array_equal( + current, + Result_fixture.data["Current [A]"].to_numpy(), + ) def test_get_only(Result_fixture): @@ -703,3 +710,5 @@ def test_from_polars_io_python_object(): assert isinstance(result.base_dataframe, pl.DataFrame) assert result.info == info pl_testing.assert_frame_equal(result.data, test_df, check_column_order=False) + +Result_fixture.get('Voltage [V]') # Ensure Result_fixture is used \ No newline at end of file From 9eacc2f9faa32c145b48c60f1fbaec493ceb67d1 Mon Sep 17 00:00:00 2001 From: Hayden Rodrigues Date: Fri, 29 Aug 2025 15:06:37 +0100 Subject: [PATCH 3/4] docs: add fuzzy matching example --- docs/source/examples/getting-started.ipynb | 57 +++++++++++++++++++++- tests/test_result.py | 5 +- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/docs/source/examples/getting-started.ipynb b/docs/source/examples/getting-started.ipynb index 071ca765..2d22ddeb 100644 --- a/docs/source/examples/getting-started.ipynb +++ b/docs/source/examples/getting-started.ipynb @@ -224,6 +224,56 @@ "print(\"Voltage = \", voltage)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A mistyped column will raise an error and suggest close matches if available:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [], + "source": [ + "current, voltage = (\n", + " cell.procedure[\"Sample\"]\n", + " .experiment(\"Break-in Cycles\")\n", + " .charge(0)\n", + " .get(\"Crrent [A]\", \"Voltge [V]\")\n", + ")\n", + "print(\"Current [A] = \", current)\n", + "print(\"Voltage [V]= \", voltage)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the column is completely mistyped an error will be thrown and all available columns will be listed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [], + "source": [ + "voltage = (\n", + " cell.procedure[\"Sample\"].experiment(\"Break-in Cycles\").charge(0).get(\"valoolashaka\")\n", + ")\n", + "print(type(voltage), voltage)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -332,6 +382,11 @@ } ], "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -342,7 +397,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/tests/test_result.py b/tests/test_result.py index 8eb59933..01b2a31b 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -125,7 +125,7 @@ def test_live_dataframe(): @pytest.fixture -def Result_fixture(lazyframe_fixture, info_fixture): +def Result_fixture(lazyframe_fixture, info_fixture): """Return a Result instance.""" return Result( base_dataframe=lazyframe_fixture, @@ -711,4 +711,5 @@ def test_from_polars_io_python_object(): assert result.info == info pl_testing.assert_frame_equal(result.data, test_df, check_column_order=False) -Result_fixture.get('Voltage [V]') # Ensure Result_fixture is used \ No newline at end of file + +Result_fixture.get("Voltage [V]") # Ensure Result_fixture is used From 9475f7a215b3c3c5bd140b713aaaf7254957c042 Mon Sep 17 00:00:00 2001 From: Hayden Rodrigues Date: Mon, 1 Sep 2025 15:04:55 +0100 Subject: [PATCH 4/4] Improved fuzzy matching of Result.get to support unit conversion --- pyprobe/result.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyprobe/result.py b/pyprobe/result.py index 350d9a31..7481617f 100644 --- a/pyprobe/result.py +++ b/pyprobe/result.py @@ -354,16 +354,15 @@ def get( logger.error(error_msg) raise ValueError(error_msg) - unrecognized_names = set(column_names) - set(self.column_list) - if not unrecognized_names: + try: return ( self.data_with_columns(*column_names).to_numpy().T[0] if len(column_names) == 1 else tuple(self.data_with_columns(*column_names).to_numpy().T) ) - else: + except ValueError: error_msgs = [] - for name in unrecognized_names: + for name in column_names: matches = difflib.get_close_matches( name, self.column_list, n=1, cutoff=0.5 )