diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e7f35e8d..fa3317b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,7 @@ name: "Build and test" on: pull_request: + types: [opened, synchronize, reopened, ready_for_review] branches: - main workflow_dispatch: diff --git a/pyprobe/cell.py b/pyprobe/cell.py index 8e5fd1a3..c8703a90 100644 --- a/pyprobe/cell.py +++ b/pyprobe/cell.py @@ -102,7 +102,93 @@ def process_cycler_file( t1 = time.time() importer = cycler_dict[cycler](input_data_path=input_data_path) self._write_parquet(importer, output_data_path) - print(f"\tparquet written in {time.time()-t1:.2f} seconds.") + print(f"\tparquet written in {time.time()-t1: .2f} seconds.") + + @staticmethod + def _verify_parquet(filename: str) -> str: + """Function to verify the filename is in the correct parquet format. + + Args: + filename (str): The filename to verify. + + Returns: + str: The filename. + """ + # Get the file extension of output_filename + _, ext = os.path.splitext(filename) + + # If the file extension is not .parquet, replace it with .parquet + if ext != ".parquet": + filename = os.path.splitext(filename)[0] + ".parquet" + return filename + + @staticmethod + def _get_filename( + info: Dict[str, Optional[str | int | float]], + filename_function: Callable[[str], str], + filename_inputs: List[str], + ) -> str: + """Function to generate the filename for the data, if provided as a function. + + Args: + info (dict): The info entry for the data file. + filename_function (function): The function to generate the input name. + filename_inputs (list): + The list of inputs to filename_function. These must be keys of the cell + info. + + Returns: + str: The input name for the data file. + """ + return filename_function( + *(str(info[filename_inputs[i]]) for i in range(len(filename_inputs))) + ) + + def _get_data_paths( + self, + folder_path: str, + filename: str | Callable[[str], str], + filename_inputs: Optional[List[str]] = None, + ) -> str: + """Function to generate the input and output paths for the data file. + + Args: + folder_path (str): The path to the folder containing the data file. + filename (str | function): A filename string or a function to generate + the file name. + filename_inputs (Optional[list]): The list of inputs to filename_function. + These must be keys of the cell info. + + Returns: + str: The full path for the data file. + """ + if isinstance(filename, str): + filename_str = filename + else: + if filename_inputs is None: + raise ValueError( + "filename_inputs must be provided when filename is a function." + ) + filename_str = self._get_filename(self.info, filename, filename_inputs) + + data_path = os.path.join(folder_path, filename_str) + return data_path + + def _write_parquet( + self, + importer: basecycler.BaseCycler, + output_data_path: str, + ) -> None: + """Import data from a cycler file and write to a PyProBE parquet file. + + Args: + importer (BaseCycler): The cycler object to import the data. + output_data_path (str): The path to write the parquet file. + """ + dataframe = importer.pyprobe_dataframe + if isinstance(dataframe, pl.LazyFrame): + dataframe = dataframe.collect() + dataframe.write_parquet(output_data_path) @validate_call def process_generic_file( @@ -150,7 +236,7 @@ def process_generic_file( column_dict=column_dict, ) self._write_parquet(importer, output_data_path) - print(f"\tparquet written in {time.time()-t1:.2f} seconds.") + print(f"\tparquet written in {time.time()-t1: .2f} seconds.") @validate_call def add_procedure( @@ -198,92 +284,6 @@ def add_procedure( pybamm_experiment_list=readme.pybamm_experiment_list, ) - @staticmethod - def _verify_parquet(filename: str) -> str: - """Function to verify the filename is in the correct parquet format. - - Args: - filename (str): The filename to verify. - - Returns: - str: The filename. - """ - # Get the file extension of output_filename - _, ext = os.path.splitext(filename) - - # If the file extension is not .parquet, replace it with .parquet - if ext != ".parquet": - filename = os.path.splitext(filename)[0] + ".parquet" - return filename - - def _write_parquet( - self, - importer: basecycler.BaseCycler, - output_data_path: str, - ) -> None: - """Import data from a cycler file and write to a PyProBE parquet file. - - Args: - importer (BaseCycler): The cycler object to import the data. - output_data_path (str): The path to write the parquet file. - """ - dataframe = importer.pyprobe_dataframe - if isinstance(dataframe, pl.LazyFrame): - dataframe = dataframe.collect() - dataframe.write_parquet(output_data_path) - - @staticmethod - def _get_filename( - info: Dict[str, Optional[str | int | float]], - filename_function: Callable[[str], str], - filename_inputs: List[str], - ) -> str: - """Function to generate the filename for the data, if provided as a function. - - Args: - info (dict): The info entry for the data file. - filename_function (function): The function to generate the input name. - filename_inputs (list): - The list of inputs to filename_function. These must be keys of the cell - info. - - Returns: - str: The input name for the data file. - """ - return filename_function( - *(str(info[filename_inputs[i]]) for i in range(len(filename_inputs))) - ) - - def _get_data_paths( - self, - folder_path: str, - filename: str | Callable[[str], str], - filename_inputs: Optional[List[str]] = None, - ) -> str: - """Function to generate the input and output paths for the data file. - - Args: - folder_path (str): The path to the folder containing the data file. - filename (str | function): A filename string or a function to generate - the file name. - filename_inputs (Optional[list]): The list of inputs to filename_function. - These must be keys of the cell info. - - Returns: - str: The full path for the data file. - """ - if isinstance(filename, str): - filename_str = filename - else: - if filename_inputs is None: - raise ValueError( - "filename_inputs must be provided when filename is a function." - ) - filename_str = self._get_filename(self.info, filename, filename_inputs) - - data_path = os.path.join(folder_path, filename_str) - return data_path - def make_cell_list( record_filepath: str, diff --git a/pyprobe/filters.py b/pyprobe/filters.py index 2a3fd1b4..8f456670 100644 --- a/pyprobe/filters.py +++ b/pyprobe/filters.py @@ -84,6 +84,7 @@ def _step( base_dataframe=base_dataframe, info=filter.info, column_definitions=filter.column_definitions, + preceding_points=filter.preceding_points, ) @@ -104,6 +105,7 @@ def _cycle(filter: "FilterToExperimentType", *cycle_numbers: Union[int]) -> "Cyc base_dataframe=lf_filtered, info=filter.info, column_definitions=filter.column_definitions, + preceding_points=filter.preceding_points, ) @@ -243,9 +245,11 @@ class Procedure(RawData): column_definitions: Dict[str, str] = Field( default_factory=lambda: default_column_definitions.copy() ) + preceding_points: pl.LazyFrame | pl.DataFrame = pl.LazyFrame({}) def model_post_init(self, __context: Any) -> None: """Create a procedure class.""" + self.preceding_points = self._get_preceding_points(self.base_dataframe) self.zero_column( "Time [s]", "Procedure Time [s]", @@ -267,6 +271,33 @@ def model_post_init(self, __context: Any) -> None: constant_current = _constant_current constant_voltage = _constant_voltage + @staticmethod + def _get_preceding_points(dataframe: pl.DataFrame | pl.LazyFrame) -> pl.DataFrame: + """Get the rows of the data frame prior to the first point of each event. + + Args: + dataframe (pl.DataFrame): The data frame to filter. + + Returns: + pl.DataFrame: + The rows of the data frame prior to the first point of each + event. + """ + event_ends = dataframe.filter( + ( + pl.col("Event").cast(pl.Int64) + - pl.col("Event").cast(pl.Int64).shift(-1) + != 0 + ) + ) + return event_ends.with_columns( + [ + pl.lit(None).alias("Cycle"), + pl.lit(None).alias("Step"), + pl.col("Event") + 1, + ] + ) + def experiment(self, *experiment_names: str) -> "Experiment": """Return an experiment object from the procedure. @@ -292,6 +323,7 @@ def experiment(self, *experiment_names: str) -> "Experiment": base_dataframe=lf_filtered, info=self.info, column_definitions=self.column_definitions, + preceding_points=self.preceding_points, ) @property @@ -381,6 +413,7 @@ class Experiment(RawData): column_definitions: Dict[str, str] = Field( default_factory=lambda: default_column_definitions.copy() ) + preceding_points: pl.LazyFrame | pl.DataFrame def model_post_init(self, __context: Any) -> None: """Create an experiment class.""" @@ -414,6 +447,7 @@ class Cycle(RawData): column_definitions: Dict[str, str] = Field( default_factory=lambda: default_column_definitions.copy() ) + preceding_points: pl.LazyFrame | pl.DataFrame def model_post_init(self, __context: Any) -> None: """Create a cycle class.""" @@ -446,6 +480,7 @@ class Step(RawData): column_definitions: Dict[str, str] = Field( default_factory=lambda: default_column_definitions.copy() ) + preceding_points: pl.LazyFrame | pl.DataFrame def model_post_init(self, __context: Any) -> None: """Create a step class.""" diff --git a/pyprobe/rawdata.py b/pyprobe/rawdata.py index 82a96237..19abd11a 100644 --- a/pyprobe/rawdata.py +++ b/pyprobe/rawdata.py @@ -55,6 +55,7 @@ class RawData(Result): column_definitions: Dict[str, str] = Field( default_factory=lambda: default_column_definitions.copy() ) + preceding_points: pl.LazyFrame | pl.DataFrame @field_validator("base_dataframe") @classmethod diff --git a/tests/analysis/test_cycling.py b/tests/analysis/test_cycling.py index dacf929c..439f87c5 100644 --- a/tests/analysis/test_cycling.py +++ b/tests/analysis/test_cycling.py @@ -2,6 +2,7 @@ import math +import polars as pl import pytest from pyprobe.analysis.cycling import Cycling @@ -12,7 +13,11 @@ @pytest.fixture def Cycling_fixture(lazyframe_fixture, info_fixture): """Return a Cycling instance.""" - input_data = Experiment(base_dataframe=lazyframe_fixture, info=info_fixture) + input_data = Experiment( + base_dataframe=lazyframe_fixture, + info=info_fixture, + preceding_points=pl.LazyFrame({}), + ) return Cycling(input_data=input_data) diff --git a/tests/test_procedure.py b/tests/test_procedure.py index cea3ad87..4e3ee698 100644 --- a/tests/test_procedure.py +++ b/tests/test_procedure.py @@ -5,6 +5,33 @@ import numpy as np import pandas as pd import polars as pl +from polars.testing import assert_frame_equal + +from pyprobe.filters import Procedure + + +def test_get_preceding_points(): + """Test the _add_event_start_duplicates method.""" + dataframe = pl.DataFrame( + { + "Step": [1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2], + "Cycle": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], + "Event": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], + "Time [s]": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + "Current [A]": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + } + ) + preceding_points = Procedure._get_preceding_points(dataframe) + expected_dataframe = pl.DataFrame( + { + "Step": [None, None, None], + "Cycle": [None, None, None], + "Event": [2, 3, 4], + "Time [s]": [3, 6, 9], + "Current [A]": [3, 6, 9], + } + ) + assert_frame_equal(preceding_points, expected_dataframe) def test_experiment(procedure_fixture, cycles_fixture, steps_fixture, benchmark): @@ -34,9 +61,40 @@ def make_experiment(): assert experiment.data["Experiment Capacity [Ah]"][0] == 0 -def test_experiment_names(procedure_fixture, titles_fixture): - """Test the experiment_names method.""" +def test_init(procedure_fixture, titles_fixture): + """Test the initialisation of a procedure object.""" assert procedure_fixture.experiment_names == titles_fixture + assert procedure_fixture.data["Procedure Time [s]"][0] == 0 + assert procedure_fixture.data["Procedure Capacity [Ah]"][0] == 0 + + np.testing.assert_array_equal( + procedure_fixture.preceding_points.select("Event") + .collect() + .to_numpy() + .flatten(), + np.arange(1, 62), + ) + assert ( + procedure_fixture.preceding_points.select("Current [A]").collect().to_numpy()[0] + == 0.3996 / 1000 + ) + assert ( + procedure_fixture.preceding_points.select("Voltage [V]").collect().to_numpy()[0] + == 4.2001 + ) + + assert ( + procedure_fixture.preceding_points.select("Current [A]") + .collect() + .to_numpy()[-1] + == 0 + ) + assert ( + procedure_fixture.preceding_points.select("Voltage [V]") + .collect() + .to_numpy()[-1] + == 3.4382 + ) def test_flatten(procedure_fixture): @@ -46,12 +104,6 @@ def test_flatten(procedure_fixture): assert flat_list == [1, 2, 3, 4, 5, 6] -def test_zero_columns(procedure_fixture): - """Test methods to set the first value of columns to zero.""" - assert procedure_fixture.data["Procedure Time [s]"][0] == 0 - assert procedure_fixture.data["Procedure Capacity [Ah]"][0] == 0 - - def test_add_external_data(procedure_fixture): """Test adding external data to the procedure.""" # Create external data diff --git a/tests/test_rawdata.py b/tests/test_rawdata.py index fb957d43..419c8088 100644 --- a/tests/test_rawdata.py +++ b/tests/test_rawdata.py @@ -12,7 +12,11 @@ @pytest.fixture def RawData_fixture(lazyframe_fixture, info_fixture): """Return a Result instance.""" - return RawData(base_dataframe=lazyframe_fixture, info=info_fixture) + return RawData( + base_dataframe=lazyframe_fixture, + info=info_fixture, + preceding_points=pl.LazyFrame({}), + ) def test_init(RawData_fixture): @@ -101,7 +105,11 @@ def test_zero_column(RawData_fixture): def test_definitions(lazyframe_fixture, info_fixture): """Test that the definitions have been correctly set.""" - rawdata = RawData(base_dataframe=lazyframe_fixture, info=info_fixture) + rawdata = RawData( + base_dataframe=lazyframe_fixture, + info=info_fixture, + preceding_points=pl.LazyFrame({}), + ) definition_keys = list(rawdata.column_definitions.keys()) assert set(definition_keys) == set( [