ImperialCollegeLondon · tomjholland · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,6 +1,7 @@
 name: "Build and test"
 on:
   pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
     branches:
       - main
   workflow_dispatch:

diff --git a/pyprobe/cell.py b/pyprobe/cell.py
@@ -102,7 +102,93 @@ def process_cycler_file(
         t1 = time.time()
         importer = cycler_dict[cycler](input_data_path=input_data_path)
         self._write_parquet(importer, output_data_path)
-        print(f"\tparquet written in {time.time()-t1:.2f} seconds.")
+        print(f"\tparquet written in {time.time()-t1: .2f} seconds.")
+
+    @staticmethod
+    def _verify_parquet(filename: str) -> str:
+        """Function to verify the filename is in the correct parquet format.
+
+        Args:
+            filename (str): The filename to verify.
+
+        Returns:
+            str: The filename.
+        """
+        # Get the file extension of output_filename
+        _, ext = os.path.splitext(filename)
+
+        # If the file extension is not .parquet, replace it with .parquet
+        if ext != ".parquet":
+            filename = os.path.splitext(filename)[0] + ".parquet"
+        return filename
+
+    @staticmethod
+    def _get_filename(
+        info: Dict[str, Optional[str | int | float]],
+        filename_function: Callable[[str], str],
+        filename_inputs: List[str],
+    ) -> str:
+        """Function to generate the filename for the data, if provided as a function.
+
+        Args:
+            info (dict): The info entry for the data file.
+            filename_function (function): The function to generate the input name.
+            filename_inputs (list):
+                The list of inputs to filename_function. These must be keys of the cell
+                info.
+
+        Returns:
+            str: The input name for the data file.
+        """
+        return filename_function(
+            *(str(info[filename_inputs[i]]) for i in range(len(filename_inputs)))
+        )
+
+    def _get_data_paths(
+        self,
+        folder_path: str,
+        filename: str | Callable[[str], str],
+        filename_inputs: Optional[List[str]] = None,
+    ) -> str:
+        """Function to generate the input and output paths for the data file.
+
+        Args:
+            folder_path (str): The path to the folder containing the data file.
+            filename (str | function): A filename string or a function to generate
+                the file name.
+            filename_inputs (Optional[list]): The list of inputs to filename_function.
+                These must be keys of the cell info.
+
+        Returns:
+            str: The full path for the data file.
+        """
+        if isinstance(filename, str):
+            filename_str = filename
+        else:
+            if filename_inputs is None:
+                raise ValueError(
+                    "filename_inputs must be provided when filename is a function."
+                )
+            filename_str = self._get_filename(self.info, filename, filename_inputs)
+
+        data_path = os.path.join(folder_path, filename_str)
+        return data_path
+
+    def _write_parquet(
+        self,
+        importer: basecycler.BaseCycler,
+        output_data_path: str,
+    ) -> None:
+        """Import data from a cycler file and write to a PyProBE parquet file.
+
+        Args:
+            importer (BaseCycler): The cycler object to import the data.
+            output_data_path (str): The path to write the parquet file.
+        """
+        dataframe = importer.pyprobe_dataframe
+        if isinstance(dataframe, pl.LazyFrame):
+            dataframe = dataframe.collect()
+        dataframe.write_parquet(output_data_path)
 
     @validate_call
     def process_generic_file(
@@ -150,7 +236,7 @@ def process_generic_file(
             column_dict=column_dict,
         )
         self._write_parquet(importer, output_data_path)
-        print(f"\tparquet written in {time.time()-t1:.2f} seconds.")
+        print(f"\tparquet written in {time.time()-t1: .2f} seconds.")
 
     @validate_call
     def add_procedure(
@@ -198,92 +284,6 @@ def add_procedure(
             pybamm_experiment_list=readme.pybamm_experiment_list,
         )
 
-    @staticmethod
-    def _verify_parquet(filename: str) -> str:
-        """Function to verify the filename is in the correct parquet format.
-
-        Args:
-            filename (str): The filename to verify.
-
-        Returns:
-            str: The filename.
-        """
-        # Get the file extension of output_filename
-        _, ext = os.path.splitext(filename)
-
-        # If the file extension is not .parquet, replace it with .parquet
-        if ext != ".parquet":
-            filename = os.path.splitext(filename)[0] + ".parquet"
-        return filename
-
-    def _write_parquet(
-        self,
-        importer: basecycler.BaseCycler,
-        output_data_path: str,
-    ) -> None:
-        """Import data from a cycler file and write to a PyProBE parquet file.
-
-        Args:
-            importer (BaseCycler): The cycler object to import the data.
-            output_data_path (str): The path to write the parquet file.
-        """
-        dataframe = importer.pyprobe_dataframe
-        if isinstance(dataframe, pl.LazyFrame):
-            dataframe = dataframe.collect()
-        dataframe.write_parquet(output_data_path)
-
-    @staticmethod
-    def _get_filename(
-        info: Dict[str, Optional[str | int | float]],
-        filename_function: Callable[[str], str],
-        filename_inputs: List[str],
-    ) -> str:
-        """Function to generate the filename for the data, if provided as a function.
-
-        Args:
-            info (dict): The info entry for the data file.
-            filename_function (function): The function to generate the input name.
-            filename_inputs (list):
-                The list of inputs to filename_function. These must be keys of the cell
-                info.
-
-        Returns:
-            str: The input name for the data file.
-        """
-        return filename_function(
-            *(str(info[filename_inputs[i]]) for i in range(len(filename_inputs)))
-        )
-
-    def _get_data_paths(
-        self,
-        folder_path: str,
-        filename: str | Callable[[str], str],
-        filename_inputs: Optional[List[str]] = None,
-    ) -> str:
-        """Function to generate the input and output paths for the data file.
-
-        Args:
-            folder_path (str): The path to the folder containing the data file.
-            filename (str | function): A filename string or a function to generate
-                the file name.
-            filename_inputs (Optional[list]): The list of inputs to filename_function.
-                These must be keys of the cell info.
-
-        Returns:
-            str: The full path for the data file.
-        """
-        if isinstance(filename, str):
-            filename_str = filename
-        else:
-            if filename_inputs is None:
-                raise ValueError(
-                    "filename_inputs must be provided when filename is a function."
-                )
-            filename_str = self._get_filename(self.info, filename, filename_inputs)
-
-        data_path = os.path.join(folder_path, filename_str)
-        return data_path
-
 
 def make_cell_list(
     record_filepath: str,

diff --git a/pyprobe/filters.py b/pyprobe/filters.py
@@ -84,6 +84,7 @@ def _step(
         base_dataframe=base_dataframe,
         info=filter.info,
         column_definitions=filter.column_definitions,
+        preceding_points=filter.preceding_points,
     )
 
 
@@ -104,6 +105,7 @@ def _cycle(filter: "FilterToExperimentType", *cycle_numbers: Union[int]) -> "Cyc
         base_dataframe=lf_filtered,
         info=filter.info,
         column_definitions=filter.column_definitions,
+        preceding_points=filter.preceding_points,
     )
 
 
@@ -243,9 +245,11 @@ class Procedure(RawData):
     column_definitions: Dict[str, str] = Field(
         default_factory=lambda: default_column_definitions.copy()
     )
+    preceding_points: pl.LazyFrame | pl.DataFrame = pl.LazyFrame({})
 
     def model_post_init(self, __context: Any) -> None:
         """Create a procedure class."""
+        self.preceding_points = self._get_preceding_points(self.base_dataframe)
         self.zero_column(
             "Time [s]",
             "Procedure Time [s]",
@@ -267,6 +271,33 @@ def model_post_init(self, __context: Any) -> None:
     constant_current = _constant_current
     constant_voltage = _constant_voltage
 
+    @staticmethod
+    def _get_preceding_points(dataframe: pl.DataFrame | pl.LazyFrame) -> pl.DataFrame:
+        """Get the rows of the data frame prior to the first point of each event.
+
+        Args:
+            dataframe (pl.DataFrame): The data frame to filter.
+
+        Returns:
+            pl.DataFrame:
+                The rows of the data frame prior to the first point of each
+                event.
+        """
+        event_ends = dataframe.filter(
+            (
+                pl.col("Event").cast(pl.Int64)
+                - pl.col("Event").cast(pl.Int64).shift(-1)
+                != 0
+            )
+        )
+        return event_ends.with_columns(
+            [
+                pl.lit(None).alias("Cycle"),
+                pl.lit(None).alias("Step"),
+                pl.col("Event") + 1,
+            ]
+        )
+
     def experiment(self, *experiment_names: str) -> "Experiment":
         """Return an experiment object from the procedure.
 
@@ -292,6 +323,7 @@ def experiment(self, *experiment_names: str) -> "Experiment":
             base_dataframe=lf_filtered,
             info=self.info,
             column_definitions=self.column_definitions,
+            preceding_points=self.preceding_points,
         )
 
     @property
@@ -381,6 +413,7 @@ class Experiment(RawData):
     column_definitions: Dict[str, str] = Field(
         default_factory=lambda: default_column_definitions.copy()
     )
+    preceding_points: pl.LazyFrame | pl.DataFrame
 
     def model_post_init(self, __context: Any) -> None:
         """Create an experiment class."""
@@ -414,6 +447,7 @@ class Cycle(RawData):
     column_definitions: Dict[str, str] = Field(
         default_factory=lambda: default_column_definitions.copy()
     )
+    preceding_points: pl.LazyFrame | pl.DataFrame
 
     def model_post_init(self, __context: Any) -> None:
         """Create a cycle class."""
@@ -446,6 +480,7 @@ class Step(RawData):
     column_definitions: Dict[str, str] = Field(
         default_factory=lambda: default_column_definitions.copy()
     )
+    preceding_points: pl.LazyFrame | pl.DataFrame
 
     def model_post_init(self, __context: Any) -> None:
         """Create a step class."""

diff --git a/pyprobe/rawdata.py b/pyprobe/rawdata.py
@@ -55,6 +55,7 @@ class RawData(Result):
     column_definitions: Dict[str, str] = Field(
         default_factory=lambda: default_column_definitions.copy()
     )
+    preceding_points: pl.LazyFrame | pl.DataFrame
 
     @field_validator("base_dataframe")
     @classmethod

diff --git a/tests/analysis/test_cycling.py b/tests/analysis/test_cycling.py
@@ -2,6 +2,7 @@
 
 import math
 
+import polars as pl
 import pytest
 
 from pyprobe.analysis.cycling import Cycling
@@ -12,7 +13,11 @@
 @pytest.fixture
 def Cycling_fixture(lazyframe_fixture, info_fixture):
     """Return a Cycling instance."""
-    input_data = Experiment(base_dataframe=lazyframe_fixture, info=info_fixture)
+    input_data = Experiment(
+        base_dataframe=lazyframe_fixture,
+        info=info_fixture,
+        preceding_points=pl.LazyFrame({}),
+    )
     return Cycling(input_data=input_data)