diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e20b8c0..52877a28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project are documented in this file. This changelog ## [Unreleased] +### Added + +- Added typed LibreOffice workbook handles and session-scoped workbook lifecycle tracking so rich extraction can reuse cached bridge payloads safely and reject foreign or closed workbook handles. + +### Fixed + +- Fixed LibreOffice rich backend workbook lifecycle integration so custom `session_factory` implementations that only support legacy path-based `extract_chart_geometries()` and `extract_draw_page_shapes()` continue to work without `load_workbook()` and `close_workbook()` hooks. + ## [0.7.1] - 2026-03-21 ### Added diff --git a/src/exstruct/core/backends/libreoffice_backend.py b/src/exstruct/core/backends/libreoffice_backend.py index d733d150..b3f4db21 100644 --- a/src/exstruct/core/backends/libreoffice_backend.py +++ b/src/exstruct/core/backends/libreoffice_backend.py @@ -3,15 +3,18 @@ from __future__ import annotations from collections.abc import Callable, Sequence +from contextlib import AbstractContextManager import logging import math from pathlib import Path +from typing import Protocol, TypeGuard from ...models import Arrow, Chart, Shape, SmartArt from ..libreoffice import ( LibreOfficeChartGeometry, LibreOfficeDrawPageShape, LibreOfficeSession, + LibreOfficeWorkbookHandle, ) from ..ooxml_drawing import OoxmlConnectorInfo, OoxmlShapeInfo, read_sheet_drawings from ..shapes import angle_to_compass, compute_line_angle_deg @@ -20,6 +23,53 @@ logger = logging.getLogger(__name__) +class _LibreOfficePathSessionProtocol(Protocol): + """Structural contract for legacy path-only rich-extraction sessions.""" + + def extract_chart_geometries( + self, file_path: Path + ) -> dict[str, list[LibreOfficeChartGeometry]]: + """Extract chart geometries for a workbook path.""" + + def extract_draw_page_shapes( + self, file_path: Path + ) -> dict[str, list[LibreOfficeDrawPageShape]]: + """Extract draw-page shapes for a workbook path.""" + + +class _LibreOfficeWorkbookLifecycleSessionProtocol(Protocol): + """Structural contract for sessions that support typed workbook handles.""" + + def load_workbook(self, file_path: Path) -> LibreOfficeWorkbookHandle: + """Open a workbook lifecycle handle.""" + + def close_workbook(self, workbook: LibreOfficeWorkbookHandle) -> None: + """Close a workbook lifecycle handle.""" + + def extract_chart_geometries( + self, workbook: Path | LibreOfficeWorkbookHandle + ) -> dict[str, list[LibreOfficeChartGeometry]]: + """Extract chart geometries for a path or workbook handle.""" + + def extract_draw_page_shapes( + self, workbook: Path | LibreOfficeWorkbookHandle + ) -> dict[str, list[LibreOfficeDrawPageShape]]: + """Extract draw-page shapes for a path or workbook handle.""" + + +_LibreOfficeRichSession = ( + _LibreOfficePathSessionProtocol | _LibreOfficeWorkbookLifecycleSessionProtocol +) + + +def _supports_workbook_lifecycle( + session: _LibreOfficeRichSession, +) -> TypeGuard[_LibreOfficeWorkbookLifecycleSessionProtocol]: + """Return whether a session exposes the typed workbook lifecycle hooks.""" + + return hasattr(session, "load_workbook") and hasattr(session, "close_workbook") + + class LibreOfficeRichBackend(RichBackend): """Best-effort rich extraction backend gated by LibreOffice runtime availability.""" @@ -27,7 +77,9 @@ def __init__( self, file_path: Path, *, - session_factory: Callable[[], LibreOfficeSession] = LibreOfficeSession.from_env, + session_factory: Callable[ + [], AbstractContextManager[_LibreOfficeRichSession] + ] = (LibreOfficeSession.from_env), ) -> None: """Store the workbook path and session factory used for lazy LibreOffice extraction.""" @@ -150,8 +202,10 @@ def _read_chart_geometries(self) -> dict[str, list[LibreOfficeChartGeometry]]: return self._chart_geometries with self._session_factory() as session: if hasattr(session, "extract_chart_geometries"): - self._chart_geometries = session.extract_chart_geometries( - self.file_path + self._chart_geometries = ( + self._extract_chart_geometries_with_optional_workbook_lifecycle( + session=session, + ) ) else: self._chart_geometries = {} @@ -164,13 +218,45 @@ def _read_draw_page_shapes(self) -> dict[str, list[LibreOfficeDrawPageShape]]: return self._draw_page_shapes with self._session_factory() as session: if hasattr(session, "extract_draw_page_shapes"): - self._draw_page_shapes = session.extract_draw_page_shapes( - self.file_path + self._draw_page_shapes = ( + self._extract_draw_page_shapes_with_optional_workbook_lifecycle( + session=session, + ) ) else: self._draw_page_shapes = {} return self._draw_page_shapes + def _extract_chart_geometries_with_optional_workbook_lifecycle( + self, + *, + session: _LibreOfficeRichSession, + ) -> dict[str, list[LibreOfficeChartGeometry]]: + """Call chart extraction while preserving legacy path-only sessions.""" + + if not _supports_workbook_lifecycle(session): + return session.extract_chart_geometries(self.file_path) + workbook = session.load_workbook(self.file_path) + try: + return session.extract_chart_geometries(workbook) + finally: + session.close_workbook(workbook) + + def _extract_draw_page_shapes_with_optional_workbook_lifecycle( + self, + *, + session: _LibreOfficeRichSession, + ) -> dict[str, list[LibreOfficeDrawPageShape]]: + """Call draw-page extraction while preserving legacy path-only sessions.""" + + if not _supports_workbook_lifecycle(session): + return session.extract_draw_page_shapes(self.file_path) + workbook = session.load_workbook(self.file_path) + try: + return session.extract_draw_page_shapes(workbook) + finally: + session.close_workbook(workbook) + def _build_shapes_from_ooxml( shapes: Sequence[OoxmlShapeInfo], diff --git a/src/exstruct/core/libreoffice.py b/src/exstruct/core/libreoffice.py index 6bceb710..82c31768 100644 --- a/src/exstruct/core/libreoffice.py +++ b/src/exstruct/core/libreoffice.py @@ -14,7 +14,7 @@ import sys from tempfile import NamedTemporaryFile, mkdtemp import time -from typing import Literal, TextIO, cast +from typing import Literal, TextIO, cast, overload _DEFAULT_STARTUP_TIMEOUT_SEC = 15.0 _DEFAULT_EXEC_TIMEOUT_SEC = 30.0 @@ -81,6 +81,15 @@ class LibreOfficeSessionConfig: profile_root: Path | None +@dataclass(frozen=True) +class LibreOfficeWorkbookHandle: + """Typed workbook token scoped to one ``LibreOfficeSession`` instance.""" + + file_path: Path + owner_session_id: int + workbook_id: int + + @dataclass(frozen=True) class _LibreOfficeStartupAttempt: """Configuration for one LibreOffice startup strategy.""" @@ -129,7 +138,9 @@ def __init__(self, config: LibreOfficeSessionConfig) -> None: self._soffice_process: subprocess.Popen[str] | None = None self._accept_port: int | None = None self._python_path = _resolve_python_path(config.soffice_path) - self._bridge_payload_cache: dict[str, object] = {} + self._bridge_payload_cache: dict[tuple[str, Path], object] = {} + self._open_workbooks: dict[int, Path] = {} + self._next_workbook_id = 0 self._soffice_stderr_sink: TextIO | None = None self._soffice_stderr_path: Path | None = None @@ -215,32 +226,114 @@ def __exit__(self, exc_type: object, exc: object, tb: object) -> None: self._soffice_stderr_sink = None self._soffice_stderr_path = None self._accept_port = None + self._bridge_payload_cache.clear() + self._open_workbooks.clear() if self._temp_profile_dir is not None: _cleanup_profile_dir(self._temp_profile_dir) self._temp_profile_dir = None - def load_workbook(self, file_path: Path) -> object: - """Return a lightweight workbook token for future subprocess integration.""" - return {"file_path": str(file_path.resolve())} + def load_workbook(self, file_path: Path) -> LibreOfficeWorkbookHandle: + """Register a workbook path and return a typed session-local handle.""" - def close_workbook(self, workbook: object) -> None: - """Close a workbook token returned by ``load_workbook``.""" - _ = workbook + resolved = file_path.resolve() + self._next_workbook_id += 1 + handle = LibreOfficeWorkbookHandle( + file_path=resolved, + owner_session_id=id(self), + workbook_id=self._next_workbook_id, + ) + self._open_workbooks[handle.workbook_id] = resolved + return handle + + def close_workbook(self, workbook: LibreOfficeWorkbookHandle) -> None: + """Release a typed workbook handle and clear any stale workbook cache.""" + + file_path = self._require_handle_path(workbook, allow_closed=True) + if file_path is None: + return + self._open_workbooks.pop(workbook.workbook_id, None) + if file_path not in self._open_workbooks.values(): + self._clear_workbook_cache(file_path) def extract_draw_page_shapes( - self, file_path: Path + self, workbook: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeDrawPageShape]]: """Extract best-effort draw-page shapes from the workbook.""" - payload = self._run_bridge(file_path, kind="draw-page") + payload = self._run_bridge( + self._resolve_workbook_path(workbook), + kind="draw-page", + ) return _parse_draw_page_payload(payload) def extract_chart_geometries( - self, file_path: Path + self, workbook: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeChartGeometry]]: """Extract chart geometry candidates from the workbook draw pages.""" - payload = self._run_bridge(file_path, kind="charts") + payload = self._run_bridge( + self._resolve_workbook_path(workbook), + kind="charts", + ) return _parse_chart_payload(payload) + def _resolve_workbook_path( + self, workbook: Path | LibreOfficeWorkbookHandle + ) -> Path: + """Resolve either a direct path or a typed handle into a workbook path.""" + + if isinstance(workbook, LibreOfficeWorkbookHandle): + return self._require_handle_path(workbook) + return workbook.resolve() + + @overload + def _require_handle_path( + self, + workbook: LibreOfficeWorkbookHandle, + *, + allow_closed: Literal[False] = False, + ) -> Path: ... + + @overload + def _require_handle_path( + self, + workbook: LibreOfficeWorkbookHandle, + *, + allow_closed: Literal[True], + ) -> Path | None: ... + + def _require_handle_path( + self, + workbook: LibreOfficeWorkbookHandle, + *, + allow_closed: bool = False, + ) -> Path | None: + """Validate workbook ownership and return the registered file path.""" + + if not isinstance(workbook, LibreOfficeWorkbookHandle): + raise TypeError( + "LibreOfficeSession workbook lifecycle expects a " + "LibreOfficeWorkbookHandle." + ) + if workbook.owner_session_id != id(self): + raise ValueError( + "LibreOffice workbook handle belongs to a different LibreOfficeSession." + ) + file_path = self._open_workbooks.get(workbook.workbook_id) + if file_path is None and not allow_closed: + raise RuntimeError("LibreOffice workbook handle is closed.") + if file_path is not None and file_path != workbook.file_path.resolve(): + raise ValueError( + "LibreOffice workbook handle does not match the registered " + "workbook path." + ) + return file_path + + def _clear_workbook_cache(self, file_path: Path) -> None: + """Drop cached bridge payloads for one workbook path.""" + + stale_keys = [key for key in self._bridge_payload_cache if key[1] == file_path] + for key in stale_keys: + self._bridge_payload_cache.pop(key, None) + def _run_bridge( self, file_path: Path, @@ -263,7 +356,8 @@ def _run_bridge( if self._accept_port is None or self._python_path is None: raise RuntimeError("LibreOfficeSession must be entered before extraction.") - cache_key = f"{kind}:{file_path.resolve()}" + resolved = file_path.resolve() + cache_key = (kind, resolved) if cache_key in self._bridge_payload_cache: return self._bridge_payload_cache[cache_key] try: @@ -271,7 +365,7 @@ def _run_bridge( python_path=self._python_path, host="127.0.0.1", port=self._accept_port, - file_path=file_path.resolve(), + file_path=resolved, kind=kind, timeout_sec=self.config.exec_timeout_sec, ) diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index f71c0350..9a70ea6c 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -208,3 +208,46 @@ - `not-needed` - rationale: this adds a single public repository policy document without changing architecture, public API design, or long-lived internal tradeoff policy. + +## 2026-04-16 issue #77 LibreOffice typed workbook handle + +### Goal + +- Replace the raw `dict` token returned by `LibreOfficeSession.load_workbook()` with a typed workbook handle. +- Give `LibreOfficeSession.close_workbook()` meaningful session-local cleanup instead of a no-op. +- Keep the current LibreOffice extraction mode, fallback behavior, and bridge subprocess lifecycle unchanged. + +### Contract summary + +- `LibreOfficeSession.load_workbook()` returns a frozen typed handle that stores the resolved workbook path and the owning session identity. +- `LibreOfficeSession.close_workbook()` validates that the handle belongs to the current session, rejects rehydrated handles whose `file_path` no longer matches the registered workbook id, becomes idempotent for repeated close attempts, and clears any session-local bridge cache entries for that workbook. +- `LibreOfficeSession.extract_draw_page_shapes()` and `extract_chart_geometries()` continue to support path-based extraction, but may also consume the typed workbook handle so callers can follow a typed lifecycle. +- `LibreOfficeRichBackend.session_factory` accepts the structural rich-extraction session contract, including legacy path-only sessions and lifecycle-aware sessions, rather than only the concrete built-in `LibreOfficeSession`. +- No public CLI, MCP, extraction-mode, fallback, or serialization contracts change in this issue. + +### Permanent destinations + +- `src/exstruct/core/libreoffice.py` + - Canonical implementation for the typed LibreOffice workbook handle and close semantics. +- `src/exstruct/core/backends/libreoffice_backend.py` + - Updated to consume the typed session lifecycle without changing backend policy. +- `tests/core/test_libreoffice_backend.py` + - Regression coverage for typed handle behavior, ownership checks, idempotent close, and cache invalidation. +- `tasks/feature_spec.md` and `tasks/todo.md` + - Retain the compact planning and verification record for this issue. + +### Constraints + +- Do not change the bridge subprocess contract in `src/exstruct/core/_libreoffice_bridge.py`; workbook documents are still opened and closed per bridge invocation. +- Do not change backend fallback policy or session startup/shutdown behavior. +- Keep backward compatibility for current path-based extraction helpers while introducing the typed handle. + +### Verification + +- `uv run pytest tests/core/test_libreoffice_backend.py -q` +- `uv run task precommit-run` + +### ADR verdict + +- `not-needed` +- rationale: this is an internal contract hardening change that preserves existing extraction policy and runtime behavior; the durable rationale can stay in the task record. diff --git a/tasks/lessons.md b/tasks/lessons.md index 06ec87b2..7ea53dcf 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -102,3 +102,8 @@ ## 2026-03-21 skill installation docs lessons - When a repo-owned Skill is intended to be publicly installable from the repository, document the `npx skills add / --skill ` path as the primary install route instead of only manual copy steps. + +## 2026-04-16 session_factory compatibility lessons + +- When tightening a public or semi-public extension point such as `session_factory`, preserve the previously accepted minimal method surface unless the compatibility contract is explicitly changed. +- If a new lifecycle hook is optional for the default implementation, add a regression test for a legacy custom implementation that lacks that hook before reporting completion. diff --git a/tasks/todo.md b/tasks/todo.md index 6c1b6c55..01079d77 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -105,3 +105,30 @@ - `uv run task precommit-run` - `uv run pytest -q` - Result summary: `pre-commit` passed (`ruff`, `ruff-format`, `mypy`), and `pytest` completed with `913 passed, 4 skipped`. + +## 2026-04-16 issue #77 LibreOffice typed workbook handle + +### Planning + +- [x] Confirm the issue scope, related PR review comment, and ADR verdict for the typed handle change. +- [x] Record the compact spec and implementation constraints in `tasks/feature_spec.md`. +- [x] Replace the raw workbook token in `src/exstruct/core/libreoffice.py` with a typed handle and meaningful `close_workbook()` cleanup. +- [x] Update `src/exstruct/core/backends/libreoffice_backend.py` and test doubles to use the typed workbook lifecycle. +- [x] Add regression tests for typed handle ownership, idempotent close, and cache invalidation. +- [x] Run the targeted LibreOffice backend tests and `uv run task precommit-run`. +- [x] Fill the Review section with verification evidence and permanent-destination notes. + +### Review + +- Added `LibreOfficeWorkbookHandle` in `src/exstruct/core/libreoffice.py` and replaced the raw `dict` workbook token with a typed, session-owned handle. +- `LibreOfficeSession.close_workbook()` now validates session ownership, is idempotent for repeated close calls, and clears session-local bridge cache entries when the last handle for a workbook is released. +- `LibreOfficeSession.extract_draw_page_shapes()` and `extract_chart_geometries()` now accept either a direct `Path` or a typed workbook handle, preserving the existing path-based call pattern while enabling the typed lifecycle. +- `src/exstruct/core/backends/libreoffice_backend.py` now prefers `load_workbook() -> extract_*() -> close_workbook()` when the session implements that lifecycle, while preserving the legacy path-only `session_factory` extension point as a runtime fallback. +- `tests/core/test_libreoffice_backend.py` now covers typed handle creation, backend lifecycle usage, legacy path-only session compatibility, foreign-session rejection, repeated close idempotence, closed-handle extraction failure, and cache invalidation after close. +- Review follow-up: `session_factory` is now typed as a structural path-or-lifecycle protocol instead of the concrete built-in session, so legacy custom sessions and test doubles no longer need `cast(LibreOfficeSession, ...)` to type-check. +- Review follow-up: workbook-handle validation now rejects rehydrated handles whose `file_path` disagrees with the registered workbook id, preventing forged handles from reusing another workbook's cache/close path. +- Review follow-up: `_resolve_workbook_path()` now directly returns `_require_handle_path()` for typed handles, removing an unreachable `None` branch so the control flow matches the actual closed-handle behavior. +- No new `dev-docs/specs/`, `dev-docs/architecture/`, `dev-docs/adr/`, or public `docs/` updates were required; this issue only hardened the existing internal LibreOffice session contract. +- Verification: + - `uv run pytest tests/core/test_libreoffice_backend.py -q` + - `uv run task precommit-run` diff --git a/tests/core/test_libreoffice_backend.py b/tests/core/test_libreoffice_backend.py index 75856a5d..fea23988 100644 --- a/tests/core/test_libreoffice_backend.py +++ b/tests/core/test_libreoffice_backend.py @@ -26,6 +26,7 @@ LibreOfficeSession, LibreOfficeSessionConfig, LibreOfficeUnavailableError, + LibreOfficeWorkbookHandle, _close_stderr_sink, _LibreOfficeStartupAttempt, _LibreOfficeStartupAttemptError, @@ -57,6 +58,11 @@ class _DummySession: """Dummy LibreOffice session used in tests.""" + def __init__(self) -> None: + """Initialize workbook-handle tracking for the session double.""" + + self._next_workbook_id = 0 + def __enter__(self) -> _DummySession: """Return the test double as the context-manager result.""" @@ -69,18 +75,23 @@ def __exit__(self, exc_type: object, exc: object, tb: object) -> None: _ = exc _ = tb - def load_workbook(self, file_path: Path) -> object: - """Return a lightweight workbook token for tests.""" + def load_workbook(self, file_path: Path) -> LibreOfficeWorkbookHandle: + """Return a typed workbook handle for tests.""" - return {"file_path": str(file_path)} + self._next_workbook_id += 1 + return LibreOfficeWorkbookHandle( + file_path=file_path.resolve(), + owner_session_id=id(self), + workbook_id=self._next_workbook_id, + ) - def close_workbook(self, workbook: object) -> None: - """Accept a workbook token without additional cleanup.""" + def close_workbook(self, workbook: LibreOfficeWorkbookHandle) -> None: + """Accept a workbook handle without additional cleanup.""" _ = workbook def extract_chart_geometries( - self, file_path: Path + self, file_path: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeChartGeometry]]: """Provide chart geometry data for this test double.""" @@ -88,7 +99,7 @@ def extract_chart_geometries( return {} def extract_draw_page_shapes( - self, file_path: Path + self, file_path: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeDrawPageShape]]: """Provide draw-page shape data for this test double.""" @@ -96,17 +107,17 @@ def extract_draw_page_shapes( return {} -def _dummy_session_factory() -> LibreOfficeSession: +def _dummy_session_factory() -> _DummySession: """Return a dummy LibreOffice session test double.""" - return cast(LibreOfficeSession, _DummySession()) + return _DummySession() class _ChartGeometrySession(_DummySession): """Session double that returns fixed chart geometry data.""" def extract_chart_geometries( - self, file_path: Path + self, file_path: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeChartGeometry]]: """Provide chart geometry data for this test double.""" @@ -125,10 +136,10 @@ def extract_chart_geometries( } -def _chart_geometry_session_factory() -> LibreOfficeSession: +def _chart_geometry_session_factory() -> _ChartGeometrySession: """Return a LibreOffice session double with chart geometries.""" - return cast(LibreOfficeSession, _ChartGeometrySession()) + return _ChartGeometrySession() class _DrawPageSession(_DummySession): @@ -137,10 +148,11 @@ class _DrawPageSession(_DummySession): def __init__(self, payload: dict[str, list[LibreOfficeDrawPageShape]]) -> None: """Initialize the test double.""" + super().__init__() self._payload = payload def extract_draw_page_shapes( - self, file_path: Path + self, file_path: Path | LibreOfficeWorkbookHandle ) -> dict[str, list[LibreOfficeDrawPageShape]]: """Provide draw-page shape data for this test double.""" @@ -253,12 +265,80 @@ def test_libreoffice_backend_avoids_probe_only_startup( entered: list[str] = [] draw_calls: list[Path] = [] chart_calls: list[Path] = [] + load_calls: list[Path] = [] + close_calls: list[LibreOfficeWorkbookHandle] = [] class _TrackingSession(_DummySession): def __enter__(self) -> _TrackingSession: entered.append("enter") return self + def extract_draw_page_shapes( + self, file_path: Path | LibreOfficeWorkbookHandle + ) -> dict[str, list[LibreOfficeDrawPageShape]]: + resolved = ( + file_path.file_path + if isinstance(file_path, LibreOfficeWorkbookHandle) + else file_path + ) + draw_calls.append(resolved) + return {} + + def extract_chart_geometries( + self, file_path: Path | LibreOfficeWorkbookHandle + ) -> dict[str, list[LibreOfficeChartGeometry]]: + resolved = ( + file_path.file_path + if isinstance(file_path, LibreOfficeWorkbookHandle) + else file_path + ) + chart_calls.append(resolved) + return {} + + def load_workbook(self, file_path: Path) -> LibreOfficeWorkbookHandle: + load_calls.append(file_path) + return super().load_workbook(file_path) + + def close_workbook(self, workbook: LibreOfficeWorkbookHandle) -> None: + close_calls.append(workbook) + + monkeypatch.setattr( + "exstruct.core.backends.libreoffice_backend.read_sheet_drawings", + lambda _path: {"Sheet1": SheetDrawingData()}, + ) + backend = LibreOfficeRichBackend( + Path("sample/basic/sample.xlsx"), + session_factory=lambda: _TrackingSession(), + ) + + backend.extract_shapes(mode="libreoffice") + backend.extract_charts(mode="libreoffice") + + resolved_sample = Path("sample/basic/sample.xlsx").resolve() + assert entered == ["enter", "enter"] + assert draw_calls == [resolved_sample] + assert chart_calls == [resolved_sample] + assert load_calls == [Path("sample/basic/sample.xlsx")] * 2 + assert len(close_calls) == 2 + + +def test_libreoffice_backend_supports_legacy_path_only_session_factory( + monkeypatch: MonkeyPatch, +) -> None: + """Verify legacy session doubles still work without workbook lifecycle hooks.""" + + draw_calls: list[Path] = [] + chart_calls: list[Path] = [] + + class _LegacySession: + def __enter__(self) -> _LegacySession: + return self + + def __exit__(self, exc_type: object, exc: object, tb: object) -> None: + _ = exc_type + _ = exc + _ = tb + def extract_draw_page_shapes( self, file_path: Path ) -> dict[str, list[LibreOfficeDrawPageShape]]: @@ -277,13 +357,12 @@ def extract_chart_geometries( ) backend = LibreOfficeRichBackend( Path("sample/basic/sample.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _TrackingSession()), + session_factory=lambda: _LegacySession(), ) backend.extract_shapes(mode="libreoffice") backend.extract_charts(mode="libreoffice") - assert entered == ["enter", "enter"] assert draw_calls == [Path("sample/basic/sample.xlsx")] assert chart_calls == [Path("sample/basic/sample.xlsx")] @@ -332,7 +411,7 @@ def test_libreoffice_backend_uses_draw_page_shapes_without_ooxml( ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) shape_data = backend.extract_shapes(mode="libreoffice") @@ -438,7 +517,7 @@ def test_libreoffice_backend_prefers_ooxml_refs_over_uno_direct_refs( ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) shape_data = backend.extract_shapes(mode="libreoffice") @@ -525,7 +604,7 @@ def test_libreoffice_backend_ignores_unmatched_ooxml_when_draw_page_exists( ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) items = backend.extract_shapes(mode="libreoffice")["Sheet1"] @@ -607,7 +686,7 @@ def test_libreoffice_backend_logs_unmatched_ooxml_when_draw_page_exists( ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) with caplog.at_level("DEBUG", logger="exstruct.core.backends.libreoffice_backend"): @@ -782,7 +861,7 @@ def test_libreoffice_backend_combines_ooxml_and_uno_connector_endpoints( ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) connector = next( @@ -905,7 +984,7 @@ def test_libreoffice_backend_rotates_ooxml_connector_delta_for_heuristic_matchin ) backend = LibreOfficeRichBackend( Path("sample/flowchart/sample-shape-connector.xlsx"), - session_factory=lambda: cast(LibreOfficeSession, _DrawPageSession(payload)), + session_factory=lambda: _DrawPageSession(payload), ) connector = next( @@ -2416,19 +2495,34 @@ def _fake_run_bridge_extract_subprocess( ) session._accept_port = 42001 - workbook_token = session.load_workbook(workbook_path) - assert workbook_token == {"file_path": str(workbook_path.resolve())} - session.close_workbook(workbook_token) + workbook = session.load_workbook(workbook_path) + assert workbook == LibreOfficeWorkbookHandle( + file_path=workbook_path.resolve(), + owner_session_id=id(session), + workbook_id=1, + ) - draw_page_shapes = session.extract_draw_page_shapes(workbook_path) - chart_geometries = session.extract_chart_geometries(workbook_path) + draw_page_shapes = session.extract_draw_page_shapes(workbook) + chart_geometries = session.extract_chart_geometries(workbook) - assert session.extract_draw_page_shapes(workbook_path) == draw_page_shapes - assert session.extract_chart_geometries(workbook_path) == chart_geometries + assert session.extract_draw_page_shapes(workbook) == draw_page_shapes + assert session.extract_chart_geometries(workbook) == chart_geometries assert [call[call.index("--kind") + 1] for call in bridge_calls] == [ "draw-page", "charts", ] + session.close_workbook(workbook) + assert session._bridge_payload_cache == {} + + reopened = session.load_workbook(workbook_path) + assert reopened.workbook_id == 2 + assert session.extract_draw_page_shapes(reopened) == draw_page_shapes + assert [call[call.index("--kind") + 1] for call in bridge_calls] == [ + "draw-page", + "charts", + "draw-page", + ] + session.close_workbook(reopened) assert draw_page_shapes == { "Sheet1": [ @@ -2482,6 +2576,118 @@ def _fake_run_bridge_extract_subprocess( } +def test_libreoffice_session_close_workbook_rejects_foreign_handle( + tmp_path: Path, monkeypatch: MonkeyPatch +) -> None: + """Verify that workbook handles cannot be closed by a different session.""" + + soffice_path = tmp_path / "soffice.exe" + python_path = tmp_path / "python.exe" + workbook_path = tmp_path / "book.xlsx" + soffice_path.write_text("", encoding="utf-8") + python_path.write_text("", encoding="utf-8") + workbook_path.write_text("", encoding="utf-8") + + monkeypatch.setattr( + "exstruct.core.libreoffice._resolve_python_path", + lambda _path: python_path, + ) + + owner = LibreOfficeSession( + LibreOfficeSessionConfig( + soffice_path=soffice_path, + startup_timeout_sec=1.0, + exec_timeout_sec=2.0, + profile_root=None, + ) + ) + foreign = LibreOfficeSession( + LibreOfficeSessionConfig( + soffice_path=soffice_path, + startup_timeout_sec=1.0, + exec_timeout_sec=2.0, + profile_root=None, + ) + ) + + workbook = owner.load_workbook(workbook_path) + + with pytest.raises(ValueError, match="different LibreOfficeSession"): + foreign.close_workbook(workbook) + + +def test_libreoffice_session_close_workbook_rejects_path_mismatched_handle( + tmp_path: Path, monkeypatch: MonkeyPatch +) -> None: + """Verify forged handles cannot reuse a workbook id for another path.""" + + soffice_path = tmp_path / "soffice.exe" + python_path = tmp_path / "python.exe" + workbook_path = tmp_path / "book.xlsx" + other_workbook_path = tmp_path / "other.xlsx" + for path in (soffice_path, python_path, workbook_path, other_workbook_path): + path.write_text("", encoding="utf-8") + + monkeypatch.setattr( + "exstruct.core.libreoffice._resolve_python_path", + lambda _path: python_path, + ) + + session = LibreOfficeSession( + LibreOfficeSessionConfig( + soffice_path=soffice_path, + startup_timeout_sec=1.0, + exec_timeout_sec=2.0, + profile_root=None, + ) + ) + + workbook = session.load_workbook(workbook_path) + forged = LibreOfficeWorkbookHandle( + file_path=other_workbook_path.resolve(), + owner_session_id=workbook.owner_session_id, + workbook_id=workbook.workbook_id, + ) + + with pytest.raises(ValueError, match="does not match the registered workbook"): + session.close_workbook(forged) + + +def test_libreoffice_session_close_workbook_is_idempotent( + tmp_path: Path, monkeypatch: MonkeyPatch +) -> None: + """Verify repeated close succeeds but closed handles can no longer extract.""" + + soffice_path = tmp_path / "soffice.exe" + python_path = tmp_path / "python.exe" + workbook_path = tmp_path / "book.xlsx" + soffice_path.write_text("", encoding="utf-8") + python_path.write_text("", encoding="utf-8") + workbook_path.write_text("", encoding="utf-8") + + monkeypatch.setattr( + "exstruct.core.libreoffice._resolve_python_path", + lambda _path: python_path, + ) + + session = LibreOfficeSession( + LibreOfficeSessionConfig( + soffice_path=soffice_path, + startup_timeout_sec=1.0, + exec_timeout_sec=2.0, + profile_root=None, + ) + ) + session._accept_port = 42001 + + workbook = session.load_workbook(workbook_path) + session.close_workbook(workbook) + session.close_workbook(workbook) + + with pytest.raises(RuntimeError, match="workbook handle is closed"): + session.extract_draw_page_shapes(workbook) + + def test_libreoffice_session_run_bridge_requires_entered_session( tmp_path: Path, monkeypatch: MonkeyPatch ) -> None: