Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [

dependencies = [
"blake3",
"lightning-sdk >=2026.03.31",
"lightning-sdk >=2026.04.16",
"lightning-utilities<=0.15.3",
"protobuf",
"psutil",
Expand Down
19 changes: 7 additions & 12 deletions src/litlogger/api/metrics_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
V1PhaseType,
V1SystemInfo,
)
from lightning_sdk.lightning_cloud.openapi.rest import ApiException

from litlogger.api.client import LitRestClient
from litlogger.colors import _create_colors
Expand Down Expand Up @@ -103,18 +104,12 @@ def get_experiment_metrics_by_name(
Returns:
The metrics stream object for the experiment, or None if not found.
"""
response = self.client.lit_logger_service_list_metrics_streams(project_id=teamspace_id)

if not response.metrics_streams:
return None

# Filter by name
matching = [ms for ms in response.metrics_streams if ms.name == name]

if not matching:
return None

return matching[0]
try:
return self.client.lit_logger_service_get_metrics_stream(project_id=teamspace_id, name=name)
except ApiException as ex:
if ex.status == 404:
return None
raise

def get_or_create_experiment_metrics(
self,
Expand Down
43 changes: 43 additions & 0 deletions tests/integrations/test_standalone_lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Covers error handling, sequential experiments, resume, console output, and API internals.
"""

import os
import uuid
from contextlib import redirect_stderr
from io import StringIO
Expand Down Expand Up @@ -254,6 +255,48 @@ def test_resume_experiment():
)


@pytest.mark.cloud()
@pytest.mark.skipif(bool(os.environ.get("TEST_GUEST_MODE", "")), reason="guests can only create up to 25 experiments")
def test_resume_old_experiment():
"""Resume an experiment that has many newer experiments in the same teamspace."""
from litlogger.api.metrics_api import MetricsApi

target_name = f"standalone_resume_old-{uuid.uuid4().hex}"
filler_prefix = f"standalone_resume_old_filler-{uuid.uuid4().hex}"

exp1 = litlogger.init(name=target_name, teamspace="oss-litlogger")
litlogger.log_metrics({"loss": 0.5}, step=0)
litlogger.finalize()

project_id = exp1._teamspace.id
target_stream_id = exp1._metrics_store.id

api = MetricsApi()
client = LitRestClient()

filler_ids: list[str] = []
try:
for i in range(51):
stream = api.create_experiment_metrics(
teamspace_id=project_id,
name=f"{filler_prefix}-{i}",
)
filler_ids.append(stream.id)

exp2 = litlogger.init(name=target_name, teamspace="oss-litlogger")
try:
assert (
exp2._metrics_store.id == target_stream_id
), "Expected to resume the original experiment, got a new stream"
finally:
litlogger.finalize()
finally:
client.lit_logger_service_delete_metrics_stream(
project_id=project_id,
body=LitLoggerServiceDeleteMetricsStreamBody(ids=[target_stream_id, *filler_ids]),
)


@pytest.mark.cloud()
def test_new_dict_api_resume():
"""Test resuming an experiment with the new dict-like API."""
Expand Down
38 changes: 22 additions & 16 deletions tests/unittests/api/test_metrics_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

from unittest.mock import MagicMock, patch

import pytest
from lightning_sdk.lightning_cloud.openapi import (
V1Metrics,
V1MetricValue,
V1PhaseType,
)
from lightning_sdk.lightning_cloud.openapi.rest import ApiException
from litlogger.api.metrics_api import MetricsApi
from litlogger.types import PhaseType

Expand All @@ -30,29 +32,25 @@ def test_get_experiment_metrics_by_name(self):
mock_stream = MagicMock()
mock_stream.id = "ms-123"
mock_stream.name = "my-experiment"
mock_stream.version_number = 1
mock_response = MagicMock()
mock_response.metrics_streams = [mock_stream]
mock_client.lit_logger_service_list_metrics_streams.return_value = mock_response
mock_client.lit_logger_service_get_metrics_stream.return_value = mock_stream
api = MetricsApi(client=mock_client)

result = api.get_experiment_metrics_by_name(
teamspace_id="ts-123",
name="my-experiment",
)

mock_client.lit_logger_service_list_metrics_streams.assert_called_once_with(
mock_client.lit_logger_service_get_metrics_stream.assert_called_once_with(
project_id="ts-123",
name="my-experiment",
)
assert result.id == "ms-123"
assert result.name == "my-experiment"

def test_get_experiment_metrics_by_name_not_found(self):
"""Test that get_experiment_metrics_by_name returns None when experiment not found."""
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.metrics_streams = []
mock_client.lit_logger_service_list_metrics_streams.return_value = mock_response
mock_client.lit_logger_service_get_metrics_stream.side_effect = ApiException(status=404)
api = MetricsApi(client=mock_client)

result = api.get_experiment_metrics_by_name(
Expand All @@ -62,13 +60,23 @@ def test_get_experiment_metrics_by_name_not_found(self):

assert result is None

def test_get_experiment_metrics_by_name_raises_on_other_errors(self):
"""Test that non-404 errors from get_metrics_stream propagate."""
mock_client = MagicMock()
mock_client.lit_logger_service_get_metrics_stream.side_effect = ApiException(status=500)
api = MetricsApi(client=mock_client)

with pytest.raises(ApiException):
api.get_experiment_metrics_by_name(
teamspace_id="ts-123",
name="my-experiment",
)

def test_get_or_create_experiment_metrics_creates_new(self):
"""Test get_or_create_experiment_metrics creates a new experiment when none exists."""
mock_client = MagicMock()
# First call to list returns empty (no existing experiment)
mock_list_response = MagicMock()
mock_list_response.metrics_streams = []
mock_client.lit_logger_service_list_metrics_streams.return_value = mock_list_response
# Get returns 404 (no existing experiment)
mock_client.lit_logger_service_get_metrics_stream.side_effect = ApiException(status=404)
# Create returns a new experiment
mock_created = MagicMock()
mock_created.id = "ms-new"
Expand All @@ -92,13 +100,11 @@ def test_get_or_create_experiment_metrics_creates_new(self):
def test_get_or_create_experiment_metrics_returns_existing(self):
"""Test get_or_create_experiment_metrics returns existing experiment without creating."""
mock_client = MagicMock()
# List returns an existing experiment
# Get returns an existing experiment
mock_existing = MagicMock()
mock_existing.id = "ms-existing"
mock_existing.name = "my-experiment"
mock_list_response = MagicMock()
mock_list_response.metrics_streams = [mock_existing]
mock_client.lit_logger_service_list_metrics_streams.return_value = mock_list_response
mock_client.lit_logger_service_get_metrics_stream.return_value = mock_existing
api = MetricsApi(client=mock_client)

result, created = api.get_or_create_experiment_metrics(
Expand Down
Loading