diff --git a/beets/library/migrations.py b/beets/library/migrations.py index e045a7c10f..c25d97dfd3 100644 --- a/beets/library/migrations.py +++ b/beets/library/migrations.py @@ -3,7 +3,7 @@ import os from contextlib import suppress from functools import cached_property -from typing import TYPE_CHECKING, ClassVar, NamedTuple, TypeVar +from typing import TYPE_CHECKING, ClassVar, NamedTuple from confuse.exceptions import ConfigError @@ -12,23 +12,13 @@ from beets.dbcore.db import Migration from beets.dbcore.pathutils import normalize_path_for_db from beets.dbcore.types import MULTI_VALUE_DELIMITER -from beets.util import unique_list +from beets.util import chunks, unique_list from beets.util.lyrics import Lyrics if TYPE_CHECKING: - from collections.abc import Iterator - from beets.dbcore.db import Model from beets.library import Library -T = TypeVar("T") - - -def chunks(lst: list[T], n: int) -> Iterator[list[T]]: - """Yield successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i : i + n] - class MultiValueFieldMigration(Migration): """Backfill multi-valued field from legacy single-string values.""" diff --git a/beets/util/__init__.py b/beets/util/__init__.py index d4ce8ab122..1733c53ba6 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -1204,3 +1204,9 @@ def get_temp_filename( def unique_list(elements: Iterable[T]) -> list[T]: """Return a list with unique elements in the original order.""" return list(dict.fromkeys(elements)) + + +def chunks(lst: Sequence[T], n: int) -> Iterator[list[T]]: + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield list(lst[i : i + n]) diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py index 2415fcc1cf..28c4be37c5 100644 --- a/beetsplug/spotify.py +++ b/beetsplug/spotify.py @@ -28,7 +28,7 @@ import time import webbrowser from http import HTTPStatus -from typing import TYPE_CHECKING, Any, ClassVar, Literal +from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypedDict import confuse import requests @@ -38,6 +38,7 @@ from beets.dbcore import types from beets.library import Library from beets.metadata_plugins import IDResponse, SearchApiMetadataSourcePlugin +from beets.util import chunks if TYPE_CHECKING: from collections.abc import Sequence @@ -49,6 +50,33 @@ DEFAULT_WAITING_TIME = 5 +class TrackDetails(TypedDict): + """Popularity and external IDs returned by the /v1/tracks batch endpoint.""" + + spotify_track_popularity: int | None + isrc: str | None + ean: str | None + upc: str | None + + +class AudioFeatures(TypedDict, total=False): + """Audio feature fields returned by the /v1/audio-features endpoint.""" + + id: str + acousticness: float + danceability: float + energy: float + instrumentalness: float + key: int + liveness: float + loudness: float + mode: int + speechiness: float + tempo: float + time_signature: int + valence: float + + class SearchResponseAlbums(IDResponse): """A response returned by the Spotify API. @@ -112,8 +140,8 @@ class SpotifyPlugin( open_track_url = "https://open.spotify.com/track/" search_url = "https://api.spotify.com/v1/search" album_url = "https://api.spotify.com/v1/albums/" - track_url = "https://api.spotify.com/v1/tracks/" - audio_features_url = "https://api.spotify.com/v1/audio-features/" + track_url = "https://api.spotify.com/v1/tracks" + audio_features_url = "https://api.spotify.com/v1/audio-features" spotify_audio_features: ClassVar[dict[str, str]] = { "acousticness": "spotify_acousticness", @@ -444,7 +472,7 @@ def track_for_id(self, track_id: str) -> None | TrackInfo: if not ( track_data := self._handle_response( - "get", f"{self.track_url}{spotify_id}" + "get", f"{self.track_url}/{spotify_id}" ) ): self._log.debug("Track not found: {}", track_id) @@ -722,11 +750,91 @@ def _output_match_results(self, results): "No {.data_source} tracks found from beets query", self ) + def _disable_audio_features(self) -> None: + """Disable audio features globally and warn only once.""" + should_log = False + with self._audio_features_lock: + if self.audio_features_available: + self.audio_features_available = False + should_log = True + if should_log: + self._log.warning( + "Audio features API is unavailable (403 error). " + "Skipping audio features for remaining tracks." + ) + + def get_track_details_by_id( + self, track_ids: Sequence[str] + ) -> dict[str, TrackDetails]: + """Fetch popularity and external IDs in batches of 50 tracks.""" + if not track_ids: + return {} + + details_by_id: dict[str, TrackDetails] = {} + for chunk in chunks(track_ids, 50): + track_data = self._handle_response( + "get", + self.track_url, + params={"ids": ",".join(chunk)}, + ) + + for idx, track in enumerate(track_data.get("tracks", [])): + if track is None: + continue + + external_ids = track.get("external_ids", {}) + track_id = track.get("id") or chunk[idx] + details_by_id[track_id] = TrackDetails( + spotify_track_popularity=track.get("popularity"), + isrc=external_ids.get("isrc"), + ean=external_ids.get("ean"), + upc=external_ids.get("upc"), + ) + + return details_by_id + + def track_audio_features_batch( + self, track_ids: Sequence[str] + ) -> dict[str, AudioFeatures]: + """Fetch track audio features in batches of 100 tracks.""" + if not track_ids: + return {} + + with self._audio_features_lock: + if not self.audio_features_available: + return {} + + features_by_id: dict[str, AudioFeatures] = {} + for chunk in chunks(track_ids, 100): + try: + features_data = self._handle_response( + "get", + self.audio_features_url, + params={"ids": ",".join(chunk)}, + ) + except AudioFeaturesUnavailableError: + self._disable_audio_features() + break + except APIError as e: + self._log.debug("Spotify API error: {}", e) + continue + + for idx, feature_data in enumerate( + features_data.get("audio_features", []) + ): + if feature_data: + track_id = feature_data.get("id") or chunk[idx] + features_by_id[track_id] = feature_data + + return features_by_id + def _fetch_info(self, items, write, force): """Obtain track information from Spotify.""" self._log.debug("Total {} tracks", len(items)) + items_to_update: list[tuple[Item, str]] = [] + for index, item in enumerate(items, start=1): self._log.info( "Processing {}/{} tracks - {} ", index, len(items), item @@ -743,14 +851,23 @@ def _fetch_info(self, items, write, force): self._log.debug("No track_id present for: {}", item) continue - popularity, isrc, ean, upc = self.track_info(spotify_track_id) - item["spotify_track_popularity"] = popularity - item["isrc"] = isrc - item["ean"] = ean - item["upc"] = upc + items_to_update.append((item, spotify_track_id)) + + if not items_to_update: + return + + unique_track_ids = list( + dict.fromkeys(track_id for _, track_id in items_to_update) + ) + track_details_by_id = self.get_track_details_by_id(unique_track_ids) + audio_features_by_id = self.track_audio_features_batch(unique_track_ids) + + for item, spotify_track_id in items_to_update: + if track_details := track_details_by_id.get(spotify_track_id): + item.update(track_details) if self.audio_features_available: - audio_features = self.track_audio_features(spotify_track_id) + audio_features = audio_features_by_id.get(spotify_track_id) if audio_features is None: self._log.info("No audio features found for: {}", item) else: @@ -767,7 +884,9 @@ def _fetch_info(self, items, write, force): def track_info(self, track_id: str): """Fetch a track's popularity and external IDs using its Spotify ID.""" - track_data = self._handle_response("get", f"{self.track_url}{track_id}") + track_data = self._handle_response( + "get", f"{self.track_url}/{track_id}" + ) external_ids = track_data.get("external_ids", {}) popularity = track_data.get("popularity") self._log.debug( @@ -796,20 +915,10 @@ def track_audio_features(self, track_id: str): try: return self._handle_response( - "get", f"{self.audio_features_url}{track_id}" + "get", f"{self.audio_features_url}/{track_id}" ) except AudioFeaturesUnavailableError: - # Disable globally in a thread-safe manner and warn once. - should_log = False - with self._audio_features_lock: - if self.audio_features_available: - self.audio_features_available = False - should_log = True - if should_log: - self._log.warning( - "Audio features API is unavailable (403 error). " - "Skipping audio features for remaining tracks." - ) + self._disable_audio_features() return None except APIError as e: self._log.debug("Spotify API error: {}", e) diff --git a/docs/changelog.rst b/docs/changelog.rst index 83f8dedeb7..7cc75bd856 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -71,9 +71,11 @@ Bug fixes For plugin developers ~~~~~~~~~~~~~~~~~~~~~ -.. - Other changes - ~~~~~~~~~~~~~ +Other changes +~~~~~~~~~~~~~ + +- :doc:`plugins/spotify`: Batch ``spotifysync`` track and audio-features API + requests and deduplicate repeated Spotify track IDs within a run. 2.10.0 (April 19, 2026) ----------------------- diff --git a/test/plugins/test_spotify.py b/test/plugins/test_spotify.py index 6e322ca0bf..127fb3a628 100644 --- a/test/plugins/test_spotify.py +++ b/test/plugins/test_spotify.py @@ -1,5 +1,6 @@ """Tests for the 'spotify' plugin""" +import json import os from urllib.parse import parse_qs, urlparse @@ -132,7 +133,7 @@ def test_track_for_id(self): responses.add( responses.GET, - f"{spotify.SpotifyPlugin.track_url}6NPVjNh8Jhru9xOmyQigds", + f"{spotify.SpotifyPlugin.track_url}/6NPVjNh8Jhru9xOmyQigds", body=response_body, status=200, content_type="application/json", @@ -279,7 +280,7 @@ def test_multiartist_album_and_track(self): responses.add( responses.GET, - f"{spotify.SpotifyPlugin.track_url}6sjZfVJworBX6TqyjkxIJ1", + f"{spotify.SpotifyPlugin.track_url}/6sjZfVJworBX6TqyjkxIJ1", body=track_response_body, status=200, content_type="application/json", @@ -307,3 +308,261 @@ def test_multiartist_album_and_track(self): assert track_info.artists == ["Foo", "Bar"] assert track_info.artist_id == "12345" assert track_info.artists_ids == ["12345", "67890"] + + @responses.activate + def test_get_track_details_by_id_chunks_requests(self): + ids_per_request = [] + + def callback(request): + ids = _params(request.url)["ids"][0].split(",") + ids_per_request.append(len(ids)) + return ( + 200, + {"Content-Type": "application/json"}, + json.dumps( + { + "tracks": [ + { + "id": track_id, + "popularity": 50, + "external_ids": {}, + } + for track_id in ids + ] + } + ), + ) + + responses.add_callback( + responses.GET, + spotify.SpotifyPlugin.track_url, + callback=callback, + content_type="application/json", + ) + + track_ids = [f"track-{idx}" for idx in range(51)] + track_info = self.spotify.get_track_details_by_id(track_ids) + + assert len(track_info) == 51 + assert ids_per_request == [50, 1] + + @responses.activate + def test_fetch_info_uses_batch_endpoints(self): + responses.add( + responses.GET, + spotify.SpotifyPlugin.track_url, + status=200, + json={ + "tracks": [ + { + "id": "id-1", + "popularity": 10, + "external_ids": { + "isrc": "isrc-1", + "ean": "ean-1", + "upc": "upc-1", + }, + }, + { + "id": "id-2", + "popularity": 20, + "external_ids": { + "isrc": "isrc-2", + "ean": "ean-2", + "upc": "upc-2", + }, + }, + { + "id": "id-3", + "popularity": 30, + "external_ids": { + "isrc": "isrc-3", + "ean": "ean-3", + "upc": "upc-3", + }, + }, + ] + }, + content_type="application/json", + ) + responses.add( + responses.GET, + spotify.SpotifyPlugin.audio_features_url, + status=200, + json={ + "audio_features": [ + {"id": "id-1", "tempo": 100.1, "energy": 0.4}, + {"id": "id-2", "tempo": 110.2, "energy": 0.5}, + {"id": "id-3", "tempo": 120.3, "energy": 0.6}, + ] + }, + content_type="application/json", + ) + + items = [] + for idx in range(1, 4): + item = Item(title=f"Track {idx}", artist="Artist", length=10) + item.add(self.lib) + item["spotify_track_id"] = f"id-{idx}" + items.append(item) + + self.spotify._fetch_info(items, write=False, force=True) + + get_calls = [ + call for call in responses.calls if call.request.method == "GET" + ] + batch_track_calls = [ + call + for call in get_calls + if urlparse(call.request.url).path == "/v1/tracks" + ] + single_track_calls = [ + call + for call in get_calls + if urlparse(call.request.url).path.startswith("/v1/tracks/") + ] + batch_audio_calls = [ + call + for call in get_calls + if urlparse(call.request.url).path == "/v1/audio-features" + ] + single_audio_calls = [ + call + for call in get_calls + if urlparse(call.request.url).path.startswith("/v1/audio-features/") + ] + + assert len(batch_track_calls) == 1 + assert len(single_track_calls) == 0 + assert len(batch_audio_calls) == 1 + assert len(single_audio_calls) == 0 + + assert items[0]["spotify_track_popularity"] == 10 + assert items[1]["spotify_track_popularity"] == 20 + assert items[2]["spotify_track_popularity"] == 30 + + assert items[0]["spotify_tempo"] == 100.1 + assert items[1]["spotify_tempo"] == 110.2 + assert items[2]["spotify_tempo"] == 120.3 + + @responses.activate + def test_fetch_info_deduplicates_batch_ids(self): + seen_track_ids = [] + seen_audio_ids = [] + + def track_callback(request): + ids = _params(request.url)["ids"][0].split(",") + seen_track_ids.append(ids) + return ( + 200, + {"Content-Type": "application/json"}, + json.dumps( + { + "tracks": [ + { + "id": track_id, + "popularity": 50, + "external_ids": {}, + } + for track_id in ids + ] + } + ), + ) + + def audio_callback(request): + ids = _params(request.url)["ids"][0].split(",") + seen_audio_ids.append(ids) + return ( + 200, + {"Content-Type": "application/json"}, + json.dumps( + { + "audio_features": [ + {"id": track_id, "tempo": 100.0} for track_id in ids + ] + } + ), + ) + + responses.add_callback( + responses.GET, + spotify.SpotifyPlugin.track_url, + callback=track_callback, + content_type="application/json", + ) + responses.add_callback( + responses.GET, + spotify.SpotifyPlugin.audio_features_url, + callback=audio_callback, + content_type="application/json", + ) + + items = [] + for idx in range(2): + item = Item(title=f"Track {idx}", artist="Artist", length=10) + item.add(self.lib) + item["spotify_track_id"] = "shared-id" + items.append(item) + + self.spotify._fetch_info(items, write=False, force=True) + + assert seen_track_ids == [["shared-id"]] + assert seen_audio_ids == [["shared-id"]] + assert items[0]["spotify_track_popularity"] == 50 + assert items[1]["spotify_track_popularity"] == 50 + + @responses.activate + def test_track_audio_features_batch_disables_on_403(self): + responses.add( + responses.GET, + spotify.SpotifyPlugin.audio_features_url, + status=403, + json={"error": {"status": 403}}, + content_type="application/json", + ) + + assert self.spotify.track_audio_features_batch(["id-1"]) == {} + assert self.spotify.audio_features_available is False + assert self.spotify.track_audio_features_batch(["id-2"]) == {} + assert len(responses.calls) == 1 + + @responses.activate + def test_track_audio_features_batch_keeps_partial_results_on_api_error( + self, + ): + def callback(request): + ids = _params(request.url)["ids"][0].split(",") + if "track-100" in ids: + return ( + 502, + {"Content-Type": "application/json"}, + json.dumps({"error": {"status": 502}}), + ) + return ( + 200, + {"Content-Type": "application/json"}, + json.dumps( + { + "audio_features": [ + {"id": track_id, "tempo": 100.0} for track_id in ids + ] + } + ), + ) + + responses.add_callback( + responses.GET, + spotify.SpotifyPlugin.audio_features_url, + callback=callback, + content_type="application/json", + ) + + track_ids = [f"track-{idx}" for idx in range(201)] + features = self.spotify.track_audio_features_batch(track_ids) + + assert "track-0" in features + assert "track-99" in features + assert "track-100" not in features + assert "track-199" not in features + assert "track-200" in features