diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index ad1df7bbe8..4d60b5a47b 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -25,15 +25,19 @@ from __future__ import annotations import os +import re +from collections import defaultdict from functools import singledispatchmethod from pathlib import Path from typing import TYPE_CHECKING, Any +import confuse import yaml from beets import config, library, plugins, ui from beets.library import Album, Item from beets.util import plurality, unique_list +from beetsplug.lastgenre.utils import drop_ignored_genres, is_ignored from .client import LastFmClient @@ -44,6 +48,8 @@ from beets.importer import ImportSession, ImportTask from beets.library import LibModel + from .utils import GenreIgnorePatterns + Whitelist = set[str] """Set of valid genre names (lowercase). Empty set means all genres allowed.""" @@ -130,6 +136,7 @@ def __init__(self) -> None: "prefer_specific": False, "title_case": True, "pretend": False, + "ignorelist": {}, } ) self.setup() @@ -139,12 +146,13 @@ def setup(self) -> None: if self.config["auto"]: self.import_stages = [self.imported] - self.client = LastFmClient( - self._log, self.config["min_weight"].get(int) - ) self.whitelist: Whitelist = self._load_whitelist() self.c14n_branches: CanonTree self.c14n_branches, self.canonicalize = self._load_c14n_tree() + self.ignore_patterns: GenreIgnorePatterns = self._load_ignorelist() + self.client = LastFmClient( + self._log, self.config["min_weight"].get(int), self.ignore_patterns + ) def _load_whitelist(self) -> Whitelist: """Load the whitelist from a text file. @@ -187,6 +195,57 @@ def _load_c14n_tree(self) -> tuple[CanonTree, bool]: flatten_tree(genres_tree, [], c14n_branches) return c14n_branches, canonicalize + def _load_ignorelist(self) -> GenreIgnorePatterns: + r"""Load patterns from configuration and compile them. + + Mapping of artist names to regex or literal patterns. Use the + quoted ``'*'`` key to define globally ignored genres:: + + lastgenre: + ignorelist: + '*': + - spoken word + - comedy + Artist Name: + - .*rock.* + - .*metal.* + + Matching is case-insensitive and full-match. Because patterns are + parsed as plain YAML scalars, backslashes (e.g. ``\w``) should + not be double-escaped. Quotes are primarily needed for special + YAML characters (e.g., ``*`` or ``[``); prefer single-quotes. + + Raises: + Several confuse.ConfigError's that tell the user about the expected + format when the config is invalid. + """ + if not self.config["ignorelist"].get(): + return {} + + raw_ignorelist = self.config["ignorelist"].get( + confuse.MappingValues(confuse.Sequence(str)) + ) + + compiled_ignorelist: GenreIgnorePatterns = defaultdict(list) + for artist, patterns in raw_ignorelist.items(): + artist_patterns = [] + for pattern in patterns: + try: + artist_patterns.append(re.compile(pattern, re.IGNORECASE)) + except re.error: + artist_patterns.append( + re.compile(re.escape(pattern), re.IGNORECASE) + ) + self._log.extra_debug( + "ignore for {}: {}", + artist, + [p.pattern for p in artist_patterns], + ) + + compiled_ignorelist[artist] = artist_patterns + + return compiled_ignorelist + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track', @@ -202,7 +261,9 @@ def sources(self) -> tuple[str, ...]: # Genre list processing. - def _resolve_genres(self, tags: list[str]) -> list[str]: + def _resolve_genres( + self, tags: list[str], artist: str | None = None + ) -> list[str]: """Canonicalize, sort and filter a list of genres. - Returns an empty list if the input tags list is empty. @@ -217,6 +278,9 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: by the specificity (depth in the canonicalization tree) of the genres. - Finally applies whitelist filtering to ensure that only valid genres are kept. (This may result in no genres at all being retained). + - Ignorelist is applied at each stage: ignored input tags skip ancestry + entirely, ignored ancestor tags are dropped, and ignored tags are + removed in the final filter. - Returns the filtered list of genres, limited to the configured count. """ if not tags: @@ -229,14 +293,29 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: # Extend the list to consider tags parents in the c14n tree tags_all = [] for tag in tags: - # Add parents that are in the whitelist, or add the oldest - # ancestor if no whitelist + # Skip ignored tags entirely — don't walk their ancestry. + if is_ignored(self._log, self.ignore_patterns, tag, artist): + continue + + # Add parents that pass whitelist (and are not ignored, which + # is checked in _filter_valid). With whitelist, we may include + # multiple parents if self.whitelist: parents = self._filter_valid( - find_parents(tag, self.c14n_branches) + find_parents(tag, self.c14n_branches), + artist=artist, ) else: - parents = [find_parents(tag, self.c14n_branches)[-1]] + # No whitelist: take only the oldest ancestor, skipping it + # if it is in the ignorelist + oldest = find_parents(tag, self.c14n_branches)[-1] + parents = ( + [] + if is_ignored( + self._log, self.ignore_patterns, oldest, artist + ) + else [oldest] + ) tags_all += parents # Stop if we have enough tags already, unless we need to find @@ -254,24 +333,34 @@ def _resolve_genres(self, tags: list[str]) -> list[str]: if self.config["prefer_specific"]: tags = sort_by_depth(tags, self.c14n_branches) - # c14n only adds allowed genres but we may have had forbidden genres in - # the original tags list - valid_tags = self._filter_valid(tags) + # Final filter: applies when c14n is disabled, or when c14n ran without + # whitelist filtering in the loop (no-whitelist path). + valid_tags = self._filter_valid(tags, artist=artist) return valid_tags[:count] - def _filter_valid(self, genres: Iterable[str]) -> list[str]: - """Filter genres based on whitelist. + def _filter_valid( + self, genres: Iterable[str], artist: str | None = None + ) -> list[str]: + """Filter genres through whitelist and ignorelist. - Returns all genres if no whitelist is configured, otherwise returns - only genres that are in the whitelist. + Drops empty/whitespace-only strings, then applies whitelist and + ignorelist checks. Returns all genres if neither is configured. + Whitelist is checked first for performance reasons (ignorelist regex + matching is more expensive and for some call sites ignored genres were + already filtered). """ - # First, drop any falsy or whitespace-only genre strings to avoid - # retaining empty tags from multi-valued fields. cleaned = [g for g in genres if g and g.strip()] - if not self.whitelist: + if not self.whitelist and not self.ignore_patterns: return cleaned - return [g for g in cleaned if g.lower() in self.whitelist] + whitelisted = [ + g + for g in cleaned + if not self.whitelist or g.lower() in self.whitelist + ] + return drop_ignored_genres( + self._log, self.ignore_patterns, whitelisted, artist + ) # Genre resolution pipeline. @@ -282,6 +371,14 @@ def _format_genres(self, tags: list[str]) -> list[str]: else: return tags + def _artist_for_filter(self, obj: LibModel) -> str | None: + """Return the representative artist for genre resolution and filtering.""" + return ( + obj.artist + if isinstance(obj, library.Item) + else obj.albumartist or obj.artist + ) + def _get_existing_genres(self, obj: LibModel) -> list[str]: """Return a list of genres for this Item or Album.""" if isinstance(obj, library.Item): @@ -292,13 +389,13 @@ def _get_existing_genres(self, obj: LibModel) -> list[str]: return genres_list def _combine_resolve_and_log( - self, old: list[str], new: list[str] + self, old: list[str], new: list[str], artist: str | None = None ) -> list[str]: """Combine old and new genres and process via _resolve_genres.""" self._log.debug("raw last.fm tags: {}", new) self._log.debug("existing genres taken into account: {}", old) combined = old + new - return self._resolve_genres(combined) + return self._resolve_genres(combined, artist=artist) def _get_genre(self, obj: LibModel) -> tuple[list[str], str]: """Get the final genre list for an Album or Item object. @@ -320,12 +417,21 @@ def _get_genre(self, obj: LibModel) -> tuple[list[str], str]: and the whitelist feature was disabled. """ + def _fallback_stage() -> tuple[list[str], str]: + """Return the fallback genre and label.""" + if fallback := self.config["fallback"].get(): + return [fallback], "fallback" + return [], "fallback unconfigured" + def _try_resolve_stage( - stage_label: str, keep_genres: list[str], new_genres: list[str] + stage_label: str, + keep_genres: list[str], + new_genres: list[str], + artist: str | None = None, ) -> tuple[list[str], str] | None: """Try to resolve genres for a given stage and log the result.""" resolved_genres = self._combine_resolve_and_log( - keep_genres, new_genres + keep_genres, new_genres, artist=artist ) if resolved_genres: suffix = "whitelist" if self.whitelist else "any" @@ -345,11 +451,15 @@ def _try_resolve_stage( # If none are found, we use the fallback (if set). if self.config["cleanup_existing"]: keep_genres = [g.lower() for g in genres] - if result := _try_resolve_stage("cleanup", keep_genres, []): + if result := _try_resolve_stage( + "cleanup", + keep_genres, + [], + artist=self._artist_for_filter(obj), + ): return result - # Return fallback string (None if not set). - return self.config["fallback"].get(), "fallback" + return _fallback_stage() # If cleanup_existing is not set, the pre-populated tags are # returned as-is. @@ -368,7 +478,7 @@ def _try_resolve_stage( obj.artist, obj.title ): if result := _try_resolve_stage( - "track", keep_genres, new_genres + "track", keep_genres, new_genres, artist=obj.artist ): return result @@ -377,18 +487,21 @@ def _try_resolve_stage( obj.albumartist, obj.album ): if result := _try_resolve_stage( - "album", keep_genres, new_genres + "album", keep_genres, new_genres, artist=obj.albumartist ): return result if "artist" in self.sources: new_genres = [] + stage_artist: str | None = None if isinstance(obj, library.Item): new_genres = self.client.fetch_artist_genre(obj.artist) stage_label = "artist" + stage_artist = obj.artist elif obj.albumartist != config["va_name"].as_str(): new_genres = self.client.fetch_artist_genre(obj.albumartist) stage_label = "album artist" + stage_artist = obj.albumartist if not new_genres: self._log.extra_debug( 'No album artist genre found for "{}", ' @@ -405,6 +518,9 @@ def _try_resolve_stage( ) if new_genres: stage_label = "multi-valued album artist" + stage_artist = ( + None # Already filtered per-artist in client + ) else: # For "Various Artists", pick the most popular track genre. item_genres = [] @@ -431,27 +547,23 @@ def _try_resolve_stage( if new_genres: if result := _try_resolve_stage( - stage_label, keep_genres, new_genres + stage_label, keep_genres, new_genres, artist=stage_artist ): return result # Nothing found, leave original if configured and valid. - if genres and self.config["keep_existing"]: - if valid_genres := self._filter_valid(genres): + if genres and self.config["keep_existing"].get(): + artist = self._artist_for_filter(obj) + if valid_genres := self._filter_valid(genres, artist=artist): return valid_genres, "original fallback" # If the original genre doesn't match a whitelisted genre, check # if we can canonicalize it to find a matching, whitelisted genre! if result := _try_resolve_stage( - "original fallback", keep_genres, [] + "original fallback", keep_genres, [], artist=artist ): return result - # Return fallback as a list. - if fallback := self.config["fallback"].get(): - return [fallback], "fallback" - - # No fallback configured. - return [], "fallback unconfigured" + return _fallback_stage() # Beets plugin hooks and CLI. diff --git a/beetsplug/lastgenre/client.py b/beetsplug/lastgenre/client.py index 727702f2f4..6b51d03229 100644 --- a/beetsplug/lastgenre/client.py +++ b/beetsplug/lastgenre/client.py @@ -25,17 +25,20 @@ from beets import plugins +from .utils import drop_ignored_genres + if TYPE_CHECKING: from collections.abc import Callable from beets.logging import BeetsLogger + from .utils import GenreIgnorePatterns + GenreCache = dict[str, list[str]] """Cache mapping entity keys to their genre lists. Keys are formatted as 'entity.arg1-arg2-...' (e.g., 'album.artist-title'). Values are lists of lowercase genre strings.""" - LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) PYLAST_EXCEPTIONS = ( @@ -48,13 +51,20 @@ class LastFmClient: """Client for fetching genres from Last.fm.""" - def __init__(self, log: BeetsLogger, min_weight: int): + def __init__( + self, + log: BeetsLogger, + min_weight: int, + ignore_patterns: GenreIgnorePatterns, + ): """Initialize the client. The min_weight parameter filters tags by their minimum weight. + The ignorelist filters forbidden genres directly after Last.fm lookup. """ self._log = log self._min_weight = min_weight + self._ignore_patterns: GenreIgnorePatterns = ignore_patterns self._genre_cache: GenreCache = {} def fetch_genre( @@ -120,14 +130,22 @@ def _last_lookup( if any(not s for s in args): return [] - key = f"{entity}.{'-'.join(str(a) for a in args)}" + args_replaced = [a.replace("\u2010", "-") for a in args] + key = f"{entity}.{'-'.join(str(a) for a in args_replaced)}" if key not in self._genre_cache: - args_replaced = [a.replace("\u2010", "-") for a in args] self._genre_cache[key] = self.fetch_genre(method(*args_replaced)) - genre = self._genre_cache[key] - self._log.extra_debug("last.fm (unfiltered) {} tags: {}", entity, genre) - return genre + genres = self._genre_cache[key] + + self._log.extra_debug( + "last.fm (unfiltered) {} tags: {}", entity, genres + ) + + # Filter forbidden genres on every call so ignorelist hits are logged. + # Artist is always the first element in args (album, artist, track lookups). + return drop_ignored_genres( + self._log, self._ignore_patterns, genres, args[0] + ) def fetch_album_genre(self, albumartist: str, albumtitle: str) -> list[str]: """Return genres from Last.fm for the album by albumartist.""" diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py new file mode 100644 index 0000000000..bd73f6aca6 --- /dev/null +++ b/beetsplug/lastgenre/utils.py @@ -0,0 +1,59 @@ +# This file is part of beets. +# Copyright 2026, J0J0 Todos. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Lastgenre plugin shared utilities and types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import re + + from beets.logging import BeetsLogger + + GenreIgnorePatterns = dict[str, list[re.Pattern[str]]] + """Mapping of artist name to list of compiled case-insensitive patterns.""" + + +def drop_ignored_genres( + logger: BeetsLogger, + ignore_patterns: GenreIgnorePatterns, + genres: list[str], + artist: str | None = None, +) -> list[str]: + """Drop genres that match the ignorelist.""" + return [ + g for g in genres if not is_ignored(logger, ignore_patterns, g, artist) + ] + + +def is_ignored( + logger: BeetsLogger, + ignore_patterns: GenreIgnorePatterns, + genre: str, + artist: str | None = None, +) -> bool: + """Check if genre tag should be ignored.""" + genre_lower = genre.lower() + for pattern in ignore_patterns.get("*") or []: + if pattern.fullmatch(genre_lower): + logger.extra_debug("ignored (global): {}", genre) + return True + for pattern in ignore_patterns.get((artist or "").lower()) or []: + if pattern.fullmatch(genre_lower): + logger.extra_debug("ignored (artist: {}): {}", artist, genre) + return True + return False diff --git a/docs/changelog.rst b/docs/changelog.rst index 10f578b805..f24a151960 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -40,6 +40,9 @@ New features - :doc:`plugins/replaygain`: Conflicting replay gain tags are now removed on write. RG_* tags are removed when setting R128_* and vice versa. - :doc:`plugins/fetchart`: Add support for WebP images. +- :doc:`plugins/lastgenre`: Add support for a user-configurable ignorelist to + exclude unwanted or incorrect Last.fm (or existing) genres, either per artist + or globally :bug:`6449` Bug fixes ~~~~~~~~~ diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index b0c01eabd2..db8118ee23 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -160,6 +160,58 @@ genres remain, set ``whitelist: no``). If ``force`` is disabled the ``keep_existing`` option is simply ignored (since ``force: no`` means ``not touching`` existing tags anyway). +Genre Ignorelist +---------------- + +Last.fm tags are crowd-sourced, so they can be wrong — especially for artists +whose names are shared with or confused with others. For example, a "Drum And +Bass" artist named "Fracture" might incorrectly receive "Metal" tags. The +ignorelist lets you reject specific genres globally or per-artist. + +Another example for the ignorelist is to exclude genres that are technically +correct but not useful to you. For example, you might want to exclude "Ska" for +"Bob Marley", even though it is a valid genre for his music. + +Filtering is done in two places: when fetching genres from Last.fm and when +resolving to a final genre list (during canonicalization and whitelisting). + +This means that existing genres are also filtered when ``force`` and +``keep_existing`` options are enabled (or ``cleanup_existing`` is enabled with +``force`` disabled). + +To enable this feature, add an ``ignorelist`` section to your ``lastgenre`` +configuration: + +.. code-block:: yaml + + lastgenre: + ignorelist: + fracture: + - ^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$ + - progressive metal + bob marley: + - ska + '*': + - electronic + +A combination of regex patterns and plain genre names is possible. The ``'*'`` +key applies globally to all artists — use it to block genres you never want, +regardless of artist. Patterns are matched against the full genre string, so a +plain ``metal`` will not match ``heavy metal`` unless you write a regex like +``.*metal``. + +.. attention:: + + - The global key ``'*'`` **must** be surrounded by single quotes so that + YAML does not interpret it as an anchor. + - Any regex pattern that starts with a special YAML character (``[``, ``*``, + or ``:``) or ends with ``:`` **must** be surrounded by quotes. + - Prefer **single quotes** (``'...'``) when quoting is necessary, as they + treat backslashes literally (no double-escaping required). + - Because the ignorelist uses plain YAML, you do **not** need to + double-escape backslashes in unquoted or single-quoted strings (e.g., use + ``\w``, not ``\\w``). + Configuration ------------- @@ -200,6 +252,9 @@ file. The available options are: internal whitelist, or ``no`` to consider all genres valid. Default: ``yes``. - **title_case**: Convert the new tags to TitleCase before saving. Default: ``yes``. +- **ignorelist**: A mapping of artist names (or the global ``'*'`` key) to lists + of genres to exclude. See `Genre Ignorelist`_ for more details. Default: + ``no``. Running Manually ---------------- diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 07258dc088..9e89eb8b14 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,13 +14,18 @@ """Tests for the 'lastgenre' plugin.""" -from unittest.mock import Mock, patch +import re +from collections import defaultdict +from unittest.mock import MagicMock, Mock, patch +import confuse import pytest +from beets.library import Album from beets.test import _common from beets.test.helper import IOMixin, PluginTestCase from beetsplug import lastgenre +from beetsplug.lastgenre.utils import is_ignored class LastGenrePluginTest(IOMixin, PluginTestCase): @@ -202,6 +207,80 @@ def test_sort_by_depth(self): res = lastgenre.sort_by_depth(tags, self.plugin.c14n_branches) assert res == ["ambient", "electronic"] + # Ignorelist tests in resolve_genres and _is_ignored + + def test_ignorelist_filters_genres_in_resolve(self): + """Ignored genres are stripped by _resolve_genres (no c14n). + + Artist-specific and global patterns are both applied. + """ + self._setup_config(whitelist=False, canonical=False) + self.plugin.ignore_patterns = defaultdict( + list, + { + "the artist": [re.compile(r"^metal$", re.IGNORECASE)], + "*": [re.compile(r"^rock$", re.IGNORECASE)], + }, + ) + result = self.plugin._resolve_genres( + ["metal", "rock", "jazz"], artist="the artist" + ) + assert "metal" not in result, ( + "artist-specific ignored genre must be removed" + ) + assert "rock" not in result, "globally ignored genre must be removed" + assert "jazz" in result, "non-ignored genre must survive" + + def test_ignorelist_stops_c14n_ancestry_walk(self): + """An ignored tag's c14n parents don't bleed into the result. + + Without ignorelist, 'delta blues' canonicalizes to 'blues'. + With 'delta blues' ignored the tag is skipped entirely in the + c14n loop, so 'blues' must not appear either. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + self.plugin.ignore_patterns = defaultdict( + list, + { + "the artist": [re.compile(r"^delta blues$", re.IGNORECASE)], + }, + ) + result = self.plugin._resolve_genres( + ["delta blues"], artist="the artist" + ) + assert result == [], ( + "ignored tag must not contribute c14n parents to the result" + ) + + def test_ignorelist_c14n_no_whitelist_keeps_oldest_ancestor(self): + """With c14n on and whitelist off, ignorelist must not change the + parent-selection rule: only the oldest ancestor is returned. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + # ignorelist targets an unrelated genre — must not affect parent walking + self.plugin.ignore_patterns = defaultdict( + list, + {"*": [re.compile(r"^jazz$", re.IGNORECASE)]}, + ) + result = self.plugin._resolve_genres(["delta blues"]) + assert result == ["blues"], ( + "oldest ancestor only must be returned, not the full parent chain" + ) + + def test_ignorelist_c14n_no_whitelist_drops_ignored_ancestor(self): + """With c14n on and whitelist off, if the oldest ancestor itself is + ignored it must be dropped and the tag contributes nothing. + """ + self._setup_config(whitelist=False, canonical=True, count=99) + self.plugin.ignore_patterns = defaultdict( + list, + {"*": [re.compile(r"^blues$", re.IGNORECASE)]}, + ) + result = self.plugin._resolve_genres(["delta blues"]) + assert result == [], ( + "ignored oldest ancestor must not appear in the result" + ) + @pytest.fixture def config(config): @@ -636,3 +715,224 @@ def mock_fetch_artist_genre(self, artist): # Run assert plugin._get_genre(item) == expected_result + + +class TestIgnorelist: + """Ignorelist pattern matching tests independent of resolve_genres.""" + + @pytest.mark.parametrize( + "ignorelist_dict, artist, genre, expected_forbidden", + [ + # Global ignorelist - simple word + ({"*": ["spoken word"]}, "Any Artist", "spoken word", True), + ({"*": ["spoken word"]}, "Any Artist", "jazz", False), + # Global ignorelist - regex pattern + ( + {"*": [".*electronic.*"]}, + "Any Artist", + "ambient electronic", + True, + ), + ({"*": [".*electronic.*"]}, "Any Artist", "jazz", False), + # Artist-specific ignorelist + ({"metallica": ["metal"]}, "Metallica", "metal", True), + ({"metallica": ["metal"]}, "Iron Maiden", "metal", False), + # Case insensitive matching + ({"metallica": ["metal"]}, "METALLICA", "METAL", True), + # Full-match behavior: plain "metal" must not match "heavy metal" + ({"metallica": ["metal"]}, "Metallica", "heavy metal", False), + # Regex behavior: explicit pattern ".*metal.*" may match "heavy metal" + ({"metallica": [".*metal.*"]}, "Metallica", "heavy metal", True), + # Artist-specific ignorelist - exact match + ( + {"metallica": ["^Heavy Metal$"]}, + "Metallica", + "classic metal", + False, + ), + # Combined global and artist-specific + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "spoken word", + True, + ), + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + "Metallica", + "metal", + True, + ), + # Complex regex pattern with multiple features (raw string) + ( + { + "fracture": [ + r"^(heavy|black|power|death)?\s?(metal|rock)$|\w+-metal\d*$" + ] + }, + "Fracture", + "power metal", + True, + ), + # Complex regex pattern with multiple features (regular string) + ( + {"amon tobin": ["d(rum)?[ n/]*b(ass)?"]}, + "Amon Tobin", + "dnb", + True, + ), + # Empty ignorelist + ({}, "Any Artist", "any genre", False), + ], + ) + def test_ignorelist_patterns( + self, ignorelist_dict, artist, genre, expected_forbidden + ): + """Test ignorelist pattern matching logic directly.""" + + logger = Mock() + + # Set up compiled ignorelist directly (skipping file parsing) + compiled_ignorelist = defaultdict(list) + for artist_name, patterns in ignorelist_dict.items(): + compiled_ignorelist[artist_name.lower()] = [ + re.compile(pattern, re.IGNORECASE) for pattern in patterns + ] + + result = is_ignored(logger, compiled_ignorelist, genre, artist) + assert result == expected_forbidden + + @pytest.mark.parametrize( + "ignorelist_config, expected_ignorelist", + [ + # Basic artist with single pattern + ({"metallica": ["metal"]}, {"metallica": ["metal"]}), + # Global ignorelist with '*' key + ({"*": ["spoken word"]}, {"*": ["spoken word"]}), + # Multiple patterns per artist + ( + {"metallica": ["metal", ".*rock.*"]}, + {"metallica": ["metal", ".*rock.*"]}, + ), + # Combined global and artist-specific + ( + {"*": ["spoken word"], "metallica": ["metal"]}, + {"*": ["spoken word"], "metallica": ["metal"]}, + ), + # Artist names are preserved by the current loader implementation. + ({"METALLICA": ["METAL"]}, {"METALLICA": ["METAL"]}), + # Invalid regex pattern that gets escaped (full-match literal fallback) + ( + {"artist": ["[invalid(regex"]}, + {"artist": ["\\[invalid\\(regex"]}, + ), + # Disabled via False / empty dict — both produce empty dict + (False, {}), + ({}, {}), + ], + ) + def test_ignorelist_config_format( + self, ignorelist_config, expected_ignorelist + ): + """Test ignorelist parsing/compilation with isolated config state.""" + cfg = confuse.Configuration("test", read=False) + cfg.set({"lastgenre": {"ignorelist": ignorelist_config}}) + + # Mimic the plugin loader behavior in isolation to avoid global config bleed. + if not cfg["lastgenre"]["ignorelist"].get(): + string_ignorelist = {} + else: + raw_strs = cfg["lastgenre"]["ignorelist"].get( + confuse.MappingValues(confuse.Sequence(str)) + ) + string_ignorelist = {} + for artist, patterns in raw_strs.items(): + compiled_patterns = [] + for pattern in patterns: + try: + compiled_patterns.append( + re.compile(pattern, re.IGNORECASE).pattern + ) + except re.error: + compiled_patterns.append( + re.compile( + re.escape(pattern), re.IGNORECASE + ).pattern + ) + string_ignorelist[artist] = compiled_patterns + + assert string_ignorelist == expected_ignorelist + + @pytest.mark.parametrize( + "invalid_config, expected_error_message", + [ + # A plain string (e.g. accidental file path) instead of a mapping + ( + "/path/to/ignorelist.txt", + "must be a dict", + ), + # An integer instead of a mapping + ( + 42, + "must be a dict", + ), + # A list of strings instead of a mapping + ( + ["spoken word", "comedy"], + "must be a dict", + ), + # A mapping with a non-list value + ( + {"metallica": "metal"}, + "must be a list", + ), + ], + ) + def test_ignorelist_config_format_errors( + self, invalid_config, expected_error_message + ): + """Test ignorelist config validation errors in isolated config.""" + cfg = confuse.Configuration("test", read=False) + cfg.set({"lastgenre": {"ignorelist": invalid_config}}) + + with pytest.raises(confuse.ConfigTypeError) as exc_info: + _ = cfg["lastgenre"]["ignorelist"].get( + confuse.MappingValues(confuse.Sequence(str)) + ) + + assert expected_error_message in str(exc_info.value) + + def test_ignorelist_multivalued_album_artist_fallback(self, config): + """`stage_artist=None` fallback must not re-drop per-artist results.""" + config["lastgenre"]["ignorelist"] = { + "Artist A": ["Metal"], + "Artist Group": ["Metal"], + } + config["lastgenre"]["whitelist"] = False + config["lastgenre"]["count"] = 10 + + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + + obj = MagicMock(spec=Album) + obj.albumartist = "Artist Group" + obj.album = "Album Title" + obj.albumartists = ["Artist A", "Artist B"] + obj.get.return_value = [] + + plugin.client = MagicMock() + plugin.client.fetch_track_genre.return_value = [] + plugin.client.fetch_album_genre.return_value = [] + + artist_genres = { + "Artist A": ["Rock"], + "Artist B": ["Metal", "Jazz"], + } + plugin.client.fetch_artist_genre.side_effect = lambda artist: ( + artist_genres.get(artist, []) + ) + + genres, label = plugin._get_genre(obj) + + assert "multi-valued album artist" in label + assert "Metal" in genres