diff --git a/.gitignore b/.gitignore index 36b13f1..ba0430d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,176 +1 @@ -# ---> Python -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# UV -# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -#uv.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# Ruff stuff: -.ruff_cache/ - -# PyPI configuration file -.pypirc - +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index 7d976eb..05941cd 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ -# picard-discogs-genre +# Discogs Genre & Style -Use genre & style information from Discogs for Picard \ No newline at end of file +Use genre & style information from Discogs for Picard + +## Getting Started + +1. Clone this repository to your Picard plugins folder +2. Enable the plugin +3. Turn off "Use genres from MusicBrainz" in Picard settings (Metadata > Genres) +4. Set the [personal access token](https://www.discogs.com/settings/developers) (optional, but recommended) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..91a569a --- /dev/null +++ b/__init__.py @@ -0,0 +1,242 @@ +import re +import json +import urllib.parse +import functools +import unicodedata +from PyQt5 import QtWidgets + +from picard import log, config +from picard.webservice import ratecontrol +from picard.metadata import register_album_metadata_processor +from picard.ui.options import register_options_page, OptionsPage +from picard.album import Album +from picard.metadata import Metadata + +from .constants import * + +class DiscogsGenreOptionsPage(OptionsPage): + NAME = "discogs_genre_and_style" + TITLE = "Discogs Genre & Style" + PARENT = "plugins" + + options = CONFIG_OPTIONS + + def __init__(self, parent=None) -> None: + super().__init__(parent) + self.setup_ui() + + def setup_ui(self) -> None: + layout = QtWidgets.QVBoxLayout(self) + + options_group = QtWidgets.QGroupBox("Options", self) + options_group.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum) + options_layout = QtWidgets.QVBoxLayout(options_group) + + self.token_input = QtWidgets.QLineEdit(self) + self.token_input.setPlaceholderText("Account > Settings > Developers > Generate token") + + min_overlap_layout = QtWidgets.QHBoxLayout() + + min_overlap_label = QtWidgets.QLabel("Minimum Token Overlap", self) + min_overlap_label.setToolTip("Minimum percentage of token overlap required to consider a Discogs search result as a match (0-100%)") + min_overlap_label.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed) + self.min_overlap_input = QtWidgets.QSpinBox(self) + self.min_overlap_input.setRange(0, 100) + self.min_overlap_input.setSuffix(" %") + self.min_overlap_input.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred) + + min_overlap_layout.addWidget(min_overlap_label) + min_overlap_layout.addStretch() + min_overlap_layout.addWidget(self.min_overlap_input) + + options_layout.addWidget(QtWidgets.QLabel("Personal Access Token (recommended for higher rate limits)", self)) + options_layout.addWidget(self.token_input) + options_layout.addLayout(min_overlap_layout) + + layout.addWidget(options_group) + layout.addStretch() + + def load(self): + self.token_input.setText(config.setting["discogs_personal_access_token"] or "") + self.min_overlap_input.setValue(config.setting["discogs_minimum_token_overlap"] or 80) + + def save(self): + config.setting["discogs_personal_access_token"] = self.token_input.text().strip() + config.setting["discogs_minimum_token_overlap"] = self.min_overlap_input.value() + +class DiscogsGenreProcessor: + def __init__(self): + self.host = "api.discogs.com" + + def _normalize_text(self, text: str) -> str: + if not text: + return "" + + text = unicodedata.normalize("NFKC", text).casefold() + + text = unicodedata.normalize("NFKD", text) + text = "".join(ch for ch in text if unicodedata.category(ch) != "Mn") + + text = "".join(ch if ch.isalnum() else " " for ch in text) + return re.sub(r"\s+", " ", text).strip() + + def _token_overlap(self, left: str, right: str) -> float: + left_tokens = set(left.split()) + right_tokens = set(right.split()) + if not left_tokens or not right_tokens: + return 0.0 + return len(left_tokens & right_tokens) / len(left_tokens) + + def _apply_rate_limit(self, token): + # 60 req/min with token (1000ms), 25 req/min without (2400ms) + delay = 1000 if token else 2400 + ratecontrol.set_minimum_delay((self.host, 443), delay) + + def process_album(self, album: Album, metadata: Metadata, release: dict): + token = (config.setting["discogs_personal_access_token"] or "").strip() + self._apply_rate_limit(token) + + discogs_url = None + + for rel in release.get('relations', []): + if rel.get('type') == 'discogs': + discogs_url = rel.get('url', {}).get('resource') + break + + if not discogs_url: + for rel in release.get('release-group', {}).get('relations', []): + if rel.get('type') == 'discogs': + discogs_url = rel.get('url', {}).get('resource') + break + + if discogs_url: + match = re.search(r'/(release|master)/(\d+)', discogs_url) + if match: + entity_type = match.group(1) + entity_id = match.group(2) + + album._requests += 1 + self.fetch_discogs_tags(album, metadata, entity_type, entity_id, token) + return + + + rg_credits = release.get('release-group', {}).get('artist-credit', []) + credits = rg_credits or release.get('artist-credit', []) + artists = [ + c.get('name') or c.get('artist', {}).get('name', '') + for c in credits + if isinstance(c, dict) + ] + title = metadata.get('album') + + if artists and title: + album._requests += 1 + self.search_discogs(album, metadata, artists, title, token) + + def search_discogs(self, album: Album, metadata: Metadata, artists: list[str], title: str, token: str): + query_params = { + 'artist': self._normalize_text(artists[0]), + 'release_title': self._normalize_text(title), + 'type': 'master' + } + + if token: + query_params['token'] = token + + path = "/database/search?" + urllib.parse.urlencode(query_params) + full_url = f"https://{self.host}{path}" + + album.tagger.webservice.get_url( # type: ignore + url=full_url, + parse_response_type="json", + handler=functools.partial(self.handle_search_response, album, metadata, artists, title) + ) + + def handle_search_response(self, album: Album, metadata: Metadata, artists: list[str], title: str, response, reply, error): + try: + if error or not response: + log.error(f"Discogs Search API failed: {error}") + return + + results = response.get('results', []) + valid_result = self.validate_search_results(artists, title, results) + + if valid_result: + genres = valid_result.get('genre', []) + styles = valid_result.get('style', []) + + for genre in genres: + metadata.add('genre', genre) + for style in styles: + metadata.add('style', style) + finally: + album._requests -= 1 + if album._requests == 0: + album._finalize_loading(None) + + def validate_search_results(self, mb_artists: list[str], mb_title: str, results: list): + norm_mb_artists = [self._normalize_text(artist) for artist in mb_artists if artist] + norm_mb_title = self._normalize_text(mb_title) + + for result in results: + raw_title = result.get('title', '') or '' + norm_full_title = self._normalize_text(raw_title) + + # "Artist(s) - Release" in search results + parts = raw_title.split(' - ', 1) + norm_dc_artist = self._normalize_text(parts[0]) if len(parts) > 1 else norm_full_title + norm_dc_release = self._normalize_text(parts[1]) if len(parts) > 1 else norm_full_title + + title_match = ( + norm_mb_title == norm_dc_release + or self._token_overlap(norm_mb_title, norm_dc_release) >= (config.setting["discogs_minimum_token_overlap"] or 80) / 100 + or self._token_overlap(norm_mb_title, norm_full_title) >= (config.setting["discogs_minimum_token_overlap"] or 80) / 100 + ) + + artists_match = all( + (artist in norm_dc_artist) + or (artist in norm_full_title) + or (self._token_overlap(artist, norm_dc_artist) >= (config.setting["discogs_minimum_token_overlap"] or 80) / 100) + for artist in norm_mb_artists + ) + + if title_match and artists_match: + return result + + return None + + def fetch_discogs_tags(self, album: Album, metadata: Metadata, entity_type: str, entity_id: str, token: str): + path = f"/{entity_type}s/{entity_id}" + if token: + path += f"?token={token}" + + full_url = f"https://{self.host}{path}" + + album.tagger.webservice.get_url( # type: ignore + url=full_url, + parse_response_type="json", + priority=True, + handler=functools.partial(self.handle_tags_response, album, metadata) + ) + + def handle_tags_response(self, album: Album, metadata: Metadata, response, reply, error): + try: + if error or not response: + log.error(f"Discogs Tags API failed: {error}") + return + + genres = response.get('genres', []) + styles = response.get('styles', []) + + for genre in genres: + metadata.add('genre', genre) + for style in styles: + metadata.add('style', style) + + finally: + album._requests -= 1 + if album._requests == 0: + album._finalize_loading(None) + +register_options_page(DiscogsGenreOptionsPage) +register_album_metadata_processor(DiscogsGenreProcessor().process_album) \ No newline at end of file diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..f79914d --- /dev/null +++ b/constants.py @@ -0,0 +1,16 @@ +from typing import List +from picard.config import TextOption, IntOption, Option + +PLUGIN_NAME = "Discogs Genre & Style" +PLUGIN_AUTHOR = "cy1der" +PLUGIN_DESCRIPTION = "Fetches genres and styles from Discogs" +PLUGIN_VERSION = "1.0.0" +PLUGIN_API_VERSIONS = ["2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9", "2.10", "2.11", "2.12", "2.13"] +PLUGIN_LICENSE = "GPL-2.0-or-later" +PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html" +PLUGIN_USER_GUIDE_URL = "https://git.altaiar.dev/ahmed/picard-discogs-genre" + +CONFIG_OPTIONS: List[Option] = [ + TextOption("setting", "discogs_personal_access_token", ""), + IntOption("setting", "discogs_minimum_token_overlap", 80) +] \ No newline at end of file