Run binaries on song

2025-08-06 15:38:22 -04:00
commit 9c0b8f9ab5
35 changed files with 1788 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+cache/
--- a/init.py
+++ b/init.py
@@ -0,0 +1,493 @@
+import os
+import subprocess
+import hashlib
+
+from picard import config, log
+from picard.ui.options import (
+    OptionsPage,
+    register_options_page,
+)
+from picard.ui.itemviews import (
+    BaseAction,
+    register_track_action,
+    # register_album_action,
+)
+from picard.track import Track
+
+from PyQt5 import QtWidgets
+import threading
+import concurrent.futures
+
+PLUGIN_NAME = "AcousticBrainz-ng"
+PLUGIN_AUTHOR = "cy1der"
+PLUGIN_DESCRIPTION = """
+Analyze track acoustic characteristics using Essentia
+<br/>
+This plugin is not affiliated with the <a href='https://acousticbrainz.org'>AcousticBrainz</a> project<br/>
+This is not a 1:1 recreation of the AcousticBrainz schema, but will provide most of the meaningful data<br/>
+External dependencies:
+<ul>
+<li><a href='https://essentia.upf.edu'>Essentia</a> binaries compiled with TensorFlow and gaia2 support</li>
+<li>A few MusicNN models (see user guide for details)</li>
+</ul>
+<strong>This plugin is CPU heavy!</strong>
+"""
+PLUGIN_VERSION = "1.0.0"
+PLUGIN_API_VERSIONS = ["2.7", "2.8", "2.9", "2.10", "2.11"]
+PLUGIN_LICENSE = "GPL-2.0-or-later"
+PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html"
+PLUGIN_USER_GUIDE_URL = "https://example.com" # TODO: Update with actual user guide URL
+
+REQUIRED_MODELS: list[tuple[str, str]] = [
+    ("msd-musicnn-1", "msd.json"),
+    ("mood_acoustic-musicnn-mtt-2", "mood_acoustic.json"),
+    ("mood_aggressive-musicnn-mtt-2", "mood_aggressive.json"),
+    ("mood_electronic-musicnn-msd-2", "mood_electronic.json"),
+    ("mood_happy-musicnn-msd-2", "mood_happy.json"),
+    ("mood_party-musicnn-mtt-2", "mood_party.json"),
+    ("mood_relaxed-musicnn-msd-2", "mood_relaxed.json"),
+    ("mood_sad-musicnn-msd-2", "mood_sad.json"),
+    ("danceability-musicnn-msd-2", "danceability.json"),
+    ("gender-musicnn-msd-2", "gender.json"),
+    ("tonal_atonal-musicnn-mtt-2", "tonality.json"),
+    ("voice_instrumental-musicnn-msd-2", "voice_instrumental.json")
+]
+
+OPTIONAL_MODELS: list[tuple[str, str]] = [
+    ("genre_electronic-musicnn-msd-2", "genre_electronic.json"),
+    ("genre_rosamerica-musicnn-msd-2", "genre_rosamerica.json"),
+    ("genre_tzanetakis-musicnn-msd-2", "genre_tzanetakis.json")
+]
+
+REQUIRED_BINARIES: list[str] = [
+    "streaming_extractor_music",
+    "streaming_musicnn_predict",
+    "streaming_md5",
+]
+
+# Avoid memory hogging
+TF_ENABLE_ONEDNN_OPTS: int = 0
+
+ENV = os.environ.copy()
+ENV['TF_ENABLE_ONEDNN_OPTS'] = str(TF_ENABLE_ONEDNN_OPTS)
+
+config.TextOption("setting", "acousticbrainz_ng_binaries_path", os.path.join(os.path.dirname(__file__), "bin"))
+config.TextOption("setting", "acousticbrainz_ng_models_path", os.path.join(os.path.dirname(__file__), "models"))
+config.TextOption("setting", "acousticbrainz_ng_cache_path", os.path.join(os.path.dirname(__file__), "cache"))
+
+config.IntOption("setting", "acousticbrainz_ng_max_musicnn_workers", 4)
+
+config.BoolOption("setting", "acousticbrainz_ng_autorun", False)
+config.BoolOption("setting", "acousticbrainz_ng_analyze_optional", False)
+config.BoolOption("setting", "acousticbrainz_ng_save_raw", False)
+
+class AcousticBrainzNGOptionsPage(OptionsPage):
+    NAME = "acousticbrainz_ng"
+    TITLE = "AcousticBrainz-ng"
+    PARENT = "plugins"
+    
+    def __init__(self, parent=None) -> None:
+        super().__init__(parent)
+        self.setup_ui()
+
+    def _create_path_input_layout(self, line_edit: QtWidgets.QLineEdit, browse_callback, check_callback=None) -> QtWidgets.QHBoxLayout:
+        layout = QtWidgets.QHBoxLayout()
+        
+        browse_button = QtWidgets.QPushButton("Browse", self)
+        browse_button.clicked.connect(browse_callback)
+        layout.addWidget(line_edit)
+        layout.addWidget(browse_button)
+        
+        if check_callback:
+            check_button = QtWidgets.QPushButton("Check", self)
+            check_button.clicked.connect(check_callback)
+            layout.addWidget(check_button)
+        
+        return layout
+    
+    def setup_ui(self) -> None:
+        layout = QtWidgets.QVBoxLayout(self)
+        
+        options_group = QtWidgets.QGroupBox("Options", self)
+        options_group.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum)
+        options_layout = QtWidgets.QVBoxLayout(options_group)
+        
+        self.autorun_checkbox = QtWidgets.QCheckBox("Autorun analysis", self)
+        self.autorun_checkbox.setToolTip("Automatically run analysis on new tracks")
+
+        self.analyze_optional_checkbox = QtWidgets.QCheckBox("Analyze optional models", self)
+        self.analyze_optional_checkbox.setToolTip("Include optional models in the analysis")
+
+        self.save_raw_checkbox = QtWidgets.QCheckBox("Save raw values", self)
+        self.save_raw_checkbox.setToolTip("Save raw MusicNN numbers in the metadata")
+        
+        musicnn_workers_layout = QtWidgets.QHBoxLayout()
+        
+        musicnn_workers_label = QtWidgets.QLabel("Max MusicNN workers:", self)
+        musicnn_workers_label.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred)
+        self.musicnn_workers_input = QtWidgets.QSpinBox(self)
+        self.musicnn_workers_input.setToolTip("Maximum number of concurrent MusicNN workers")
+        self.musicnn_workers_input.setRange(1, max(len(REQUIRED_MODELS), len(OPTIONAL_MODELS)))
+        self.musicnn_workers_input.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred)
+        
+        musicnn_workers_layout.addWidget(musicnn_workers_label)
+        musicnn_workers_layout.addStretch()
+        musicnn_workers_layout.addWidget(self.musicnn_workers_input)
+        
+        options_layout.addWidget(self.autorun_checkbox)
+        options_layout.addWidget(self.analyze_optional_checkbox)
+        options_layout.addWidget(self.save_raw_checkbox)
+        options_layout.addLayout(musicnn_workers_layout)
+
+        layout.addWidget(options_group)
+
+        paths_group = QtWidgets.QGroupBox("Paths", self)
+        paths_group.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum)
+        paths_layout = QtWidgets.QVBoxLayout(paths_group)
+        
+        # Binaries path
+        self.binaries_path_input = QtWidgets.QLineEdit(self)
+        self.binaries_path_input.setPlaceholderText("Path to Essentia binaries")
+        binaries_layout = self._create_path_input_layout(
+            self.binaries_path_input,
+            lambda: self._browse_folder(self.binaries_path_input),
+            lambda: (self._check_binaries(show_success=True), None)[1]
+        )
+
+        # Models path
+        self.models_path_input = QtWidgets.QLineEdit(self)
+        self.models_path_input.setPlaceholderText("Path to MusicNN models")
+        models_layout = self._create_path_input_layout(
+            self.models_path_input,
+            lambda: self._browse_folder(self.models_path_input),
+            lambda: (self._check_models(show_success=True, check_optional=True), None)[1]
+        )
+        
+        # Cache path
+        self.cache_path_input = QtWidgets.QLineEdit(self)
+        self.cache_path_input.setPlaceholderText("Path to cache directory")
+        cache_layout = self._create_path_input_layout(
+            self.cache_path_input,
+            lambda: self._browse_folder(self.cache_path_input)
+        )
+
+        paths_layout.addWidget(QtWidgets.QLabel("Binaries", self))
+        paths_layout.addLayout(binaries_layout)
+        paths_layout.addWidget(QtWidgets.QLabel("Models", self))
+        paths_layout.addLayout(models_layout)
+        paths_layout.addWidget(QtWidgets.QLabel("Cache", self))
+        paths_layout.addLayout(cache_layout)
+        
+        layout.addWidget(paths_group)
+        
+        layout.addStretch()
+
+    def _check_binaries(self, show_success=False) -> bool:
+        path = self.binaries_path_input.text()
+        if not path or not os.path.exists(path):
+            QtWidgets.QMessageBox.warning(self, "Binaries", "Invalid or empty path.")
+            return False
+        
+        missing_binaries = []
+        for binary in REQUIRED_BINARIES:
+            binary_path = AcousticBrainzNG._get_binary_path(binary, path)
+            if not os.path.exists(binary_path):
+                missing_binaries.append(binary)
+        
+        if missing_binaries:
+            message = f"Missing binaries:\n" + "\n".join(f"• {binary}" for binary in missing_binaries)
+            QtWidgets.QMessageBox.warning(self, "Binaries", message)
+            return False
+        else:
+            if show_success:
+                QtWidgets.QMessageBox.information(self, "Binaries", "All binaries found!")
+            return True
+
+    def _check_models(self, show_success=False, check_optional=True) -> bool:
+        path = self.models_path_input.text()
+        if not path or not os.path.exists(path):
+            QtWidgets.QMessageBox.warning(self, "Models", "Invalid or empty path.")
+            return False
+        
+        missing_required = []
+        for model in REQUIRED_MODELS:
+            model_path = os.path.join(path, f"{model[0]}.pb")
+            if not os.path.exists(model_path):
+                missing_required.append(model[0])
+        
+        missing_optional = []
+        if check_optional:
+            for model in OPTIONAL_MODELS:
+                model_path = os.path.join(path, f"{model[0]}.pb")
+                if not os.path.exists(model_path):
+                    missing_optional.append(model[0])
+        
+        if missing_required:
+            message = f"Missing required models:\n" + "\n".join(f"• {model}.pb" for model in missing_required)
+            QtWidgets.QMessageBox.warning(self, "Models", message)
+            return False
+        elif missing_optional and check_optional:
+            message = f"Missing optional models:\n" + "\n".join(f"• {model}.pb" for model in missing_optional)
+            QtWidgets.QMessageBox.information(self, "Models", message)
+        
+        if show_success:
+            if missing_optional and check_optional:
+                QtWidgets.QMessageBox.information(self, "Models", "All required models found! Some optional models are missing.")
+            else:
+                QtWidgets.QMessageBox.information(self, "Models", "All models found!")
+        
+        return True
+
+    def _browse_folder(self, line_edit: QtWidgets.QLineEdit) -> None:
+            folder = QtWidgets.QFileDialog.getExistingDirectory(
+                self, "Select Folder",
+                line_edit.text() or os.path.expanduser("~")
+            )
+            if folder:
+                line_edit.setText(folder)
+
+    def load(self):
+        self.autorun_checkbox.setChecked(config.setting["acousticbrainz_ng_autorun"] or False)
+        self.analyze_optional_checkbox.setChecked(config.setting["acousticbrainz_ng_analyze_optional"] or False)
+        self.save_raw_checkbox.setChecked(config.setting["acousticbrainz_ng_save_raw"] or False)
+        
+        self.musicnn_workers_input.setValue(config.setting["acousticbrainz_ng_max_musicnn_workers"] or 4)
+
+        self.binaries_path_input.setText(config.setting["acousticbrainz_ng_binaries_path"])
+        self.models_path_input.setText(config.setting["acousticbrainz_ng_models_path"])
+        self.cache_path_input.setText(config.setting["acousticbrainz_ng_cache_path"])
+    
+    def save(self):
+        self._check_binaries()
+        self._check_models(show_success=False, check_optional=False)
+        
+        config.setting["acousticbrainz_ng_autorun"] = self.autorun_checkbox.isChecked()
+        config.setting["acousticbrainz_ng_analyze_optional"] = self.analyze_optional_checkbox.isChecked()
+        config.setting["acousticbrainz_ng_save_raw"] = self.save_raw_checkbox.isChecked()
+
+        max_workers = max(1, min(self.musicnn_workers_input.value(), max(len(REQUIRED_MODELS), len(OPTIONAL_MODELS))))
+        config.setting["acousticbrainz_ng_max_musicnn_workers"] = max_workers
+
+        config.setting["acousticbrainz_ng_binaries_path"] = self.binaries_path_input.text()
+        config.setting["acousticbrainz_ng_models_path"] = self.models_path_input.text()
+        config.setting["acousticbrainz_ng_cache_path"] = self.cache_path_input.text()
+
+class AcousticBrainzNG:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def _get_binary_path(binary_name: str, binaries_path: str) -> str:
+        binary_path = os.path.join(binaries_path, binary_name)
+        if os.name == 'nt':  # Windows
+            binary_path += '.exe'
+        return binary_path
+
+    def _get_binary_paths(self) -> tuple[str, str]:
+        binaries_path = config.setting["acousticbrainz_ng_binaries_path"]
+        if not binaries_path:
+            raise ValueError("Binaries path not configured")
+            
+        musicnn_binary_path = self._get_binary_path("streaming_musicnn_predict", binaries_path)
+        gaia_binary_path = self._get_binary_path("streaming_extractor_music", binaries_path)
+        
+        if not os.path.exists(musicnn_binary_path):
+            raise FileNotFoundError(f"Binary {musicnn_binary_path} not found")
+        if not os.path.exists(gaia_binary_path):
+            raise FileNotFoundError(f"Binary {gaia_binary_path} not found")
+            
+        return musicnn_binary_path, gaia_binary_path
+
+    def _run_musicnn_models(self, models: list[tuple[str, str]], musicnn_binary_path: str, file: str, output_path: str) -> None:
+        models_path = config.setting["acousticbrainz_ng_models_path"]
+        if not models_path:
+            raise ValueError("Models path not configured")
+
+        def run_musicnn_model(model_info):
+            model_name, output_file = model_info
+            try:
+                model_path = os.path.join(models_path, f"{model_name}.pb")
+
+                if not os.path.exists(model_path):
+                    raise FileNotFoundError(f"Model {model_name} not found at {model_path}")
+
+                output_file_path = os.path.join(output_path, output_file)
+                
+                if os.path.exists(output_file_path):
+                    log.debug(f"{output_file_path} already exists, skipping {model_name}")
+                    return
+                
+                subprocess.run(
+                    [musicnn_binary_path, model_path, file, output_file_path],
+                    capture_output=True,
+                    text=True,
+                    env=ENV
+                )
+            except FileNotFoundError as e:
+                log.error(f"Model {model_name} not found: {e}")
+            except Exception as e:
+                log.error(f"Error processing model {model_name}: {e}")
+
+        max_workers = config.setting["acousticbrainz_ng_max_musicnn_workers"] or 4
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(run_musicnn_model, model) for model in models]
+            concurrent.futures.wait(futures)
+
+    def analyze_required(self, metadata: dict, file: str) -> None:        
+        if not self._check_binaries():
+            log.error("Essentia binaries not found")
+            return
+
+        if not self._check_required_models():
+            log.error("Required models not found")
+            return
+        
+        try:
+            musicnn_binary_path, gaia_binary_path = self._get_binary_paths()
+        except (ValueError, FileNotFoundError) as e:
+            log.error(str(e))
+            return
+    
+        output_path = self._generate_cache_folder(metadata, file)
+        if not output_path:
+            raise ValueError("Failed to generate cache folder path")
+
+        def run_gaia():
+            if os.path.exists(os.path.join(output_path, "gaia.json")):
+                log.debug(f"Gaia output already exists at {os.path.join(output_path, 'gaia.json')}, skipping")
+                return
+
+            subprocess.run(
+                [gaia_binary_path, file, os.path.join(output_path, "gaia.json")],
+                capture_output=True,
+                text=True,
+                env=ENV
+            )
+        
+        gaia_thread = threading.Thread(target=run_gaia)
+        gaia_thread.start()
+
+        self._run_musicnn_models(REQUIRED_MODELS, musicnn_binary_path, file, output_path)
+        gaia_thread.join()
+
+    def analyze_optional(self, metadata: dict, file: str) -> None:        
+        if not self._check_binaries():
+            log.error("Essentia binaries not found")
+            return
+
+        if not self._check_optional_models():
+            log.error("Optional models not found")
+            return
+        
+        try:
+            musicnn_binary_path, _ = self._get_binary_paths()
+        except (ValueError, FileNotFoundError) as e:
+            log.error(str(e))
+            return
+    
+        output_path = self._generate_cache_folder(metadata, file)
+        if not output_path:
+            raise ValueError("Failed to generate cache folder path")
+
+        self._run_musicnn_models(OPTIONAL_MODELS, musicnn_binary_path, file, output_path)
+    
+    def _generate_cache_folder(self, metadata: dict, file_path: str) -> str:
+        cache_base = config.setting["acousticbrainz_ng_cache_path"]
+        if not cache_base:
+            raise ValueError("Cache path not configured")
+
+        release_artist_mbid = metadata.get('musicbrainz_albumartistid', 'NO_MBID')
+        release_group_mbid = metadata.get('musicbrainz_releasegroupid', 'NO_MBID')
+        release_mbid = metadata.get('musicbrainz_albumid', 'NO_MBID')
+        recording_mbid = metadata.get('musicbrainz_recordingid')
+        
+        if not recording_mbid:
+            recording_mbid = self._get_audio_hash(file_path)
+
+        cache_folder = os.path.join(
+            str(cache_base),
+            str(release_artist_mbid),
+            str(release_group_mbid),
+            str(release_mbid),
+            str(recording_mbid)
+        )
+
+        os.makedirs(cache_folder, exist_ok=True)
+        
+        return cache_folder
+    
+    def _get_audio_hash(self, file_path: str) -> str:
+        try:
+            binaries_path = config.setting["acousticbrainz_ng_binaries_path"]
+            if not binaries_path:
+                raise ValueError("Binaries path not configured")
+                
+            binary_path = self._get_binary_path("streaming_md5", binaries_path)
+            
+            result = subprocess.run(
+                [binary_path, file_path],
+                capture_output=True,
+                text=True,
+                env=ENV
+            )
+            
+            if result.returncode == 0:
+                for line in result.stdout.strip().split('\n'):
+                    if line.startswith('MD5:'):
+                        return line.split('MD5:')[1].strip()
+            
+            log.error(f"Failed to calculate audio hash: {result.stderr}")
+            
+        except Exception as e:
+            log.error(f"Error calculating audio hash: {e}")
+
+        return f"fallback_{hashlib.md5(file_path.encode('utf-8')).hexdigest()}"
+    
+    def _check_binaries(self) -> bool:
+        path = config.setting["acousticbrainz_ng_binaries_path"]
+        
+        if not path or not os.path.exists(path):
+            return False
+        
+        for binary in REQUIRED_BINARIES:
+            binary_path = self._get_binary_path(binary, path)
+            if not os.path.exists(binary_path):
+                return False
+        
+        return True
+
+    def _check_models(self, models: list[tuple[str, str]]) -> bool:
+        path = config.setting["acousticbrainz_ng_models_path"]
+        
+        if not path or not os.path.exists(path):
+            return False
+        
+        for model in models:
+            model_path = os.path.join(path, f"{model[0]}.pb")
+            if not os.path.exists(model_path):
+                return False
+        
+        return True
+
+    def _check_required_models(self) -> bool:
+        return self._check_models(REQUIRED_MODELS)
+
+    def _check_optional_models(self) -> bool:
+        return self._check_models(OPTIONAL_MODELS)
+
+acousticbrainz_ng = AcousticBrainzNG()
+
+class AcousticBrainzNGTrackAction(BaseAction):
+    NAME = f"Analyze with {PLUGIN_NAME}"
+    
+    def callback(self, objs):
+        tracks = list(filter(lambda o: isinstance(o, Track), objs))
+        
+        for track in tracks:
+            acousticbrainz_ng.analyze_required(track.metadata, track.files[0].filename)
+            
+            if config.setting["acousticbrainz_ng_analyze_optional"]:
+                acousticbrainz_ng.analyze_optional(track.metadata, track.files[0].filename)
+
+register_options_page(AcousticBrainzNGOptionsPage)
+register_track_action(AcousticBrainzNGTrackAction())
--- a/bin/streaming_extractor_music
+++ b/bin/streaming_extractor_music
--- a/bin/streaming_md5
+++ b/bin/streaming_md5
--- a/bin/streaming_musicnn_predict
+++ b/bin/streaming_musicnn_predict
--- a/models/danceability-musicnn-msd-2.json
+++ b/models/danceability-musicnn-msd-2.json
@@ -0,0 +1,87 @@
+{
+    "name": "danceability",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/danceability/danceability-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification of music by danceability",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "danceable",
+        "not_danceable"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "size": "306 full tracks, 124/182 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.93
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/danceability-musicnn-msd-2.pb
+++ b/models/danceability-musicnn-msd-2.pb
--- a/models/gender-musicnn-msd-2.json
+++ b/models/gender-musicnn-msd-2.json
@@ -0,0 +1,67 @@
+{
+  "name": "gender",
+  "type": "multi-class classifier",
+  "link": "https://essentia.upf.edu/models/classifiers/gender/gender-musicnn-msd-2.pb",
+  "version": "1",
+  "description": "classification of vocal music by gender (male/female)",
+  "author": "Pablo Alonso",
+  "email": "pablo.alonso@upf.edu",
+  "release_date": "2020-07-07",
+  "framework": "tensorflow",
+  "framework_version": "1.15.0",
+  "classes": ["female", "male"],
+  "model_types": ["frozen_model"],
+  "dataset": {
+    "name": "In-house MTG collection",
+    "size": "3311 full tracks, 1508/1803 per class",
+    "metrics": {
+      "5-fold_cross_validation_normalized_accuracy": 0.88
+    }
+  },
+  "schema": {
+    "inputs": [
+      {
+        "name": "model/Placeholder",
+        "type": "float",
+        "shape": [187, 96]
+      }
+    ],
+    "outputs": [
+      {
+        "name": "model/Sigmoid",
+        "type": "float",
+        "shape": [1, 2],
+        "op": "Sigmoid",
+        "output_purpose": "predictions"
+      },
+      {
+        "name": "model/dense_2/BiasAdd",
+        "type": "float",
+        "shape": [1, 2],
+        "op": "fully connected",
+        "description": "logits",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense_1/BiasAdd",
+        "type": "float",
+        "shape": [1, 100],
+        "op": "fully connected",
+        "description": "penultimate layer",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense/BiasAdd",
+        "type": "float",
+        "shape": [1, 200],
+        "op": "fully connected",
+        "output_purpose": "embeddings"
+      }
+    ]
+  },
+  "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+  "inference": {
+    "sample_rate": 16000,
+    "algorithm": "TensorflowPredictMusiCNN"
+  }
+}
--- a/models/gender-musicnn-msd-2.pb
+++ b/models/gender-musicnn-msd-2.pb
--- a/models/genre_electronic-musicnn-msd-2.json
+++ b/models/genre_electronic-musicnn-msd-2.json
@@ -0,0 +1,67 @@
+{
+  "name": "genre electronic",
+  "type": "multi-class classifier",
+  "link": "https://essentia.upf.edu/models/classifiers/genre_electronic/genre_electronic-musicnn-msd-2.pb",
+  "version": "1",
+  "description": "classification of electronic music by subgenres",
+  "author": "Pablo Alonso",
+  "email": "pablo.alonso@upf.edu",
+  "release_date": "2020-07-07",
+  "framework": "tensorflow",
+  "framework_version": "1.15.0",
+  "classes": ["ambient", "drum and bass", "house", "techno", "trance"],
+  "model_types": ["frozen_model"],
+  "dataset": {
+    "name": "In-house MTG collection",
+    "size": "250 track excerpts, 50 per genre",
+    "metrics": {
+      "5-fold_cross_validation_normalized_accuracy": 0.95
+    }
+  },
+  "schema": {
+    "inputs": [
+      {
+        "name": "model/Placeholder",
+        "type": "float",
+        "shape": [187, 96]
+      }
+    ],
+    "outputs": [
+      {
+        "name": "model/Sigmoid",
+        "type": "float",
+        "shape": [1, 5],
+        "op": "Sigmoid",
+        "output_purpose": "predictions"
+      },
+      {
+        "name": "model/dense_2/BiasAdd",
+        "type": "float",
+        "shape": [1, 5],
+        "op": "fully connected",
+        "description": "logits",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense_1/BiasAdd",
+        "type": "float",
+        "shape": [1, 100],
+        "op": "fully connected",
+        "description": "penultimate layer",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense/BiasAdd",
+        "type": "float",
+        "shape": [1, 200],
+        "op": "fully connected",
+        "output_purpose": "embeddings"
+      }
+    ]
+  },
+  "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+  "inference": {
+    "sample_rate": 16000,
+    "algorithm": "TensorflowPredictMusiCNN"
+  }
+}
--- a/models/genre_electronic-musicnn-msd-2.pb
+++ b/models/genre_electronic-musicnn-msd-2.pb
--- a/models/genre_rosamerica-musicnn-msd-2.json
+++ b/models/genre_rosamerica-musicnn-msd-2.json
@@ -0,0 +1,77 @@
+{
+  "name": "genre Rosamerica",
+  "type": "multi-class classifier",
+  "link": "https://essentia.upf.edu/models/classifiers/genre_rosamerica/genre_rosamerica-musicnn-msd-2.pb",
+  "version": "1",
+  "description": "classification of music by genre",
+  "author": "Pablo Alonso",
+  "email": "pablo.alonso@upf.edu",
+  "release_date": "2020-07-07",
+  "framework": "tensorflow",
+  "framework_version": "1.15.0",
+  "classes": [
+    "classical",
+    "dance",
+    "hiphop",
+    "jazz",
+    "pop",
+    "rhythm and blues",
+    "rock",
+    "speech"
+  ],
+  "model_types": ["frozen_model"],
+  "dataset": {
+    "name": "In-house MTG collection created by a musicologist",
+    "citation": "@phdthesis{i2009audio,\n  title={Audio content processing for automatic music genre classification: descriptors, databases, and classifiers},\n  author={i Termens, Enric Guaus},\n  year={2009},\n  school={PhD thesis, Universitat Pompeu Fabra, Barcelona, Spain}\n}",
+    "size": "400 tracks, 50 per genre",
+    "metrics": {
+      "5-fold_cross_validation_normalized_accuracy": 0.92
+    }
+  },
+  "schema": {
+    "inputs": [
+      {
+        "name": "model/Placeholder",
+        "type": "float",
+        "shape": [187, 96]
+      }
+    ],
+    "outputs": [
+      {
+        "name": "model/Sigmoid",
+        "type": "float",
+        "shape": [1, 8],
+        "op": "Sigmoid",
+        "output_purpose": "predictions"
+      },
+      {
+        "name": "model/dense_2/BiasAdd",
+        "type": "float",
+        "shape": [1, 8],
+        "op": "fully connected",
+        "description": "logits",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense_1/BiasAdd",
+        "type": "float",
+        "shape": [1, 100],
+        "op": "fully connected",
+        "description": "penultimate layer",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense/BiasAdd",
+        "type": "float",
+        "shape": [1, 200],
+        "op": "fully connected",
+        "output_purpose": "embeddings"
+      }
+    ]
+  },
+  "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+  "inference": {
+    "sample_rate": 16000,
+    "algorithm": "TensorflowPredictMusiCNN"
+  }
+}
--- a/models/genre_rosamerica-musicnn-msd-2.pb
+++ b/models/genre_rosamerica-musicnn-msd-2.pb
--- a/models/genre_tzanetakis-musicnn-msd-2.json
+++ b/models/genre_tzanetakis-musicnn-msd-2.json
@@ -0,0 +1,79 @@
+{
+  "name": "genre GTZAN",
+  "type": "multi-class classifier",
+  "link": "https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-2.pb",
+  "version": "1",
+  "description": "classification of music by genre",
+  "author": "Pablo Alonso",
+  "email": "pablo.alonso@upf.edu",
+  "release_date": "2020-07-07",
+  "framework": "tensorflow",
+  "framework_version": "1.15.0",
+  "classes": [
+    "blues",
+    "classical",
+    "country",
+    "disco",
+    "hiphop",
+    "jazz",
+    "metal",
+    "pop",
+    "reggae",
+    "rock"
+  ],
+  "model_types": ["frozen_model"],
+  "dataset": {
+    "name": "the GTZAN Genre Collection",
+    "citation": "@article{tzanetakis2002musical,\n  title={Musical genre classification of audio signals},\n  author={Tzanetakis, George and Cook, Perry},\n  journal={IEEE Transactions on speech and audio processing},\n  volume={10},\n  number={5},\n  pages={293--302},\n  year={2002},\n  publisher={IEEE}\n}",
+    "size": "1000 track excerpts, 100 per genre",
+    "metrics": {
+      "5-fold_cross_validation_normalized_accuracy": 0.83
+    }
+  },
+  "schema": {
+    "inputs": [
+      {
+        "name": "model/Placeholder",
+        "type": "float",
+        "shape": [187, 96]
+      }
+    ],
+    "outputs": [
+      {
+        "name": "model/Sigmoid",
+        "type": "float",
+        "shape": [1, 10],
+        "op": "Sigmoid",
+        "output_purpose": "predictions"
+      },
+      {
+        "name": "model/dense_2/BiasAdd",
+        "type": "float",
+        "shape": [1, 10],
+        "op": "fully connected",
+        "description": "logits",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense_1/BiasAdd",
+        "type": "float",
+        "shape": [1, 100],
+        "op": "fully connected",
+        "description": "penultimate layer",
+        "output_purpose": ""
+      },
+      {
+        "name": "model/dense/BiasAdd",
+        "type": "float",
+        "shape": [1, 200],
+        "op": "fully connected",
+        "output_purpose": "embeddings"
+      }
+    ]
+  },
+  "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+  "inference": {
+    "sample_rate": 16000,
+    "algorithm": "TensorflowPredictMusiCNN"
+  }
+}
--- a/models/genre_tzanetakis-musicnn-msd-2.pb
+++ b/models/genre_tzanetakis-musicnn-msd-2.pb
--- a/models/mood_acoustic-musicnn-mtt-2.json
+++ b/models/mood_acoustic-musicnn-mtt-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood acoustic",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_acoustic/mood_acoustic-musicnn-mtt-2.pb",
+    "version": "1",
+    "description": "classification of music by type of sound (acoustic/non-acoustic)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "acoustic",
+        "non_acoustic"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "321 full tracks + excerpts, 193/128 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.93
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_acoustic-musicnn-mtt-2.pb
+++ b/models/mood_acoustic-musicnn-mtt-2.pb
--- a/models/mood_aggressive-musicnn-mtt-2.json
+++ b/models/mood_aggressive-musicnn-mtt-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood aggressive",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_aggressive/mood_aggressive-musicnn-mtt-2.pb",
+    "version": "1",
+    "description": "classification of music by mood (aggressive/non-aggressive)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "aggressive",
+        "not_aggressive"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection ",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "280 full tracks + excerpts, 133/147 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.96
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_aggressive-musicnn-mtt-2.pb
+++ b/models/mood_aggressive-musicnn-mtt-2.pb
--- a/models/mood_electronic-musicnn-msd-2.json
+++ b/models/mood_electronic-musicnn-msd-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood electronic",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_electronic/mood_electronic-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification of music by type of sound (electronic/non-electronic)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "electronic",
+        "non_electronic"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "332 full tracks + excerpts, 164/168 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.95
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_electronic-musicnn-msd-2.pb
+++ b/models/mood_electronic-musicnn-msd-2.pb
--- a/models/mood_happy-musicnn-msd-2.json
+++ b/models/mood_happy-musicnn-msd-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood happy",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_happy/mood_happy-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification of music by mood (happy/non-happy)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "happy",
+        "non_happy"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "302 full tracks + excerpts, 139/163 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.81
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_happy-musicnn-msd-2.pb
+++ b/models/mood_happy-musicnn-msd-2.pb
--- a/models/mood_party-musicnn-mtt-2.json
+++ b/models/mood_party-musicnn-mtt-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood party",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_party/mood_party-musicnn-mtt-2.pb",
+    "version": "1",
+    "description": "classification of music by mood (party/non-party)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "non_party",
+        "party"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "349 full tracks + excerpts, 198/151 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.92
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_party-musicnn-mtt-2.pb
+++ b/models/mood_party-musicnn-mtt-2.pb
--- a/models/mood_relaxed-musicnn-msd-2.json
+++ b/models/mood_relaxed-musicnn-msd-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood relaxed",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_relaxed/mood_relaxed-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification of music by mood (relaxed/non-relaxed)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "non_relaxed",
+        "relaxed"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "446 full tracks + excerpts, 145/301 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.9
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_relaxed-musicnn-msd-2.pb
+++ b/models/mood_relaxed-musicnn-msd-2.pb
--- a/models/mood_sad-musicnn-msd-2.json
+++ b/models/mood_sad-musicnn-msd-2.json
@@ -0,0 +1,88 @@
+{
+    "name": "mood sad",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/mood_sad/mood_sad-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification of music by mood (sad/non-sad)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "non_sad",
+        "sad"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "citation": "@inproceedings{laurier2009music,\n  title={Music mood annotator design and integration},\n  author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n  booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n  pages={156--161},\n  year={2009},\n  organization={IEEE}\n}",
+        "size": "230 full tracks + excerpts, 96/134 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.86
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/mood_sad-musicnn-msd-2.pb
+++ b/models/mood_sad-musicnn-msd-2.pb
--- a/models/msd-musicnn-1.json
+++ b/models/msd-musicnn-1.json
@@ -0,0 +1,126 @@
+{
+    "name": "MSD MusiCNN",
+    "type": "auto-tagging",
+    "link": "https://essentia.upf.edu/models/feature-extractors/musicnn/msd-musicnn-1.pb",
+    "version": "1",
+    "description": "prediction of the top-50 tags in the dataset",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-03-31",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "rock",
+        "pop",
+        "alternative",
+        "indie",
+        "electronic",
+        "female vocalists",
+        "dance",
+        "00s",
+        "alternative rock",
+        "jazz",
+        "beautiful",
+        "metal",
+        "chillout",
+        "male vocalists",
+        "classic rock",
+        "soul",
+        "indie rock",
+        "Mellow",
+        "electronica",
+        "80s",
+        "folk",
+        "90s",
+        "chill",
+        "instrumental",
+        "punk",
+        "oldies",
+        "blues",
+        "hard rock",
+        "ambient",
+        "acoustic",
+        "experimental",
+        "female vocalist",
+        "guitar",
+        "Hip-Hop",
+        "70s",
+        "party",
+        "country",
+        "easy listening",
+        "sexy",
+        "catchy",
+        "funk",
+        "electro",
+        "heavy metal",
+        "Progressive rock",
+        "60s",
+        "rnb",
+        "indie pop",
+        "sad",
+        "House",
+        "happy"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "The Millon Song Dataset",
+        "citation": "http://millionsongdataset.com/",
+        "size": "200k up to two minutes audio previews",
+        "metrics": {
+            "ROC-AUC": 0.88,
+            "PR-AUC": 0.29
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    50
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    50
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/msd-musicnn-1.pb
+++ b/models/msd-musicnn-1.pb
--- a/models/tonal_atonal-musicnn-mtt-2.json
+++ b/models/tonal_atonal-musicnn-mtt-2.json
@@ -0,0 +1,87 @@
+{
+    "name": "tonal/atonal",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/tonal_atonal/tonal_atonal-musicnn-mtt-2.pb",
+    "version": "1",
+    "description": "classification of music by tonality (tonal/atonal)",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "atonal",
+        "tonal"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "size": "345 track excerpts, 145/200 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.91
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/tonal_atonal-musicnn-mtt-2.pb
+++ b/models/tonal_atonal-musicnn-mtt-2.pb
--- a/models/voice_instrumental-musicnn-msd-2.json
+++ b/models/voice_instrumental-musicnn-msd-2.json
@@ -0,0 +1,87 @@
+{
+    "name": "voice/instrumental classifiers",
+    "type": "multi-class classifier",
+    "link": "https://essentia.upf.edu/models/classifiers/voice_instrumental/voice_instrumental-musicnn-msd-2.pb",
+    "version": "1",
+    "description": "classification into music with voice/instrumental",
+    "author": "Pablo Alonso",
+    "email": "pablo.alonso@upf.edu",
+    "release_date": "2020-07-07",
+    "framework": "tensorflow",
+    "framework_version": "1.15.0",
+    "classes": [
+        "instrumental",
+        "voice"
+    ],
+    "model_types": [
+        "frozen_model"
+    ],
+    "dataset": {
+        "name": "In-house MTG collection",
+        "size": "1000 track excerpts, 500 per class",
+        "metrics": {
+            "5-fold_cross_validation_normalized_accuracy": 0.98
+        }
+    },
+    "schema": {
+        "inputs": [
+            {
+                "name": "model/Placeholder",
+                "type": "float",
+                "shape": [
+                    187,
+                    96
+                ]
+            }
+        ],
+        "outputs": [
+            {
+                "name": "model/Sigmoid",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "Sigmoid",
+                "output_purpose": "predictions"
+            },
+            {
+                "name": "model/dense_2/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    2
+                ],
+                "op": "fully connected",
+                "description": "logits",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense_1/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    100
+                ],
+                "op": "fully connected",
+                "description": "penultimate layer",
+                "output_purpose": ""
+            },
+            {
+                "name": "model/dense/BiasAdd",
+                "type": "float",
+                "shape": [
+                    1,
+                    200
+                ],
+                "op": "fully connected",
+                "output_purpose": "embeddings"
+            }
+        ]
+    },
+    "citation": "@inproceedings{alonso2020tensorflow,\n  title={Tensorflow Audio Models in Essentia},\n  author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  year={2020}\n}",
+    "inference": {
+        "sample_rate": 16000,
+        "algorithm": "TensorflowPredictMusiCNN"
+    }
+}
--- a/models/voice_instrumental-musicnn-msd-2.pb
+++ b/models/voice_instrumental-musicnn-msd-2.pb