Run binaries on song
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
__pycache__/
|
||||
cache/
|
||||
493
__init__.py
Normal file
493
__init__.py
Normal file
@@ -0,0 +1,493 @@
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
|
||||
from picard import config, log
|
||||
from picard.ui.options import (
|
||||
OptionsPage,
|
||||
register_options_page,
|
||||
)
|
||||
from picard.ui.itemviews import (
|
||||
BaseAction,
|
||||
register_track_action,
|
||||
# register_album_action,
|
||||
)
|
||||
from picard.track import Track
|
||||
|
||||
from PyQt5 import QtWidgets
|
||||
import threading
|
||||
import concurrent.futures
|
||||
|
||||
PLUGIN_NAME = "AcousticBrainz-ng"
|
||||
PLUGIN_AUTHOR = "cy1der"
|
||||
PLUGIN_DESCRIPTION = """
|
||||
Analyze track acoustic characteristics using Essentia
|
||||
<br/>
|
||||
This plugin is not affiliated with the <a href='https://acousticbrainz.org'>AcousticBrainz</a> project<br/>
|
||||
This is not a 1:1 recreation of the AcousticBrainz schema, but will provide most of the meaningful data<br/>
|
||||
External dependencies:
|
||||
<ul>
|
||||
<li><a href='https://essentia.upf.edu'>Essentia</a> binaries compiled with TensorFlow and gaia2 support</li>
|
||||
<li>A few MusicNN models (see user guide for details)</li>
|
||||
</ul>
|
||||
<strong>This plugin is CPU heavy!</strong>
|
||||
"""
|
||||
PLUGIN_VERSION = "1.0.0"
|
||||
PLUGIN_API_VERSIONS = ["2.7", "2.8", "2.9", "2.10", "2.11"]
|
||||
PLUGIN_LICENSE = "GPL-2.0-or-later"
|
||||
PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html"
|
||||
PLUGIN_USER_GUIDE_URL = "https://example.com" # TODO: Update with actual user guide URL
|
||||
|
||||
REQUIRED_MODELS: list[tuple[str, str]] = [
|
||||
("msd-musicnn-1", "msd.json"),
|
||||
("mood_acoustic-musicnn-mtt-2", "mood_acoustic.json"),
|
||||
("mood_aggressive-musicnn-mtt-2", "mood_aggressive.json"),
|
||||
("mood_electronic-musicnn-msd-2", "mood_electronic.json"),
|
||||
("mood_happy-musicnn-msd-2", "mood_happy.json"),
|
||||
("mood_party-musicnn-mtt-2", "mood_party.json"),
|
||||
("mood_relaxed-musicnn-msd-2", "mood_relaxed.json"),
|
||||
("mood_sad-musicnn-msd-2", "mood_sad.json"),
|
||||
("danceability-musicnn-msd-2", "danceability.json"),
|
||||
("gender-musicnn-msd-2", "gender.json"),
|
||||
("tonal_atonal-musicnn-mtt-2", "tonality.json"),
|
||||
("voice_instrumental-musicnn-msd-2", "voice_instrumental.json")
|
||||
]
|
||||
|
||||
OPTIONAL_MODELS: list[tuple[str, str]] = [
|
||||
("genre_electronic-musicnn-msd-2", "genre_electronic.json"),
|
||||
("genre_rosamerica-musicnn-msd-2", "genre_rosamerica.json"),
|
||||
("genre_tzanetakis-musicnn-msd-2", "genre_tzanetakis.json")
|
||||
]
|
||||
|
||||
REQUIRED_BINARIES: list[str] = [
|
||||
"streaming_extractor_music",
|
||||
"streaming_musicnn_predict",
|
||||
"streaming_md5",
|
||||
]
|
||||
|
||||
# Avoid memory hogging
|
||||
TF_ENABLE_ONEDNN_OPTS: int = 0
|
||||
|
||||
ENV = os.environ.copy()
|
||||
ENV['TF_ENABLE_ONEDNN_OPTS'] = str(TF_ENABLE_ONEDNN_OPTS)
|
||||
|
||||
config.TextOption("setting", "acousticbrainz_ng_binaries_path", os.path.join(os.path.dirname(__file__), "bin"))
|
||||
config.TextOption("setting", "acousticbrainz_ng_models_path", os.path.join(os.path.dirname(__file__), "models"))
|
||||
config.TextOption("setting", "acousticbrainz_ng_cache_path", os.path.join(os.path.dirname(__file__), "cache"))
|
||||
|
||||
config.IntOption("setting", "acousticbrainz_ng_max_musicnn_workers", 4)
|
||||
|
||||
config.BoolOption("setting", "acousticbrainz_ng_autorun", False)
|
||||
config.BoolOption("setting", "acousticbrainz_ng_analyze_optional", False)
|
||||
config.BoolOption("setting", "acousticbrainz_ng_save_raw", False)
|
||||
|
||||
class AcousticBrainzNGOptionsPage(OptionsPage):
|
||||
NAME = "acousticbrainz_ng"
|
||||
TITLE = "AcousticBrainz-ng"
|
||||
PARENT = "plugins"
|
||||
|
||||
def __init__(self, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self.setup_ui()
|
||||
|
||||
def _create_path_input_layout(self, line_edit: QtWidgets.QLineEdit, browse_callback, check_callback=None) -> QtWidgets.QHBoxLayout:
|
||||
layout = QtWidgets.QHBoxLayout()
|
||||
|
||||
browse_button = QtWidgets.QPushButton("Browse", self)
|
||||
browse_button.clicked.connect(browse_callback)
|
||||
layout.addWidget(line_edit)
|
||||
layout.addWidget(browse_button)
|
||||
|
||||
if check_callback:
|
||||
check_button = QtWidgets.QPushButton("Check", self)
|
||||
check_button.clicked.connect(check_callback)
|
||||
layout.addWidget(check_button)
|
||||
|
||||
return layout
|
||||
|
||||
def setup_ui(self) -> None:
|
||||
layout = QtWidgets.QVBoxLayout(self)
|
||||
|
||||
options_group = QtWidgets.QGroupBox("Options", self)
|
||||
options_group.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum)
|
||||
options_layout = QtWidgets.QVBoxLayout(options_group)
|
||||
|
||||
self.autorun_checkbox = QtWidgets.QCheckBox("Autorun analysis", self)
|
||||
self.autorun_checkbox.setToolTip("Automatically run analysis on new tracks")
|
||||
|
||||
self.analyze_optional_checkbox = QtWidgets.QCheckBox("Analyze optional models", self)
|
||||
self.analyze_optional_checkbox.setToolTip("Include optional models in the analysis")
|
||||
|
||||
self.save_raw_checkbox = QtWidgets.QCheckBox("Save raw values", self)
|
||||
self.save_raw_checkbox.setToolTip("Save raw MusicNN numbers in the metadata")
|
||||
|
||||
musicnn_workers_layout = QtWidgets.QHBoxLayout()
|
||||
|
||||
musicnn_workers_label = QtWidgets.QLabel("Max MusicNN workers:", self)
|
||||
musicnn_workers_label.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred)
|
||||
self.musicnn_workers_input = QtWidgets.QSpinBox(self)
|
||||
self.musicnn_workers_input.setToolTip("Maximum number of concurrent MusicNN workers")
|
||||
self.musicnn_workers_input.setRange(1, max(len(REQUIRED_MODELS), len(OPTIONAL_MODELS)))
|
||||
self.musicnn_workers_input.setSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Preferred)
|
||||
|
||||
musicnn_workers_layout.addWidget(musicnn_workers_label)
|
||||
musicnn_workers_layout.addStretch()
|
||||
musicnn_workers_layout.addWidget(self.musicnn_workers_input)
|
||||
|
||||
options_layout.addWidget(self.autorun_checkbox)
|
||||
options_layout.addWidget(self.analyze_optional_checkbox)
|
||||
options_layout.addWidget(self.save_raw_checkbox)
|
||||
options_layout.addLayout(musicnn_workers_layout)
|
||||
|
||||
layout.addWidget(options_group)
|
||||
|
||||
paths_group = QtWidgets.QGroupBox("Paths", self)
|
||||
paths_group.setSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum)
|
||||
paths_layout = QtWidgets.QVBoxLayout(paths_group)
|
||||
|
||||
# Binaries path
|
||||
self.binaries_path_input = QtWidgets.QLineEdit(self)
|
||||
self.binaries_path_input.setPlaceholderText("Path to Essentia binaries")
|
||||
binaries_layout = self._create_path_input_layout(
|
||||
self.binaries_path_input,
|
||||
lambda: self._browse_folder(self.binaries_path_input),
|
||||
lambda: (self._check_binaries(show_success=True), None)[1]
|
||||
)
|
||||
|
||||
# Models path
|
||||
self.models_path_input = QtWidgets.QLineEdit(self)
|
||||
self.models_path_input.setPlaceholderText("Path to MusicNN models")
|
||||
models_layout = self._create_path_input_layout(
|
||||
self.models_path_input,
|
||||
lambda: self._browse_folder(self.models_path_input),
|
||||
lambda: (self._check_models(show_success=True, check_optional=True), None)[1]
|
||||
)
|
||||
|
||||
# Cache path
|
||||
self.cache_path_input = QtWidgets.QLineEdit(self)
|
||||
self.cache_path_input.setPlaceholderText("Path to cache directory")
|
||||
cache_layout = self._create_path_input_layout(
|
||||
self.cache_path_input,
|
||||
lambda: self._browse_folder(self.cache_path_input)
|
||||
)
|
||||
|
||||
paths_layout.addWidget(QtWidgets.QLabel("Binaries", self))
|
||||
paths_layout.addLayout(binaries_layout)
|
||||
paths_layout.addWidget(QtWidgets.QLabel("Models", self))
|
||||
paths_layout.addLayout(models_layout)
|
||||
paths_layout.addWidget(QtWidgets.QLabel("Cache", self))
|
||||
paths_layout.addLayout(cache_layout)
|
||||
|
||||
layout.addWidget(paths_group)
|
||||
|
||||
layout.addStretch()
|
||||
|
||||
def _check_binaries(self, show_success=False) -> bool:
|
||||
path = self.binaries_path_input.text()
|
||||
if not path or not os.path.exists(path):
|
||||
QtWidgets.QMessageBox.warning(self, "Binaries", "Invalid or empty path.")
|
||||
return False
|
||||
|
||||
missing_binaries = []
|
||||
for binary in REQUIRED_BINARIES:
|
||||
binary_path = AcousticBrainzNG._get_binary_path(binary, path)
|
||||
if not os.path.exists(binary_path):
|
||||
missing_binaries.append(binary)
|
||||
|
||||
if missing_binaries:
|
||||
message = f"Missing binaries:\n" + "\n".join(f"• {binary}" for binary in missing_binaries)
|
||||
QtWidgets.QMessageBox.warning(self, "Binaries", message)
|
||||
return False
|
||||
else:
|
||||
if show_success:
|
||||
QtWidgets.QMessageBox.information(self, "Binaries", "All binaries found!")
|
||||
return True
|
||||
|
||||
def _check_models(self, show_success=False, check_optional=True) -> bool:
|
||||
path = self.models_path_input.text()
|
||||
if not path or not os.path.exists(path):
|
||||
QtWidgets.QMessageBox.warning(self, "Models", "Invalid or empty path.")
|
||||
return False
|
||||
|
||||
missing_required = []
|
||||
for model in REQUIRED_MODELS:
|
||||
model_path = os.path.join(path, f"{model[0]}.pb")
|
||||
if not os.path.exists(model_path):
|
||||
missing_required.append(model[0])
|
||||
|
||||
missing_optional = []
|
||||
if check_optional:
|
||||
for model in OPTIONAL_MODELS:
|
||||
model_path = os.path.join(path, f"{model[0]}.pb")
|
||||
if not os.path.exists(model_path):
|
||||
missing_optional.append(model[0])
|
||||
|
||||
if missing_required:
|
||||
message = f"Missing required models:\n" + "\n".join(f"• {model}.pb" for model in missing_required)
|
||||
QtWidgets.QMessageBox.warning(self, "Models", message)
|
||||
return False
|
||||
elif missing_optional and check_optional:
|
||||
message = f"Missing optional models:\n" + "\n".join(f"• {model}.pb" for model in missing_optional)
|
||||
QtWidgets.QMessageBox.information(self, "Models", message)
|
||||
|
||||
if show_success:
|
||||
if missing_optional and check_optional:
|
||||
QtWidgets.QMessageBox.information(self, "Models", "All required models found! Some optional models are missing.")
|
||||
else:
|
||||
QtWidgets.QMessageBox.information(self, "Models", "All models found!")
|
||||
|
||||
return True
|
||||
|
||||
def _browse_folder(self, line_edit: QtWidgets.QLineEdit) -> None:
|
||||
folder = QtWidgets.QFileDialog.getExistingDirectory(
|
||||
self, "Select Folder",
|
||||
line_edit.text() or os.path.expanduser("~")
|
||||
)
|
||||
if folder:
|
||||
line_edit.setText(folder)
|
||||
|
||||
def load(self):
|
||||
self.autorun_checkbox.setChecked(config.setting["acousticbrainz_ng_autorun"] or False)
|
||||
self.analyze_optional_checkbox.setChecked(config.setting["acousticbrainz_ng_analyze_optional"] or False)
|
||||
self.save_raw_checkbox.setChecked(config.setting["acousticbrainz_ng_save_raw"] or False)
|
||||
|
||||
self.musicnn_workers_input.setValue(config.setting["acousticbrainz_ng_max_musicnn_workers"] or 4)
|
||||
|
||||
self.binaries_path_input.setText(config.setting["acousticbrainz_ng_binaries_path"])
|
||||
self.models_path_input.setText(config.setting["acousticbrainz_ng_models_path"])
|
||||
self.cache_path_input.setText(config.setting["acousticbrainz_ng_cache_path"])
|
||||
|
||||
def save(self):
|
||||
self._check_binaries()
|
||||
self._check_models(show_success=False, check_optional=False)
|
||||
|
||||
config.setting["acousticbrainz_ng_autorun"] = self.autorun_checkbox.isChecked()
|
||||
config.setting["acousticbrainz_ng_analyze_optional"] = self.analyze_optional_checkbox.isChecked()
|
||||
config.setting["acousticbrainz_ng_save_raw"] = self.save_raw_checkbox.isChecked()
|
||||
|
||||
max_workers = max(1, min(self.musicnn_workers_input.value(), max(len(REQUIRED_MODELS), len(OPTIONAL_MODELS))))
|
||||
config.setting["acousticbrainz_ng_max_musicnn_workers"] = max_workers
|
||||
|
||||
config.setting["acousticbrainz_ng_binaries_path"] = self.binaries_path_input.text()
|
||||
config.setting["acousticbrainz_ng_models_path"] = self.models_path_input.text()
|
||||
config.setting["acousticbrainz_ng_cache_path"] = self.cache_path_input.text()
|
||||
|
||||
class AcousticBrainzNG:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _get_binary_path(binary_name: str, binaries_path: str) -> str:
|
||||
binary_path = os.path.join(binaries_path, binary_name)
|
||||
if os.name == 'nt': # Windows
|
||||
binary_path += '.exe'
|
||||
return binary_path
|
||||
|
||||
def _get_binary_paths(self) -> tuple[str, str]:
|
||||
binaries_path = config.setting["acousticbrainz_ng_binaries_path"]
|
||||
if not binaries_path:
|
||||
raise ValueError("Binaries path not configured")
|
||||
|
||||
musicnn_binary_path = self._get_binary_path("streaming_musicnn_predict", binaries_path)
|
||||
gaia_binary_path = self._get_binary_path("streaming_extractor_music", binaries_path)
|
||||
|
||||
if not os.path.exists(musicnn_binary_path):
|
||||
raise FileNotFoundError(f"Binary {musicnn_binary_path} not found")
|
||||
if not os.path.exists(gaia_binary_path):
|
||||
raise FileNotFoundError(f"Binary {gaia_binary_path} not found")
|
||||
|
||||
return musicnn_binary_path, gaia_binary_path
|
||||
|
||||
def _run_musicnn_models(self, models: list[tuple[str, str]], musicnn_binary_path: str, file: str, output_path: str) -> None:
|
||||
models_path = config.setting["acousticbrainz_ng_models_path"]
|
||||
if not models_path:
|
||||
raise ValueError("Models path not configured")
|
||||
|
||||
def run_musicnn_model(model_info):
|
||||
model_name, output_file = model_info
|
||||
try:
|
||||
model_path = os.path.join(models_path, f"{model_name}.pb")
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
raise FileNotFoundError(f"Model {model_name} not found at {model_path}")
|
||||
|
||||
output_file_path = os.path.join(output_path, output_file)
|
||||
|
||||
if os.path.exists(output_file_path):
|
||||
log.debug(f"{output_file_path} already exists, skipping {model_name}")
|
||||
return
|
||||
|
||||
subprocess.run(
|
||||
[musicnn_binary_path, model_path, file, output_file_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=ENV
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
log.error(f"Model {model_name} not found: {e}")
|
||||
except Exception as e:
|
||||
log.error(f"Error processing model {model_name}: {e}")
|
||||
|
||||
max_workers = config.setting["acousticbrainz_ng_max_musicnn_workers"] or 4
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = [executor.submit(run_musicnn_model, model) for model in models]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
def analyze_required(self, metadata: dict, file: str) -> None:
|
||||
if not self._check_binaries():
|
||||
log.error("Essentia binaries not found")
|
||||
return
|
||||
|
||||
if not self._check_required_models():
|
||||
log.error("Required models not found")
|
||||
return
|
||||
|
||||
try:
|
||||
musicnn_binary_path, gaia_binary_path = self._get_binary_paths()
|
||||
except (ValueError, FileNotFoundError) as e:
|
||||
log.error(str(e))
|
||||
return
|
||||
|
||||
output_path = self._generate_cache_folder(metadata, file)
|
||||
if not output_path:
|
||||
raise ValueError("Failed to generate cache folder path")
|
||||
|
||||
def run_gaia():
|
||||
if os.path.exists(os.path.join(output_path, "gaia.json")):
|
||||
log.debug(f"Gaia output already exists at {os.path.join(output_path, 'gaia.json')}, skipping")
|
||||
return
|
||||
|
||||
subprocess.run(
|
||||
[gaia_binary_path, file, os.path.join(output_path, "gaia.json")],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=ENV
|
||||
)
|
||||
|
||||
gaia_thread = threading.Thread(target=run_gaia)
|
||||
gaia_thread.start()
|
||||
|
||||
self._run_musicnn_models(REQUIRED_MODELS, musicnn_binary_path, file, output_path)
|
||||
gaia_thread.join()
|
||||
|
||||
def analyze_optional(self, metadata: dict, file: str) -> None:
|
||||
if not self._check_binaries():
|
||||
log.error("Essentia binaries not found")
|
||||
return
|
||||
|
||||
if not self._check_optional_models():
|
||||
log.error("Optional models not found")
|
||||
return
|
||||
|
||||
try:
|
||||
musicnn_binary_path, _ = self._get_binary_paths()
|
||||
except (ValueError, FileNotFoundError) as e:
|
||||
log.error(str(e))
|
||||
return
|
||||
|
||||
output_path = self._generate_cache_folder(metadata, file)
|
||||
if not output_path:
|
||||
raise ValueError("Failed to generate cache folder path")
|
||||
|
||||
self._run_musicnn_models(OPTIONAL_MODELS, musicnn_binary_path, file, output_path)
|
||||
|
||||
def _generate_cache_folder(self, metadata: dict, file_path: str) -> str:
|
||||
cache_base = config.setting["acousticbrainz_ng_cache_path"]
|
||||
if not cache_base:
|
||||
raise ValueError("Cache path not configured")
|
||||
|
||||
release_artist_mbid = metadata.get('musicbrainz_albumartistid', 'NO_MBID')
|
||||
release_group_mbid = metadata.get('musicbrainz_releasegroupid', 'NO_MBID')
|
||||
release_mbid = metadata.get('musicbrainz_albumid', 'NO_MBID')
|
||||
recording_mbid = metadata.get('musicbrainz_recordingid')
|
||||
|
||||
if not recording_mbid:
|
||||
recording_mbid = self._get_audio_hash(file_path)
|
||||
|
||||
cache_folder = os.path.join(
|
||||
str(cache_base),
|
||||
str(release_artist_mbid),
|
||||
str(release_group_mbid),
|
||||
str(release_mbid),
|
||||
str(recording_mbid)
|
||||
)
|
||||
|
||||
os.makedirs(cache_folder, exist_ok=True)
|
||||
|
||||
return cache_folder
|
||||
|
||||
def _get_audio_hash(self, file_path: str) -> str:
|
||||
try:
|
||||
binaries_path = config.setting["acousticbrainz_ng_binaries_path"]
|
||||
if not binaries_path:
|
||||
raise ValueError("Binaries path not configured")
|
||||
|
||||
binary_path = self._get_binary_path("streaming_md5", binaries_path)
|
||||
|
||||
result = subprocess.run(
|
||||
[binary_path, file_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=ENV
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.startswith('MD5:'):
|
||||
return line.split('MD5:')[1].strip()
|
||||
|
||||
log.error(f"Failed to calculate audio hash: {result.stderr}")
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error calculating audio hash: {e}")
|
||||
|
||||
return f"fallback_{hashlib.md5(file_path.encode('utf-8')).hexdigest()}"
|
||||
|
||||
def _check_binaries(self) -> bool:
|
||||
path = config.setting["acousticbrainz_ng_binaries_path"]
|
||||
|
||||
if not path or not os.path.exists(path):
|
||||
return False
|
||||
|
||||
for binary in REQUIRED_BINARIES:
|
||||
binary_path = self._get_binary_path(binary, path)
|
||||
if not os.path.exists(binary_path):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_models(self, models: list[tuple[str, str]]) -> bool:
|
||||
path = config.setting["acousticbrainz_ng_models_path"]
|
||||
|
||||
if not path or not os.path.exists(path):
|
||||
return False
|
||||
|
||||
for model in models:
|
||||
model_path = os.path.join(path, f"{model[0]}.pb")
|
||||
if not os.path.exists(model_path):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_required_models(self) -> bool:
|
||||
return self._check_models(REQUIRED_MODELS)
|
||||
|
||||
def _check_optional_models(self) -> bool:
|
||||
return self._check_models(OPTIONAL_MODELS)
|
||||
|
||||
acousticbrainz_ng = AcousticBrainzNG()
|
||||
|
||||
class AcousticBrainzNGTrackAction(BaseAction):
|
||||
NAME = f"Analyze with {PLUGIN_NAME}"
|
||||
|
||||
def callback(self, objs):
|
||||
tracks = list(filter(lambda o: isinstance(o, Track), objs))
|
||||
|
||||
for track in tracks:
|
||||
acousticbrainz_ng.analyze_required(track.metadata, track.files[0].filename)
|
||||
|
||||
if config.setting["acousticbrainz_ng_analyze_optional"]:
|
||||
acousticbrainz_ng.analyze_optional(track.metadata, track.files[0].filename)
|
||||
|
||||
register_options_page(AcousticBrainzNGOptionsPage)
|
||||
register_track_action(AcousticBrainzNGTrackAction())
|
||||
BIN
bin/streaming_extractor_music
Executable file
BIN
bin/streaming_extractor_music
Executable file
Binary file not shown.
BIN
bin/streaming_md5
Executable file
BIN
bin/streaming_md5
Executable file
Binary file not shown.
BIN
bin/streaming_musicnn_predict
Executable file
BIN
bin/streaming_musicnn_predict
Executable file
Binary file not shown.
87
models/danceability-musicnn-msd-2.json
Normal file
87
models/danceability-musicnn-msd-2.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"name": "danceability",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/danceability/danceability-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by danceability",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"danceable",
|
||||
"not_danceable"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"size": "306 full tracks, 124/182 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.93
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/danceability-musicnn-msd-2.pb
Normal file
BIN
models/danceability-musicnn-msd-2.pb
Normal file
Binary file not shown.
67
models/gender-musicnn-msd-2.json
Normal file
67
models/gender-musicnn-msd-2.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"name": "gender",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/gender/gender-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of vocal music by gender (male/female)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": ["female", "male"],
|
||||
"model_types": ["frozen_model"],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"size": "3311 full tracks, 1508/1803 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.88
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [187, 96]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [1, 2],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 2],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 100],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 200],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/gender-musicnn-msd-2.pb
Normal file
BIN
models/gender-musicnn-msd-2.pb
Normal file
Binary file not shown.
67
models/genre_electronic-musicnn-msd-2.json
Normal file
67
models/genre_electronic-musicnn-msd-2.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"name": "genre electronic",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/genre_electronic/genre_electronic-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of electronic music by subgenres",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": ["ambient", "drum and bass", "house", "techno", "trance"],
|
||||
"model_types": ["frozen_model"],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"size": "250 track excerpts, 50 per genre",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.95
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [187, 96]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [1, 5],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 5],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 100],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 200],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/genre_electronic-musicnn-msd-2.pb
Normal file
BIN
models/genre_electronic-musicnn-msd-2.pb
Normal file
Binary file not shown.
77
models/genre_rosamerica-musicnn-msd-2.json
Normal file
77
models/genre_rosamerica-musicnn-msd-2.json
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"name": "genre Rosamerica",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/genre_rosamerica/genre_rosamerica-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by genre",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"classical",
|
||||
"dance",
|
||||
"hiphop",
|
||||
"jazz",
|
||||
"pop",
|
||||
"rhythm and blues",
|
||||
"rock",
|
||||
"speech"
|
||||
],
|
||||
"model_types": ["frozen_model"],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection created by a musicologist",
|
||||
"citation": "@phdthesis{i2009audio,\n title={Audio content processing for automatic music genre classification: descriptors, databases, and classifiers},\n author={i Termens, Enric Guaus},\n year={2009},\n school={PhD thesis, Universitat Pompeu Fabra, Barcelona, Spain}\n}",
|
||||
"size": "400 tracks, 50 per genre",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.92
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [187, 96]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [1, 8],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 8],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 100],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 200],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/genre_rosamerica-musicnn-msd-2.pb
Normal file
BIN
models/genre_rosamerica-musicnn-msd-2.pb
Normal file
Binary file not shown.
79
models/genre_tzanetakis-musicnn-msd-2.json
Normal file
79
models/genre_tzanetakis-musicnn-msd-2.json
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"name": "genre GTZAN",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by genre",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"blues",
|
||||
"classical",
|
||||
"country",
|
||||
"disco",
|
||||
"hiphop",
|
||||
"jazz",
|
||||
"metal",
|
||||
"pop",
|
||||
"reggae",
|
||||
"rock"
|
||||
],
|
||||
"model_types": ["frozen_model"],
|
||||
"dataset": {
|
||||
"name": "the GTZAN Genre Collection",
|
||||
"citation": "@article{tzanetakis2002musical,\n title={Musical genre classification of audio signals},\n author={Tzanetakis, George and Cook, Perry},\n journal={IEEE Transactions on speech and audio processing},\n volume={10},\n number={5},\n pages={293--302},\n year={2002},\n publisher={IEEE}\n}",
|
||||
"size": "1000 track excerpts, 100 per genre",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.83
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [187, 96]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [1, 10],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 10],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 100],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [1, 200],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/genre_tzanetakis-musicnn-msd-2.pb
Normal file
BIN
models/genre_tzanetakis-musicnn-msd-2.pb
Normal file
Binary file not shown.
88
models/mood_acoustic-musicnn-mtt-2.json
Normal file
88
models/mood_acoustic-musicnn-mtt-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood acoustic",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_acoustic/mood_acoustic-musicnn-mtt-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by type of sound (acoustic/non-acoustic)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"acoustic",
|
||||
"non_acoustic"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "321 full tracks + excerpts, 193/128 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.93
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_acoustic-musicnn-mtt-2.pb
Normal file
BIN
models/mood_acoustic-musicnn-mtt-2.pb
Normal file
Binary file not shown.
88
models/mood_aggressive-musicnn-mtt-2.json
Normal file
88
models/mood_aggressive-musicnn-mtt-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood aggressive",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_aggressive/mood_aggressive-musicnn-mtt-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by mood (aggressive/non-aggressive)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"aggressive",
|
||||
"not_aggressive"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection ",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "280 full tracks + excerpts, 133/147 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.96
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_aggressive-musicnn-mtt-2.pb
Normal file
BIN
models/mood_aggressive-musicnn-mtt-2.pb
Normal file
Binary file not shown.
88
models/mood_electronic-musicnn-msd-2.json
Normal file
88
models/mood_electronic-musicnn-msd-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood electronic",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_electronic/mood_electronic-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by type of sound (electronic/non-electronic)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"electronic",
|
||||
"non_electronic"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "332 full tracks + excerpts, 164/168 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.95
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_electronic-musicnn-msd-2.pb
Normal file
BIN
models/mood_electronic-musicnn-msd-2.pb
Normal file
Binary file not shown.
88
models/mood_happy-musicnn-msd-2.json
Normal file
88
models/mood_happy-musicnn-msd-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood happy",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_happy/mood_happy-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by mood (happy/non-happy)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"happy",
|
||||
"non_happy"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "302 full tracks + excerpts, 139/163 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.81
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_happy-musicnn-msd-2.pb
Normal file
BIN
models/mood_happy-musicnn-msd-2.pb
Normal file
Binary file not shown.
88
models/mood_party-musicnn-mtt-2.json
Normal file
88
models/mood_party-musicnn-mtt-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood party",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_party/mood_party-musicnn-mtt-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by mood (party/non-party)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"non_party",
|
||||
"party"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "349 full tracks + excerpts, 198/151 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.92
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_party-musicnn-mtt-2.pb
Normal file
BIN
models/mood_party-musicnn-mtt-2.pb
Normal file
Binary file not shown.
88
models/mood_relaxed-musicnn-msd-2.json
Normal file
88
models/mood_relaxed-musicnn-msd-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood relaxed",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_relaxed/mood_relaxed-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by mood (relaxed/non-relaxed)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"non_relaxed",
|
||||
"relaxed"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "446 full tracks + excerpts, 145/301 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.9
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_relaxed-musicnn-msd-2.pb
Normal file
BIN
models/mood_relaxed-musicnn-msd-2.pb
Normal file
Binary file not shown.
88
models/mood_sad-musicnn-msd-2.json
Normal file
88
models/mood_sad-musicnn-msd-2.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"name": "mood sad",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/mood_sad/mood_sad-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by mood (sad/non-sad)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"non_sad",
|
||||
"sad"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"citation": "@inproceedings{laurier2009music,\n title={Music mood annotator design and integration},\n author={Laurier, Cyril and Meyers, Owen and Serra, Joan and Blech, Martin and Herrera, Perfecto},\n booktitle={2009 Seventh International Workshop on Content-Based Multimedia Indexing},\n pages={156--161},\n year={2009},\n organization={IEEE}\n}",
|
||||
"size": "230 full tracks + excerpts, 96/134 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.86
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/mood_sad-musicnn-msd-2.pb
Normal file
BIN
models/mood_sad-musicnn-msd-2.pb
Normal file
Binary file not shown.
126
models/msd-musicnn-1.json
Normal file
126
models/msd-musicnn-1.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"name": "MSD MusiCNN",
|
||||
"type": "auto-tagging",
|
||||
"link": "https://essentia.upf.edu/models/feature-extractors/musicnn/msd-musicnn-1.pb",
|
||||
"version": "1",
|
||||
"description": "prediction of the top-50 tags in the dataset",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-03-31",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"rock",
|
||||
"pop",
|
||||
"alternative",
|
||||
"indie",
|
||||
"electronic",
|
||||
"female vocalists",
|
||||
"dance",
|
||||
"00s",
|
||||
"alternative rock",
|
||||
"jazz",
|
||||
"beautiful",
|
||||
"metal",
|
||||
"chillout",
|
||||
"male vocalists",
|
||||
"classic rock",
|
||||
"soul",
|
||||
"indie rock",
|
||||
"Mellow",
|
||||
"electronica",
|
||||
"80s",
|
||||
"folk",
|
||||
"90s",
|
||||
"chill",
|
||||
"instrumental",
|
||||
"punk",
|
||||
"oldies",
|
||||
"blues",
|
||||
"hard rock",
|
||||
"ambient",
|
||||
"acoustic",
|
||||
"experimental",
|
||||
"female vocalist",
|
||||
"guitar",
|
||||
"Hip-Hop",
|
||||
"70s",
|
||||
"party",
|
||||
"country",
|
||||
"easy listening",
|
||||
"sexy",
|
||||
"catchy",
|
||||
"funk",
|
||||
"electro",
|
||||
"heavy metal",
|
||||
"Progressive rock",
|
||||
"60s",
|
||||
"rnb",
|
||||
"indie pop",
|
||||
"sad",
|
||||
"House",
|
||||
"happy"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "The Millon Song Dataset",
|
||||
"citation": "http://millionsongdataset.com/",
|
||||
"size": "200k up to two minutes audio previews",
|
||||
"metrics": {
|
||||
"ROC-AUC": 0.88,
|
||||
"PR-AUC": 0.29
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
50
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
50
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/msd-musicnn-1.pb
Normal file
BIN
models/msd-musicnn-1.pb
Normal file
Binary file not shown.
87
models/tonal_atonal-musicnn-mtt-2.json
Normal file
87
models/tonal_atonal-musicnn-mtt-2.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"name": "tonal/atonal",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/tonal_atonal/tonal_atonal-musicnn-mtt-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification of music by tonality (tonal/atonal)",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"atonal",
|
||||
"tonal"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"size": "345 track excerpts, 145/200 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.91
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/tonal_atonal-musicnn-mtt-2.pb
Normal file
BIN
models/tonal_atonal-musicnn-mtt-2.pb
Normal file
Binary file not shown.
87
models/voice_instrumental-musicnn-msd-2.json
Normal file
87
models/voice_instrumental-musicnn-msd-2.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"name": "voice/instrumental classifiers",
|
||||
"type": "multi-class classifier",
|
||||
"link": "https://essentia.upf.edu/models/classifiers/voice_instrumental/voice_instrumental-musicnn-msd-2.pb",
|
||||
"version": "1",
|
||||
"description": "classification into music with voice/instrumental",
|
||||
"author": "Pablo Alonso",
|
||||
"email": "pablo.alonso@upf.edu",
|
||||
"release_date": "2020-07-07",
|
||||
"framework": "tensorflow",
|
||||
"framework_version": "1.15.0",
|
||||
"classes": [
|
||||
"instrumental",
|
||||
"voice"
|
||||
],
|
||||
"model_types": [
|
||||
"frozen_model"
|
||||
],
|
||||
"dataset": {
|
||||
"name": "In-house MTG collection",
|
||||
"size": "1000 track excerpts, 500 per class",
|
||||
"metrics": {
|
||||
"5-fold_cross_validation_normalized_accuracy": 0.98
|
||||
}
|
||||
},
|
||||
"schema": {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model/Placeholder",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
187,
|
||||
96
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model/Sigmoid",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "Sigmoid",
|
||||
"output_purpose": "predictions"
|
||||
},
|
||||
{
|
||||
"name": "model/dense_2/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "logits",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense_1/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
100
|
||||
],
|
||||
"op": "fully connected",
|
||||
"description": "penultimate layer",
|
||||
"output_purpose": ""
|
||||
},
|
||||
{
|
||||
"name": "model/dense/BiasAdd",
|
||||
"type": "float",
|
||||
"shape": [
|
||||
1,
|
||||
200
|
||||
],
|
||||
"op": "fully connected",
|
||||
"output_purpose": "embeddings"
|
||||
}
|
||||
]
|
||||
},
|
||||
"citation": "@inproceedings{alonso2020tensorflow,\n title={Tensorflow Audio Models in Essentia},\n author={Alonso-Jim{\\'e}nez, Pablo and Bogdanov, Dmitry and Pons, Jordi and Serra, Xavier},\n booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n year={2020}\n}",
|
||||
"inference": {
|
||||
"sample_rate": 16000,
|
||||
"algorithm": "TensorflowPredictMusiCNN"
|
||||
}
|
||||
}
|
||||
BIN
models/voice_instrumental-musicnn-msd-2.pb
Normal file
BIN
models/voice_instrumental-musicnn-msd-2.pb
Normal file
Binary file not shown.
Reference in New Issue
Block a user