From df921014582391a6f23def49f53df0c7aaf5e55b Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 24 Dec 2023 12:39:53 +0100 Subject: [PATCH 01/39] Artist search feature implemented --- lb_content_resolver/artist_search.py | 114 +++++++++++++++++++++++++++ lb_content_resolver/lb_radio.py | 2 + resolve.py | 9 +++ 3 files changed, 125 insertions(+) create mode 100755 lb_content_resolver/artist_search.py diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py new file mode 100755 index 0000000..dc7be25 --- /dev/null +++ b/lb_content_resolver/artist_search.py @@ -0,0 +1,114 @@ +import os +from collections import defaultdict +import datetime +import sys + +import peewee +import requests + +from lb_content_resolver.model.database import db +from lb_content_resolver.model.recording import Recording, RecordingMetadata +#from troi.recording_search_service import RecordingSearchByArtistService +from troi.splitter import plist + + +class LocalRecordingSearchByArtistService: #(RecordingSearchByArtistService): + ''' + Given the local database, search for artists that meet given tag criteria + ''' + + def __init__(self, db): +# RecordingSearchByTagService.__init__(self) + self.db = db + + def search(self, artist_mbids, begin_percent, end_percent, num_recordings): + """ + Perform an artist search. Parameters: + + tags - a list of artist_mbids for which to search recordings + begin_percent - if many recordings match the above parameters, return only + recordings that have a minimum popularity percent score + of begin_percent. + end_percent - if many recordings match the above parameters, return only + recordings that have a maximum popularity percent score + of end_percent. + num_recordings - ideally return these many recordings + + If only few recordings match, the begin_percent and end_percent are + ignored. + """ + + print(artist_mbids) + + query = """SELECT popularity + , recording_mbid + , artist_mbid + , subsonic_id + FROM recording + JOIN recording_metadata + ON recording.id = recording_metadata.recording_id + JOIN recording_subsonic + ON recording.id = recording_subsonic.recording_id + WHERE artist_mbid in (%s) + ORDER BY artist_mbid + , popularity""" + + self.db.open_db() + placeholders = ",".join(("?", ) * len(artist_mbids)) + cursor = db.execute_sql(query % placeholders, params=tuple(artist_mbids)) + + artists = defaultdict(list) + for rec in cursor.fetchall(): + artists[rec[2]].append({"popularity": rec[0], "recording_mbid": rec[1], "artist_mbid": rec[2], "subsonic_id": rec[3]}) + + for artist in artists: + artists[artist] = self.fetch_and_select_on_popularity(artists[artist], begin_percent, end_percent, num_recordings) + + return artists + + + # TODO: use this in both tag and artist search classes + def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent, num_recordings): + """ + Break the data into over, matching and under (percent) groups + """ + + matching_recordings = [] + over_recordings = [] + under_recordings = [] + for rec in recordings: + if rec["popularity"] >= begin_percent: + if rec["popularity"] < end_percent: + matching_recordings.append(rec) + else: + over_recordings.append(rec) + else: + under_recordings.append(rec) + + # If we have enough recordings, we're done! + if len(matching_recordings) >= num_recordings: + return plist(matching_recordings) + + # We don't have enough recordings, see if we can pick the ones outside + # of our desired range in a best effort to make a playlist. + # Keep adding the best matches until we (hopefully) get our desired number of recordings + while len(matching_recordings) < num_recordings: + if under_recordings: + under_diff = begin_percent - under_recordings[-1]["popularity"] + else: + under_diff = 1.0 + + if over_recordings: + over_diff = over_recordings[-1]["popularity"] - end_percent + else: + over_diff = 1.0 + + if over_diff == 1.0 and under_diff == 1.0: + break + + if under_diff < over_diff: + matching_recordings.insert(0, under_recordings.pop(-1)) + else: + matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) + + return plist(matching_recordings) diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 23a9bf4..56840c6 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -6,6 +6,7 @@ from troi.splitter import plist from lb_content_resolver.tag_search import LocalRecordingSearchByTagService +from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.model.database import db import config @@ -51,6 +52,7 @@ def generate(self, mode, prompt): patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1}) patch.register_service(LocalRecordingSearchByTagService(self.db)) + patch.register_service(LocalRecordingSearchByArtistService(self.db)) # Now generate the playlist try: diff --git a/resolve.py b/resolve.py index acdd065..bfc902d 100755 --- a/resolve.py +++ b/resolve.py @@ -11,6 +11,7 @@ from lb_content_resolver.lb_radio import ListenBrainzRadioLocal from lb_content_resolver.utils import ask_yes_no_question from lb_content_resolver.top_tags import TopTags +from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService import config @@ -90,6 +91,13 @@ def top_tags(index_dir, count): tt.print_top_tags_tightly(count) +@click.command() +@click.argument('index_dir') +def artist_test(index_dir): + db = Database(index_dir) + s = LocalRecordingSearchByArtistService(db) + s.search(["8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11", "067102ea-9519-4622-9077-57ca4164cfbb"], .9, .6, 20) + cli.add_command(create) cli.add_command(scan) cli.add_command(playlist) @@ -98,6 +106,7 @@ def top_tags(index_dir, count): cli.add_command(subsonic) cli.add_command(lb_radio) cli.add_command(top_tags) +cli.add_command(artist_test) def usage(command): From e48b4e4dffc52f0b13a2e5b944a409264d277661 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 24 Dec 2023 23:05:15 +0100 Subject: [PATCH 02/39] Artist lb radio works! --- lb_content_resolver/artist_search.py | 60 +++++++++++++++------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py index dc7be25..a28f4d0 100755 --- a/lb_content_resolver/artist_search.py +++ b/lb_content_resolver/artist_search.py @@ -8,17 +8,18 @@ from lb_content_resolver.model.database import db from lb_content_resolver.model.recording import Recording, RecordingMetadata -#from troi.recording_search_service import RecordingSearchByArtistService +from troi.recording_search_service import RecordingSearchByArtistService from troi.splitter import plist +from troi import Recording as TroiRecording -class LocalRecordingSearchByArtistService: #(RecordingSearchByArtistService): +class LocalRecordingSearchByArtistService(RecordingSearchByArtistService): ''' Given the local database, search for artists that meet given tag criteria ''' def __init__(self, db): -# RecordingSearchByTagService.__init__(self) + RecordingSearchByArtistService.__init__(self) self.db = db def search(self, artist_mbids, begin_percent, end_percent, num_recordings): @@ -38,8 +39,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings): ignored. """ - print(artist_mbids) - query = """SELECT popularity , recording_mbid , artist_mbid @@ -85,30 +84,37 @@ def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent, else: under_recordings.append(rec) - # If we have enough recordings, we're done! - if len(matching_recordings) >= num_recordings: - return plist(matching_recordings) + # If we have enough recordings, skip the extending part + if len(matching_recordings) < num_recordings: + # We don't have enough recordings, see if we can pick the ones outside + # of our desired range in a best effort to make a playlist. + # Keep adding the best matches until we (hopefully) get our desired number of recordings + while len(matching_recordings) < num_recordings: + if under_recordings: + under_diff = begin_percent - under_recordings[-1]["popularity"] + else: + under_diff = 1.0 - # We don't have enough recordings, see if we can pick the ones outside - # of our desired range in a best effort to make a playlist. - # Keep adding the best matches until we (hopefully) get our desired number of recordings - while len(matching_recordings) < num_recordings: - if under_recordings: - under_diff = begin_percent - under_recordings[-1]["popularity"] - else: - under_diff = 1.0 + if over_recordings: + over_diff = over_recordings[-1]["popularity"] - end_percent + else: + over_diff = 1.0 - if over_recordings: - over_diff = over_recordings[-1]["popularity"] - end_percent - else: - over_diff = 1.0 + if over_diff == 1.0 and under_diff == 1.0: + break + + if under_diff < over_diff: + matching_recordings.insert(0, under_recordings.pop(-1)) + else: + matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) - if over_diff == 1.0 and under_diff == 1.0: - break + # Convert results into recordings + results = plist() + for rec in matching_recordings: + r = TroiRecording(mbid=rec["recording_mbid"]) + if "subsonic_id" in rec: + r.musicbrainz={"subsonic_id": rec["subsonic_id"]} - if under_diff < over_diff: - matching_recordings.insert(0, under_recordings.pop(-1)) - else: - matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) + results.append(r) - return plist(matching_recordings) + return results From ce81ee576f7a546ab1777133a31cf920eeb063a1 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 25 Dec 2023 12:47:05 +0100 Subject: [PATCH 03/39] Refactor class to select recordings so it can be used by more than one class --- lb_content_resolver/artist_search.py | 58 +------------------------ lb_content_resolver/tag_search.py | 50 +++------------------- lb_content_resolver/utils.py | 64 ++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 101 deletions(-) diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py index a28f4d0..113a09e 100755 --- a/lb_content_resolver/artist_search.py +++ b/lb_content_resolver/artist_search.py @@ -8,9 +8,9 @@ from lb_content_resolver.model.database import db from lb_content_resolver.model.recording import Recording, RecordingMetadata +from lb_content_resolver.utils import select_recordings_on_popularity from troi.recording_search_service import RecordingSearchByArtistService from troi.splitter import plist -from troi import Recording as TroiRecording class LocalRecordingSearchByArtistService(RecordingSearchByArtistService): @@ -61,60 +61,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings): artists[rec[2]].append({"popularity": rec[0], "recording_mbid": rec[1], "artist_mbid": rec[2], "subsonic_id": rec[3]}) for artist in artists: - artists[artist] = self.fetch_and_select_on_popularity(artists[artist], begin_percent, end_percent, num_recordings) + artists[artist] = select_recordings_on_popularity(artists[artist], begin_percent, end_percent, num_recordings) return artists - - - # TODO: use this in both tag and artist search classes - def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent, num_recordings): - """ - Break the data into over, matching and under (percent) groups - """ - - matching_recordings = [] - over_recordings = [] - under_recordings = [] - for rec in recordings: - if rec["popularity"] >= begin_percent: - if rec["popularity"] < end_percent: - matching_recordings.append(rec) - else: - over_recordings.append(rec) - else: - under_recordings.append(rec) - - # If we have enough recordings, skip the extending part - if len(matching_recordings) < num_recordings: - # We don't have enough recordings, see if we can pick the ones outside - # of our desired range in a best effort to make a playlist. - # Keep adding the best matches until we (hopefully) get our desired number of recordings - while len(matching_recordings) < num_recordings: - if under_recordings: - under_diff = begin_percent - under_recordings[-1]["popularity"] - else: - under_diff = 1.0 - - if over_recordings: - over_diff = over_recordings[-1]["popularity"] - end_percent - else: - over_diff = 1.0 - - if over_diff == 1.0 and under_diff == 1.0: - break - - if under_diff < over_diff: - matching_recordings.insert(0, under_recordings.pop(-1)) - else: - matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) - - # Convert results into recordings - results = plist() - for rec in matching_recordings: - r = TroiRecording(mbid=rec["recording_mbid"]) - if "subsonic_id" in rec: - r.musicbrainz={"subsonic_id": rec["subsonic_id"]} - - results.append(r) - - return results diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py index 06d600a..4aa6c28 100755 --- a/lb_content_resolver/tag_search.py +++ b/lb_content_resolver/tag_search.py @@ -8,6 +8,7 @@ from lb_content_resolver.model.database import db from lb_content_resolver.model.recording import Recording, RecordingMetadata +from lb_content_resolver.utils import select_recordings_on_popularity from troi.recording_search_service import RecordingSearchByTagService from troi.splitter import plist @@ -51,52 +52,11 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings): placeholders = ",".join(("?", ) * len(tags)) cursor = db.execute_sql(query % (placeholders, pop_clause), params) - # Break the data into over, matching and under (percent) groups - matching_recordings = [] - over_recordings = [] - under_recordings = [] + recordings = [] for rec in cursor.fetchall(): - recording = { - "recording_mbid": rec[0], - "percent": rec[1], - "subsonic_id": rec[2] - } - - if rec[1] >= begin_percent: - if rec[1] < end_percent: - matching_recordings.append(recording) - else: - over_recordings.append(recording) - else: - under_recordings.append(recording) - - # If we have enough recordings, we're done! - if len(matching_recordings) >= num_recordings: - return plist(matching_recordings) - - # We don't have enough recordings, see if we can pick the ones outside - # of our desired range in a best effort to make a playlist. - # Keep adding the best matches until we (hopefully) get our desired number of recordings - while len(matching_recordings) < num_recordings: - if under_recordings: - under_diff = begin_percent - under_recordings[-1]["percent"] - else: - under_diff = 1.0 - - if over_recordings: - over_diff = over_recordings[-1]["percent"] - end_percent - else: - over_diff = 1.0 - - if over_diff == 1.0 and under_diff == 1.0: - break - - if under_diff < over_diff: - matching_recordings.insert(0, under_recordings.pop(-1)) - else: - matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) - - return plist(matching_recordings) + recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2]}) + + return select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings) def or_search(self, tags, min_popularity=None, max_popularity=None): """ diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py index 5c5bdd3..3a792f9 100755 --- a/lb_content_resolver/utils.py +++ b/lb_content_resolver/utils.py @@ -1,3 +1,7 @@ +from troi.splitter import plist +from troi import Recording as TroiRecording + + def ask_yes_no_question(prompt): while True: @@ -12,3 +16,63 @@ def ask_yes_no_question(prompt): return False else: print("eh? try again.") + + +def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings): + """ + Given dicts of recording data, select up to num_recordings recordings randomly + from the recordings that ideally lie in popularity between begin_percent and end_percent. + + If too little data is found in the percent range, select recordings that are the closest + to the disired range. + """ + + matching_recordings = [] + over_recordings = [] + under_recordings = [] + for rec in recordings: + if rec["popularity"] >= begin_percent: + if rec["popularity"] < end_percent: + matching_recordings.append(rec) + else: + over_recordings.append(rec) + else: + under_recordings.append(rec) + + # If we have enough recordings, skip the extending part + if len(matching_recordings) < num_recordings: + # We don't have enough recordings, see if we can pick the ones outside + # of our desired range in a best effort to make a playlist. + # Keep adding the best matches until we (hopefully) get our desired number of recordings + while len(matching_recordings) < num_recordings: + if under_recordings: + under_diff = begin_percent - under_recordings[-1]["popularity"] + else: + under_diff = None + + if over_recordings: + over_diff = over_recordings[-1]["popularity"] - end_percent + else: + over_diff = None + + if over_diff == None and under_diff == None: + break + + if over_diff is not None and under_diff is not None and under_diff < over_diff: + matching_recordings.insert(0, under_recordings.pop(-1)) + else: + if under_diff is not None: + matching_recordings.insert(len(matching_recordings), under_recordings.pop(-1)) + else: + matching_recordings.insert(len(matching_recordings), over_recordings.pop(0)) + + # Convert results into recordings + results = plist() + for rec in matching_recordings: + r = TroiRecording(mbid=rec["recording_mbid"]) + if "subsonic_id" in rec: + r.musicbrainz = {"subsonic_id": rec["subsonic_id"]} + + results.append(r) + + return results From 0081aed9daf29f2c1d963152b6501e0721d8591a Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 18:33:23 +0100 Subject: [PATCH 04/39] non-local playlists now resolve to local files. --- lb_content_resolver/content_resolver.py | 35 ++++++++++++++----------- lb_content_resolver/lb_radio.py | 28 ++++++++++++++++++++ lb_content_resolver/playlist.py | 4 +-- resolve.py | 3 +++ 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 38c7312..208af3f 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -9,7 +9,7 @@ from lb_content_resolver.model.recording import Recording from lb_content_resolver.fuzzy_index import FuzzyIndex from lb_matching_tools.cleaner import MetadataCleaner -from lb_content_resolver.playlist import read_jspf_playlist, generate_m3u_playlist +from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist SUPPORTED_FORMATS = ["flac", "ogg", "mp3", "m4a", "wma"] @@ -84,20 +84,26 @@ def resolve_recordings(self, query_data, match_threshold): return resolved_recordings - def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold): + def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None): """ - Given a JSPF playlist, resolve tracks and write the m3u file. Print output to console for now. + Given a JSPF playlist or a list of troi recordings, resolve tracks and return a list of resolved recordings. + threshold is a value between 0 and 1.0 for the percentage score required before a track is matched. """ + + if recordings is None and jspf_playlist is None: + raise ValueError("Either recordings or jspf_playlist must be passed.") + self.db.open_db() self.build_index() - jspf = read_jspf_playlist(jspf_playlist) - - title = jspf["playlist"]["title"] - recordings = [] artist_recording_data = [] - for i, track in enumerate(jspf["playlist"]["track"]): - artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]}) + if jspf_playlist is not None: + jspf = read_jspf_playlist(jspf_playlist) + for i, track in enumerate(jspf["playlist"]["track"]): + artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]}) + else: + for rec in recordings: + artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name }) hits = self.resolve_recordings(artist_recording_data, match_threshold) hit_index = {hit["index"]: hit for hit in hits} @@ -106,7 +112,7 @@ def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold): recordings = Recording.select().where(Recording.id.in_(recording_ids)) rec_index = {r.id: r for r in recordings} - results = [] + results = [None] * len(artist_recording_data) for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: print("FAIL %s - %s not resolved." % (artist_recording["artist_name"], artist_recording["recording_name"])) @@ -114,16 +120,13 @@ def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold): hit = hit_index[i] rec = rec_index[hit["recording_id"]] - hit["file_path"] = rec.file_path - hit["artist_name"] = rec.artist_name - hit["recording_name"] = rec.recording_name - results.append(hit) + results[hit["index"]] = rec print("OK %s - %s resolved: %s" % (rec.artist_name, rec.recording_name, os.path.basename(rec.file_path))) if len(results) == 0: print("Sorry, but no tracks could be resolved, no playlist generated.") return - print(f'\n{len(recordings)} recordings resolved, {len(jspf["playlist"]["track"]) - len(recordings)} not resolved.') + print(f'\n{len(recordings)} recordings resolved, {len(artist_recording_data) - len(recordings)} not resolved.') - generate_m3u_playlist(m3u_playlist, title, recordings) + return results diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 56840c6..8965898 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -8,6 +8,7 @@ from lb_content_resolver.tag_search import LocalRecordingSearchByTagService from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.model.database import db +from lb_content_resolver.content_resolver import ContentResolver import config @@ -16,6 +17,9 @@ class ListenBrainzRadioLocal: Generate local playlists against a music collection available via subsonic. ''' + # TODO: Make this an argument + MATCH_THRESHOLD = .8 + def __init__(self, db): self.db = db @@ -65,4 +69,28 @@ def generate(self, mode, prompt): print("Your prompt generated an empty playlist.") self.sanity_check() + # Resolve any tracks that have not been resolved to a subsonic_id or a local file + self.resolve_recordings(playlist) + return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} + + def resolve_recordings(self, playlist): + + recordings = [] + for recording in playlist.playlists[0].recordings: + if "subsonic_id" in recording.musicbrainz or "filename" in recording.musicbrainz: + continue + + recordings.append(recording) + + cr = ContentResolver(self.db) + resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings) + + for i, t_recording in enumerate(playlist.playlists[0].recordings): + if resolved[i] is not None: + # TODO make this work for subsonic_ids +# if "subsonic_id" in resolved.musicbrainz: +# recording.musicbrainz["subsonic_id"] = resolved._id + + if resolved[i].file_path != "": + t_recording.musicbrainz["filename"] = resolved[i].file_path diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py index 3ddd0bf..8c88f17 100644 --- a/lb_content_resolver/playlist.py +++ b/lb_content_resolver/playlist.py @@ -12,9 +12,9 @@ def read_jspf_playlist(jspf_file): return json.loads(js) -def generate_m3u_playlist(file_name, playlist_title, recordings): +def write_m3u_playlist(file_name, playlist_title, hits): """ - Given a list of Recording objects, write a m3u playlist. + Given a list of Recordings, write a m3u playlist. """ with open(file_name, "w") as m3u: diff --git a/resolve.py b/resolve.py index bfc902d..7d12426 100755 --- a/resolve.py +++ b/resolve.py @@ -12,6 +12,7 @@ from lb_content_resolver.utils import ask_yes_no_question from lb_content_resolver.top_tags import TopTags from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService +from lb_content_resolver.playlist import write_m3u_playlist import config @@ -65,7 +66,9 @@ def subsonic(index_dir): def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): db = Database(index_dir) cr = ContentResolver(db) + title, recordings = cr.resolve_playlist(jspf_playlist, threshold) cr.resolve_playlist(jspf_playlist, m3u_playlist, threshold) + write_m3u_playlist(write_m3u_playlist, title, recordings) @click.command() @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) From 7922977a474e24c343d8c17daa06a5092978e44c Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 21:23:03 +0100 Subject: [PATCH 05/39] Fix location issue and store full pathname in the DB --- lb_content_resolver/content_resolver.py | 34 ++++++++++++++-------- lb_content_resolver/database.py | 2 +- lb_content_resolver/lb_radio.py | 13 +++++---- lb_content_resolver/model/subsonic.py | 2 +- lb_content_resolver/playlist.py | 21 ++++++++++++-- lb_content_resolver/utils.py | 16 +++++++++++ requirements.txt | 1 + resolve.py | 38 ++++++++++++++++--------- 8 files changed, 91 insertions(+), 36 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 208af3f..ae4e336 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -7,13 +7,14 @@ from lb_content_resolver.model.database import db, setup_db from lb_content_resolver.model.recording import Recording +from lb_content_resolver.model.subsonic import RecordingSubsonic from lb_content_resolver.fuzzy_index import FuzzyIndex from lb_matching_tools.cleaner import MetadataCleaner -from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist +from lb_content_resolver.playlist import read_jspf_playlist +from lb_content_resolver.utils import bcolors SUPPORTED_FORMATS = ["flac", "ogg", "mp3", "m4a", "wma"] - class ContentResolver: ''' Scan a given path and enter/update the metadata in the search index @@ -70,12 +71,10 @@ def resolve_recordings(self, query_data, match_threshold): for data in next_query_data: recording_name = mc.clean_recording(data["recording_name"]) if recording_name != data["recording_name"]: - print(f'RETRY recording {data["recording_name"]} => {recording_name}') query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]}) artist_name = mc.clean_artist(data["artist_name"]) if artist_name != data["artist_name"]: - print(f'RETRY artist {data["artist_name"]} => {artist_name}') query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]}) # If nothing got cleaned, we can finish now @@ -93,35 +92,46 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) if recordings is None and jspf_playlist is None: raise ValueError("Either recordings or jspf_playlist must be passed.") + print("\nResolve recordings to local files or subsonic ids") + self.db.open_db() self.build_index() artist_recording_data = [] if jspf_playlist is not None: - jspf = read_jspf_playlist(jspf_playlist) - for i, track in enumerate(jspf["playlist"]["track"]): + for i, track in enumerate(jspf_playlist["playlist"]["track"]): artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]}) else: for rec in recordings: - artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name }) + artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name}) hits = self.resolve_recordings(artist_recording_data, match_threshold) hit_index = {hit["index"]: hit for hit in hits} recording_ids = [r["recording_id"] for r in hits] - recordings = Recording.select().where(Recording.id.in_(recording_ids)) - rec_index = {r.id: r for r in recordings} - + recordings = Recording \ + .select(Recording, RecordingSubsonic.subsonic_id) \ + .join(RecordingSubsonic, peewee.JOIN.LEFT_OUTER, on=(Recording.id == RecordingSubsonic.recording_id)) \ + .where(Recording.id.in_(recording_ids)) \ + .dicts() + rec_index = {r["id"]: r for r in recordings} + + print(" %-40s %-40s %-40s" % ("ARTIST", "RECORDING", "RELEASE")) results = [None] * len(artist_recording_data) for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: - print("FAIL %s - %s not resolved." % (artist_recording["artist_name"], artist_recording["recording_name"])) + print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["artist_name"][:39], + artist_recording["recording_name"][:39])) continue hit = hit_index[i] rec = rec_index[hit["recording_id"]] results[hit["index"]] = rec - print("OK %s - %s resolved: %s" % (rec.artist_name, rec.recording_name, os.path.basename(rec.file_path))) + print(bcolors.OKGREEN + "OK" + bcolors.ENDC + " %-40s %-40s" % (artist_recording["artist_name"][:39], + artist_recording["recording_name"][:39])) + print(" %-40s %-40s %-40s" % (rec["artist_name"][:39], + rec["recording_name"][:39], + rec["release_name"][:39])) if len(results) == 0: print("Sorry, but no tracks could be resolved, no playlist generated.") diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 8016787..af41f71 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -207,7 +207,7 @@ def read_metadata_and_add(self, relative_path, format, mtime, update): # really isn't for you anyway. heh. if mdata is not None: mdata["mtime"] = mtime - mdata["file_path"] = relative_path + mdata["file_path"] = file_path mdata["artist_mbid"] = self.convert_to_uuid(mdata["artist_mbid"]) mdata["release_mbid"] = self.convert_to_uuid(mdata["release_mbid"]) diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 8965898..130c8c3 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -63,7 +63,7 @@ def generate(self, mode, prompt): playlist = patch.generate_playlist() except RuntimeError as err: print(f"LB Radio generation failed: {err}") - return + return None if playlist == None: print("Your prompt generated an empty playlist.") @@ -88,9 +88,10 @@ def resolve_recordings(self, playlist): for i, t_recording in enumerate(playlist.playlists[0].recordings): if resolved[i] is not None: - # TODO make this work for subsonic_ids -# if "subsonic_id" in resolved.musicbrainz: -# recording.musicbrainz["subsonic_id"] = resolved._id + if resolved[i]["subsonic_id"] != "": + t_recording.musicbrainz["subsonic_id"] = resolved[i]["subsonic_id"] - if resolved[i].file_path != "": - t_recording.musicbrainz["filename"] = resolved[i].file_path + if resolved[i]["file_path"] != "": + t_recording.musicbrainz["filename"] = resolved[i]["file_path"] + + t_recording.duration = resolved[i]["duration"] diff --git a/lb_content_resolver/model/subsonic.py b/lb_content_resolver/model/subsonic.py index 728592d..59601df 100644 --- a/lb_content_resolver/model/subsonic.py +++ b/lb_content_resolver/model/subsonic.py @@ -14,7 +14,7 @@ class Meta: table_name = "recording_subsonic" id = AutoField() - recording = ForeignKeyField(Recording, backref="metadata") + recording = ForeignKeyField(Recording, backref="subsonic") subsonic_id = TextField() last_updated = DateTimeField(null=False, default=datetime.datetime.now) diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py index 8c88f17..5b28634 100644 --- a/lb_content_resolver/playlist.py +++ b/lb_content_resolver/playlist.py @@ -12,9 +12,9 @@ def read_jspf_playlist(jspf_file): return json.loads(js) -def write_m3u_playlist(file_name, playlist_title, hits): +def write_m3u_playlist_from_results(file_name, playlist_title, hits): """ - Given a list of Recordings, write a m3u playlist. + Given a list of Recordings, write an m3u playlist. """ with open(file_name, "w") as m3u: @@ -24,3 +24,20 @@ def write_m3u_playlist(file_name, playlist_title, hits): for rec in recordings: m3u.write("#EXTINF %d,%s\n" % (rec.duration / 1000, rec.recording_name)) m3u.write(rec.file_path + "\n") + + +def write_m3u_playlist_from_jspf(file_name, jspf): + """ + Given a jspf playlist, write an m3u playlist. + """ + + with open(file_name, "w") as m3u: + m3u.write("#EXTM3U\n") + m3u.write("#EXTENC: UTF-8\n") + m3u.write("#PLAYLIST %s\n" % jspf["playlist"]["title"]) + for track in jspf["playlist"]["track"]: + if "location" not in track: + continue + + m3u.write("#EXTINF %d,%s\n" % (track["duration"] / 1000, track["title"])) + m3u.write(track["location"] + "\n") diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py index 3a792f9..30a09eb 100755 --- a/lb_content_resolver/utils.py +++ b/lb_content_resolver/utils.py @@ -76,3 +76,19 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_ results.append(r) return results + + +class bcolors: + """ Basic ASCII color codes """ + + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + diff --git a/requirements.txt b/requirements.txt index 81723cd..901d241 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ requests py-sonic@git+https://github.com/mayhem/py-sonic.git@int-vs-string tqdm troi@git+https://github.com/metabrainz/troi-recommendation-playground.git@lb-local +icecream diff --git a/resolve.py b/resolve.py index 7d12426..c527976 100755 --- a/resolve.py +++ b/resolve.py @@ -12,7 +12,7 @@ from lb_content_resolver.utils import ask_yes_no_question from lb_content_resolver.top_tags import TopTags from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService -from lb_content_resolver.playlist import write_m3u_playlist +from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf import config @@ -25,6 +25,7 @@ def cli(): @click.command() @click.argument('index_dir') def create(index_dir): + """Create a new index directory to track a music collection""" db = Database(index_dir) db.create() @@ -33,6 +34,7 @@ def create(index_dir): @click.argument('index_dir') @click.argument('music_dir') def scan(index_dir, music_dir): + """Scan a directory and its subdirectories for music files to add to the collection""" db = Database(index_dir) db.scan(music_dir) @@ -40,6 +42,7 @@ def scan(index_dir, music_dir): @click.command() @click.argument('index_dir') def cleanup(index_dir): + """Perform a database cleanup. Check that files exist and if they don't remove from the index""" db = Database(index_dir) db.database_cleanup() @@ -47,6 +50,7 @@ def cleanup(index_dir): @click.command() @click.argument('index_dir') def metadata(index_dir): + """Lookup metadata (popularity and tags) for recordings""" db = Database(index_dir) lookup = MetadataLookup(db) lookup.lookup() @@ -55,6 +59,7 @@ def metadata(index_dir): @click.command() @click.argument('index_dir') def subsonic(index_dir): + """Scan a remote subsonic music collection""" db = SubsonicDatabase(index_dir) db.sync() @@ -64,43 +69,49 @@ def subsonic(index_dir): @click.argument('m3u_playlist') @click.option('-t', '--threshold', default=.80) def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): + """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection""" db = Database(index_dir) cr = ContentResolver(db) - title, recordings = cr.resolve_playlist(jspf_playlist, threshold) - cr.resolve_playlist(jspf_playlist, m3u_playlist, threshold) - write_m3u_playlist(write_m3u_playlist, title, recordings) + jspf = read_jspf_playlist(jspf_playlist) + results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist) + write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"]) @click.command() @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) +@click.option('-p', '--save-to-playlist', required=False) +@click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") @click.argument('index_dir') @click.argument('mode') @click.argument('prompt') -def lb_radio(upload_to_subsonic, index_dir, mode, prompt): +def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt): + """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection""" db = SubsonicDatabase(index_dir) r = ListenBrainzRadioLocal(db) jspf = r.generate(mode, prompt) + if jspf is None: + return if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": - if ask_yes_no_question("Upload via subsonic? (Y/n)"): + if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): print("uploading playlist") db.upload_playlist(jspf) + elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0: + if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"): + print("saving playlist") + write_m3u_playlist_from_jspf(save_to_playlist, jspf) + else: + print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.") @click.command() @click.argument('index_dir') @click.argument('count', required=False, default=250) def top_tags(index_dir, count): + "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts""" db = Database(index_dir) tt = TopTags(db) tt.print_top_tags_tightly(count) -@click.command() -@click.argument('index_dir') -def artist_test(index_dir): - db = Database(index_dir) - s = LocalRecordingSearchByArtistService(db) - s.search(["8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11", "067102ea-9519-4622-9077-57ca4164cfbb"], .9, .6, 20) - cli.add_command(create) cli.add_command(scan) cli.add_command(playlist) @@ -109,7 +120,6 @@ def artist_test(index_dir): cli.add_command(subsonic) cli.add_command(lb_radio) cli.add_command(top_tags) -cli.add_command(artist_test) def usage(command): From 7805d026ce179e78b5d311f32ac3757628d4fbff Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 21:37:11 +0100 Subject: [PATCH 06/39] Add progress bar to scan collection --- lb_content_resolver/content_resolver.py | 10 +++++----- lb_content_resolver/database.py | 23 +++++++++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index ae4e336..db37430 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -120,8 +120,8 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) results = [None] * len(artist_recording_data) for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: - print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["artist_name"][:39], - artist_recording["recording_name"][:39])) + print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["recording_name"][:39], + artist_recording["artist_name"][:39])) continue hit = hit_index[i] @@ -129,9 +129,9 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) results[hit["index"]] = rec print(bcolors.OKGREEN + "OK" + bcolors.ENDC + " %-40s %-40s" % (artist_recording["artist_name"][:39], artist_recording["recording_name"][:39])) - print(" %-40s %-40s %-40s" % (rec["artist_name"][:39], - rec["recording_name"][:39], - rec["release_name"][:39])) + print(" %-40s %-40s %-40s" % (rec["recording_name"][:39], + rec["release_name"][:39], + rec["artist_name"][:39])) if len(results) == 0: print("Sorry, but no tracks could be resolved, no playlist generated.") diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index af41f71..c5b8047 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -7,6 +7,7 @@ from unidecode import unidecode import peewee +from tqdm import tqdm from lb_content_resolver.model.database import db, setup_db from lb_content_resolver.model.recording import Recording, RecordingMetadata @@ -74,7 +75,9 @@ def scan(self, music_dir): self.audio_file_count = self.track_count_estimate print("Found %s audio files" % self.audio_file_count) - self.traverse("") + with tqdm(total=self.track_count_estimate) as self.progress_bar: + self.traverse("") + self.close_db() print("Checked %s tracks:" % self.total) @@ -146,9 +149,9 @@ def add_or_update_recording(self, mdata): with db.atomic() as transaction: if mdata is not None: details = " %d%% " % (100 * self.total / self.audio_file_count) - details += " %-30s %-30s %-30s" % ((mdata.get("artist_name", "") or "")[:29], + details += " %-30s %-30s %-30s" % ((mdata.get("recording_name", "") or "")[:29], (mdata.get("release_name", "") or "")[:29], - (mdata.get("recording_name", "") or "")[:29]) + (mdata.get("artist_name", "") or "")[:29]) else: details = "" @@ -245,11 +248,14 @@ def add(self, relative_path): stats = os.stat(fullpath) ts = datetime.datetime.fromtimestamp(stats[8]) + # update the progress bar + self.progress_bar.update(1) + base, ext = os.path.splitext(relative_path) ext = ext.lower()[1:] base = os.path.basename(relative_path) if ext not in SUPPORTED_FORMATS: - print(" unknown %s" % base) + self.progress_bar.write(" unknown %s" % base) self.skipped += 1 return @@ -263,7 +269,7 @@ def add(self, relative_path): exists = True if recording.mtime == ts: self.not_changed += 1 - print("unchanged %s" % base) + self.progress_bar.write("unchanged %s" % base) return # read the file's last modified time to avoid re-reading tags @@ -272,14 +278,15 @@ def add(self, relative_path): status, details = self.read_metadata_and_add(relative_path, ext, ts, exists) if status == "updated": - print(" update %s" % details) + self.progress_bar.write(" update %s" % details) self.updated += 1 elif status == "added": - print(" add %s" % details) + self.progress_bar.write(" add %s" % details) self.added += 1 else: self.error += 1 - print(" error %s" % details) + self.progress_bar.write(" error %s" % details) + def database_cleanup(self): ''' From e425baec0557efc511c4419c5c94594a4e0ea0e6 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 22:14:35 +0100 Subject: [PATCH 07/39] writing and uploading resolved playlists now works! --- lb_content_resolver/content_resolver.py | 2 +- lb_content_resolver/subsonic.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index db37430..03733ac 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -120,7 +120,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) results = [None] * len(artist_recording_data) for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: - print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["recording_name"][:39], + print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s %-40s" % (artist_recording["recording_name"][:39], artist_recording["artist_name"][:39])) continue diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index ad7d41d..a6eab8e 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -124,9 +124,12 @@ def upload_playlist(self, jspf): conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT) - song_ids = [ - track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:] - for track in jspf["playlist"]["track"] - ] + song_ids = [] + for track in jspf["playlist"]["track"]: + try: + song_ids.append(track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:]) + except KeyError: + continue + name = jspf["playlist"]["title"] conn.createPlaylist(name=name, songIds=song_ids) From f977dda22492108e71af38999d90fe71699a3b5e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 22:42:09 +0100 Subject: [PATCH 08/39] artist, tag and stats elements now play cleanly together! --- lb_content_resolver/content_resolver.py | 8 ++++---- lb_content_resolver/lb_radio.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 03733ac..cf23e46 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -116,19 +116,19 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) .dicts() rec_index = {r["id"]: r for r in recordings} - print(" %-40s %-40s %-40s" % ("ARTIST", "RECORDING", "RELEASE")) + print(" %-40s %-40s %-40s" % ("RECORDING", "RELEASE", "ARTIST")) results = [None] * len(artist_recording_data) for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: - print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s %-40s" % (artist_recording["recording_name"][:39], + print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "", artist_recording["artist_name"][:39])) continue hit = hit_index[i] rec = rec_index[hit["recording_id"]] results[hit["index"]] = rec - print(bcolors.OKGREEN + "OK" + bcolors.ENDC + " %-40s %-40s" % (artist_recording["artist_name"][:39], - artist_recording["recording_name"][:39])) + print(bcolors.OKGREEN + "OK" + bcolors.ENDC + " %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "", + artist_recording["artist_name"][:39])) print(" %-40s %-40s %-40s" % (rec["recording_name"][:39], rec["release_name"][:39], rec["artist_name"][:39])) diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 130c8c3..ca897d1 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -86,7 +86,7 @@ def resolve_recordings(self, playlist): cr = ContentResolver(self.db) resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings) - for i, t_recording in enumerate(playlist.playlists[0].recordings): + for i, t_recording in enumerate(recordings): if resolved[i] is not None: if resolved[i]["subsonic_id"] != "": t_recording.musicbrainz["subsonic_id"] = resolved[i]["subsonic_id"] From 349f806d3e568ae5c23c90b7ba125c0dcd28c796 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 23:33:38 +0100 Subject: [PATCH 09/39] Add duplicate funcion to show duplicates in the collection --- lb_content_resolver/utils.py | 2 -- resolve.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py index 30a09eb..042842e 100755 --- a/lb_content_resolver/utils.py +++ b/lb_content_resolver/utils.py @@ -90,5 +90,3 @@ class bcolors: ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' - - diff --git a/resolve.py b/resolve.py index c527976..8d1432a 100755 --- a/resolve.py +++ b/resolve.py @@ -11,6 +11,7 @@ from lb_content_resolver.lb_radio import ListenBrainzRadioLocal from lb_content_resolver.utils import ask_yes_no_question from lb_content_resolver.top_tags import TopTags +from lb_content_resolver.duplicates import FindDuplicates from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf import config @@ -111,6 +112,14 @@ def top_tags(index_dir, count): tt = TopTags(db) tt.print_top_tags_tightly(count) +@click.command() +@click.argument('index_dir') +def duplicates(index_dir): + "Print all the tracks in the DB that are duplciated as per recording_mbid""" + db = Database(index_dir) + fd = FindDuplicates(db) + fd.print_duplicate_recordings() + cli.add_command(create) cli.add_command(scan) @@ -120,6 +129,7 @@ def top_tags(index_dir, count): cli.add_command(subsonic) cli.add_command(lb_radio) cli.add_command(top_tags) +cli.add_command(duplicates) def usage(command): From b07533edd7dd5d7fd1649c6d700ef0bd80d6378a Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 30 Dec 2023 23:48:25 +0100 Subject: [PATCH 10/39] Add missing file --- lb_content_resolver/duplicates.py | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 lb_content_resolver/duplicates.py diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py new file mode 100755 index 0000000..bbbc54e --- /dev/null +++ b/lb_content_resolver/duplicates.py @@ -0,0 +1,57 @@ +import os +import json +from collections import defaultdict +import datetime +import sys + +import peewee +import requests + +from lb_content_resolver.model.database import db +from lb_content_resolver.model.recording import Recording, RecordingMetadata +from troi.recording_search_service import RecordingSearchByTagService +from troi.splitter import plist + + +class FindDuplicates: + ''' + Class to fetch recordings that are duplicate in the database. + ''' + + def __init__(self, db): + self.db = db + + def get_duplicate_recordings(self): + """ + Return a list of (recording_name + """ + + query = """SELECT recording_name + , release_name + , artist_name + , recording_mbid + , json_group_array(file_path) AS file_paths + , COUNT(*) AS cnt + FROM recording + GROUP BY recording_mbid + HAVING cnt > 1 + ORDER BY cnt DESC, artist_name, recording_name""" + + self.db.open_db() + + return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ] + + + def print_duplicate_recordings(self): + + total = 0 + dups = self.get_duplicate_recordings() + for dup in dups: + print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2])) + for f in dup[4]: + print(" %s" % f) + total += 1 + print() + + print() + print("%d recordings had a total of %d duplicates." % (len(dups), total)) From 8084a8b1fc3b13fa4b1d192689ae0f7d5ead500d Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 00:03:52 +0100 Subject: [PATCH 11/39] Improve the cleanup function --- lb_content_resolver/database.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index c5b8047..76ba110 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -295,8 +295,13 @@ def database_cleanup(self): self.open_db() query = Recording.select() + recording_ids = [] for recording in query: if not os.path.exists(recording.file_path): - print("DEL %s" % recording.file_path) - recording.delete() + print("UNLINK %s" % recording.file_path) + recording_ids.append(recording.id) + + placeholders = ",".join(("?", ) * len(recording_ids)) + db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % (placeholders, (recording_ids,))) + self.close_db() From e4d5c17b6065f3d5fca5fe36896c0f71a7059a54 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 00:07:12 +0100 Subject: [PATCH 12/39] Fix delete --- lb_content_resolver/database.py | 2 +- lb_content_resolver/metadata_lookup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 76ba110..4cbb71e 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -302,6 +302,6 @@ def database_cleanup(self): recording_ids.append(recording.id) placeholders = ",".join(("?", ) * len(recording_ids)) - db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % (placeholders, (recording_ids,))) + db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids)) self.close_db() diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py index 2162985..09cfaad 100755 --- a/lb_content_resolver/metadata_lookup.py +++ b/lb_content_resolver/metadata_lookup.py @@ -104,6 +104,7 @@ def lookup_chunk(self, args, mbid_to_id_index): # insert new recording tags tag_ids = {} for tag in tags: + print(tag) cursor = db.execute_sql("""INSERT INTO tag (name) VALUES (?) ON CONFLICT DO UPDATE SET name = ? RETURNING id""", (tag,tag)) From 1d86d5ffa7d444d54aeb0090023a00622ec738e2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 11:06:38 +0100 Subject: [PATCH 13/39] Improve dups --- lb_content_resolver/duplicates.py | 39 +++++++++++++++++--------- lb_content_resolver/metadata_lookup.py | 1 - lb_content_resolver/subsonic.py | 2 +- resolve.py | 5 ++-- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py index bbbc54e..5fa892f 100755 --- a/lb_content_resolver/duplicates.py +++ b/lb_content_resolver/duplicates.py @@ -21,31 +21,44 @@ class FindDuplicates: def __init__(self, db): self.db = db - def get_duplicate_recordings(self): + def get_duplicate_recordings(self, exclude_different_releases): """ Return a list of (recording_name """ - query = """SELECT recording_name - , release_name - , artist_name - , recording_mbid - , json_group_array(file_path) AS file_paths - , COUNT(*) AS cnt - FROM recording - GROUP BY recording_mbid - HAVING cnt > 1 - ORDER BY cnt DESC, artist_name, recording_name""" + if exclude_different_releases: + query = """SELECT recording_name + , release_name + , artist_name + , recording_mbid + , json_group_array(file_path) AS file_paths + , COUNT(*) AS cnt + FROM recording + GROUP BY recording_mbid + HAVING cnt > 1 + ORDER BY cnt DESC, artist_name, recording_name""" + else: + query = """SELECT recording_name + , release_name + , artist_name + , recording_mbid + , json_group_array(file_path) AS file_paths + , COUNT(*) AS cnt + FROM recording + GROUP BY recording_mbid + , release_mbid + HAVING cnt > 1 + ORDER BY cnt DESC, artist_name, recording_name""" self.db.open_db() return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ] - def print_duplicate_recordings(self): + def print_duplicate_recordings(self, exclude_different_releases=True): total = 0 - dups = self.get_duplicate_recordings() + dups = self.get_duplicate_recordings(exclude_different_releases) for dup in dups: print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2])) for f in dup[4]: diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py index 09cfaad..2162985 100755 --- a/lb_content_resolver/metadata_lookup.py +++ b/lb_content_resolver/metadata_lookup.py @@ -104,7 +104,6 @@ def lookup_chunk(self, args, mbid_to_id_index): # insert new recording tags tag_ids = {} for tag in tags: - print(tag) cursor = db.execute_sql("""INSERT INTO tag (name) VALUES (?) ON CONFLICT DO UPDATE SET name = ? RETURNING id""", (tag,tag)) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index a6eab8e..e5ac190 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -81,7 +81,7 @@ def run_sync(self): if len(release_tracks) == 0: print("For album %s" % album_mbid) - print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"]["song"]))) + print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"].get("song", [])))) print("album '%s' by '%s'" % (album["album"], album["artist"])) if "song" not in album_info["album"]: diff --git a/resolve.py b/resolve.py index 8d1432a..fb5cb3e 100755 --- a/resolve.py +++ b/resolve.py @@ -114,11 +114,12 @@ def top_tags(index_dir, count): @click.command() @click.argument('index_dir') -def duplicates(index_dir): +@click.option('-e', '--exclude-different-release', required=False, is_flag=True) +def duplicates(exclude_different_release, index_dir): "Print all the tracks in the DB that are duplciated as per recording_mbid""" db = Database(index_dir) fd = FindDuplicates(db) - fd.print_duplicate_recordings() + fd.print_duplicate_recordings(exclude_different_release) cli.add_command(create) From 9c7492377bbdb750bfa4d677a5ddc60fce6d1e7e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 11:52:37 +0100 Subject: [PATCH 14/39] Finished the duplicate recording detetction feature --- lb_content_resolver/duplicates.py | 10 +++++----- resolve.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py index 5fa892f..7715963 100755 --- a/lb_content_resolver/duplicates.py +++ b/lb_content_resolver/duplicates.py @@ -21,12 +21,12 @@ class FindDuplicates: def __init__(self, db): self.db = db - def get_duplicate_recordings(self, exclude_different_releases): + def get_duplicate_recordings(self, include_different_releases): """ Return a list of (recording_name """ - if exclude_different_releases: + if include_different_releases: query = """SELECT recording_name , release_name , artist_name @@ -35,6 +35,7 @@ def get_duplicate_recordings(self, exclude_different_releases): , COUNT(*) AS cnt FROM recording GROUP BY recording_mbid + , release_mbid HAVING cnt > 1 ORDER BY cnt DESC, artist_name, recording_name""" else: @@ -46,7 +47,6 @@ def get_duplicate_recordings(self, exclude_different_releases): , COUNT(*) AS cnt FROM recording GROUP BY recording_mbid - , release_mbid HAVING cnt > 1 ORDER BY cnt DESC, artist_name, recording_name""" @@ -55,10 +55,10 @@ def get_duplicate_recordings(self, exclude_different_releases): return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ] - def print_duplicate_recordings(self, exclude_different_releases=True): + def print_duplicate_recordings(self, include_different_releases=True): total = 0 - dups = self.get_duplicate_recordings(exclude_different_releases) + dups = self.get_duplicate_recordings(include_different_releases) for dup in dups: print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2])) for f in dup[4]: diff --git a/resolve.py b/resolve.py index fb5cb3e..3ef1275 100755 --- a/resolve.py +++ b/resolve.py @@ -114,7 +114,7 @@ def top_tags(index_dir, count): @click.command() @click.argument('index_dir') -@click.option('-e', '--exclude-different-release', required=False, is_flag=True) +@click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True) def duplicates(exclude_different_release, index_dir): "Print all the tracks in the DB that are duplciated as per recording_mbid""" db = Database(index_dir) From ea8505ebd7250ddf26729b739fb7449cfed1b683 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 13:41:48 +0100 Subject: [PATCH 15/39] Improve the status update of the subsonic scan and make it faster --- lb_content_resolver/subsonic.py | 160 ++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 68 deletions(-) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index e5ac190..a3f1bbb 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -1,11 +1,14 @@ import datetime import os +import sys from uuid import UUID import libsonic +from tqdm import tqdm from lb_content_resolver.database import Database from lb_content_resolver.model.database import db +from lb_content_resolver.utils import bcolors import config @@ -13,8 +16,9 @@ class SubsonicDatabase(Database): ''' Add subsonic sync capabilities to the Database ''' - - MAX_ALBUMS_PER_CALL = 500 + + # Determined by the number of albums we can fetch in one go + BATCH_SIZE = 500 def __init__(self, index_dir): Database.__init__(self, index_dir) @@ -26,81 +30,99 @@ def sync(self): # Keep some stats self.total = 0 - self.added = 0 - self.removed = 0 - self.updated = 0 + self.matched = 0 + self.error = 0 self.open_db() self.run_sync() self.close_db() - print("Checked %s tracks:" % self.total) - print(" %5d tracks added" % self.added) - print(" %5d tracks updated" % self.updated) - print(" %5d tracks removed" % self.removed) + print("Checked %s albums:" % self.total) + print(" %5d albums matched" % self.matched) + print(" %5d albums with errors" % self.error) def run_sync(self): """ Perform the sync between the local collection and the subsonic one. """ - print("Connect to subsonic..") + print("[ connect to subsonic ]") conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT) - cursor = db.connection().cursor() - print("Fetch recordings") - album_count = 0 + print("[ load albums ]") + album_ids = set() + albums = [] + offset = 0 while True: - recordings = [] - albums_this_batch = 0 - albums = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.MAX_ALBUMS_PER_CALL, offset=album_count) - - for album in albums["albumList"]["album"]: - album_count += 1 - albums_this_batch += 1 - - album_info = conn.getAlbumInfo2(id=album["id"]) - try: - album_mbid = album_info["albumInfo"]["musicBrainzId"] - except KeyError: - print("subsonic album '%s' by '%s' has no MBID" % (album["album"], album["artist"])) - continue - - cursor.execute( - """SELECT recording.id - , track_num - , COALESCE(disc_num, 1) - FROM recording - WHERE release_mbid = ?""", (album_mbid, )) - - # create index on (track_num, disc_num) - release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()} - - album_info = conn.getAlbum(id=album["id"]) - - if len(release_tracks) == 0: - print("For album %s" % album_mbid) - print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"].get("song", [])))) - - print("album '%s' by '%s'" % (album["album"], album["artist"])) - if "song" not in album_info["album"]: - print("No songs returned") - else: - for song in album_info["album"]["song"]: - - if (song["track"], song.get("discNumber", 1)) in release_tracks: - recordings.append((release_tracks[(song["track"], song["discNumber"])], song["id"])) - else: - print("Song not matched: ", song["title"]) - continue - - self.update_recordings(recordings) - - print("fetched %d releases" % albums_this_batch) - if albums_this_batch < self.MAX_ALBUMS_PER_CALL: + results = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset) + albums.extend(results["albumList"]["album"]) + album_ids.update([r["id"] for r in results["albumList"]["album"] ]) + + album_count = len(results["albumList"]["album"]) + offset += album_count + if album_count < self.BATCH_SIZE: break + print("[ loaded %d albums ]" % len(album_ids)) + + pbar = tqdm(total=len(album_ids)) + recordings = [] + + for album in albums: + album_info = conn.getAlbumInfo2(id=album["id"]) + try: + album_mbid = album_info["albumInfo"]["musicBrainzId"] + except KeyError: + pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" % + (album["album"], album["artist"])) + continue + + cursor.execute( + """SELECT recording.id + , track_num + , COALESCE(disc_num, 1) + FROM recording + WHERE release_mbid = ?""", (album_mbid, )) + + # create index on (track_num, disc_num) + release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()} + + album_info = conn.getAlbum(id=album["id"]) + + if len(release_tracks) == 0: + pbar.write("For album %s" % album_mbid) + pbar.write("loaded %d of %d expected tracks from DB." % + (len(release_tracks), len(album_info["album"].get("song", [])))) + + msg = "" + if "song" not in album_info["album"]: + msg += " No songs returned\n" + else: + for song in album_info["album"]["song"]: + if (song["track"], song.get("discNumber", 1)) in release_tracks: + recordings.append((release_tracks[(song["track"], song["discNumber"])], song["id"])) + else: + msg += " Song not matched: '%s'\n" % song["title"] + continue + if msg == "": + pbar.write(bcolors.OKGREEN + "OK " + bcolors.ENDC + "album %-50s %-50s" % + (album["album"][:49], album["artist"][:49])) + self.matched += 1 + else: + pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" % + (album["album"][:49], album["artist"][:49])) + pbar.write(msg) + self.error += 1 + + if len(recordings) >= self.BATCH_SIZE: + self.update_recordings(recordings) + recordings = [] + + self.total += 1 + pbar.update(1) + + def update_recordings(self, recordings): """ Given a list of recording_subsonic records, update the DB. @@ -110,12 +132,13 @@ def update_recordings(self, recordings): recordings = [(r[0], r[1], datetime.datetime.now()) for r in recordings] cursor = db.connection().cursor() - cursor.executemany( - """INSERT INTO recording_subsonic (recording_id, subsonic_id, last_updated) - VALUES (?, ?, ?) - ON CONFLICT DO UPDATE SET recording_id = excluded.recording_id - , subsonic_id = excluded.subsonic_id - , last_updated = excluded.last_updated""", recordings) + with db.atomic() as transaction: + cursor.executemany( + """INSERT INTO recording_subsonic (recording_id, subsonic_id, last_updated) + VALUES (?, ?, ?) + ON CONFLICT DO UPDATE SET recording_id = excluded.recording_id + , subsonic_id = excluded.subsonic_id + , last_updated = excluded.last_updated""", recordings) def upload_playlist(self, jspf): """ @@ -127,9 +150,10 @@ def upload_playlist(self, jspf): song_ids = [] for track in jspf["playlist"]["track"]: try: - song_ids.append(track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:]) + song_ids.append( + track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:]) except KeyError: continue - + name = jspf["playlist"]["title"] conn.createPlaylist(name=name, songIds=song_ids) From 9e0325b961d67315e017bc5e976ab72d63fb9dd3 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 13:46:08 +0100 Subject: [PATCH 16/39] Minor cleanup --- lb_content_resolver/subsonic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index a3f1bbb..b6a699d 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -76,6 +76,7 @@ def run_sync(self): except KeyError: pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" % (album["album"], album["artist"])) + self.error += 1 continue cursor.execute( @@ -122,6 +123,9 @@ def run_sync(self): self.total += 1 pbar.update(1) + if len(recordings) >= self.BATCH_SIZE: + self.update_recordings(recordings) + def update_recordings(self, recordings): """ From 907cc99a7398bebbd2fc63372676b0a4118777a3 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sun, 31 Dec 2023 14:45:35 +0100 Subject: [PATCH 17/39] subsonic: use getAlbumList2 --- lb_content_resolver/subsonic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index b6a699d..59b34e1 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -13,10 +13,10 @@ class SubsonicDatabase(Database): - ''' + ''' Add subsonic sync capabilities to the Database ''' - + # Determined by the number of albums we can fetch in one go BATCH_SIZE = 500 @@ -55,11 +55,11 @@ def run_sync(self): albums = [] offset = 0 while True: - results = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset) - albums.extend(results["albumList"]["album"]) - album_ids.update([r["id"] for r in results["albumList"]["album"] ]) + results = conn.getAlbumList2(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset) + albums.extend(results["albumList2"]["album"]) + album_ids.update([r["id"] for r in results["albumList2"]["album"] ]) - album_count = len(results["albumList"]["album"]) + album_count = len(results["albumList2"]["album"]) offset += album_count if album_count < self.BATCH_SIZE: break From 5521f49235e0678f96d0d69b8a298380dd18aaa8 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sun, 31 Dec 2023 15:00:37 +0100 Subject: [PATCH 18/39] subsonic: avoid call to getAlbumInfo2 if MBID is already present This adds compatibility with clients not supporting getAlbumInfo2 --- lb_content_resolver/subsonic.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index 59b34e1..60f1f8b 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -70,14 +70,19 @@ def run_sync(self): recordings = [] for album in albums: - album_info = conn.getAlbumInfo2(id=album["id"]) - try: - album_mbid = album_info["albumInfo"]["musicBrainzId"] - except KeyError: - pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" % - (album["album"], album["artist"])) - self.error += 1 - continue + album_info = conn.getAlbum(id=album["id"]) + + # Some servers might already include the MBID in the list or album response + album_mbid = album_info.get("musicBrainzId", album.get("musicBrainzId")) + if not album_mbid: + album_info2 = conn.getAlbumInfo2(id=album["id"]) + try: + album_mbid = album_info2["albumInfo"]["musicBrainzId"] + except KeyError: + pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" % + (album_info["name"], album_info["artist"])) + self.error += 1 + continue cursor.execute( """SELECT recording.id @@ -89,8 +94,6 @@ def run_sync(self): # create index on (track_num, disc_num) release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()} - album_info = conn.getAlbum(id=album["id"]) - if len(release_tracks) == 0: pbar.write("For album %s" % album_mbid) pbar.write("loaded %d of %d expected tracks from DB." % @@ -108,11 +111,11 @@ def run_sync(self): continue if msg == "": pbar.write(bcolors.OKGREEN + "OK " + bcolors.ENDC + "album %-50s %-50s" % - (album["album"][:49], album["artist"][:49])) + (album_info["name"][:49], album_info["artist"][:49])) self.matched += 1 else: pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" % - (album["album"][:49], album["artist"][:49])) + (album_info["name"][:49], album_info["artist"][:49])) pbar.write(msg) self.error += 1 From 8f419aa2f5ea07949f0c931d253969fd6a9cfdb1 Mon Sep 17 00:00:00 2001 From: Philipp Wolfer Date: Sun, 31 Dec 2023 15:44:54 +0100 Subject: [PATCH 19/39] subsonic: fix wrong variable use to read album name and artist --- lb_content_resolver/subsonic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index 60f1f8b..ab4695f 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -80,7 +80,7 @@ def run_sync(self): album_mbid = album_info2["albumInfo"]["musicBrainzId"] except KeyError: pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" % - (album_info["name"], album_info["artist"])) + (album["name"], album["artist"])) self.error += 1 continue @@ -111,11 +111,11 @@ def run_sync(self): continue if msg == "": pbar.write(bcolors.OKGREEN + "OK " + bcolors.ENDC + "album %-50s %-50s" % - (album_info["name"][:49], album_info["artist"][:49])) + (album["name"][:49], album["artist"][:49])) self.matched += 1 else: pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" % - (album_info["name"][:49], album_info["artist"][:49])) + (album["name"][:49], album["artist"][:49])) pbar.write(msg) self.error += 1 From 042956b7e91f00d42a2ad5c4ca38a27f1e8dcadb Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 16:45:45 +0100 Subject: [PATCH 20/39] Make the metadata lookup suck less with proper progress bars --- lb_content_resolver/metadata_lookup.py | 41 ++++++++++++++++---------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py index 2162985..883ebe1 100755 --- a/lb_content_resolver/metadata_lookup.py +++ b/lb_content_resolver/metadata_lookup.py @@ -5,6 +5,7 @@ import peewee import requests +from tqdm import tqdm from lb_content_resolver.model.database import db from lb_content_resolver.model.recording import Recording, RecordingMetadata @@ -15,6 +16,8 @@ class MetadataLookup: Given the local database, lookup metadata from MusicBrainz to allow local playlist resolution. ''' + BATCH_SIZE = 1000 + def __init__(self, db): self.db = db @@ -24,34 +27,38 @@ def lookup(self): """ self.db.open_db() - args = [] - mbid_to_id_index = {} - cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id, popularity + cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id FROM recording LEFT JOIN recording_metadata ON recording.id = recording_metadata.recording_id - WHERE recording.recording_mbid IS NOT NULL """) + WHERE recording_mbid IS NOT NULL + ORDER BY artist_name, release_name""") + recordings = [] for row in cursor.fetchall(): - mbid = str(row[1]) - args.append({ "[recording_mbid]": mbid }) - mbid_to_id_index[mbid] = row - if len(args) == 1000: - if not self.lookup_chunk(args, mbid_to_id_index): - return - args = [] - mbid_to_id_index = {} + recordings.append(row) + + print("[ %d recordings to lookup ]" % len(recordings)) - if len(args) > 0: - self.lookup_chunk(args, mbid_to_id_index) + offset = 0 + with tqdm(total=len(recordings)) as self.pbar: + while offset <= len(recordings): + self.process_recordings(recordings[offset:offset+self.BATCH_SIZE]) + offset += self.BATCH_SIZE - def lookup_chunk(self, args, mbid_to_id_index): + def process_recordings(self, recordings): """ This function carries out the actual lookup of the metadata and inserting the popularity and tags into the DB for the given chunk of recordings. """ + args = [] + mbid_to_id_index = {} + for rec in recordings: + mbid_to_id_index[ str(rec[1])] = rec + args.append({ "[recording_mbid]": str(rec[1]) }) + r = requests.post("https://labs.api.listenbrainz.org/bulk-tag-lookup/json", json=args) if r.status_code != 200: print("Fail: %d %s" % (r.status_code, r.text)) @@ -69,6 +76,8 @@ def lookup_chunk(self, args, mbid_to_id_index): recording_tags[mbid][row["source"]].append(row["tag"]) tags.add(row["tag"]) + self.pbar.update(len(recordings)) + tags = list(tags) with db.atomic(): @@ -81,7 +90,7 @@ def lookup_chunk(self, args, mbid_to_id_index): for mbid in list(set(mbids)): mbid = str(mbid) row = mbid_to_id_index[mbid] - if row[3] is None: + if row[2] is None: recording_metadata = RecordingMetadata.create(recording=row[0], popularity=recording_pop[mbid], last_updated=datetime.datetime.now()) From 1c594ac420b702c084f66a494c5c2fc0f5c019c3 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 16:53:46 +0100 Subject: [PATCH 21/39] Show top tags after metadata load --- lb_content_resolver/top_tags.py | 2 +- resolve.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lb_content_resolver/top_tags.py b/lb_content_resolver/top_tags.py index da909ce..6a7d5bf 100755 --- a/lb_content_resolver/top_tags.py +++ b/lb_content_resolver/top_tags.py @@ -55,4 +55,4 @@ def print_top_tags_tightly(self, limit=250): top_tags = self.get_top_tags(limit) - print("; ".join([ tt["tag"] for tt in top_tags ])) + print("; ".join([ "%s %s" % (tt["tag"], tt["count"]) for tt in top_tags ])) diff --git a/resolve.py b/resolve.py index 3ef1275..ebe92cb 100755 --- a/resolve.py +++ b/resolve.py @@ -56,6 +56,10 @@ def metadata(index_dir): lookup = MetadataLookup(db) lookup.lookup() + print("\nThese top tags describe your collection:") + tt = TopTags(db) + tt.print_top_tags_tightly(100) + @click.command() @click.argument('index_dir') From c8e82410f84427950ef8da141c751c15a404d706 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 21:15:00 +0100 Subject: [PATCH 22/39] Do not resolve playlists if no tracks are missing. Less crashy. --- lb_content_resolver/content_resolver.py | 12 ++++++++---- lb_content_resolver/lb_radio.py | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index cf23e46..f137f0f 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -94,17 +94,21 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) print("\nResolve recordings to local files or subsonic ids") - self.db.open_db() - self.build_index() - artist_recording_data = [] if jspf_playlist is not None: + if len(jspf_playlist["playlist"]["track"]) == 0: + return [] for i, track in enumerate(jspf_playlist["playlist"]["track"]): artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]}) else: + if not recordings: + return [] for rec in recordings: artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name}) + self.db.open_db() + self.build_index() + hits = self.resolve_recordings(artist_recording_data, match_threshold) hit_index = {hit["index"]: hit for hit in hits} @@ -135,7 +139,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) if len(results) == 0: print("Sorry, but no tracks could be resolved, no playlist generated.") - return + return [] print(f'\n{len(recordings)} recordings resolved, {len(artist_recording_data) - len(recordings)} not resolved.') diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index ca897d1..277b431 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -83,6 +83,9 @@ def resolve_recordings(self, playlist): recordings.append(recording) + if not recordings: + return + cr = ContentResolver(self.db) resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings) From a59fa0139b176e369aafe15c609e996fc35de661 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 21:43:14 +0100 Subject: [PATCH 23/39] Update readme for the new features on this branch --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16aa3ea..64346da 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,38 @@ Finally, match your collection against the subsonic collection: ### Playlist generation -Currently only tag elements are supported for LB Local Radio. +Currently artist and tag elements are supported for LB Local Radio, +which means that playlists from these two elements are made from the local +collection and thus will not need to be resolved. All other elements +may generate playlists with tracks that are not availalble in your +collection. In this case, the fuzzy search will attempt to make the +missing tracks to your collection. -To generate a playlist: +For a complete reference to LB Radio, see: +[ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html) + +The playlist generator works with a given mode: "easy", "medium" +and "hard". An easy playlist will generate data that more closely +meets the prompt, which should translate into a playlist that should +be easier and pleasent to listen to. Medium goes further and includes +less popular and more far flung stuff, before hard digs at the bottom +of the barrel. + +This may not always feel very pronounced, especially if your collection +isn't very suited for the prompt that was given. + + +#### Artist Element + +``` +./resolve.py lb-radio music_index easy 'artist:(taylor swift, drakee)' +``` + +Generates a playlist with music from Taylor Swift and artists similar +to her and Drake, and artists similar to him. + + +#### Tag Element ``` ./resolve.py lb-radio music_index easy 'tag:(downtempo, trip hop)' @@ -107,3 +136,14 @@ You can include more than on tag query in a prompt: ``` ./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)' ``` + +#### Stats, Collections, Playlists and Rec + +There are more elements, but these are "global" elements that will need to +have their results resolved to the local collection. The resolution process is +always a bit tricky since its outcome heavily depends on the collection. The +generator will do its best to generate a fitting playlist, but that doesn't +always happen. + +For the other elements, please refer to the +[ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html) From 84ab20f6cb56e7808563a070630de1767ae2f791 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 21:50:26 +0100 Subject: [PATCH 24/39] Document new features --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 64346da..8ebc93b 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,12 @@ Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, Og ./resolve.py scan music_index ``` +If you remove from tracks from your collection, use cleanup to remove refereces to those tracks: + +``` +./resolve.py cleanup music_index +``` + ## Resolve JSPF playlists to local collection Then make a JSPF playlist on LB: @@ -147,3 +153,23 @@ always happen. For the other elements, please refer to the [ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html) + +## Other features + +### Collection deduplication + +The "duplicates" command will print a report of duplicate recordings +in your collection, based on MusicBrainz Recording MBIDs. There are several +types of duplicates that this may find: + +1. Duplicated tracks with the same title, release and artist. +2. Duplicated tracks that live on different releases, but have the same name +3. Duplicated tracks that exist once on an album and again on a compilation. + +If you specify -e or --exclude-different-release, then case #3 will not be shown. + +### Top tags + +The top-tags command will print the top tags and the number of times they +have been used in your collection. This requires that the "metadata" +command was run before. From aad73e3930eeccf4036352e1ea11bc9908035c04 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 31 Dec 2023 21:55:03 +0100 Subject: [PATCH 25/39] Finish updating the README --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8ebc93b..c5b4c95 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,17 @@ Then open the m3u playlist with a local tool. ### Prerequisites NOTE: This feature only works if you music collection -is tagged with MusicBrainz tags. (We recommend Picard: -http://picard.musicbrainz.org ) and if your music -collection is also available via a Subsonic API. +is tagged with MusicBrainz tags. We recommend Picard: +http://picard.musicbrainz.org for tagging your collection. + +If you're unwilling to properly tag your collection, +then please do not contact us to request that we remove +this requirement. We can't. We won't. Please close this +tab and move on. + +If you have your collection hosted on an app like Funkwhale, +Navidrom or Gonic, who have a Subsonic API, you can generate +playlists directly the web application. ### Setup @@ -113,7 +121,7 @@ isn't very suited for the prompt that was given. #### Artist Element ``` -./resolve.py lb-radio music_index easy 'artist:(taylor swift, drakee)' +./resolve.py lb-radio music_index easy 'artist:(taylor swift, drake)' ``` Generates a playlist with music from Taylor Swift and artists similar From f91626af7b5b66b7627c68bfdbe18137ab9742e3 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 4 Jan 2024 23:46:25 +0100 Subject: [PATCH 26/39] First cut at periodic jams for lb local. Not a bad start! --- .gitignore | 3 + lb_content_resolver/content_resolver.py | 16 +++- lb_content_resolver/database.py | 21 ----- lb_content_resolver/fuzzy_index.py | 10 +-- lb_content_resolver/lb_radio.py | 13 +-- lb_content_resolver/troi/__init__.py | 0 lb_content_resolver/troi/patches/__init__.py | 0 .../troi/patches/periodic_jams.py | 79 +++++++++++++++++++ lb_content_resolver/troi/periodic_jams.py | 40 ++++++++++ .../troi/recording_resolver.py | 65 +++++++++++++++ resolve.py | 53 +++++++++---- 11 files changed, 249 insertions(+), 51 deletions(-) create mode 100644 lb_content_resolver/troi/__init__.py create mode 100644 lb_content_resolver/troi/patches/__init__.py create mode 100755 lb_content_resolver/troi/patches/periodic_jams.py create mode 100755 lb_content_resolver/troi/periodic_jams.py create mode 100644 lb_content_resolver/troi/recording_resolver.py diff --git a/.gitignore b/.gitignore index 0f33172..b4d8fff 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ mp3 /build/ /dist/ config.py +*.jspf +*.m3u +.eggs diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index f137f0f..79f8b80 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -24,16 +24,28 @@ def __init__(self, db): self.db = db self.fuzzy_index = None + def get_artist_recording_metadata(self): + """ + Fetch the metadata needed to build a fuzzy search index. + """ + + artist_recording_data = [] + for recording in Recording.select(): + artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id)) + + return artist_recording_data + + def build_index(self): """ Fetch the data from the DB and then build the fuzzy lookup index. """ - artist_recording_data = self.db.get_artist_recording_metadata() + artist_recording_data = self.get_artist_recording_metadata() for recording in Recording.select(): artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id)) - self.fuzzy_index = FuzzyIndex(self.db.index_dir) + self.fuzzy_index = FuzzyIndex() self.fuzzy_index.build(artist_recording_data) def resolve_recordings(self, query_data, match_threshold): diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 4cbb71e..5ee0e97 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -119,27 +119,6 @@ def traverse(self, relative_path, dry_run=False): return True - def get_artist_recording_metadata(self): - """ - Fetch the metadata needed to build a fuzzy search index. - """ - - artist_recording_data = [] - for recording in Recording.select(): - artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id)) - - return artist_recording_data - - def encode_string(self, text): - """ - Remove unwanted crap from the query string and only keep essential information. - - 'This is the ultimate track !!' -> 'thisistheultimatetrack' - """ - if text is None: - return None - return unidecode(re.sub(" +", " ", re.sub(r'[^\w ]+', '', text)).strip().lower()) - def add_or_update_recording(self, mdata): """ Given a Recording, add it to the DB if it does not exist. If it does, diff --git a/lb_content_resolver/fuzzy_index.py b/lb_content_resolver/fuzzy_index.py index a524df5..548c794 100755 --- a/lb_content_resolver/fuzzy_index.py +++ b/lb_content_resolver/fuzzy_index.py @@ -26,18 +26,10 @@ class FuzzyIndex: be quick to rebuild this index. ''' - def __init__(self, index_dir): - self.index_dir = index_dir + def __init__(self): self.vectorizer = None self.index = None - def create(self): - try: - os.mkdir(self.index_dir) - except OSError as err: - print("Could not create index directory: %s (%s)" % (self.index_dir, err)) - return - def encode_string(self, text): if text is None: return None diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 277b431..b916088 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -45,7 +45,7 @@ def sanity_check(self): "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.") elif num_subsonic < num_recordings // 2: print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." % - (num_subsonic, num_recordings)) + (num_subsonic, num_recordings)) def generate(self, mode, prompt): """ @@ -70,11 +70,11 @@ def generate(self, mode, prompt): self.sanity_check() # Resolve any tracks that have not been resolved to a subsonic_id or a local file - self.resolve_recordings(playlist) + self.resolve_playlist(self.MATCH_THRESHOLD, playlist) return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} - def resolve_recordings(self, playlist): + def resolve_playlist(self, match_threshold, playlist): recordings = [] for recording in playlist.playlists[0].recordings: @@ -84,10 +84,13 @@ def resolve_recordings(self, playlist): recordings.append(recording) if not recordings: - return + return + return self.resolve_recordings(match_threshold, recordings) + + def resolve_recordings(self, match_threshold, recordings): cr = ContentResolver(self.db) - resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings) + resolved = cr.resolve_playlist(match_threshold, recordings) for i, t_recording in enumerate(recordings): if resolved[i] is not None: diff --git a/lb_content_resolver/troi/__init__.py b/lb_content_resolver/troi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lb_content_resolver/troi/patches/__init__.py b/lb_content_resolver/troi/patches/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py new file mode 100755 index 0000000..c33acd1 --- /dev/null +++ b/lb_content_resolver/troi/patches/periodic_jams.py @@ -0,0 +1,79 @@ +from datetime import datetime, timedelta + +import troi.listenbrainz.recs +import troi.musicbrainz.recording_lookup +from troi import Playlist +from troi.playlist import PlaylistMakerElement + +from lb_content_resolver.troi.recording_resolver import RecordingResolverElement +from lb_content_resolver.model.database import db + +DAYS_OF_RECENT_LISTENS_TO_EXCLUDE = 60 # Exclude tracks listened in last X days from the daily jams playlist +DAILY_JAMS_MIN_RECORDINGS = 25 # the minimum number of recordings we aspire to have in a daily jam, this is not a hard limit +BATCH_SIZE_RECS = 1000 # the number of recommendations fetched in 1 go +MAX_RECS_LIMIT = 1000 # the maximum of recommendations available in LB + +class LocalPeriodicJamsPatch(troi.patch.Patch): + """ + """ + + + def __init__(self, args, debug=False): + super().__init__(args, debug) + + @staticmethod + def inputs(): + """ + Generate a periodic playlist from the ListenBrainz recommended recordings. + + \b + USER_NAME is a MusicBrainz user name that has an account on ListenBrainz. + TYPE Must be one of "daily-jams", "weekly-jams" or "weekly-exploration". + JAM_DATE is the date for which the jam is created (this is needed to account for the fact different timezones + can be on different dates). Required formatting for the date is 'YYYY-MM-DD'. + """ + return [{ + "type": "argument", + "args": ["user_name"] + }, { + "type": "argument", + "args": ["type"], + "kwargs": { + "required": False + } + }] + + @staticmethod + def outputs(): + return [Playlist] + + @staticmethod + def slug(): + return "local-periodic-jams" + + @staticmethod + def description(): + return "Generate a localized periodic playlist from the ListenBrainz recommended recordings." + + def create(self, inputs): + user_name = inputs['user_name'] + + recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name, + "raw", + count=1000) + recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() + recs_lookup.set_sources(recs) + + resolve = RecordingResolverElement(db, .8) + resolve.set_sources(recs_lookup) + + pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name), + desc="test playlist!", + patch_slug="periodic-jams", + max_num_recordings=50, + max_artist_occurrence=2, + shuffle=True, + expires_at=datetime.utcnow() + timedelta(weeks=2)) + pl_maker.set_sources(resolve) + + return pl_maker diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py new file mode 100755 index 0000000..73b0f3b --- /dev/null +++ b/lb_content_resolver/troi/periodic_jams.py @@ -0,0 +1,40 @@ +from lb_content_resolver.lb_radio import ListenBrainzRadioLocal +from lb_content_resolver.troi.patches.periodic_jams import LocalPeriodicJamsPatch + + +class LocalPeriodicJams(ListenBrainzRadioLocal): + ''' + Generate local playlists against a music collection available via subsonic. + ''' + + # TODO: Make this an argument + MATCH_THRESHOLD = .8 + + def __init__(self, db, user_name): + ListenBrainzRadioLocal.__init__(self, db) + self.user_name = user_name + + def generate(self): + """ + Generate a periodic jams playlist + """ + + self.db.open_db() + + patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1}) + + # Now generate the playlist + try: + playlist = patch.generate_playlist() + except RuntimeError as err: + print(f"LB Radio generation failed: {err}") + return None + + if playlist == None: + print("Your prompt generated an empty playlist.") + self.sanity_check() + + # Resolve any tracks that have not been resolved to a subsonic_id or a local file + self.resolve_playlist(self.MATCH_THRESHOLD, playlist) + + return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py new file mode 100644 index 0000000..5b301b9 --- /dev/null +++ b/lb_content_resolver/troi/recording_resolver.py @@ -0,0 +1,65 @@ +#from troi.musicbrainz.recording_lookup import RecordingLookupElement +from troi import Element + +from lb_content_resolver.content_resolver import ContentResolver +from lb_content_resolver.model.subsonic import RecordingSubsonic +from lb_content_resolver.model.recording import Recording +from troi import Recording + + +class RecordingResolverElement(Element): + + def __init__(self, db, match_threshold): + Element.__init__(self) + self.db = db + self.match_threshold = match_threshold + self.resolve = ContentResolver(db) + + @staticmethod + def inputs(): + return [] + + @staticmethod + def outputs(): + return [Recording] + + def read(self, inputs): + + # TODO: Add a check to make sure that metadata is present. + + # Build the fuzzy index + lookup_data = [] + for recording in inputs[0]: + lookup_data.append({"artist_name": recording.artist.name, "recording_name": recording.name}) + + self.resolve.build_index() + + # Resolve the recordings + resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold) + recording_ids = [result["recording_id"] for result in resolved] + + # Fetch the recordings to lookup subsonic ids + recordings = RecordingSubsonic \ + .select() \ + .where(RecordingSubsonic.recording_id.in_(recording_ids)) \ + .dicts() + + # Build a subsonic index + subsonic_index = {} + matched = [] + for recording in recordings: + matched.append(recording["recording"]) + subsonic_index[recording["recording"]] = recording["subsonic_id"] + + # Set the subsonic ids into the recordings and only return recordings with an ID + results = [] + for r in resolved: + try: + recording = inputs[0][r["index"]] + recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]] + except KeyError: + continue + + results.append(recording) + + return results diff --git a/resolve.py b/resolve.py index ebe92cb..47eb84d 100755 --- a/resolve.py +++ b/resolve.py @@ -13,9 +13,27 @@ from lb_content_resolver.top_tags import TopTags from lb_content_resolver.duplicates import FindDuplicates from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService +from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf import config +# TODO: Make sure all functions work with subsonic and with local files + + +def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): + if jspf is None: + return + + if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": + if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): + print("uploading playlist") + db.upload_playlist(jspf) + elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0: + if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"): + print("saving playlist") + write_m3u_playlist_from_jspf(save_to_playlist, jspf) + else: + print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.") @click.group() @@ -68,6 +86,7 @@ def subsonic(index_dir): db = SubsonicDatabase(index_dir) db.sync() + @click.command() @click.argument('index_dir') @click.argument('jspf_playlist') @@ -81,6 +100,7 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist) write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"]) + @click.command() @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) @click.option('-p', '--save-to-playlist', required=False) @@ -93,39 +113,43 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr db = SubsonicDatabase(index_dir) r = ListenBrainzRadioLocal(db) jspf = r.generate(mode, prompt) - if jspf is None: - return + output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) - if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": - if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): - print("uploading playlist") - db.upload_playlist(jspf) - elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0: - if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"): - print("saving playlist") - write_m3u_playlist_from_jspf(save_to_playlist, jspf) - else: - print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.") @click.command() @click.argument('index_dir') @click.argument('count', required=False, default=250) def top_tags(index_dir, count): - "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts""" + "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" "" db = Database(index_dir) tt = TopTags(db) tt.print_top_tags_tightly(count) + @click.command() @click.argument('index_dir') @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True) def duplicates(exclude_different_release, index_dir): - "Print all the tracks in the DB that are duplciated as per recording_mbid""" + "Print all the tracks in the DB that are duplciated as per recording_mbid" "" db = Database(index_dir) fd = FindDuplicates(db) fd.print_duplicate_recordings(exclude_different_release) +@click.command() +@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) +@click.option('-p', '--save-to-playlist', required=False) +@click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") +@click.argument('index_dir') +@click.argument('user_name') +def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name): + "Generate a periodic jams playlist" + db = SubsonicDatabase(index_dir) + pj = LocalPeriodicJams(db, user_name) + jspf = pj.generate() + output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) + + cli.add_command(create) cli.add_command(scan) cli.add_command(playlist) @@ -135,6 +159,7 @@ def duplicates(exclude_different_release, index_dir): cli.add_command(lb_radio) cli.add_command(top_tags) cli.add_command(duplicates) +cli.add_command(periodic_jams) def usage(command): From 0e272c0f308074fb6c34bb4446b573e64e3249b9 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 5 Jan 2024 23:54:44 +0100 Subject: [PATCH 27/39] Add the recent listens filter, which is really critical --- lb_content_resolver/troi/patches/periodic_jams.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py index c33acd1..fad2e44 100755 --- a/lb_content_resolver/troi/patches/periodic_jams.py +++ b/lb_content_resolver/troi/patches/periodic_jams.py @@ -61,8 +61,12 @@ def create(self, inputs): recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name, "raw", count=1000) + + latest_filter = troi.filters.LatestListenedAtFilterElement(DAYS_OF_RECENT_LISTENS_TO_EXCLUDE) + latest_filter.set_sources(recs) + recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() - recs_lookup.set_sources(recs) + recs_lookup.set_sources(latest_filter) resolve = RecordingResolverElement(db, .8) resolve.set_sources(recs_lookup) From 8b80e216a8cb2a06228d721c4ce2dae92f728a59 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 6 Jan 2024 14:11:58 +0100 Subject: [PATCH 28/39] Start tracking recordings that went unresolved --- lb_content_resolver/content_resolver.py | 31 +++++++++++++---- lb_content_resolver/database.py | 15 ++++---- lb_content_resolver/model/recording.py | 2 +- .../model/unresolved_recording.py | 24 +++++++++++++ lb_content_resolver/playlist.py | 8 +++-- .../troi/recording_resolver.py | 14 +++++--- lb_content_resolver/unresolved_recording.py | 34 +++++++++++++++++++ resolve.py | 7 ++-- 8 files changed, 112 insertions(+), 23 deletions(-) create mode 100644 lb_content_resolver/model/unresolved_recording.py create mode 100755 lb_content_resolver/unresolved_recording.py diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 79f8b80..ac4c5c9 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -8,6 +8,7 @@ from lb_content_resolver.model.database import db, setup_db from lb_content_resolver.model.recording import Recording from lb_content_resolver.model.subsonic import RecordingSubsonic +from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker from lb_content_resolver.fuzzy_index import FuzzyIndex from lb_matching_tools.cleaner import MetadataCleaner from lb_content_resolver.playlist import read_jspf_playlist @@ -50,11 +51,12 @@ def build_index(self): def resolve_recordings(self, query_data, match_threshold): """ - Given a list of dicts with artist_name and recording_name in query data and a matching threshold, - attempt to match recordings by looking them up in the fuzzy index. + Given a list of dicts with artist_name, recording_name, recording_mbid in query data and + a matching threshold, attempt to match recordings by looking them up in the fuzzy index. """ resolved_recordings = [] + unresolved_recording_mbids = [] # Set indexes in the data so we can correlate matches for i, data in enumerate(query_data): @@ -67,10 +69,12 @@ def resolve_recordings(self, query_data, match_threshold): for hit, data in zip(hits, query_data): if hit["confidence"] < match_threshold: next_query_data.append(data) + unresolved_recording_mbids.append(data["recording_mbid"]) else: resolved_recordings.append({ "artist_name": data["artist_name"], "recording_name": data["recording_name"], + "recording_mbid": data["recording_mbid"], "recording_id": hit["recording_id"], "confidence": hit["confidence"], "index": data["index"], @@ -83,16 +87,25 @@ def resolve_recordings(self, query_data, match_threshold): for data in next_query_data: recording_name = mc.clean_recording(data["recording_name"]) if recording_name != data["recording_name"]: - query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]}) + query_data.append({"artist_name": artist_name, + "recording_name": recording_name, + "recording_mbid": data["recording_mbid"], + "index": data["index"]}) artist_name = mc.clean_artist(data["artist_name"]) if artist_name != data["artist_name"]: - query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]}) + query_data.append({"artist_name": artist_name, + "recording_name": recording_name, + "recording_mbid": data["recording_mbid"], + "index": data["index"]}) # If nothing got cleaned, we can finish now if len(query_data) == 0: break + ur = UnresolvedRecordingTracker() + ur.add(unresolved_recording_mbids) + return resolved_recordings def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None): @@ -111,12 +124,16 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) if len(jspf_playlist["playlist"]["track"]) == 0: return [] for i, track in enumerate(jspf_playlist["playlist"]["track"]): - artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]}) + artist_recording_data.append({"artist_name": track["creator"], + "recording_name": track["title"], + "recording_mbid": track["identifier"][35:]}) else: if not recordings: return [] for rec in recordings: - artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name}) + artist_recording_data.append({"artist_name": rec.artist.name, + "recording_name": rec.name, + "recording_mbid": rec.mbid}) self.db.open_db() self.build_index() @@ -134,10 +151,12 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) print(" %-40s %-40s %-40s" % ("RECORDING", "RELEASE", "ARTIST")) results = [None] * len(artist_recording_data) + unresolved_recordings = [] for i, artist_recording in enumerate(artist_recording_data): if i not in hit_index: print(bcolors.FAIL + "FAIL" + bcolors.ENDC + " %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "", artist_recording["artist_name"][:39])) + unresolved_recordings.append(artist_recording["recording_mbid"]) continue hit = hit_index[i] diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 5ee0e97..d82164a 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -11,6 +11,7 @@ from lb_content_resolver.model.database import db, setup_db from lb_content_resolver.model.recording import Recording, RecordingMetadata +from lb_content_resolver.model.unresolved_recording import UnresolvedRecording from lb_content_resolver.model.subsonic import RecordingSubsonic from lb_content_resolver.model.tag import Tag, RecordingTag from lb_content_resolver.formats import mp3, m4a, flac, ogg_opus, ogg_vorbis, wma @@ -32,15 +33,17 @@ def create(self): Create the index directory for the data. Currently it contains only the sqlite dir, but in the future we may serialize the fuzzy index here as well. """ - try: - os.mkdir(self.index_dir) - except OSError as err: - print("Could not create index directory: %s (%s)" % (self.index_dir, err)) - return + + if not os.path.exists(self.index_dir): + try: + os.mkdir(self.index_dir) + except OSError as err: + print("Could not create index directory: %s (%s)" % (self.index_dir, err)) + return setup_db(self.db_file) db.connect() - db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic]) + db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording]) def open_db(self): """ diff --git a/lb_content_resolver/model/recording.py b/lb_content_resolver/model/recording.py index 4852945..49c2433 100644 --- a/lb_content_resolver/model/recording.py +++ b/lb_content_resolver/model/recording.py @@ -34,7 +34,7 @@ def __repr__(self): class RecordingMetadata(Model): """ - Additional metadata for recorings: popularity. In future additional fields + Additional metadata for recordings: popularity. In future additional fields like release date and release country could be added to this table. """ diff --git a/lb_content_resolver/model/unresolved_recording.py b/lb_content_resolver/model/unresolved_recording.py new file mode 100644 index 0000000..c60f0ef --- /dev/null +++ b/lb_content_resolver/model/unresolved_recording.py @@ -0,0 +1,24 @@ +import datetime +from peewee import * +from lb_content_resolver.model.database import db + + +class UnresolvedRecording(Model): + """ + Table used to track which recordings where resolving failed. This can be used both + for debugging purposes and to provide the user with a list of 'if you had this + album, you'd resolve more music' kind of report. + """ + + class Meta: + database = db + table_name = "unresolved_recording" + + id = AutoField() + # Not using the UUIDField here, since it annoyingly removes '-' from the UUID. + recording_mbid = TextField(null=True, index=True, unique=True) + lookup_count = IntegerField(null=False, default=1) + last_updated = DateTimeField(null=False, default=datetime.datetime.now) + + def __repr__(self): + return "" % (self.recording_mbid, self.count) diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py index 5b28634..8f15110 100644 --- a/lb_content_resolver/playlist.py +++ b/lb_content_resolver/playlist.py @@ -21,9 +21,11 @@ def write_m3u_playlist_from_results(file_name, playlist_title, hits): m3u.write("#EXTM3U\n") m3u.write("#EXTENC: UTF-8\n") m3u.write("#PLAYLIST %s\n" % playlist_title) - for rec in recordings: - m3u.write("#EXTINF %d,%s\n" % (rec.duration / 1000, rec.recording_name)) - m3u.write(rec.file_path + "\n") + for rec in hits: + if rec is None: + continue + m3u.write("#EXTINF %d,%s\n" % (rec["duration"] / 1000, rec["recording_name"])) + m3u.write(rec["file_path"] + "\n") def write_m3u_playlist_from_jspf(file_name, jspf): diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py index 5b301b9..acc4ce0 100644 --- a/lb_content_resolver/troi/recording_resolver.py +++ b/lb_content_resolver/troi/recording_resolver.py @@ -1,4 +1,3 @@ -#from troi.musicbrainz.recording_lookup import RecordingLookupElement from troi import Element from lb_content_resolver.content_resolver import ContentResolver @@ -8,6 +7,10 @@ class RecordingResolverElement(Element): + """ + This Troi element takes in a list of recordings, which *must* have artist name and recording + name set and resolves them to a local collection by using the ContentResolver class + """ def __init__(self, db, match_threshold): Element.__init__(self) @@ -25,12 +28,15 @@ def outputs(): def read(self, inputs): - # TODO: Add a check to make sure that metadata is present. - # Build the fuzzy index lookup_data = [] for recording in inputs[0]: - lookup_data.append({"artist_name": recording.artist.name, "recording_name": recording.name}) + if recording.artist is None or recording.artist.name is None or recording.name is None: + raise RuntimeError("artist name and recording name are needed for RecordingResolverElement.") + + lookup_data.append({"artist_name": recording.artist.name, + "recording_name": recording.name, + "recording_mbid": recording.mbid}) self.resolve.build_index() diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py new file mode 100755 index 0000000..e23e0ae --- /dev/null +++ b/lb_content_resolver/unresolved_recording.py @@ -0,0 +1,34 @@ +import os +import datetime +import sys + +import peewee + +from lb_content_resolver.model.database import db +from lb_content_resolver.model.unresolved_recording import UnresolvedRecording + + +class UnresolvedRecordingTracker: + ''' + This class keeps track of recordings that were not resolved when + a playlist was resolved. This will allow us to give recommendations + on which albums to add to their collection to resolve more recordings. + ''' + + def __init__(self): + pass + + def add(self, recording_mbids): + """ + Add one or more recording MBIDs to the unresolved recordings track. If this has + previously been unresolved, increment the count for the number + of times it has been unresolved. + """ + + query = """INSERT INTO unresolved_recording (recording_mbid, last_updated, lookup_count) + VALUES (?, ?, 1) + ON CONFLICT DO UPDATE SET lookup_count = EXCLUDED.lookup_count + 1""" + + with db.atomic() as transaction: + for mbid in recording_mbids: + db.execute_sql(query, (mbid, datetime.datetime.now())) diff --git a/resolve.py b/resolve.py index 47eb84d..a7a9902 100755 --- a/resolve.py +++ b/resolve.py @@ -14,7 +14,7 @@ from lb_content_resolver.duplicates import FindDuplicates from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams -from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf +from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf import config # TODO: Make sure all functions work with subsonic and with local files @@ -32,6 +32,7 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"): print("saving playlist") write_m3u_playlist_from_jspf(save_to_playlist, jspf) + else: print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.") @@ -97,8 +98,8 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): db = Database(index_dir) cr = ContentResolver(db) jspf = read_jspf_playlist(jspf_playlist) - results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist) - write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"]) + results = cr.resolve_playlist(threshold, jspf_playlist=jspf) + write_m3u_playlist_from_results(m3u_playlist, jspf["playlist"]["title"], results) @click.command() From 0aacd34650104e13d7c1b6720fcbd2b22b737769 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 6 Jan 2024 15:36:27 +0100 Subject: [PATCH 29/39] Very simple unresolved recordings report is in place --- lb_content_resolver/content_resolver.py | 2 +- lb_content_resolver/unresolved_recording.py | 64 +++++++++++++++++++++ resolve.py | 17 ++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index ac4c5c9..3559839 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -126,7 +126,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) for i, track in enumerate(jspf_playlist["playlist"]["track"]): artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"], - "recording_mbid": track["identifier"][35:]}) + "recording_mbid": track["identifier"][34:]}) else: if not recordings: return [] diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py index e23e0ae..29f1f7e 100755 --- a/lb_content_resolver/unresolved_recording.py +++ b/lb_content_resolver/unresolved_recording.py @@ -1,6 +1,8 @@ import os import datetime +import requests import sys +from math import ceil import peewee @@ -15,9 +17,17 @@ class UnresolvedRecordingTracker: on which albums to add to their collection to resolve more recordings. ''' + LOOKUP_BATCH_SIZE = 50 + def __init__(self): pass + @staticmethod + def chunks(lst, n): + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i:i + n] + def add(self, recording_mbids): """ Add one or more recording MBIDs to the unresolved recordings track. If this has @@ -32,3 +42,57 @@ def add(self, recording_mbids): with db.atomic() as transaction: for mbid in recording_mbids: db.execute_sql(query, (mbid, datetime.datetime.now())) + + def get(self, num_items, lookup_count): + + if lookup_count is not None: + where_clause = f"WHERE lookup_count >= {lookup_count}" + else: + where_clause = "" + + query = f"""SELECT recording_mbid + , lookup_count + FROM unresolved_recording + {where_clause} + ORDER BY lookup_count DESC""" + + cursor = db.execute_sql(query) + recording_mbids = [] + lookup_counts = {} + for row in cursor.fetchall(): + recording_mbids.append(row[0]) + lookup_counts[row[0]] = row[1] + + recording_data = {} + for chunk in self.chunks(recording_mbids, self.LOOKUP_BATCH_SIZE): + args = ",".join(chunk) + + params = { "recording_mbids": args, "inc": "artist release" } + while True: + r = requests.get("https://api.listenbrainz.org/1/metadata/recording", params=params) + if r.status_code != 200: + print("Failed to fetch metadata for recordings: ", r.text) + return [] + + if r.status_code == 429: + sleep(1) + continue + + break + recording_data.update(dict(r.json())) + + results = [] + for mbid in recording_mbids: + rec = recording_data[mbid] + results.append({ + "artist_name": rec["artist"]["name"], + "artists": rec["artist"]["artists"], + "release_name": rec["release"]["name"], + "release_mbid": rec["release"]["mbid"], + "release_group_mbid": rec["release"]["release_group_mbid"], + "recording_name": "Contact", + "recording_mbid": mbid, + "lookup_count": lookup_counts[mbid] + }) + + return results diff --git a/resolve.py b/resolve.py index a7a9902..d71ceba 100755 --- a/resolve.py +++ b/resolve.py @@ -15,9 +15,11 @@ from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf +from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker import config # TODO: Make sure all functions work with subsonic and with local files +# TODO: avoid passing in db to objects and just open the db def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): @@ -150,6 +152,20 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use jspf = pj.generate() output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) +@click.command() +@click.option('-c', '--count', required=False, default=25) +@click.option('-l', '--lookup-count', required=False, default=3) +@click.argument('index_dir') +def unresolved_releases(count, lookup_count, index_dir): + "Show the top unresolved releases" + + db = SubsonicDatabase(index_dir) + db.open_db() + urt = UnresolvedRecordingTracker() + recordings = urt.get(num_items=count, lookup_count=lookup_count) + from icecream import ic + ic(recordings) + cli.add_command(create) cli.add_command(scan) @@ -161,6 +177,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use cli.add_command(top_tags) cli.add_command(duplicates) cli.add_command(periodic_jams) +cli.add_command(unresolved_releases) def usage(command): From 22fe98cae66ed64e4135ec70788d55c6667c4c97 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sat, 6 Jan 2024 16:09:42 +0100 Subject: [PATCH 30/39] Unresolved albums report is now done --- lb_content_resolver/unresolved_recording.py | 30 ++++++++++++++------- resolve.py | 5 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py index 29f1f7e..1bed543 100755 --- a/lb_content_resolver/unresolved_recording.py +++ b/lb_content_resolver/unresolved_recording.py @@ -1,8 +1,7 @@ -import os +from collections import defaultdict import datetime -import requests -import sys from math import ceil +import requests import peewee @@ -43,7 +42,10 @@ def add(self, recording_mbids): for mbid in recording_mbids: db.execute_sql(query, (mbid, datetime.datetime.now())) - def get(self, num_items, lookup_count): + def get_releases(self, num_items, lookup_count): + """ + Organize the unresolved recordings into releases with a list of recordings. + """ if lookup_count is not None: where_clause = f"WHERE lookup_count >= {lookup_count}" @@ -67,7 +69,7 @@ def get(self, num_items, lookup_count): for chunk in self.chunks(recording_mbids, self.LOOKUP_BATCH_SIZE): args = ",".join(chunk) - params = { "recording_mbids": args, "inc": "artist release" } + params = {"recording_mbids": args, "inc": "artist release"} while True: r = requests.get("https://api.listenbrainz.org/1/metadata/recording", params=params) if r.status_code != 200: @@ -81,18 +83,28 @@ def get(self, num_items, lookup_count): break recording_data.update(dict(r.json())) - results = [] + releases = defaultdict(list) for mbid in recording_mbids: rec = recording_data[mbid] - results.append({ + releases[rec["release"]["mbid"]].append({ "artist_name": rec["artist"]["name"], "artists": rec["artist"]["artists"], "release_name": rec["release"]["name"], "release_mbid": rec["release"]["mbid"], "release_group_mbid": rec["release"]["release_group_mbid"], - "recording_name": "Contact", + "recording_name": rec["recording"]["name"], "recording_mbid": mbid, "lookup_count": lookup_counts[mbid] }) - return results + return releases + + def print_releases(self, releases): + + print("%-50s %-50s" % ("RELEASE", "ARTIST")) + for release_mbid in sorted(releases.keys(), key=lambda a: releases[a][0]["release_name"]): + rel = releases[release_mbid] + print("%-60s %-50s" % (rel[0]["release_name"][:59], rel[0]["artist_name"][:49])) + for rec in rel: + print(" %-57s %d lookups" % (rec["recording_name"][:56], rec["lookup_count"])) + print() diff --git a/resolve.py b/resolve.py index d71ceba..b77a7f2 100755 --- a/resolve.py +++ b/resolve.py @@ -162,9 +162,8 @@ def unresolved_releases(count, lookup_count, index_dir): db = SubsonicDatabase(index_dir) db.open_db() urt = UnresolvedRecordingTracker() - recordings = urt.get(num_items=count, lookup_count=lookup_count) - from icecream import ic - ic(recordings) + releases = urt.get_releases(num_items=count, lookup_count=lookup_count) + urt.print_releases(releases) cli.add_command(create) From 877d800d71924a99f953323de0aa6800f6d1b143 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Sun, 7 Jan 2024 22:45:21 +0100 Subject: [PATCH 31/39] Filter recent listens too --- lb_content_resolver/troi/patches/periodic_jams.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py index fad2e44..7cd59e3 100755 --- a/lb_content_resolver/troi/patches/periodic_jams.py +++ b/lb_content_resolver/troi/patches/periodic_jams.py @@ -62,8 +62,12 @@ def create(self, inputs): "raw", count=1000) + recent_listens_lookup = troi.listenbrainz.listens.RecentListensTimestampLookup(user_name, + days=2) + recent_listens_lookup.set_sources(recs) + latest_filter = troi.filters.LatestListenedAtFilterElement(DAYS_OF_RECENT_LISTENS_TO_EXCLUDE) - latest_filter.set_sources(recs) + latest_filter.set_sources(recent_listens_lookup) recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() recs_lookup.set_sources(latest_filter) From 44844b1891632c9c0149452de270b80bef023b73 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 8 Jan 2024 21:23:41 +0100 Subject: [PATCH 32/39] Improve the unresolved recordings function --- lb_content_resolver/content_resolver.py | 2 +- lb_content_resolver/unresolved_recording.py | 44 ++++++++++++++------- resolve.py | 5 +-- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 3559839..fe19531 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -86,13 +86,13 @@ def resolve_recordings(self, query_data, match_threshold): query_data = [] for data in next_query_data: recording_name = mc.clean_recording(data["recording_name"]) + artist_name = mc.clean_artist(data["artist_name"]) if recording_name != data["recording_name"]: query_data.append({"artist_name": artist_name, "recording_name": recording_name, "recording_mbid": data["recording_mbid"], "index": data["index"]}) - artist_name = mc.clean_artist(data["artist_name"]) if artist_name != data["artist_name"]: query_data.append({"artist_name": artist_name, "recording_name": recording_name, diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py index 1bed543..1bcd95b 100755 --- a/lb_content_resolver/unresolved_recording.py +++ b/lb_content_resolver/unresolved_recording.py @@ -1,6 +1,7 @@ from collections import defaultdict import datetime from math import ceil +from operator import itemgetter import requests import peewee @@ -27,6 +28,13 @@ def chunks(lst, n): for i in range(0, len(lst), n): yield lst[i:i + n] + @staticmethod + def multisort(xs, specs): + """ Multiple key sort helper """ + for key, reverse in reversed(specs): + xs.sort(key=itemgetter(key), reverse=reverse) + return xs + def add(self, recording_mbids): """ Add one or more recording MBIDs to the unresolved recordings track. If this has @@ -42,21 +50,15 @@ def add(self, recording_mbids): for mbid in recording_mbids: db.execute_sql(query, (mbid, datetime.datetime.now())) - def get_releases(self, num_items, lookup_count): + def get_releases(self, num_items): """ Organize the unresolved recordings into releases with a list of recordings. + Return up to num_item releases. """ - if lookup_count is not None: - where_clause = f"WHERE lookup_count >= {lookup_count}" - else: - where_clause = "" - query = f"""SELECT recording_mbid , lookup_count - FROM unresolved_recording - {where_clause} - ORDER BY lookup_count DESC""" + FROM unresolved_recording""" cursor = db.execute_sql(query) recording_mbids = [] @@ -97,14 +99,26 @@ def get_releases(self, num_items, lookup_count): "lookup_count": lookup_counts[mbid] }) - return releases + release_list = [] + for mbid in releases: + release = releases[mbid] + total_count = sum([rec["lookup_count"] for rec in release]) + release_list.append({ + "mbid": release[0]["release_mbid"], + "release_name": release[0]["release_name"], + "artist_name": release[0]["artist_name"], + "lookup_count": total_count, + "recordings": release + }) + + return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False)))[:num_items] def print_releases(self, releases): + """ Neatly print all the release/recordings returned from the get_releases function """ - print("%-50s %-50s" % ("RELEASE", "ARTIST")) - for release_mbid in sorted(releases.keys(), key=lambda a: releases[a][0]["release_name"]): - rel = releases[release_mbid] - print("%-60s %-50s" % (rel[0]["release_name"][:59], rel[0]["artist_name"][:49])) - for rec in rel: + print("%-60s %-50s" % ("RELEASE", "ARTIST")) + for release in releases: + print("%-60s %-50s" % (release["release_name"][:59], release["artist_name"][:49])) + for rec in release["recordings"]: print(" %-57s %d lookups" % (rec["recording_name"][:56], rec["lookup_count"])) print() diff --git a/resolve.py b/resolve.py index b77a7f2..19edb89 100755 --- a/resolve.py +++ b/resolve.py @@ -154,15 +154,14 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use @click.command() @click.option('-c', '--count', required=False, default=25) -@click.option('-l', '--lookup-count', required=False, default=3) @click.argument('index_dir') -def unresolved_releases(count, lookup_count, index_dir): +def unresolved_releases(count, index_dir): "Show the top unresolved releases" db = SubsonicDatabase(index_dir) db.open_db() urt = UnresolvedRecordingTracker() - releases = urt.get_releases(num_items=count, lookup_count=lookup_count) + releases = urt.get_releases(num_items=count) urt.print_releases(releases) From 3b29b227a2ff639a2b28e69c30ffe36f1bc82d02 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 8 Jan 2024 21:54:08 +0100 Subject: [PATCH 33/39] db open cleanup --- lb_content_resolver/artist_search.py | 4 +-- lb_content_resolver/content_resolver.py | 4 +-- lb_content_resolver/database.py | 20 +++++------ lb_content_resolver/duplicates.py | 2 -- lb_content_resolver/lb_radio.py | 13 ++----- lb_content_resolver/metadata_lookup.py | 2 -- lb_content_resolver/subsonic.py | 2 -- lb_content_resolver/tag_search.py | 4 +-- lb_content_resolver/top_tags.py | 4 --- .../troi/patches/periodic_jams.py | 2 +- lb_content_resolver/troi/periodic_jams.py | 6 ++-- .../troi/recording_resolver.py | 5 ++- lb_content_resolver/unresolved_recording.py | 4 +-- resolve.py | 36 +++++++++++-------- 14 files changed, 45 insertions(+), 63 deletions(-) diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py index 113a09e..4aa30d5 100755 --- a/lb_content_resolver/artist_search.py +++ b/lb_content_resolver/artist_search.py @@ -18,9 +18,8 @@ class LocalRecordingSearchByArtistService(RecordingSearchByArtistService): Given the local database, search for artists that meet given tag criteria ''' - def __init__(self, db): + def __init__(self): RecordingSearchByArtistService.__init__(self) - self.db = db def search(self, artist_mbids, begin_percent, end_percent, num_recordings): """ @@ -52,7 +51,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings): ORDER BY artist_mbid , popularity""" - self.db.open_db() placeholders = ",".join(("?", ) * len(artist_mbids)) cursor = db.execute_sql(query % placeholders, params=tuple(artist_mbids)) diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index fe19531..915a5c7 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -21,8 +21,7 @@ class ContentResolver: Scan a given path and enter/update the metadata in the search index ''' - def __init__(self, db): - self.db = db + def __init__(self): self.fuzzy_index = None def get_artist_recording_metadata(self): @@ -135,7 +134,6 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) "recording_name": rec.name, "recording_mbid": rec.mbid}) - self.db.open_db() self.build_index() hits = self.resolve_recordings(artist_recording_data, match_threshold) diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index d82164a..e368462 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -45,14 +45,14 @@ def create(self): db.connect() db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording]) - def open_db(self): + def open(self): """ Open the database file and connect to the db. """ setup_db(self.db_file) db.connect() - def close_db(self): + def close(self): """ Close the db.""" db.close() @@ -72,7 +72,6 @@ def scan(self, music_dir): # Future improvement, commit to DB only every 1000 tracks or so. print("Check collection size...") - self.open_db() self.track_count_estimate = 0 self.traverse("", dry_run=True) self.audio_file_count = self.track_count_estimate @@ -270,20 +269,21 @@ def add(self, relative_path): self.progress_bar.write(" error %s" % details) - def database_cleanup(self): + def database_cleanup(self, dry_run): ''' Look for missing tracks and remove them from the DB. Then look for empty releases/artists and remove those too ''' - self.open_db() query = Recording.select() recording_ids = [] for recording in query: if not os.path.exists(recording.file_path): - print("UNLINK %s" % recording.file_path) + print("RM %s" % recording.file_path) recording_ids.append(recording.id) - placeholders = ",".join(("?", ) * len(recording_ids)) - db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids)) - - self.close_db() + if not dry_run: + placeholders = ",".join(("?", ) * len(recording_ids)) + db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids)) + print("Stale references removed") + else: + print("--delete not specified, no refeences removed") diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py index 7715963..afc8925 100755 --- a/lb_content_resolver/duplicates.py +++ b/lb_content_resolver/duplicates.py @@ -50,8 +50,6 @@ def get_duplicate_recordings(self, include_different_releases): HAVING cnt > 1 ORDER BY cnt DESC, artist_name, recording_name""" - self.db.open_db() - return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ] diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index b916088..1c6347c 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -20,16 +20,11 @@ class ListenBrainzRadioLocal: # TODO: Make this an argument MATCH_THRESHOLD = .8 - def __init__(self, db): - self.db = db - def sanity_check(self): """ Run a sanity check on the DB to see if data is missing that is required for LB Radio to work. """ - self.db.open_db() - num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0] num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0] num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0] @@ -52,11 +47,9 @@ def generate(self, mode, prompt): Generate a playlist given the mode and prompt. """ - self.db.open_db() - patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1}) - patch.register_service(LocalRecordingSearchByTagService(self.db)) - patch.register_service(LocalRecordingSearchByArtistService(self.db)) + patch.register_service(LocalRecordingSearchByTagService()) + patch.register_service(LocalRecordingSearchByArtistService()) # Now generate the playlist try: @@ -89,7 +82,7 @@ def resolve_playlist(self, match_threshold, playlist): return self.resolve_recordings(match_threshold, recordings) def resolve_recordings(self, match_threshold, recordings): - cr = ContentResolver(self.db) + cr = ContentResolver() resolved = cr.resolve_playlist(match_threshold, recordings) for i, t_recording in enumerate(recordings): diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py index 883ebe1..108abec 100755 --- a/lb_content_resolver/metadata_lookup.py +++ b/lb_content_resolver/metadata_lookup.py @@ -26,8 +26,6 @@ def lookup(self): Iterate over all recordings in the database and call lookup_chunk for chunks of recordings. """ - self.db.open_db() - cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id FROM recording LEFT JOIN recording_metadata diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index ab4695f..fcf3377 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -33,9 +33,7 @@ def sync(self): self.matched = 0 self.error = 0 - self.open_db() self.run_sync() - self.close_db() print("Checked %s albums:" % self.total) print(" %5d albums matched" % self.matched) diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py index 4aa6c28..9343ca9 100755 --- a/lb_content_resolver/tag_search.py +++ b/lb_content_resolver/tag_search.py @@ -21,9 +21,8 @@ class LocalRecordingSearchByTagService(RecordingSearchByTagService): to make this work for tracks without subsonic ids. ''' - def __init__(self, db): + def __init__(self): RecordingSearchByTagService.__init__(self) - self.db = db def search(self, tags, operator, begin_percent, end_percent, num_recordings): """ @@ -48,7 +47,6 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings): else: query, params, pop_clause = self.and_search(tags) - self.db.open_db() placeholders = ",".join(("?", ) * len(tags)) cursor = db.execute_sql(query % (placeholders, pop_clause), params) diff --git a/lb_content_resolver/top_tags.py b/lb_content_resolver/top_tags.py index 6a7d5bf..10999d1 100755 --- a/lb_content_resolver/top_tags.py +++ b/lb_content_resolver/top_tags.py @@ -17,9 +17,6 @@ class TopTags: Class to fetch top tags ''' - def __init__(self, db): - self.db = db - def get_top_tags(self, limit=50): """ """ @@ -35,7 +32,6 @@ def get_top_tags(self, limit=50): ORDER BY cnt DESC LIMIT ?""" - self.db.open_db() cursor = db.execute_sql(query, (limit,)) top_tags = [] diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py index 7cd59e3..48abfcf 100755 --- a/lb_content_resolver/troi/patches/periodic_jams.py +++ b/lb_content_resolver/troi/patches/periodic_jams.py @@ -72,7 +72,7 @@ def create(self, inputs): recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() recs_lookup.set_sources(latest_filter) - resolve = RecordingResolverElement(db, .8) + resolve = RecordingResolverElement(.8) resolve.set_sources(recs_lookup) pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name), diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py index 73b0f3b..97318c9 100755 --- a/lb_content_resolver/troi/periodic_jams.py +++ b/lb_content_resolver/troi/periodic_jams.py @@ -10,8 +10,8 @@ class LocalPeriodicJams(ListenBrainzRadioLocal): # TODO: Make this an argument MATCH_THRESHOLD = .8 - def __init__(self, db, user_name): - ListenBrainzRadioLocal.__init__(self, db) + def __init__(self, user_name): + ListenBrainzRadioLocal.__init__(self) self.user_name = user_name def generate(self): @@ -19,8 +19,6 @@ def generate(self): Generate a periodic jams playlist """ - self.db.open_db() - patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1}) # Now generate the playlist diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py index acc4ce0..58348c0 100644 --- a/lb_content_resolver/troi/recording_resolver.py +++ b/lb_content_resolver/troi/recording_resolver.py @@ -12,11 +12,10 @@ class RecordingResolverElement(Element): name set and resolves them to a local collection by using the ContentResolver class """ - def __init__(self, db, match_threshold): + def __init__(self, match_threshold): Element.__init__(self) - self.db = db self.match_threshold = match_threshold - self.resolve = ContentResolver(db) + self.resolve = ContentResolver() @staticmethod def inputs(): diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py index 1bcd95b..4d0f518 100755 --- a/lb_content_resolver/unresolved_recording.py +++ b/lb_content_resolver/unresolved_recording.py @@ -50,7 +50,7 @@ def add(self, recording_mbids): for mbid in recording_mbids: db.execute_sql(query, (mbid, datetime.datetime.now())) - def get_releases(self, num_items): + def get_releases(self): """ Organize the unresolved recordings into releases with a list of recordings. Return up to num_item releases. @@ -111,7 +111,7 @@ def get_releases(self, num_items): "recordings": release }) - return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False)))[:num_items] + return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False))) def print_releases(self, releases): """ Neatly print all the release/recordings returned from the get_releases function """ diff --git a/resolve.py b/resolve.py index 19edb89..84eec3b 100755 --- a/resolve.py +++ b/resolve.py @@ -19,7 +19,6 @@ import config # TODO: Make sure all functions work with subsonic and with local files -# TODO: avoid passing in db to objects and just open the db def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): @@ -58,15 +57,18 @@ def create(index_dir): def scan(index_dir, music_dir): """Scan a directory and its subdirectories for music files to add to the collection""" db = Database(index_dir) + db.open() db.scan(music_dir) @click.command() +@click.option('-d', '--delete', required=False, is_flag=True, default=True) @click.argument('index_dir') -def cleanup(index_dir): +def cleanup(delete, index_dir): """Perform a database cleanup. Check that files exist and if they don't remove from the index""" db = Database(index_dir) - db.database_cleanup() + db.open() + db.database_cleanup(delete) @click.command() @@ -74,6 +76,7 @@ def cleanup(index_dir): def metadata(index_dir): """Lookup metadata (popularity and tags) for recordings""" db = Database(index_dir) + db.open() lookup = MetadataLookup(db) lookup.lookup() @@ -87,6 +90,7 @@ def metadata(index_dir): def subsonic(index_dir): """Scan a remote subsonic music collection""" db = SubsonicDatabase(index_dir) + db.open() db.sync() @@ -98,7 +102,8 @@ def subsonic(index_dir): def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection""" db = Database(index_dir) - cr = ContentResolver(db) + db.open() + cr = ContentResolver() jspf = read_jspf_playlist(jspf_playlist) results = cr.resolve_playlist(threshold, jspf_playlist=jspf) write_m3u_playlist_from_results(m3u_playlist, jspf["playlist"]["title"], results) @@ -114,7 +119,8 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt): """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection""" db = SubsonicDatabase(index_dir) - r = ListenBrainzRadioLocal(db) + db.open() + r = ListenBrainzRadioLocal() jspf = r.generate(mode, prompt) output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) @@ -123,9 +129,10 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr @click.argument('index_dir') @click.argument('count', required=False, default=250) def top_tags(index_dir, count): - "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" "" + "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" db = Database(index_dir) - tt = TopTags(db) + db.open() + tt = TopTags() tt.print_top_tags_tightly(count) @@ -133,8 +140,9 @@ def top_tags(index_dir, count): @click.argument('index_dir') @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True) def duplicates(exclude_different_release, index_dir): - "Print all the tracks in the DB that are duplciated as per recording_mbid" "" + "Print all the tracks in the DB that are duplciated as per recording_mbid" db = Database(index_dir) + db.open() fd = FindDuplicates(db) fd.print_duplicate_recordings(exclude_different_release) @@ -148,20 +156,20 @@ def duplicates(exclude_different_release, index_dir): def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name): "Generate a periodic jams playlist" db = SubsonicDatabase(index_dir) - pj = LocalPeriodicJams(db, user_name) + db.open() + pj = LocalPeriodicJams(user_name) jspf = pj.generate() output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) @click.command() -@click.option('-c', '--count', required=False, default=25) @click.argument('index_dir') -def unresolved_releases(count, index_dir): +def unresolved(index_dir): "Show the top unresolved releases" db = SubsonicDatabase(index_dir) - db.open_db() + db.open() urt = UnresolvedRecordingTracker() - releases = urt.get_releases(num_items=count) + releases = urt.get_releases() urt.print_releases(releases) @@ -175,7 +183,7 @@ def unresolved_releases(count, index_dir): cli.add_command(top_tags) cli.add_command(duplicates) cli.add_command(periodic_jams) -cli.add_command(unresolved_releases) +cli.add_command(unresolved) def usage(command): From 19ae013b50424afa7bf412c6950f3e704fc18fc2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 9 Jan 2024 11:32:25 +0100 Subject: [PATCH 34/39] Huh. I'm stuck --- README.md | 37 +++++++++- lb_content_resolver/artist_search.py | 2 +- lb_content_resolver/database.py | 23 +++++++ lb_content_resolver/lb_radio.py | 24 +------ lb_content_resolver/metadata_lookup.py | 3 - lb_content_resolver/tag_search.py | 10 +-- .../troi/patches/periodic_jams.py | 11 ++- lb_content_resolver/troi/periodic_jams.py | 13 +++- .../troi/recording_resolver.py | 67 ++++++++++++++----- lb_content_resolver/utils.py | 4 +- resolve.py | 25 +++++-- 11 files changed, 150 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index c5b4c95..a1c7cec 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,34 @@ The ListenBrainz Content Resolver resolves global JSPF playlists to a local collection of music, using the resolve function. -ListenBrainz Local Radio allows you to generate tag radio playlists that -can be uploaded to your favorite subsonic API enabled music system. +The features of this package include: -## Quick Start +1. ListenBrainz Radio Local: allows you to generate radio-style playlists that +that are created using only the files in the local collection, or if that is not +possible, a global playlist with MBIDS will be resolved to a local file collection +as best as possible. + +2. Periodic-jams: ListenBrainz periodic-jams, but fully resolved against your own +local collection. This is optimized for local and gives better results than +the global troi patch by the same name. + +3. Metadata fetchgin: Several of the features here require metadata to be downloaded +from ListenBrainz in order to power the LB Radio Local. + +4. Scan local file collections. MP3, Ogg Vorbis, Ogg Opus, WMA, M4A and FLAC file are supported. + +5. Scan a remote subsonic API collection. We've tested Navidrome, Funkwhale and Gonic. + +6. Print a report of duplicate files in the collection + +7. Print a list of top tags for the collection + +8. Print a list of tracks that failed to resolve and print the list of albums that they +belong to. This gives the user feedback about tracks that could be added to the collection +to improve the local matching. + + +## Installation To install the package: @@ -18,6 +42,8 @@ pip install -r requirements.txt ## Scanning your collection +### Scan a collection on the local filesystem + Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported. ``` @@ -31,6 +57,11 @@ If you remove from tracks from your collection, use cleanup to remove refereces ./resolve.py cleanup music_index ``` +### Scan a Subsonic collection + +Finish me + + ## Resolve JSPF playlists to local collection Then make a JSPF playlist on LB: diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py index 4aa30d5..6f694ba 100755 --- a/lb_content_resolver/artist_search.py +++ b/lb_content_resolver/artist_search.py @@ -45,7 +45,7 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings): FROM recording JOIN recording_metadata ON recording.id = recording_metadata.recording_id - JOIN recording_subsonic + LEFT JOIN recording_subsonic ON recording.id = recording_subsonic.recording_id WHERE artist_mbid in (%s) ORDER BY artist_mbid diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index e368462..1c2bef8 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -287,3 +287,26 @@ def database_cleanup(self, dry_run): print("Stale references removed") else: print("--delete not specified, no refeences removed") + + def metadata_sanity_check(self, include_subsonic=False): + """ + Run a sanity check on the DB to see if data is missing that is required for LB Radio to work. + """ + + num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0] + num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0] + num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0] + + if num_metadata == 0: + print("sanity check: You have not downloaded metadata for your collection. Run the metadata command.") + elif num_metadata < num_recordings // 2: + print("sanity check: Only %d of your %d recordings have metadata information available. Run the metdata command." % + (num_metadata, num_recordings)) + + if include_subsonic: + if num_subsonic == 0 and include_subsonic: + print( + "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.") + elif num_subsonic < num_recordings // 2: + print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." % + (num_subsonic, num_recordings)) diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 1c6347c..61c3d92 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -20,28 +20,6 @@ class ListenBrainzRadioLocal: # TODO: Make this an argument MATCH_THRESHOLD = .8 - def sanity_check(self): - """ - Run a sanity check on the DB to see if data is missing that is required for LB Radio to work. - """ - - num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0] - num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0] - num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0] - - if num_metadata == 0: - print("sanity check: You have not downloaded metadata for your collection. Run the metadata command.") - elif num_metadata < num_recordings // 2: - print("sanity check: Only %d of your %d recordings have metadata information available. Run the metdata command." % - (num_metadata, num_recordings)) - - if num_subsonic == 0: - print( - "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.") - elif num_subsonic < num_recordings // 2: - print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." % - (num_subsonic, num_recordings)) - def generate(self, mode, prompt): """ Generate a playlist given the mode and prompt. @@ -60,7 +38,7 @@ def generate(self, mode, prompt): if playlist == None: print("Your prompt generated an empty playlist.") - self.sanity_check() + return {"playlist": {"track": []}} # Resolve any tracks that have not been resolved to a subsonic_id or a local file self.resolve_playlist(self.MATCH_THRESHOLD, playlist) diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py index 108abec..cfba09a 100755 --- a/lb_content_resolver/metadata_lookup.py +++ b/lb_content_resolver/metadata_lookup.py @@ -18,9 +18,6 @@ class MetadataLookup: BATCH_SIZE = 1000 - def __init__(self, db): - self.db = db - def lookup(self): """ Iterate over all recordings in the database and call lookup_chunk for chunks of recordings. diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py index 9343ca9..7398fea 100755 --- a/lb_content_resolver/tag_search.py +++ b/lb_content_resolver/tag_search.py @@ -52,7 +52,7 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings): recordings = [] for rec in cursor.fetchall(): - recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2]}) + recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2], "file_path": rec[3]}) return select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings) @@ -73,14 +73,13 @@ def or_search(self, tags, min_popularity=None, max_popularity=None): SELECT recording_mbid , popularity AS percent , subsonic_id - , recording_name - , artist_name + , file_path FROM recording JOIN recording_ids ON recording.id = recording_ids.recording_id JOIN recording_metadata ON recording.id = recording_metadata.recording_id - JOIN recording_subsonic + LEFT JOIN recording_subsonic ON recording.id = recording_subsonic.recording_id %s ORDER BY popularity DESC""" @@ -118,12 +117,13 @@ def and_search(self, tags, min_popularity=None, max_popularity=None): SELECT recording_mbid , popularity AS percent , subsonic_id + , file_path FROM recording JOIN recording_ids ON recording.id = recording_ids.recording_id JOIN recording_metadata ON recording.id = recording_metadata.recording_id - JOIN recording_subsonic + LEFT JOIN recording_subsonic ON recording.id = recording_subsonic.recording_id %s ORDER BY popularity DESC""" diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py index 48abfcf..37b0d0d 100755 --- a/lb_content_resolver/troi/patches/periodic_jams.py +++ b/lb_content_resolver/troi/patches/periodic_jams.py @@ -18,7 +18,7 @@ class LocalPeriodicJamsPatch(troi.patch.Patch): """ - def __init__(self, args, debug=False): + def __init__(self, args, debug=False): super().__init__(args, debug) @staticmethod @@ -37,10 +37,8 @@ def inputs(): "args": ["user_name"] }, { "type": "argument", - "args": ["type"], - "kwargs": { - "required": False - } + "args": ["target"], + }] @staticmethod @@ -57,6 +55,7 @@ def description(): def create(self, inputs): user_name = inputs['user_name'] + target = inputs['target'] recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name, "raw", @@ -72,7 +71,7 @@ def create(self, inputs): recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement() recs_lookup.set_sources(latest_filter) - resolve = RecordingResolverElement(.8) + resolve = RecordingResolverElement(.8, target) resolve.set_sources(recs_lookup) pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name), diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py index 97318c9..7b6fb5a 100755 --- a/lb_content_resolver/troi/periodic_jams.py +++ b/lb_content_resolver/troi/periodic_jams.py @@ -10,16 +10,23 @@ class LocalPeriodicJams(ListenBrainzRadioLocal): # TODO: Make this an argument MATCH_THRESHOLD = .8 - def __init__(self, user_name): + def __init__(self, user_name, target): ListenBrainzRadioLocal.__init__(self) self.user_name = user_name + self.target = target def generate(self): """ Generate a periodic jams playlist """ - patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1}) + patch = LocalPeriodicJamsPatch({ + "user_name": self.user_name, + "echo": True, + "debug": True, + "target": self.target, + "min_recordings": 1 + }) # Now generate the playlist try: @@ -30,7 +37,7 @@ def generate(self): if playlist == None: print("Your prompt generated an empty playlist.") - self.sanity_check() + return {"playlist": {"track": []}} # Resolve any tracks that have not been resolved to a subsonic_id or a local file self.resolve_playlist(self.MATCH_THRESHOLD, playlist) diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py index 58348c0..990f923 100644 --- a/lb_content_resolver/troi/recording_resolver.py +++ b/lb_content_resolver/troi/recording_resolver.py @@ -3,6 +3,7 @@ from lb_content_resolver.content_resolver import ContentResolver from lb_content_resolver.model.subsonic import RecordingSubsonic from lb_content_resolver.model.recording import Recording +from lb_content_resolver.model.database import db from troi import Recording @@ -12,10 +13,14 @@ class RecordingResolverElement(Element): name set and resolves them to a local collection by using the ContentResolver class """ - def __init__(self, match_threshold): + def __init__(self, match_threshold, target="filesystem"): + """ Match threshold: The value from 0 to 1.0 on how sure a match must be to be accepted. + target: Either "filesystem" or "subsonic", the audio file source we're working with. + """ Element.__init__(self) self.match_threshold = match_threshold self.resolve = ContentResolver() + self.target = target @staticmethod def inputs(): @@ -44,27 +49,53 @@ def read(self, inputs): recording_ids = [result["recording_id"] for result in resolved] # Fetch the recordings to lookup subsonic ids - recordings = RecordingSubsonic \ - .select() \ - .where(RecordingSubsonic.recording_id.in_(recording_ids)) \ - .dicts() - - # Build a subsonic index + query = """SELECT recording_mbid + , file_path + , subsonic_id + FROM recording + LEFT JOIN recording_subsonic + ON recording_subsonic.recording_id = recording.id + WHERE recording.id IN (%s)""" + + placeholders = ",".join(("?", ) * len(recording_ids)) + print(query % placeholders) + cursor = db.execute_sql(query % placeholders, params=tuple(recording_ids)) + recordings = [] + for row in cursor.fetchall(): + print("row ", row) + recordings.append({ "recording_mbid": row[0], + "file_path": row[1], + "subsonic_id": row[2] }) + print(recordings) + print(recording_ids) + + # Build a indexes subsonic_index = {} - matched = [] + file_index = {} for recording in recordings: - matched.append(recording["recording"]) - subsonic_index[recording["recording"]] = recording["subsonic_id"] + if "subsonic_id" in recording: + subsonic_index[recording["recording_mbid"]] = recording["subsonic_id"] + if "file_path" in recording: + subsonic_index[recording["recording_mbid"]] = recording["file_path"] - # Set the subsonic ids into the recordings and only return recordings with an ID + # Set the ids into the recordings and only return recordings with an ID, depending on target results = [] for r in resolved: - try: - recording = inputs[0][r["index"]] - recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]] - except KeyError: - continue - - results.append(recording) + recording = inputs[0][r["index"]] + if self.target == "subsonic": + try: + recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]] + except KeyError: + continue + + results.append(recording) + + if self.target == "filesystem": + try: + recording.musicbrainz["filename"] = file_index[r["recording_id"]] + except KeyError: + continue + + results.append(recording) return results diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py index 042842e..f55d9de 100755 --- a/lb_content_resolver/utils.py +++ b/lb_content_resolver/utils.py @@ -70,8 +70,10 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_ results = plist() for rec in matching_recordings: r = TroiRecording(mbid=rec["recording_mbid"]) - if "subsonic_id" in rec: + if "subsonic_id" in rec and rec["subsonic_id"]: r.musicbrainz = {"subsonic_id": rec["subsonic_id"]} + if "file_path" in rec and rec["file_path"]: + r.musicbrainz = {"filename": rec["file_path"]} results.append(r) diff --git a/resolve.py b/resolve.py index 84eec3b..7fa31cd 100755 --- a/resolve.py +++ b/resolve.py @@ -19,6 +19,8 @@ import config # TODO: Make sure all functions work with subsonic and with local files +# TODO: Make sure config.py is only needed for subsonic functions +# TODO: Think up a better way to specify the DB location def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): @@ -77,11 +79,11 @@ def metadata(index_dir): """Lookup metadata (popularity and tags) for recordings""" db = Database(index_dir) db.open() - lookup = MetadataLookup(db) + lookup = MetadataLookup() lookup.lookup() print("\nThese top tags describe your collection:") - tt = TopTags(db) + tt = TopTags() tt.print_top_tags_tightly(100) @@ -122,6 +124,11 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr db.open() r = ListenBrainzRadioLocal() jspf = r.generate(mode, prompt) + if len(jspf["playlist"]["track"]) == 0: + print(upload_to_subsonic) + db.metadata_sanity_check(include_subsonic=upload_to_subsonic) + return + output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) @@ -148,17 +155,23 @@ def duplicates(exclude_different_release, index_dir): @click.command() -@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) +@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False) @click.option('-p', '--save-to-playlist', required=False) @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") @click.argument('index_dir') @click.argument('user_name') def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name): "Generate a periodic jams playlist" - db = SubsonicDatabase(index_dir) + db = Database(index_dir) db.open() - pj = LocalPeriodicJams(user_name) + + target = "subsonic" if upload_to_subsonic else "filesystem" + pj = LocalPeriodicJams(user_name, target) jspf = pj.generate() + if len(jspf["playlist"]["track"]) == 0: + db.metadata_sanity_check(include_subsonic=upload_to_subsonic) + return + output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) @click.command() @@ -166,7 +179,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use def unresolved(index_dir): "Show the top unresolved releases" - db = SubsonicDatabase(index_dir) + db = Database(index_dir) db.open() urt = UnresolvedRecordingTracker() releases = urt.get_releases() From b16af12eb60e7bc7ca77d5369b5ded36952008be Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 9 Jan 2024 21:02:14 +0100 Subject: [PATCH 35/39] Interim checkin --- lb_content_resolver/troi/recording_resolver.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py index 990f923..10572e2 100644 --- a/lb_content_resolver/troi/recording_resolver.py +++ b/lb_content_resolver/troi/recording_resolver.py @@ -46,7 +46,7 @@ def read(self, inputs): # Resolve the recordings resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold) - recording_ids = [result["recording_id"] for result in resolved] + recording_ids = tuple([result["recording_id"] for result in resolved]) # Fetch the recordings to lookup subsonic ids query = """SELECT recording_mbid @@ -58,16 +58,13 @@ def read(self, inputs): WHERE recording.id IN (%s)""" placeholders = ",".join(("?", ) * len(recording_ids)) - print(query % placeholders) - cursor = db.execute_sql(query % placeholders, params=tuple(recording_ids)) + cursor = db.execute_sql(query % placeholders, params=recording_ids) recordings = [] for row in cursor.fetchall(): - print("row ", row) recordings.append({ "recording_mbid": row[0], "file_path": row[1], "subsonic_id": row[2] }) - print(recordings) - print(recording_ids) + print(len(recordings)) # Build a indexes subsonic_index = {} From 74f7a3412c81dd9404f5b20d301e5c24c959ffb7 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 9 Jan 2024 23:17:52 +0100 Subject: [PATCH 36/39] All features now work with filename or subsonic_id --- lb_content_resolver/database.py | 8 +++++-- lb_content_resolver/lb_radio.py | 16 ++++++++++++- lb_content_resolver/tag_search.py | 3 --- .../troi/recording_resolver.py | 24 ++++++++++++------- resolve.py | 20 ++++++++++++++-- 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 1c2bef8..75eee1b 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -49,8 +49,12 @@ def open(self): """ Open the database file and connect to the db. """ - setup_db(self.db_file) - db.connect() + try: + setup_db(self.db_file) + db.connect() + except peewee.OperationalError: + print("Cannot open database index file: '%s'" % self.db_file) + sys.exit(-1) def close(self): """ Close the db.""" diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 61c3d92..206e5b4 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -46,7 +46,9 @@ def generate(self, mode, prompt): return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} def resolve_playlist(self, match_threshold, playlist): + """ Attempt to resolve any tracks without local ids to local ids """ + # Find recordings that are missing local ids recordings = [] for recording in playlist.playlists[0].recordings: if "subsonic_id" in recording.musicbrainz or "filename" in recording.musicbrainz: @@ -57,9 +59,21 @@ def resolve_playlist(self, match_threshold, playlist): if not recordings: return - return self.resolve_recordings(match_threshold, recordings) + # Use the content resolver to resolve the recordings + self.resolve_recordings(match_threshold, recordings) + + # Now filter out the tracks that were not matched + filtered = [] + for rec in playlist.playlists[0].recordings: + if "subsonic_id" in rec.musicbrainz or "fileame" in rec.musicbrainz: + filtered.append(rec) + + playlist.playlists[0].recordings = filtered + def resolve_recordings(self, match_threshold, recordings): + """ Use the content resolver to resolve the given recordings """ + cr = ContentResolver() resolved = cr.resolve_playlist(match_threshold, recordings) diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py index 7398fea..c266e95 100755 --- a/lb_content_resolver/tag_search.py +++ b/lb_content_resolver/tag_search.py @@ -16,9 +16,6 @@ class LocalRecordingSearchByTagService(RecordingSearchByTagService): ''' Given the local database, search for recordings that meet given tag criteria - - NOTE: Right now this only works for subsonic tracks -- at some point we may need - to make this work for tracks without subsonic ids. ''' def __init__(self): diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py index 10572e2..3bb8f0a 100644 --- a/lb_content_resolver/troi/recording_resolver.py +++ b/lb_content_resolver/troi/recording_resolver.py @@ -48,32 +48,40 @@ def read(self, inputs): resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold) recording_ids = tuple([result["recording_id"] for result in resolved]) + # Could also be done with: + # Recording.select().join(RecordingSubsonic, JOIN.LEFT_OUTER).where(Recording.id.in_(recording_ids)) + # Fetch the recordings to lookup subsonic ids - query = """SELECT recording_mbid + query = """SELECT recording.id , file_path , subsonic_id FROM recording LEFT JOIN recording_subsonic ON recording_subsonic.recording_id = recording.id - WHERE recording.id IN (%s)""" + WHERE """ + + where_clause_elements = [] + for id in recording_ids: + where_clause_elements.append("recording.id = %d" % id) + + where_clause = " or ".join(where_clause_elements) + query += where_clause - placeholders = ",".join(("?", ) * len(recording_ids)) - cursor = db.execute_sql(query % placeholders, params=recording_ids) + cursor = db.execute_sql(query) recordings = [] for row in cursor.fetchall(): - recordings.append({ "recording_mbid": row[0], + recordings.append({ "recording_id": row[0], "file_path": row[1], "subsonic_id": row[2] }) - print(len(recordings)) # Build a indexes subsonic_index = {} file_index = {} for recording in recordings: if "subsonic_id" in recording: - subsonic_index[recording["recording_mbid"]] = recording["subsonic_id"] + subsonic_index[recording["recording_id"]] = recording["subsonic_id"] if "file_path" in recording: - subsonic_index[recording["recording_mbid"]] = recording["file_path"] + file_index[recording["recording_id"]] = recording["file_path"] # Set the ids into the recordings and only return recordings with an ID, depending on target results = [] diff --git a/resolve.py b/resolve.py index 7fa31cd..c239c91 100755 --- a/resolve.py +++ b/resolve.py @@ -16,9 +16,9 @@ from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker +from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI import config -# TODO: Make sure all functions work with subsonic and with local files # TODO: Make sure config.py is only needed for subsonic functions # TODO: Think up a better way to specify the DB location @@ -28,10 +28,23 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): return if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": + try: + _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \ + ["additional_metadata"]["subsonic_identifier"] + except KeyError: + print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.") + return + if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): print("uploading playlist") db.upload_playlist(jspf) + elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0: + try: + _ = jspf["playlist"]["track"][0]["location"] + except KeyError: + print("Playlist does not appear to contain file paths. Can't write a local playlist.") + return if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"): print("saving playlist") write_m3u_playlist_from_jspf(save_to_playlist, jspf) @@ -162,9 +175,11 @@ def duplicates(exclude_different_release, index_dir): @click.argument('user_name') def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name): "Generate a periodic jams playlist" - db = Database(index_dir) + db = SubsonicDatabase(index_dir) db.open() + # TODO: ensure that we catch upload to subsonic when we have a FS playlist + target = "subsonic" if upload_to_subsonic else "filesystem" pj = LocalPeriodicJams(user_name, target) jspf = pj.generate() @@ -174,6 +189,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask) + @click.command() @click.argument('index_dir') def unresolved(index_dir): From f866b0d4007c2675430a5e9d235d85f4d77078d2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 9 Jan 2024 23:30:48 +0100 Subject: [PATCH 37/39] All functions now work without config.py if you dont use subsonic --- README.md | 25 ++++++++++++++++++++++++- lb_content_resolver/lb_radio.py | 1 - lb_content_resolver/subsonic.py | 4 +++- resolve.py | 25 +++++++++++++------------ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index a1c7cec..fb415de 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,9 @@ pip install -r requirements.txt ## Scanning your collection +Note: Soon we will eliminate the requirement to do a filesystem scan before also doing a subsonic +scan (if you plan to use subsonic). For now, do the file system scan, then the subsonic scan. + ### Scan a collection on the local filesystem Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported. @@ -59,7 +62,19 @@ If you remove from tracks from your collection, use cleanup to remove refereces ### Scan a Subsonic collection -Finish me +To enable support you need to create a config.py file config.py.sample: + +``` +cp config.py.sample config.py +``` + +Then edit the file and add your subsonic configuration. + +``` +./resolve.py subsonic music_index +``` + +This will match your collection to the remove subsonic API collection. ## Resolve JSPF playlists to local collection @@ -212,3 +227,11 @@ If you specify -e or --exclude-different-release, then case #3 will not be shown The top-tags command will print the top tags and the number of times they have been used in your collection. This requires that the "metadata" command was run before. + +### Unresolved Releases + +Any tracks that fail to resolve to a local collection will have their +recording_mbid saved in the database. This enables the unresolved releases +report which specifies a list of releases that you might consider adding to your +collection, because in the past they failed to resolve to your location collection. + diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index 206e5b4..d6d947a 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -9,7 +9,6 @@ from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService from lb_content_resolver.model.database import db from lb_content_resolver.content_resolver import ContentResolver -import config class ListenBrainzRadioLocal: diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py index fcf3377..50c3ce5 100755 --- a/lb_content_resolver/subsonic.py +++ b/lb_content_resolver/subsonic.py @@ -9,7 +9,6 @@ from lb_content_resolver.database import Database from lb_content_resolver.model.database import db from lb_content_resolver.utils import bcolors -import config class SubsonicDatabase(Database): @@ -45,6 +44,8 @@ def run_sync(self): """ print("[ connect to subsonic ]") + + import config conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT) cursor = db.connection().cursor() @@ -150,6 +151,7 @@ def upload_playlist(self, jspf): Given a JSPF playlist, upload the playlist to the subsonic API. """ + import config conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT) song_ids = [] diff --git a/resolve.py b/resolve.py index c239c91..e6b6837 100755 --- a/resolve.py +++ b/resolve.py @@ -17,9 +17,7 @@ from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI -import config -# TODO: Make sure config.py is only needed for subsonic functions # TODO: Think up a better way to specify the DB location @@ -27,17 +25,20 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): if jspf is None: return - if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": - try: - _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \ - ["additional_metadata"]["subsonic_identifier"] - except KeyError: - print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.") - return + if upload_to_subsonic: + import config + + if len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "": + try: + _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \ + ["additional_metadata"]["subsonic_identifier"] + except KeyError: + print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.") + return - if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): - print("uploading playlist") - db.upload_playlist(jspf) + if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"): + print("uploading playlist") + db.upload_playlist(jspf) elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0: try: From 9b31507a805f1a9faf436a548f9492a268d81049 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 10 Jan 2024 12:12:56 +0100 Subject: [PATCH 38/39] Rework the index_dir and use db_files instead. --- README.md | 49 +++++------ config.py.sample | 3 + lb_content_resolver/content_resolver.py | 2 - lb_content_resolver/database.py | 24 +++--- lb_content_resolver/fuzzy_index.py | 7 +- resolve.py | 110 +++++++++++++++--------- 6 files changed, 111 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index fb415de..e0e3b0a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,21 @@ source .virtualenv/bin/activate pip install -r requirements.txt ``` +### Setting up config.py + +While it isn't strictly necessary to setup config.py, it makes using the resolver easier: + +``` +cp config.py.sample config.py +``` + +Then edit config.py and set the location of where you're going to store your resolver database file +into DATABASE_FILE. If you plan to use a Subsonic API, the fill out the Subsonic section as well. + +If you decide not to use the config.py file, make sure to pass the path to the DB file with -d to each +command. All further examples in this file assume you added the config file and will therefore omit +the -d option. + ## Scanning your collection Note: Soon we will eliminate the requirement to do a filesystem scan before also doing a subsonic @@ -50,14 +65,14 @@ scan (if you plan to use subsonic). For now, do the file system scan, then the s Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported. ``` -./resolve.py create music_index -./resolve.py scan music_index +./resolve.py create +./resolve.py scan ``` If you remove from tracks from your collection, use cleanup to remove refereces to those tracks: ``` -./resolve.py cleanup music_index +./resolve.py cleanup ``` ### Scan a Subsonic collection @@ -71,7 +86,7 @@ cp config.py.sample config.py Then edit the file and add your subsonic configuration. ``` -./resolve.py subsonic music_index +./resolve.py subsonic ``` This will match your collection to the remove subsonic API collection. @@ -94,7 +109,7 @@ curl "https://api.listenbrainz.org/1/playlist/" > test.jspf Finally, resolve the playlist to local files: ``` -./resolve.py playlist music_index input.jspf output.m3u +./resolve.py playlist input.jspf output.m3u ``` Then open the m3u playlist with a local tool. @@ -124,21 +139,7 @@ to download more data for your MusicBrainz tagged music collection. First, download tag and popularity data: ``` -./resolve.py metadata music_index -``` - -Then, copy config.py.sample to config.py and then edit config.py: - -``` -cp config.py.sample config.py -edit config.py -``` - -Fill out the values for your subsonic server API and save the file. -Finally, match your collection against the subsonic collection: - -``` -./resolve.py subsonic music_index +./resolve.py metadata ``` ### Playlist generation @@ -167,7 +168,7 @@ isn't very suited for the prompt that was given. #### Artist Element ``` -./resolve.py lb-radio music_index easy 'artist:(taylor swift, drake)' +./resolve.py lb-radio easy 'artist:(taylor swift, drake)' ``` Generates a playlist with music from Taylor Swift and artists similar @@ -177,14 +178,14 @@ to her and Drake, and artists similar to him. #### Tag Element ``` -./resolve.py lb-radio music_index easy 'tag:(downtempo, trip hop)' +./resolve.py lb-radio easy 'tag:(downtempo, trip hop)' ``` This will generate a playlist on easy mode for recordings that are tagged with "downtempo" AND "trip hop". ``` -./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or' +./resolve.py lb-radio medium 'tag:(downtempo, trip hop)::or' ``` This will generate a playlist on medium mode for recordings that are @@ -194,7 +195,7 @@ at the end of the prompt. You can include more than on tag query in a prompt: ``` -./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)' +./resolve.py lb-radio medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)' ``` #### Stats, Collections, Playlists and Rec diff --git a/config.py.sample b/config.py.sample index 2a22996..fa007f4 100644 --- a/config.py.sample +++ b/config.py.sample @@ -1,3 +1,6 @@ +# Where to find the database file +DATABASE_FILE = "" + # To connect to a subsonic API SUBSONIC_HOST = "" # include http:// or https:// SUBSONIC_USER = "" diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py index 915a5c7..d5b2418 100755 --- a/lb_content_resolver/content_resolver.py +++ b/lb_content_resolver/content_resolver.py @@ -116,8 +116,6 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None) if recordings is None and jspf_playlist is None: raise ValueError("Either recordings or jspf_playlist must be passed.") - print("\nResolve recordings to local files or subsonic ids") - artist_recording_data = [] if jspf_playlist is not None: if len(jspf_playlist["playlist"]["track"]) == 0: diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 75eee1b..33d2973 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -23,24 +23,16 @@ class Database: ''' Keep a database with metadata for a collection of local music files. ''' - def __init__(self, index_dir): - self.index_dir = index_dir - self.db_file = os.path.join(index_dir, "lb_resolve.db") + def __init__(self, db_file): + self.db_file = db_file self.fuzzy_index = None def create(self): """ - Create the index directory for the data. Currently it contains only - the sqlite dir, but in the future we may serialize the fuzzy index here as well. + Create the database. Can be run again to create tables that have been recently added to the code, + but don't exist in the DB yet. """ - if not os.path.exists(self.index_dir): - try: - os.mkdir(self.index_dir) - except OSError as err: - print("Could not create index directory: %s (%s)" % (self.index_dir, err)) - return - setup_db(self.db_file) db.connect() db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording]) @@ -84,7 +76,7 @@ def scan(self, music_dir): with tqdm(total=self.track_count_estimate) as self.progress_bar: self.traverse("") - self.close_db() + self.close() print("Checked %s tracks:" % self.total) print(" %5d tracks not changed since last run" % self.not_changed) @@ -285,12 +277,16 @@ def database_cleanup(self, dry_run): print("RM %s" % recording.file_path) recording_ids.append(recording.id) + if not recording_ids: + print("No cleanup needed, all recordings found") + return + if not dry_run: placeholders = ",".join(("?", ) * len(recording_ids)) db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids)) print("Stale references removed") else: - print("--delete not specified, no refeences removed") + print("--delete not specified, no refences removed") def metadata_sanity_check(self, include_subsonic=False): """ diff --git a/lb_content_resolver/fuzzy_index.py b/lb_content_resolver/fuzzy_index.py index 548c794..8142326 100755 --- a/lb_content_resolver/fuzzy_index.py +++ b/lb_content_resolver/fuzzy_index.py @@ -71,7 +71,10 @@ def search(self, query_data): output = [] for i, result in enumerate(results): - output.append({ "confidence": fabs(result[1][0]), - "recording_id": result[0][0] }) + if len(result[0]): + output.append({ "confidence": fabs(result[1][0]), + "recording_id": result[0][0] }) + else: + output.append({ "confidence": 0.0, "recording_id": 0 }) return output diff --git a/resolve.py b/resolve.py index e6b6837..cd9b51d 100755 --- a/resolve.py +++ b/resolve.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import sys import click @@ -18,8 +19,6 @@ from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI -# TODO: Think up a better way to specify the DB location - def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): if jspf is None: @@ -54,44 +53,67 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask): print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.") +def db_file_check(db_file): + """ Check the db_file argument and give useful user feedback. """ + + if not db_file: + try: + import config + except ModuleNotFoundError: + print("Database file not specified with -d (--db_file) argument. Consider adding it to config.py for ease of use.") + sys.exit(-1) + + if not config.DATABASE_FILE: + print("config.py found, but DATABASE_FILE is empty. Please add it or use -d option to specify it.") + sys.exit(-1) + + return config.DATABASE_FILE + else: + return db_file + + @click.group() def cli(): pass @click.command() -@click.argument('index_dir') -def create(index_dir): - """Create a new index directory to track a music collection""" - db = Database(index_dir) +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +def create(db_file): + """Create a new database to track a music collection""" + db_file = db_file_check(db_file) + db = Database(db_file) db.create() @click.command() -@click.argument('index_dir') +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) @click.argument('music_dir') -def scan(index_dir, music_dir): +def scan(db_file, music_dir): """Scan a directory and its subdirectories for music files to add to the collection""" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() db.scan(music_dir) @click.command() -@click.option('-d', '--delete', required=False, is_flag=True, default=True) -@click.argument('index_dir') -def cleanup(delete, index_dir): +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +@click.option("-r", "--remove", required=False, is_flag=True, default=True) +def cleanup(db_file, remove): """Perform a database cleanup. Check that files exist and if they don't remove from the index""" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() - db.database_cleanup(delete) + db.database_cleanup(remove) @click.command() -@click.argument('index_dir') -def metadata(index_dir): +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +def metadata(db_file): """Lookup metadata (popularity and tags) for recordings""" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() lookup = MetadataLookup() lookup.lookup() @@ -102,22 +124,24 @@ def metadata(index_dir): @click.command() -@click.argument('index_dir') -def subsonic(index_dir): +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +def subsonic(db_file): """Scan a remote subsonic music collection""" - db = SubsonicDatabase(index_dir) + db_file = db_file_check(db_file) + db = SubsonicDatabase(db_file) db.open() db.sync() @click.command() -@click.argument('index_dir') +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +@click.option('-t', '--threshold', default=.80) @click.argument('jspf_playlist') @click.argument('m3u_playlist') -@click.option('-t', '--threshold', default=.80) -def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): +def playlist(db_file, threshold, jspf_playlist, m3u_playlist): """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection""" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() cr = ContentResolver() jspf = read_jspf_playlist(jspf_playlist) @@ -126,15 +150,16 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): @click.command() +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) @click.option('-p', '--save-to-playlist', required=False) @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") -@click.argument('index_dir') @click.argument('mode') @click.argument('prompt') -def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt): +def lb_radio(db_file, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt): """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection""" - db = SubsonicDatabase(index_dir) + db_file = db_file_check(db_file) + db = SubsonicDatabase(db_file) db.open() r = ListenBrainzRadioLocal() jspf = r.generate(mode, prompt) @@ -147,40 +172,41 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr @click.command() -@click.argument('index_dir') +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) @click.argument('count', required=False, default=250) -def top_tags(index_dir, count): +def top_tags(db_file, count): "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() tt = TopTags() tt.print_top_tags_tightly(count) @click.command() -@click.argument('index_dir') +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True) -def duplicates(exclude_different_release, index_dir): +def duplicates(db_file, exclude_different_release): "Print all the tracks in the DB that are duplciated as per recording_mbid" - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() fd = FindDuplicates(db) fd.print_duplicate_recordings(exclude_different_release) @click.command() +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False) @click.option('-p', '--save-to-playlist', required=False) @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") -@click.argument('index_dir') @click.argument('user_name') -def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name): +def periodic_jams(db_file, upload_to_subsonic, save_to_playlist, dont_ask, user_name): "Generate a periodic jams playlist" - db = SubsonicDatabase(index_dir) + db_file = db_file_check(db_file) + db = SubsonicDatabase(db_file) db.open() - # TODO: ensure that we catch upload to subsonic when we have a FS playlist - target = "subsonic" if upload_to_subsonic else "filesystem" pj = LocalPeriodicJams(user_name, target) jspf = pj.generate() @@ -192,11 +218,11 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use @click.command() -@click.argument('index_dir') -def unresolved(index_dir): +@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +def unresolved(db_file): "Show the top unresolved releases" - - db = Database(index_dir) + db_file = db_file_check(db_file) + db = Database(db_file) db.open() urt = UnresolvedRecordingTracker() releases = urt.get_releases() From f2dbf1aba9f5112dfa29b1f43198dd36293cc88e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 10 Jan 2024 12:32:35 +0100 Subject: [PATCH 39/39] Make match threshold a command line arg --- lb_content_resolver/database.py | 2 +- lb_content_resolver/lb_radio.py | 11 +++++------ lb_content_resolver/troi/periodic_jams.py | 8 +++----- resolve.py | 10 ++++++---- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index 33d2973..c72e840 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -182,7 +182,7 @@ def read_metadata_and_add(self, relative_path, format, mtime, update): elif format == "wma": mdata = wma.read(file_path) - # TODO: In the future we should attempt to read basic metadata from + # In the future we should attempt to read basic metadata from # the filename here. But, if you have untagged files, this tool # really isn't for you anyway. heh. if mdata is not None: diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py index d6d947a..4114eae 100755 --- a/lb_content_resolver/lb_radio.py +++ b/lb_content_resolver/lb_radio.py @@ -16,12 +16,11 @@ class ListenBrainzRadioLocal: Generate local playlists against a music collection available via subsonic. ''' - # TODO: Make this an argument - MATCH_THRESHOLD = .8 - - def generate(self, mode, prompt): + def generate(self, mode, prompt, match_threshold): """ - Generate a playlist given the mode and prompt. + Generate a playlist given the mode and prompt. Optional match_threshold, a value from + 0 to 1.0 allows the use to control how well local resolution tracks must match before + being considered a match. """ patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1}) @@ -40,7 +39,7 @@ def generate(self, mode, prompt): return {"playlist": {"track": []}} # Resolve any tracks that have not been resolved to a subsonic_id or a local file - self.resolve_playlist(self.MATCH_THRESHOLD, playlist) + self.resolve_playlist(match_threshold, playlist) return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py index 7b6fb5a..e61812c 100755 --- a/lb_content_resolver/troi/periodic_jams.py +++ b/lb_content_resolver/troi/periodic_jams.py @@ -7,13 +7,11 @@ class LocalPeriodicJams(ListenBrainzRadioLocal): Generate local playlists against a music collection available via subsonic. ''' - # TODO: Make this an argument - MATCH_THRESHOLD = .8 - - def __init__(self, user_name, target): + def __init__(self, user_name, target, match_threshold): ListenBrainzRadioLocal.__init__(self) self.user_name = user_name self.target = target + self.match_threshold = match_threshold def generate(self): """ @@ -40,6 +38,6 @@ def generate(self): return {"playlist": {"track": []}} # Resolve any tracks that have not been resolved to a subsonic_id or a local file - self.resolve_playlist(self.MATCH_THRESHOLD, playlist) + self.resolve_playlist(self.match_threshold, playlist) return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}} diff --git a/resolve.py b/resolve.py index cd9b51d..1e55c64 100755 --- a/resolve.py +++ b/resolve.py @@ -151,18 +151,19 @@ def playlist(db_file, threshold, jspf_playlist, m3u_playlist): @click.command() @click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +@click.option('-t', '--threshold', default=.80) @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True) @click.option('-p', '--save-to-playlist', required=False) @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") @click.argument('mode') @click.argument('prompt') -def lb_radio(db_file, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt): +def lb_radio(db_file, threshold, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt): """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection""" db_file = db_file_check(db_file) db = SubsonicDatabase(db_file) db.open() r = ListenBrainzRadioLocal() - jspf = r.generate(mode, prompt) + jspf = r.generate(mode, prompt, threshold) if len(jspf["playlist"]["track"]) == 0: print(upload_to_subsonic) db.metadata_sanity_check(include_subsonic=upload_to_subsonic) @@ -197,18 +198,19 @@ def duplicates(db_file, exclude_different_release): @click.command() @click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False) +@click.option('-t', '--threshold', default=.80) @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False) @click.option('-p', '--save-to-playlist', required=False) @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file") @click.argument('user_name') -def periodic_jams(db_file, upload_to_subsonic, save_to_playlist, dont_ask, user_name): +def periodic_jams(db_file, threshold, upload_to_subsonic, save_to_playlist, dont_ask, user_name): "Generate a periodic jams playlist" db_file = db_file_check(db_file) db = SubsonicDatabase(db_file) db.open() target = "subsonic" if upload_to_subsonic else "filesystem" - pj = LocalPeriodicJams(user_name, target) + pj = LocalPeriodicJams(user_name, target, threshold) jspf = pj.generate() if len(jspf["playlist"]["track"]) == 0: db.metadata_sanity_check(include_subsonic=upload_to_subsonic)