From df921014582391a6f23def49f53df0c7aaf5e55b Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 24 Dec 2023 12:39:53 +0100
Subject: [PATCH 01/39] Artist search feature implemented

---
 lb_content_resolver/artist_search.py | 114 +++++++++++++++++++++++++++
 lb_content_resolver/lb_radio.py      |   2 +
 resolve.py                           |   9 +++
 3 files changed, 125 insertions(+)
 create mode 100755 lb_content_resolver/artist_search.py

diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py
new file mode 100755
index 0000000..dc7be25
--- /dev/null
+++ b/lb_content_resolver/artist_search.py
@@ -0,0 +1,114 @@
+import os
+from collections import defaultdict
+import datetime
+import sys
+
+import peewee
+import requests
+
+from lb_content_resolver.model.database import db
+from lb_content_resolver.model.recording import Recording, RecordingMetadata
+#from troi.recording_search_service import RecordingSearchByArtistService
+from troi.splitter import plist
+
+
+class LocalRecordingSearchByArtistService: #(RecordingSearchByArtistService):
+    ''' 
+    Given the local database, search for artists that meet given tag criteria
+    '''
+
+    def __init__(self, db):
+#        RecordingSearchByTagService.__init__(self)
+        self.db = db
+
+    def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
+        """
+        Perform an artist search. Parameters:
+
+        tags - a list of artist_mbids for which to search recordings
+        begin_percent - if many recordings match the above parameters, return only
+                        recordings that have a minimum popularity percent score 
+                        of begin_percent.
+        end_percent - if many recordings match the above parameters, return only
+                      recordings that have a maximum popularity percent score 
+                      of end_percent.
+        num_recordings - ideally return these many recordings
+
+        If only few recordings match, the begin_percent and end_percent are
+        ignored.
+        """
+
+        print(artist_mbids)
+
+        query = """SELECT popularity
+                        , recording_mbid
+                        , artist_mbid
+                        , subsonic_id
+                     FROM recording
+                     JOIN recording_metadata
+                       ON recording.id = recording_metadata.recording_id
+                     JOIN recording_subsonic
+                       ON recording.id = recording_subsonic.recording_id
+                    WHERE artist_mbid in (%s)
+                 ORDER BY artist_mbid
+                        , popularity"""
+
+        self.db.open_db()
+        placeholders = ",".join(("?", ) * len(artist_mbids))
+        cursor = db.execute_sql(query % placeholders, params=tuple(artist_mbids))
+
+        artists = defaultdict(list)
+        for rec in cursor.fetchall():
+            artists[rec[2]].append({"popularity": rec[0], "recording_mbid": rec[1], "artist_mbid": rec[2], "subsonic_id": rec[3]})
+
+        for artist in artists:
+            artists[artist] = self.fetch_and_select_on_popularity(artists[artist], begin_percent, end_percent, num_recordings)
+
+        return artists
+
+
+    # TODO: use this in both tag and artist search classes
+    def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent, num_recordings):
+        """
+            Break the data into over, matching and under (percent) groups
+        """
+
+        matching_recordings = []
+        over_recordings = []
+        under_recordings = []
+        for rec in recordings:
+            if rec["popularity"] >= begin_percent:
+                if rec["popularity"] < end_percent:
+                    matching_recordings.append(rec)
+                else:
+                    over_recordings.append(rec)
+            else:
+                under_recordings.append(rec)
+
+        # If we have enough recordings, we're done!
+        if len(matching_recordings) >= num_recordings:
+            return plist(matching_recordings)
+
+        # We don't have enough recordings, see if we can pick the ones outside
+        # of our desired range in a best effort to make a playlist.
+        # Keep adding the best matches until we (hopefully) get our desired number of recordings
+        while len(matching_recordings) < num_recordings:
+            if under_recordings:
+                under_diff = begin_percent - under_recordings[-1]["popularity"]
+            else:
+                under_diff = 1.0
+
+            if over_recordings:
+                over_diff = over_recordings[-1]["popularity"] - end_percent
+            else:
+                over_diff = 1.0
+
+            if over_diff == 1.0 and under_diff == 1.0:
+                break
+
+            if under_diff < over_diff:
+                matching_recordings.insert(0, under_recordings.pop(-1))
+            else:
+                matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
+
+        return plist(matching_recordings)
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 23a9bf4..56840c6 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -6,6 +6,7 @@
 from troi.splitter import plist
 
 from lb_content_resolver.tag_search import LocalRecordingSearchByTagService
+from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.model.database import db
 import config
 
@@ -51,6 +52,7 @@ def generate(self, mode, prompt):
 
         patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1})
         patch.register_service(LocalRecordingSearchByTagService(self.db))
+        patch.register_service(LocalRecordingSearchByArtistService(self.db))
 
         # Now generate the playlist
         try:
diff --git a/resolve.py b/resolve.py
index acdd065..bfc902d 100755
--- a/resolve.py
+++ b/resolve.py
@@ -11,6 +11,7 @@
 from lb_content_resolver.lb_radio import ListenBrainzRadioLocal
 from lb_content_resolver.utils import ask_yes_no_question
 from lb_content_resolver.top_tags import TopTags
+from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 import config
 
 
@@ -90,6 +91,13 @@ def top_tags(index_dir, count):
     tt.print_top_tags_tightly(count)
 
 
+@click.command()
+@click.argument('index_dir')
+def artist_test(index_dir):
+    db = Database(index_dir)
+    s = LocalRecordingSearchByArtistService(db)
+    s.search(["8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11", "067102ea-9519-4622-9077-57ca4164cfbb"], .9, .6, 20)
+    
 cli.add_command(create)
 cli.add_command(scan)
 cli.add_command(playlist)
@@ -98,6 +106,7 @@ def top_tags(index_dir, count):
 cli.add_command(subsonic)
 cli.add_command(lb_radio)
 cli.add_command(top_tags)
+cli.add_command(artist_test)
 
 
 def usage(command):

From e48b4e4dffc52f0b13a2e5b944a409264d277661 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 24 Dec 2023 23:05:15 +0100
Subject: [PATCH 02/39] Artist lb radio works!

---
 lb_content_resolver/artist_search.py | 60 +++++++++++++++-------------
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py
index dc7be25..a28f4d0 100755
--- a/lb_content_resolver/artist_search.py
+++ b/lb_content_resolver/artist_search.py
@@ -8,17 +8,18 @@
 
 from lb_content_resolver.model.database import db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
-#from troi.recording_search_service import RecordingSearchByArtistService
+from troi.recording_search_service import RecordingSearchByArtistService
 from troi.splitter import plist
+from troi import Recording as TroiRecording
 
 
-class LocalRecordingSearchByArtistService: #(RecordingSearchByArtistService):
+class LocalRecordingSearchByArtistService(RecordingSearchByArtistService):
     ''' 
     Given the local database, search for artists that meet given tag criteria
     '''
 
     def __init__(self, db):
-#        RecordingSearchByTagService.__init__(self)
+        RecordingSearchByArtistService.__init__(self)
         self.db = db
 
     def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
@@ -38,8 +39,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
         ignored.
         """
 
-        print(artist_mbids)
-
         query = """SELECT popularity
                         , recording_mbid
                         , artist_mbid
@@ -85,30 +84,37 @@ def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent,
             else:
                 under_recordings.append(rec)
 
-        # If we have enough recordings, we're done!
-        if len(matching_recordings) >= num_recordings:
-            return plist(matching_recordings)
+        # If we have enough recordings, skip the extending part
+        if len(matching_recordings) < num_recordings:
+            # We don't have enough recordings, see if we can pick the ones outside
+            # of our desired range in a best effort to make a playlist.
+            # Keep adding the best matches until we (hopefully) get our desired number of recordings
+            while len(matching_recordings) < num_recordings:
+                if under_recordings:
+                    under_diff = begin_percent - under_recordings[-1]["popularity"]
+                else:
+                    under_diff = 1.0
 
-        # We don't have enough recordings, see if we can pick the ones outside
-        # of our desired range in a best effort to make a playlist.
-        # Keep adding the best matches until we (hopefully) get our desired number of recordings
-        while len(matching_recordings) < num_recordings:
-            if under_recordings:
-                under_diff = begin_percent - under_recordings[-1]["popularity"]
-            else:
-                under_diff = 1.0
+                if over_recordings:
+                    over_diff = over_recordings[-1]["popularity"] - end_percent
+                else:
+                    over_diff = 1.0
 
-            if over_recordings:
-                over_diff = over_recordings[-1]["popularity"] - end_percent
-            else:
-                over_diff = 1.0
+                if over_diff == 1.0 and under_diff == 1.0:
+                    break
+
+                if under_diff < over_diff:
+                    matching_recordings.insert(0, under_recordings.pop(-1))
+                else:
+                    matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
 
-            if over_diff == 1.0 and under_diff == 1.0:
-                break
+        # Convert results into recordings
+        results = plist()
+        for rec in matching_recordings:
+            r = TroiRecording(mbid=rec["recording_mbid"])
+            if "subsonic_id" in rec:
+                r.musicbrainz={"subsonic_id": rec["subsonic_id"]}
 
-            if under_diff < over_diff:
-                matching_recordings.insert(0, under_recordings.pop(-1))
-            else:
-                matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
+            results.append(r)
 
-        return plist(matching_recordings)
+        return results

From ce81ee576f7a546ab1777133a31cf920eeb063a1 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Mon, 25 Dec 2023 12:47:05 +0100
Subject: [PATCH 03/39] Refactor class to select recordings so it can be used
 by more than one class

---
 lb_content_resolver/artist_search.py | 58 +------------------------
 lb_content_resolver/tag_search.py    | 50 +++-------------------
 lb_content_resolver/utils.py         | 64 ++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 101 deletions(-)

diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py
index a28f4d0..113a09e 100755
--- a/lb_content_resolver/artist_search.py
+++ b/lb_content_resolver/artist_search.py
@@ -8,9 +8,9 @@
 
 from lb_content_resolver.model.database import db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
+from lb_content_resolver.utils import select_recordings_on_popularity
 from troi.recording_search_service import RecordingSearchByArtistService
 from troi.splitter import plist
-from troi import Recording as TroiRecording
 
 
 class LocalRecordingSearchByArtistService(RecordingSearchByArtistService):
@@ -61,60 +61,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
             artists[rec[2]].append({"popularity": rec[0], "recording_mbid": rec[1], "artist_mbid": rec[2], "subsonic_id": rec[3]})
 
         for artist in artists:
-            artists[artist] = self.fetch_and_select_on_popularity(artists[artist], begin_percent, end_percent, num_recordings)
+            artists[artist] = select_recordings_on_popularity(artists[artist], begin_percent, end_percent, num_recordings)
 
         return artists
-
-
-    # TODO: use this in both tag and artist search classes
-    def fetch_and_select_on_popularity(self, recordings, begin_percent, end_percent, num_recordings):
-        """
-            Break the data into over, matching and under (percent) groups
-        """
-
-        matching_recordings = []
-        over_recordings = []
-        under_recordings = []
-        for rec in recordings:
-            if rec["popularity"] >= begin_percent:
-                if rec["popularity"] < end_percent:
-                    matching_recordings.append(rec)
-                else:
-                    over_recordings.append(rec)
-            else:
-                under_recordings.append(rec)
-
-        # If we have enough recordings, skip the extending part
-        if len(matching_recordings) < num_recordings:
-            # We don't have enough recordings, see if we can pick the ones outside
-            # of our desired range in a best effort to make a playlist.
-            # Keep adding the best matches until we (hopefully) get our desired number of recordings
-            while len(matching_recordings) < num_recordings:
-                if under_recordings:
-                    under_diff = begin_percent - under_recordings[-1]["popularity"]
-                else:
-                    under_diff = 1.0
-
-                if over_recordings:
-                    over_diff = over_recordings[-1]["popularity"] - end_percent
-                else:
-                    over_diff = 1.0
-
-                if over_diff == 1.0 and under_diff == 1.0:
-                    break
-
-                if under_diff < over_diff:
-                    matching_recordings.insert(0, under_recordings.pop(-1))
-                else:
-                    matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
-
-        # Convert results into recordings
-        results = plist()
-        for rec in matching_recordings:
-            r = TroiRecording(mbid=rec["recording_mbid"])
-            if "subsonic_id" in rec:
-                r.musicbrainz={"subsonic_id": rec["subsonic_id"]}
-
-            results.append(r)
-
-        return results
diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py
index 06d600a..4aa6c28 100755
--- a/lb_content_resolver/tag_search.py
+++ b/lb_content_resolver/tag_search.py
@@ -8,6 +8,7 @@
 
 from lb_content_resolver.model.database import db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
+from lb_content_resolver.utils import select_recordings_on_popularity
 from troi.recording_search_service import RecordingSearchByTagService
 from troi.splitter import plist
 
@@ -51,52 +52,11 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings):
         placeholders = ",".join(("?", ) * len(tags))
         cursor = db.execute_sql(query % (placeholders, pop_clause), params)
 
-        # Break the data into over, matching and under (percent) groups
-        matching_recordings = []
-        over_recordings = []
-        under_recordings = []
+        recordings = []
         for rec in cursor.fetchall():
-            recording = {
-                "recording_mbid": rec[0],
-                "percent": rec[1],
-                "subsonic_id": rec[2]
-            }
-
-            if rec[1] >= begin_percent:
-                if rec[1] < end_percent:
-                    matching_recordings.append(recording)
-                else:
-                    over_recordings.append(recording)
-            else:
-                under_recordings.append(recording)
-
-        # If we have enough recordings, we're done!
-        if len(matching_recordings) >= num_recordings:
-            return plist(matching_recordings)
-
-        # We don't have enough recordings, see if we can pick the ones outside
-        # of our desired range in a best effort to make a playlist.
-        # Keep adding the best matches until we (hopefully) get our desired number of recordings
-        while len(matching_recordings) < num_recordings:
-            if under_recordings:
-                under_diff = begin_percent - under_recordings[-1]["percent"]
-            else:
-                under_diff = 1.0
-
-            if over_recordings:
-                over_diff = over_recordings[-1]["percent"] - end_percent
-            else:
-                over_diff = 1.0
-
-            if over_diff == 1.0 and under_diff == 1.0:
-                break
-
-            if under_diff < over_diff:
-                matching_recordings.insert(0, under_recordings.pop(-1))
-            else:
-                matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
-
-        return plist(matching_recordings)
+            recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2]})
+
+        return select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings)
 
     def or_search(self, tags, min_popularity=None, max_popularity=None):
         """
diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py
index 5c5bdd3..3a792f9 100755
--- a/lb_content_resolver/utils.py
+++ b/lb_content_resolver/utils.py
@@ -1,3 +1,7 @@
+from troi.splitter import plist
+from troi import Recording as TroiRecording
+
+
 def ask_yes_no_question(prompt):
 
     while True:
@@ -12,3 +16,63 @@ def ask_yes_no_question(prompt):
             return False
         else:
             print("eh? try again.")
+
+
+def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings):
+    """
+       Given dicts of recording data, select up to num_recordings recordings randomly 
+       from the recordings that ideally lie in popularity between begin_percent and end_percent.
+
+       If too little data is found in the percent range, select recordings that are the closest
+       to the disired range.
+    """
+
+    matching_recordings = []
+    over_recordings = []
+    under_recordings = []
+    for rec in recordings:
+        if rec["popularity"] >= begin_percent:
+            if rec["popularity"] < end_percent:
+                matching_recordings.append(rec)
+            else:
+                over_recordings.append(rec)
+        else:
+            under_recordings.append(rec)
+
+    # If we have enough recordings, skip the extending part
+    if len(matching_recordings) < num_recordings:
+        # We don't have enough recordings, see if we can pick the ones outside
+        # of our desired range in a best effort to make a playlist.
+        # Keep adding the best matches until we (hopefully) get our desired number of recordings
+        while len(matching_recordings) < num_recordings:
+            if under_recordings:
+                under_diff = begin_percent - under_recordings[-1]["popularity"]
+            else:
+                under_diff = None
+
+            if over_recordings:
+                over_diff = over_recordings[-1]["popularity"] - end_percent
+            else:
+                over_diff = None
+
+            if over_diff == None and under_diff == None:
+                break
+
+            if over_diff is not None and under_diff is not None and under_diff < over_diff:
+                matching_recordings.insert(0, under_recordings.pop(-1))
+            else:
+                if under_diff is not None:
+                    matching_recordings.insert(len(matching_recordings), under_recordings.pop(-1))
+                else:
+                    matching_recordings.insert(len(matching_recordings), over_recordings.pop(0))
+
+    # Convert results into recordings
+    results = plist()
+    for rec in matching_recordings:
+        r = TroiRecording(mbid=rec["recording_mbid"])
+        if "subsonic_id" in rec:
+            r.musicbrainz = {"subsonic_id": rec["subsonic_id"]}
+
+        results.append(r)
+
+    return results

From 0081aed9daf29f2c1d963152b6501e0721d8591a Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 18:33:23 +0100
Subject: [PATCH 04/39] non-local playlists now resolve to local files.

---
 lb_content_resolver/content_resolver.py | 35 ++++++++++++++-----------
 lb_content_resolver/lb_radio.py         | 28 ++++++++++++++++++++
 lb_content_resolver/playlist.py         |  4 +--
 resolve.py                              |  3 +++
 4 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 38c7312..208af3f 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -9,7 +9,7 @@
 from lb_content_resolver.model.recording import Recording
 from lb_content_resolver.fuzzy_index import FuzzyIndex
 from lb_matching_tools.cleaner import MetadataCleaner
-from lb_content_resolver.playlist import read_jspf_playlist, generate_m3u_playlist
+from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist
 
 SUPPORTED_FORMATS = ["flac", "ogg", "mp3", "m4a", "wma"]
 
@@ -84,20 +84,26 @@ def resolve_recordings(self, query_data, match_threshold):
 
         return resolved_recordings
 
-    def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold):
+    def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None):
         """ 
-            Given a JSPF playlist, resolve tracks and write the m3u file. Print output to console for now.
+            Given a JSPF playlist or a list of troi recordings, resolve tracks and return a list of resolved recordings.
+            threshold is a value between 0 and 1.0 for the percentage score required before a track is matched.
         """
+
+        if recordings is None and jspf_playlist is None:
+            raise ValueError("Either recordings or jspf_playlist must be passed.")
+
         self.db.open_db()
         self.build_index()
 
-        jspf = read_jspf_playlist(jspf_playlist)
-
-        title = jspf["playlist"]["title"]
-        recordings = []
         artist_recording_data = []
-        for i, track in enumerate(jspf["playlist"]["track"]):
-            artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]})
+        if jspf_playlist is not None:
+            jspf = read_jspf_playlist(jspf_playlist)
+            for i, track in enumerate(jspf["playlist"]["track"]):
+                artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]})
+        else:
+            for rec in recordings:
+                artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name })
 
         hits = self.resolve_recordings(artist_recording_data, match_threshold)
         hit_index = {hit["index"]: hit for hit in hits}
@@ -106,7 +112,7 @@ def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold):
         recordings = Recording.select().where(Recording.id.in_(recording_ids))
         rec_index = {r.id: r for r in recordings}
 
-        results = []
+        results = [None] * len(artist_recording_data)
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
                 print("FAIL %s - %s not resolved." % (artist_recording["artist_name"], artist_recording["recording_name"]))
@@ -114,16 +120,13 @@ def resolve_playlist(self, jspf_playlist, m3u_playlist, match_threshold):
 
             hit = hit_index[i]
             rec = rec_index[hit["recording_id"]]
-            hit["file_path"] = rec.file_path
-            hit["artist_name"] = rec.artist_name
-            hit["recording_name"] = rec.recording_name
-            results.append(hit)
+            results[hit["index"]] = rec
             print("OK   %s - %s resolved: %s" % (rec.artist_name, rec.recording_name, os.path.basename(rec.file_path)))
 
         if len(results) == 0:
             print("Sorry, but no tracks could be resolved, no playlist generated.")
             return
 
-        print(f'\n{len(recordings)} recordings resolved, {len(jspf["playlist"]["track"]) - len(recordings)} not resolved.')
+        print(f'\n{len(recordings)} recordings resolved, {len(artist_recording_data) - len(recordings)} not resolved.')
 
-        generate_m3u_playlist(m3u_playlist, title, recordings)
+        return results
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 56840c6..8965898 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -8,6 +8,7 @@
 from lb_content_resolver.tag_search import LocalRecordingSearchByTagService
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.model.database import db
+from lb_content_resolver.content_resolver import ContentResolver
 import config
 
 
@@ -16,6 +17,9 @@ class ListenBrainzRadioLocal:
        Generate local playlists against a music collection available via subsonic.
     '''
 
+    # TODO: Make this an argument
+    MATCH_THRESHOLD = .8
+
     def __init__(self, db):
         self.db = db
 
@@ -65,4 +69,28 @@ def generate(self, mode, prompt):
             print("Your prompt generated an empty playlist.")
             self.sanity_check()
 
+        # Resolve any tracks that have not been resolved to a subsonic_id or a local file
+        self.resolve_recordings(playlist)
+
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
+
+    def resolve_recordings(self, playlist):
+
+        recordings = []
+        for recording in playlist.playlists[0].recordings:
+            if "subsonic_id" in recording.musicbrainz or "filename" in recording.musicbrainz:
+                continue
+
+            recordings.append(recording)
+
+        cr = ContentResolver(self.db)
+        resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings)
+
+        for i, t_recording in enumerate(playlist.playlists[0].recordings):
+            if resolved[i] is not None:
+                # TODO make this work for subsonic_ids
+#                if "subsonic_id" in resolved.musicbrainz:
+#                    recording.musicbrainz["subsonic_id"] = resolved._id
+
+                if resolved[i].file_path != "":
+                    t_recording.musicbrainz["filename"] = resolved[i].file_path
diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py
index 3ddd0bf..8c88f17 100644
--- a/lb_content_resolver/playlist.py
+++ b/lb_content_resolver/playlist.py
@@ -12,9 +12,9 @@ def read_jspf_playlist(jspf_file):
     return json.loads(js)
 
 
-def generate_m3u_playlist(file_name, playlist_title, recordings):
+def write_m3u_playlist(file_name, playlist_title, hits):
     """
-       Given a list of Recording objects, write a m3u playlist.
+       Given a list of Recordings, write a m3u playlist.
     """
 
     with open(file_name, "w") as m3u:
diff --git a/resolve.py b/resolve.py
index bfc902d..7d12426 100755
--- a/resolve.py
+++ b/resolve.py
@@ -12,6 +12,7 @@
 from lb_content_resolver.utils import ask_yes_no_question
 from lb_content_resolver.top_tags import TopTags
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
+from lb_content_resolver.playlist import write_m3u_playlist
 import config
 
 
@@ -65,7 +66,9 @@ def subsonic(index_dir):
 def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
     db = Database(index_dir)
     cr = ContentResolver(db)
+    title, recordings = cr.resolve_playlist(jspf_playlist, threshold)
     cr.resolve_playlist(jspf_playlist, m3u_playlist, threshold)
+    write_m3u_playlist(write_m3u_playlist, title, recordings)
 
 @click.command()
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)

From 7922977a474e24c343d8c17daa06a5092978e44c Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 21:23:03 +0100
Subject: [PATCH 05/39] Fix location issue and store full pathname in the DB

---
 lb_content_resolver/content_resolver.py | 34 ++++++++++++++--------
 lb_content_resolver/database.py         |  2 +-
 lb_content_resolver/lb_radio.py         | 13 +++++----
 lb_content_resolver/model/subsonic.py   |  2 +-
 lb_content_resolver/playlist.py         | 21 ++++++++++++--
 lb_content_resolver/utils.py            | 16 +++++++++++
 requirements.txt                        |  1 +
 resolve.py                              | 38 ++++++++++++++++---------
 8 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 208af3f..ae4e336 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -7,13 +7,14 @@
 
 from lb_content_resolver.model.database import db, setup_db
 from lb_content_resolver.model.recording import Recording
+from lb_content_resolver.model.subsonic import RecordingSubsonic
 from lb_content_resolver.fuzzy_index import FuzzyIndex
 from lb_matching_tools.cleaner import MetadataCleaner
-from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist
+from lb_content_resolver.playlist import read_jspf_playlist
+from lb_content_resolver.utils import bcolors
 
 SUPPORTED_FORMATS = ["flac", "ogg", "mp3", "m4a", "wma"]
 
-
 class ContentResolver:
     ''' 
     Scan a given path and enter/update the metadata in the search index
@@ -70,12 +71,10 @@ def resolve_recordings(self, query_data, match_threshold):
             for data in next_query_data:
                 recording_name = mc.clean_recording(data["recording_name"])
                 if recording_name != data["recording_name"]:
-                    print(f'RETRY recording {data["recording_name"]} => {recording_name}')
                     query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]})
 
                 artist_name = mc.clean_artist(data["artist_name"])
                 if artist_name != data["artist_name"]:
-                    print(f'RETRY artist {data["artist_name"]} => {artist_name}')
                     query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]})
 
             # If nothing got cleaned, we can finish now
@@ -93,35 +92,46 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
         if recordings is None and jspf_playlist is None:
             raise ValueError("Either recordings or jspf_playlist must be passed.")
 
+        print("\nResolve recordings to local files or subsonic ids")
+
         self.db.open_db()
         self.build_index()
 
         artist_recording_data = []
         if jspf_playlist is not None:
-            jspf = read_jspf_playlist(jspf_playlist)
-            for i, track in enumerate(jspf["playlist"]["track"]):
+            for i, track in enumerate(jspf_playlist["playlist"]["track"]):
                 artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]})
         else:
             for rec in recordings:
-                artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name })
+                artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name})
 
         hits = self.resolve_recordings(artist_recording_data, match_threshold)
         hit_index = {hit["index"]: hit for hit in hits}
 
         recording_ids = [r["recording_id"] for r in hits]
-        recordings = Recording.select().where(Recording.id.in_(recording_ids))
-        rec_index = {r.id: r for r in recordings}
-
+        recordings = Recording \
+                      .select(Recording, RecordingSubsonic.subsonic_id) \
+                      .join(RecordingSubsonic, peewee.JOIN.LEFT_OUTER, on=(Recording.id == RecordingSubsonic.recording_id)) \
+                      .where(Recording.id.in_(recording_ids)) \
+                      .dicts()
+        rec_index = {r["id"]: r for r in recordings}
+
+        print("     %-40s %-40s %-40s" % ("ARTIST", "RECORDING", "RELEASE"))
         results = [None] * len(artist_recording_data)
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
-                print("FAIL %s - %s not resolved." % (artist_recording["artist_name"], artist_recording["recording_name"]))
+                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["artist_name"][:39],
+                                              artist_recording["recording_name"][:39]))
                 continue
 
             hit = hit_index[i]
             rec = rec_index[hit["recording_id"]]
             results[hit["index"]] = rec
-            print("OK   %s - %s resolved: %s" % (rec.artist_name, rec.recording_name, os.path.basename(rec.file_path)))
+            print(bcolors.OKGREEN + "OK" + bcolors.ENDC + "   %-40s %-40s" % (artist_recording["artist_name"][:39],
+                                          artist_recording["recording_name"][:39]))
+            print("     %-40s %-40s %-40s" % (rec["artist_name"][:39],
+                                              rec["recording_name"][:39],
+                                              rec["release_name"][:39]))
 
         if len(results) == 0:
             print("Sorry, but no tracks could be resolved, no playlist generated.")
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 8016787..af41f71 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -207,7 +207,7 @@ def read_metadata_and_add(self, relative_path, format, mtime, update):
         # really isn't for you anyway. heh.
         if mdata is not None:
             mdata["mtime"] = mtime
-            mdata["file_path"] = relative_path
+            mdata["file_path"] = file_path
 
             mdata["artist_mbid"] = self.convert_to_uuid(mdata["artist_mbid"])
             mdata["release_mbid"] = self.convert_to_uuid(mdata["release_mbid"])
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 8965898..130c8c3 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -63,7 +63,7 @@ def generate(self, mode, prompt):
             playlist = patch.generate_playlist()
         except RuntimeError as err:
             print(f"LB Radio generation failed: {err}")
-            return
+            return None
 
         if playlist == None:
             print("Your prompt generated an empty playlist.")
@@ -88,9 +88,10 @@ def resolve_recordings(self, playlist):
 
         for i, t_recording in enumerate(playlist.playlists[0].recordings):
             if resolved[i] is not None:
-                # TODO make this work for subsonic_ids
-#                if "subsonic_id" in resolved.musicbrainz:
-#                    recording.musicbrainz["subsonic_id"] = resolved._id
+                if resolved[i]["subsonic_id"] != "":
+                    t_recording.musicbrainz["subsonic_id"] = resolved[i]["subsonic_id"]
 
-                if resolved[i].file_path != "":
-                    t_recording.musicbrainz["filename"] = resolved[i].file_path
+                if resolved[i]["file_path"] != "":
+                    t_recording.musicbrainz["filename"] = resolved[i]["file_path"]
+
+                t_recording.duration = resolved[i]["duration"]
diff --git a/lb_content_resolver/model/subsonic.py b/lb_content_resolver/model/subsonic.py
index 728592d..59601df 100644
--- a/lb_content_resolver/model/subsonic.py
+++ b/lb_content_resolver/model/subsonic.py
@@ -14,7 +14,7 @@ class Meta:
         table_name = "recording_subsonic"
 
     id = AutoField()
-    recording = ForeignKeyField(Recording, backref="metadata")
+    recording = ForeignKeyField(Recording, backref="subsonic")
 
     subsonic_id = TextField()
     last_updated = DateTimeField(null=False, default=datetime.datetime.now)
diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py
index 8c88f17..5b28634 100644
--- a/lb_content_resolver/playlist.py
+++ b/lb_content_resolver/playlist.py
@@ -12,9 +12,9 @@ def read_jspf_playlist(jspf_file):
     return json.loads(js)
 
 
-def write_m3u_playlist(file_name, playlist_title, hits):
+def write_m3u_playlist_from_results(file_name, playlist_title, hits):
     """
-       Given a list of Recordings, write a m3u playlist.
+       Given a list of Recordings, write an m3u playlist.
     """
 
     with open(file_name, "w") as m3u:
@@ -24,3 +24,20 @@ def write_m3u_playlist(file_name, playlist_title, hits):
         for rec in recordings:
             m3u.write("#EXTINF %d,%s\n" % (rec.duration / 1000, rec.recording_name))
             m3u.write(rec.file_path + "\n")
+
+
+def write_m3u_playlist_from_jspf(file_name, jspf):
+    """
+       Given a jspf playlist, write an m3u playlist.
+    """
+
+    with open(file_name, "w") as m3u:
+        m3u.write("#EXTM3U\n")
+        m3u.write("#EXTENC: UTF-8\n")
+        m3u.write("#PLAYLIST %s\n" % jspf["playlist"]["title"])
+        for track in jspf["playlist"]["track"]:
+            if "location" not in track:
+                continue
+
+            m3u.write("#EXTINF %d,%s\n" % (track["duration"] / 1000, track["title"]))
+            m3u.write(track["location"] + "\n")
diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py
index 3a792f9..30a09eb 100755
--- a/lb_content_resolver/utils.py
+++ b/lb_content_resolver/utils.py
@@ -76,3 +76,19 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_
         results.append(r)
 
     return results
+
+
+class bcolors:
+    """ Basic ASCII color codes """
+
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+
diff --git a/requirements.txt b/requirements.txt
index 81723cd..901d241 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ requests
 py-sonic@git+https://github.com/mayhem/py-sonic.git@int-vs-string
 tqdm
 troi@git+https://github.com/metabrainz/troi-recommendation-playground.git@lb-local
+icecream
diff --git a/resolve.py b/resolve.py
index 7d12426..c527976 100755
--- a/resolve.py
+++ b/resolve.py
@@ -12,7 +12,7 @@
 from lb_content_resolver.utils import ask_yes_no_question
 from lb_content_resolver.top_tags import TopTags
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
-from lb_content_resolver.playlist import write_m3u_playlist
+from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 import config
 
 
@@ -25,6 +25,7 @@ def cli():
 @click.command()
 @click.argument('index_dir')
 def create(index_dir):
+    """Create a new index directory to track a music collection"""
     db = Database(index_dir)
     db.create()
 
@@ -33,6 +34,7 @@ def create(index_dir):
 @click.argument('index_dir')
 @click.argument('music_dir')
 def scan(index_dir, music_dir):
+    """Scan a directory and its subdirectories for music files to add to the collection"""
     db = Database(index_dir)
     db.scan(music_dir)
 
@@ -40,6 +42,7 @@ def scan(index_dir, music_dir):
 @click.command()
 @click.argument('index_dir')
 def cleanup(index_dir):
+    """Perform a database cleanup. Check that files exist and if they don't remove from the index"""
     db = Database(index_dir)
     db.database_cleanup()
 
@@ -47,6 +50,7 @@ def cleanup(index_dir):
 @click.command()
 @click.argument('index_dir')
 def metadata(index_dir):
+    """Lookup metadata (popularity and tags) for recordings"""
     db = Database(index_dir)
     lookup = MetadataLookup(db)
     lookup.lookup()
@@ -55,6 +59,7 @@ def metadata(index_dir):
 @click.command()
 @click.argument('index_dir')
 def subsonic(index_dir):
+    """Scan a remote subsonic music collection"""
     db = SubsonicDatabase(index_dir)
     db.sync()
 
@@ -64,43 +69,49 @@ def subsonic(index_dir):
 @click.argument('m3u_playlist')
 @click.option('-t', '--threshold', default=.80)
 def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
+    """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection"""
     db = Database(index_dir)
     cr = ContentResolver(db)
-    title, recordings = cr.resolve_playlist(jspf_playlist, threshold)
-    cr.resolve_playlist(jspf_playlist, m3u_playlist, threshold)
-    write_m3u_playlist(write_m3u_playlist, title, recordings)
+    jspf = read_jspf_playlist(jspf_playlist)
+    results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist)
+    write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"])
 
 @click.command()
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
+@click.option('-p', '--save-to-playlist', required=False)
+@click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
 @click.argument('index_dir')
 @click.argument('mode')
 @click.argument('prompt')
-def lb_radio(upload_to_subsonic, index_dir, mode, prompt):
+def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt):
+    """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection"""
     db = SubsonicDatabase(index_dir)
     r = ListenBrainzRadioLocal(db)
     jspf = r.generate(mode, prompt)
+    if jspf is None:
+        return
 
     if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
-        if ask_yes_no_question("Upload via subsonic? (Y/n)"):
+        if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
             print("uploading playlist")
             db.upload_playlist(jspf)
+    elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0:
+        if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"):
+            print("saving playlist")
+            write_m3u_playlist_from_jspf(save_to_playlist, jspf)
+    else:
+        print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.")
 
 @click.command()
 @click.argument('index_dir')
 @click.argument('count', required=False, default=250)
 def top_tags(index_dir, count):
+    "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts"""
     db = Database(index_dir)
     tt = TopTags(db)
     tt.print_top_tags_tightly(count)
 
 
-@click.command()
-@click.argument('index_dir')
-def artist_test(index_dir):
-    db = Database(index_dir)
-    s = LocalRecordingSearchByArtistService(db)
-    s.search(["8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11", "067102ea-9519-4622-9077-57ca4164cfbb"], .9, .6, 20)
-    
 cli.add_command(create)
 cli.add_command(scan)
 cli.add_command(playlist)
@@ -109,7 +120,6 @@ def artist_test(index_dir):
 cli.add_command(subsonic)
 cli.add_command(lb_radio)
 cli.add_command(top_tags)
-cli.add_command(artist_test)
 
 
 def usage(command):

From 7805d026ce179e78b5d311f32ac3757628d4fbff Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 21:37:11 +0100
Subject: [PATCH 06/39] Add progress bar to scan collection

---
 lb_content_resolver/content_resolver.py | 10 +++++-----
 lb_content_resolver/database.py         | 23 +++++++++++++++--------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index ae4e336..db37430 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -120,8 +120,8 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
         results = [None] * len(artist_recording_data)
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
-                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["artist_name"][:39],
-                                              artist_recording["recording_name"][:39]))
+                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["recording_name"][:39],
+                                              artist_recording["artist_name"][:39]))
                 continue
 
             hit = hit_index[i]
@@ -129,9 +129,9 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
             results[hit["index"]] = rec
             print(bcolors.OKGREEN + "OK" + bcolors.ENDC + "   %-40s %-40s" % (artist_recording["artist_name"][:39],
                                           artist_recording["recording_name"][:39]))
-            print("     %-40s %-40s %-40s" % (rec["artist_name"][:39],
-                                              rec["recording_name"][:39],
-                                              rec["release_name"][:39]))
+            print("     %-40s %-40s %-40s" % (rec["recording_name"][:39],
+                                              rec["release_name"][:39],
+                                              rec["artist_name"][:39]))
 
         if len(results) == 0:
             print("Sorry, but no tracks could be resolved, no playlist generated.")
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index af41f71..c5b8047 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -7,6 +7,7 @@
 
 from unidecode import unidecode
 import peewee
+from tqdm import tqdm
 
 from lb_content_resolver.model.database import db, setup_db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
@@ -74,7 +75,9 @@ def scan(self, music_dir):
         self.audio_file_count = self.track_count_estimate
         print("Found %s audio files" % self.audio_file_count)
 
-        self.traverse("")
+        with tqdm(total=self.track_count_estimate) as self.progress_bar:
+            self.traverse("")
+
         self.close_db()
 
         print("Checked %s tracks:" % self.total)
@@ -146,9 +149,9 @@ def add_or_update_recording(self, mdata):
         with db.atomic() as transaction:
             if mdata is not None:
                 details = " %d%% " % (100 * self.total / self.audio_file_count)
-                details += " %-30s %-30s %-30s" % ((mdata.get("artist_name", "") or "")[:29], 
+                details += " %-30s %-30s %-30s" % ((mdata.get("recording_name", "") or "")[:29], 
                                                    (mdata.get("release_name", "") or "")[:29],
-                                                   (mdata.get("recording_name", "") or "")[:29])
+                                                   (mdata.get("artist_name", "") or "")[:29])
             else:
                 details = ""
 
@@ -245,11 +248,14 @@ def add(self, relative_path):
         stats = os.stat(fullpath)
         ts = datetime.datetime.fromtimestamp(stats[8])
 
+        # update the progress bar
+        self.progress_bar.update(1)
+
         base, ext = os.path.splitext(relative_path)
         ext = ext.lower()[1:]
         base = os.path.basename(relative_path)
         if ext not in SUPPORTED_FORMATS:
-            print("  unknown %s" % base)
+            self.progress_bar.write("  unknown %s" % base)
             self.skipped += 1
             return
 
@@ -263,7 +269,7 @@ def add(self, relative_path):
             exists = True
             if recording.mtime == ts:
                 self.not_changed += 1
-                print("unchanged %s" % base)
+                self.progress_bar.write("unchanged %s" % base)
                 return
 
         # read the file's last modified time to avoid re-reading tags
@@ -272,14 +278,15 @@ def add(self, relative_path):
 
         status, details = self.read_metadata_and_add(relative_path, ext, ts, exists)
         if status == "updated":
-            print("   update %s" % details)
+            self.progress_bar.write("   update %s" % details)
             self.updated += 1
         elif status == "added":
-            print("      add %s" % details)
+            self.progress_bar.write("      add %s" % details)
             self.added += 1
         else:
             self.error += 1
-            print("    error %s" % details)
+            self.progress_bar.write("    error %s" % details)
+
 
     def database_cleanup(self):
         '''

From e425baec0557efc511c4419c5c94594a4e0ea0e6 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 22:14:35 +0100
Subject: [PATCH 07/39] writing and uploading resolved playlists now works!

---
 lb_content_resolver/content_resolver.py |  2 +-
 lb_content_resolver/subsonic.py         | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index db37430..03733ac 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -120,7 +120,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
         results = [None] * len(artist_recording_data)
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
-                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s - %-40s" % (artist_recording["recording_name"][:39],
+                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s %-40s" % (artist_recording["recording_name"][:39],
                                               artist_recording["artist_name"][:39]))
                 continue
 
diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index ad7d41d..a6eab8e 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -124,9 +124,12 @@ def upload_playlist(self, jspf):
 
         conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT)
 
-        song_ids = [
-            track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:]
-            for track in jspf["playlist"]["track"]
-        ]
+        song_ids = []
+        for track in jspf["playlist"]["track"]:
+            try:
+                song_ids.append(track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:])
+            except KeyError:
+                continue
+        
         name = jspf["playlist"]["title"]
         conn.createPlaylist(name=name, songIds=song_ids)

From f977dda22492108e71af38999d90fe71699a3b5e Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 22:42:09 +0100
Subject: [PATCH 08/39] artist, tag and stats elements now play cleanly
 together!

---
 lb_content_resolver/content_resolver.py | 8 ++++----
 lb_content_resolver/lb_radio.py         | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 03733ac..cf23e46 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -116,19 +116,19 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
                       .dicts()
         rec_index = {r["id"]: r for r in recordings}
 
-        print("     %-40s %-40s %-40s" % ("ARTIST", "RECORDING", "RELEASE"))
+        print("     %-40s %-40s %-40s" % ("RECORDING", "RELEASE", "ARTIST"))
         results = [None] * len(artist_recording_data)
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
-                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s %-40s" % (artist_recording["recording_name"][:39],
+                print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "",
                                               artist_recording["artist_name"][:39]))
                 continue
 
             hit = hit_index[i]
             rec = rec_index[hit["recording_id"]]
             results[hit["index"]] = rec
-            print(bcolors.OKGREEN + "OK" + bcolors.ENDC + "   %-40s %-40s" % (artist_recording["artist_name"][:39],
-                                          artist_recording["recording_name"][:39]))
+            print(bcolors.OKGREEN + "OK" + bcolors.ENDC + "   %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "",
+                                          artist_recording["artist_name"][:39]))
             print("     %-40s %-40s %-40s" % (rec["recording_name"][:39],
                                               rec["release_name"][:39],
                                               rec["artist_name"][:39]))
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 130c8c3..ca897d1 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -86,7 +86,7 @@ def resolve_recordings(self, playlist):
         cr = ContentResolver(self.db)
         resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings)
 
-        for i, t_recording in enumerate(playlist.playlists[0].recordings):
+        for i, t_recording in enumerate(recordings):
             if resolved[i] is not None:
                 if resolved[i]["subsonic_id"] != "":
                     t_recording.musicbrainz["subsonic_id"] = resolved[i]["subsonic_id"]

From 349f806d3e568ae5c23c90b7ba125c0dcd28c796 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 23:33:38 +0100
Subject: [PATCH 09/39] Add duplicate funcion to show duplicates in the
 collection

---
 lb_content_resolver/utils.py |  2 --
 resolve.py                   | 10 ++++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py
index 30a09eb..042842e 100755
--- a/lb_content_resolver/utils.py
+++ b/lb_content_resolver/utils.py
@@ -90,5 +90,3 @@ class bcolors:
     ENDC = '\033[0m'
     BOLD = '\033[1m'
     UNDERLINE = '\033[4m'
-
-
diff --git a/resolve.py b/resolve.py
index c527976..8d1432a 100755
--- a/resolve.py
+++ b/resolve.py
@@ -11,6 +11,7 @@
 from lb_content_resolver.lb_radio import ListenBrainzRadioLocal
 from lb_content_resolver.utils import ask_yes_no_question
 from lb_content_resolver.top_tags import TopTags
+from lb_content_resolver.duplicates import FindDuplicates
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 import config
@@ -111,6 +112,14 @@ def top_tags(index_dir, count):
     tt = TopTags(db)
     tt.print_top_tags_tightly(count)
 
+@click.command()
+@click.argument('index_dir')
+def duplicates(index_dir):
+    "Print all the tracks in the DB that are duplciated as per recording_mbid"""
+    db = Database(index_dir)
+    fd = FindDuplicates(db)
+    fd.print_duplicate_recordings()
+
 
 cli.add_command(create)
 cli.add_command(scan)
@@ -120,6 +129,7 @@ def top_tags(index_dir, count):
 cli.add_command(subsonic)
 cli.add_command(lb_radio)
 cli.add_command(top_tags)
+cli.add_command(duplicates)
 
 
 def usage(command):

From b07533edd7dd5d7fd1649c6d700ef0bd80d6378a Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 30 Dec 2023 23:48:25 +0100
Subject: [PATCH 10/39] Add missing file

---
 lb_content_resolver/duplicates.py | 57 +++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100755 lb_content_resolver/duplicates.py

diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py
new file mode 100755
index 0000000..bbbc54e
--- /dev/null
+++ b/lb_content_resolver/duplicates.py
@@ -0,0 +1,57 @@
+import os
+import json
+from collections import defaultdict
+import datetime
+import sys
+
+import peewee
+import requests
+
+from lb_content_resolver.model.database import db
+from lb_content_resolver.model.recording import Recording, RecordingMetadata
+from troi.recording_search_service import RecordingSearchByTagService
+from troi.splitter import plist
+
+
+class FindDuplicates:
+    ''' 
+       Class to fetch recordings that are duplicate in the database.
+    '''
+
+    def __init__(self, db):
+        self.db = db
+
+    def get_duplicate_recordings(self):
+        """
+           Return a list of (recording_name
+        """
+
+        query = """SELECT recording_name
+                        , release_name
+                        , artist_name
+                        , recording_mbid
+                        , json_group_array(file_path) AS file_paths
+                        , COUNT(*) AS cnt
+                     FROM recording
+                 GROUP BY recording_mbid
+                   HAVING cnt > 1
+                 ORDER BY cnt DESC, artist_name, recording_name"""
+
+        self.db.open_db()
+
+        return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ]
+
+    
+    def print_duplicate_recordings(self):
+
+        total = 0
+        dups = self.get_duplicate_recordings()
+        for dup in dups:
+            print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2]))
+            for f in dup[4]:
+                print("   %s" % f)
+                total += 1
+            print()
+
+        print()
+        print("%d recordings had a total of %d duplicates." % (len(dups), total))

From 8084a8b1fc3b13fa4b1d192689ae0f7d5ead500d Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 00:03:52 +0100
Subject: [PATCH 11/39] Improve the cleanup function

---
 lb_content_resolver/database.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index c5b8047..76ba110 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -295,8 +295,13 @@ def database_cleanup(self):
 
         self.open_db()
         query = Recording.select()
+        recording_ids = []
         for recording in query:
             if not os.path.exists(recording.file_path):
-                print("DEL %s" % recording.file_path)
-                recording.delete()
+                print("UNLINK %s" % recording.file_path)
+                recording_ids.append(recording.id)
+
+        placeholders = ",".join(("?", ) * len(recording_ids))
+        db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % (placeholders, (recording_ids,)))
+
         self.close_db()

From e4d5c17b6065f3d5fca5fe36896c0f71a7059a54 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 00:07:12 +0100
Subject: [PATCH 12/39] Fix delete

---
 lb_content_resolver/database.py        | 2 +-
 lb_content_resolver/metadata_lookup.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 76ba110..4cbb71e 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -302,6 +302,6 @@ def database_cleanup(self):
                 recording_ids.append(recording.id)
 
         placeholders = ",".join(("?", ) * len(recording_ids))
-        db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % (placeholders, (recording_ids,)))
+        db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids))
 
         self.close_db()
diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py
index 2162985..09cfaad 100755
--- a/lb_content_resolver/metadata_lookup.py
+++ b/lb_content_resolver/metadata_lookup.py
@@ -104,6 +104,7 @@ def lookup_chunk(self, args, mbid_to_id_index):
             # insert new recording tags
             tag_ids = {}
             for tag in tags:
+                print(tag)
                 cursor = db.execute_sql("""INSERT INTO tag (name)
                                                 VALUES (?)
                              ON CONFLICT DO UPDATE SET name = ? RETURNING id""", (tag,tag))

From 1d86d5ffa7d444d54aeb0090023a00622ec738e2 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 11:06:38 +0100
Subject: [PATCH 13/39] Improve dups

---
 lb_content_resolver/duplicates.py      | 39 +++++++++++++++++---------
 lb_content_resolver/metadata_lookup.py |  1 -
 lb_content_resolver/subsonic.py        |  2 +-
 resolve.py                             |  5 ++--
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py
index bbbc54e..5fa892f 100755
--- a/lb_content_resolver/duplicates.py
+++ b/lb_content_resolver/duplicates.py
@@ -21,31 +21,44 @@ class FindDuplicates:
     def __init__(self, db):
         self.db = db
 
-    def get_duplicate_recordings(self):
+    def get_duplicate_recordings(self, exclude_different_releases):
         """
            Return a list of (recording_name
         """
 
-        query = """SELECT recording_name
-                        , release_name
-                        , artist_name
-                        , recording_mbid
-                        , json_group_array(file_path) AS file_paths
-                        , COUNT(*) AS cnt
-                     FROM recording
-                 GROUP BY recording_mbid
-                   HAVING cnt > 1
-                 ORDER BY cnt DESC, artist_name, recording_name"""
+        if exclude_different_releases:
+            query = """SELECT recording_name
+                            , release_name
+                            , artist_name
+                            , recording_mbid
+                            , json_group_array(file_path) AS file_paths
+                            , COUNT(*) AS cnt
+                         FROM recording
+                     GROUP BY recording_mbid
+                       HAVING cnt > 1 
+                     ORDER BY cnt DESC, artist_name, recording_name"""
+        else:
+            query = """SELECT recording_name
+                            , release_name
+                            , artist_name
+                            , recording_mbid
+                            , json_group_array(file_path) AS file_paths
+                            , COUNT(*) AS cnt
+                         FROM recording
+                     GROUP BY recording_mbid
+                            , release_mbid
+                       HAVING cnt > 1 
+                     ORDER BY cnt DESC, artist_name, recording_name"""
 
         self.db.open_db()
 
         return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ]
 
     
-    def print_duplicate_recordings(self):
+    def print_duplicate_recordings(self, exclude_different_releases=True):
 
         total = 0
-        dups = self.get_duplicate_recordings()
+        dups = self.get_duplicate_recordings(exclude_different_releases)
         for dup in dups:
             print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2]))
             for f in dup[4]:
diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py
index 09cfaad..2162985 100755
--- a/lb_content_resolver/metadata_lookup.py
+++ b/lb_content_resolver/metadata_lookup.py
@@ -104,7 +104,6 @@ def lookup_chunk(self, args, mbid_to_id_index):
             # insert new recording tags
             tag_ids = {}
             for tag in tags:
-                print(tag)
                 cursor = db.execute_sql("""INSERT INTO tag (name)
                                                 VALUES (?)
                              ON CONFLICT DO UPDATE SET name = ? RETURNING id""", (tag,tag))
diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index a6eab8e..e5ac190 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -81,7 +81,7 @@ def run_sync(self):
 
                 if len(release_tracks) == 0:
                     print("For album %s" % album_mbid)
-                    print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"]["song"])))
+                    print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"].get("song", []))))
 
                 print("album '%s' by '%s'" % (album["album"], album["artist"]))
                 if "song" not in album_info["album"]:
diff --git a/resolve.py b/resolve.py
index 8d1432a..fb5cb3e 100755
--- a/resolve.py
+++ b/resolve.py
@@ -114,11 +114,12 @@ def top_tags(index_dir, count):
 
 @click.command()
 @click.argument('index_dir')
-def duplicates(index_dir):
+@click.option('-e', '--exclude-different-release', required=False, is_flag=True)
+def duplicates(exclude_different_release, index_dir):
     "Print all the tracks in the DB that are duplciated as per recording_mbid"""
     db = Database(index_dir)
     fd = FindDuplicates(db)
-    fd.print_duplicate_recordings()
+    fd.print_duplicate_recordings(exclude_different_release)
 
 
 cli.add_command(create)

From 9c7492377bbdb750bfa4d677a5ddc60fce6d1e7e Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 11:52:37 +0100
Subject: [PATCH 14/39] Finished the duplicate recording detetction feature

---
 lb_content_resolver/duplicates.py | 10 +++++-----
 resolve.py                        |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py
index 5fa892f..7715963 100755
--- a/lb_content_resolver/duplicates.py
+++ b/lb_content_resolver/duplicates.py
@@ -21,12 +21,12 @@ class FindDuplicates:
     def __init__(self, db):
         self.db = db
 
-    def get_duplicate_recordings(self, exclude_different_releases):
+    def get_duplicate_recordings(self, include_different_releases):
         """
            Return a list of (recording_name
         """
 
-        if exclude_different_releases:
+        if include_different_releases:
             query = """SELECT recording_name
                             , release_name
                             , artist_name
@@ -35,6 +35,7 @@ def get_duplicate_recordings(self, exclude_different_releases):
                             , COUNT(*) AS cnt
                          FROM recording
                      GROUP BY recording_mbid
+                            , release_mbid
                        HAVING cnt > 1 
                      ORDER BY cnt DESC, artist_name, recording_name"""
         else:
@@ -46,7 +47,6 @@ def get_duplicate_recordings(self, exclude_different_releases):
                             , COUNT(*) AS cnt
                          FROM recording
                      GROUP BY recording_mbid
-                            , release_mbid
                        HAVING cnt > 1 
                      ORDER BY cnt DESC, artist_name, recording_name"""
 
@@ -55,10 +55,10 @@ def get_duplicate_recordings(self, exclude_different_releases):
         return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ]
 
     
-    def print_duplicate_recordings(self, exclude_different_releases=True):
+    def print_duplicate_recordings(self, include_different_releases=True):
 
         total = 0
-        dups = self.get_duplicate_recordings(exclude_different_releases)
+        dups = self.get_duplicate_recordings(include_different_releases)
         for dup in dups:
             print("%d duplicates of '%s' by '%s'" % (dup[5], dup[0], dup[2]))
             for f in dup[4]:
diff --git a/resolve.py b/resolve.py
index fb5cb3e..3ef1275 100755
--- a/resolve.py
+++ b/resolve.py
@@ -114,7 +114,7 @@ def top_tags(index_dir, count):
 
 @click.command()
 @click.argument('index_dir')
-@click.option('-e', '--exclude-different-release', required=False, is_flag=True)
+@click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True)
 def duplicates(exclude_different_release, index_dir):
     "Print all the tracks in the DB that are duplciated as per recording_mbid"""
     db = Database(index_dir)

From ea8505ebd7250ddf26729b739fb7449cfed1b683 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 13:41:48 +0100
Subject: [PATCH 15/39] Improve the status update of the subsonic scan and make
 it faster

---
 lb_content_resolver/subsonic.py | 160 ++++++++++++++++++--------------
 1 file changed, 92 insertions(+), 68 deletions(-)

diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index e5ac190..a3f1bbb 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -1,11 +1,14 @@
 import datetime
 import os
+import sys
 from uuid import UUID
 
 import libsonic
+from tqdm import tqdm
 
 from lb_content_resolver.database import Database
 from lb_content_resolver.model.database import db
+from lb_content_resolver.utils import bcolors
 import config
 
 
@@ -13,8 +16,9 @@ class SubsonicDatabase(Database):
     ''' 
     Add subsonic sync capabilities to the Database
     '''
-
-    MAX_ALBUMS_PER_CALL = 500
+    
+    # Determined by the number of albums we can fetch in one go
+    BATCH_SIZE = 500
 
     def __init__(self, index_dir):
         Database.__init__(self, index_dir)
@@ -26,81 +30,99 @@ def sync(self):
 
         # Keep some stats
         self.total = 0
-        self.added = 0
-        self.removed = 0
-        self.updated = 0
+        self.matched = 0
+        self.error = 0
 
         self.open_db()
         self.run_sync()
         self.close_db()
 
-        print("Checked %s tracks:" % self.total)
-        print("  %5d tracks added" % self.added)
-        print("  %5d tracks updated" % self.updated)
-        print("  %5d tracks removed" % self.removed)
+        print("Checked %s albums:" % self.total)
+        print("  %5d albums matched" % self.matched)
+        print("  %5d albums with errors" % self.error)
 
     def run_sync(self):
         """
             Perform the sync between the local collection and the subsonic one.
         """
 
-        print("Connect to subsonic..")
+        print("[ connect to subsonic ]")
         conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT)
-
         cursor = db.connection().cursor()
 
-        print("Fetch recordings")
-        album_count = 0
+        print("[ load albums ]")
+        album_ids = set()
+        albums = []
+        offset = 0
         while True:
-            recordings = []
-            albums_this_batch = 0
-            albums = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.MAX_ALBUMS_PER_CALL, offset=album_count)
-
-            for album in albums["albumList"]["album"]:
-                album_count += 1
-                albums_this_batch += 1
-
-                album_info = conn.getAlbumInfo2(id=album["id"])
-                try:
-                    album_mbid = album_info["albumInfo"]["musicBrainzId"]
-                except KeyError:
-                    print("subsonic album '%s' by '%s' has no MBID" % (album["album"], album["artist"]))
-                    continue
-
-                cursor.execute(
-                    """SELECT recording.id
-                                       , track_num
-                                       , COALESCE(disc_num, 1)
-                                    FROM recording
-                                   WHERE release_mbid = ?""", (album_mbid, ))
-
-                # create index on (track_num, disc_num)
-                release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()}
-
-                album_info = conn.getAlbum(id=album["id"])
-
-                if len(release_tracks) == 0:
-                    print("For album %s" % album_mbid)
-                    print("loaded %d of %d expected tracks from DB." % (len(release_tracks), len(album_info["album"].get("song", []))))
-
-                print("album '%s' by '%s'" % (album["album"], album["artist"]))
-                if "song" not in album_info["album"]:
-                    print("No songs returned")
-                else:
-                    for song in album_info["album"]["song"]:
-
-                        if (song["track"], song.get("discNumber", 1)) in release_tracks:
-                            recordings.append((release_tracks[(song["track"], song["discNumber"])], song["id"]))
-                        else:
-                            print("Song not matched: ", song["title"])
-                            continue
-
-            self.update_recordings(recordings)
-
-            print("fetched %d releases" % albums_this_batch)
-            if albums_this_batch < self.MAX_ALBUMS_PER_CALL:
+            results = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset)
+            albums.extend(results["albumList"]["album"])
+            album_ids.update([r["id"] for r in results["albumList"]["album"] ])
+
+            album_count = len(results["albumList"]["album"])
+            offset += album_count
+            if album_count < self.BATCH_SIZE:
                 break
 
+        print("[ loaded %d albums ]" % len(album_ids))
+
+        pbar = tqdm(total=len(album_ids))
+        recordings = []
+
+        for album in albums:
+            album_info = conn.getAlbumInfo2(id=album["id"])
+            try:
+                album_mbid = album_info["albumInfo"]["musicBrainzId"]
+            except KeyError:
+                pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" %
+                           (album["album"], album["artist"]))
+                continue
+
+            cursor.execute(
+                """SELECT recording.id
+                                   , track_num
+                                   , COALESCE(disc_num, 1)
+                                FROM recording
+                               WHERE release_mbid = ?""", (album_mbid, ))
+
+            # create index on (track_num, disc_num)
+            release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()}
+
+            album_info = conn.getAlbum(id=album["id"])
+
+            if len(release_tracks) == 0:
+                pbar.write("For album %s" % album_mbid)
+                pbar.write("loaded %d of %d expected tracks from DB." %
+                           (len(release_tracks), len(album_info["album"].get("song", []))))
+
+            msg = ""
+            if "song" not in album_info["album"]:
+                msg += "   No songs returned\n"
+            else:
+                for song in album_info["album"]["song"]:
+                    if (song["track"], song.get("discNumber", 1)) in release_tracks:
+                        recordings.append((release_tracks[(song["track"], song["discNumber"])], song["id"]))
+                    else:
+                        msg += "   Song not matched: '%s'\n" % song["title"]
+                        continue
+            if msg == "":
+                pbar.write(bcolors.OKGREEN + "OK   " + bcolors.ENDC + "album %-50s %-50s" %
+                           (album["album"][:49], album["artist"][:49]))
+                self.matched += 1
+            else:
+                pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" %
+                           (album["album"][:49], album["artist"][:49]))
+                pbar.write(msg)
+                self.error += 1
+
+            if len(recordings) >= self.BATCH_SIZE:
+                self.update_recordings(recordings)
+                recordings = []
+
+            self.total += 1
+            pbar.update(1)
+
+
     def update_recordings(self, recordings):
         """
             Given a list of recording_subsonic records, update the DB.
@@ -110,12 +132,13 @@ def update_recordings(self, recordings):
         recordings = [(r[0], r[1], datetime.datetime.now()) for r in recordings]
 
         cursor = db.connection().cursor()
-        cursor.executemany(
-            """INSERT INTO recording_subsonic (recording_id, subsonic_id, last_updated)
-                                    VALUES (?, ?, ?)
-                 ON CONFLICT DO UPDATE SET recording_id = excluded.recording_id
-                                         , subsonic_id = excluded.subsonic_id
-                                         , last_updated = excluded.last_updated""", recordings)
+        with db.atomic() as transaction:
+            cursor.executemany(
+                """INSERT INTO recording_subsonic (recording_id, subsonic_id, last_updated)
+                                        VALUES (?, ?, ?)
+                     ON CONFLICT DO UPDATE SET recording_id = excluded.recording_id
+                                             , subsonic_id = excluded.subsonic_id
+                                             , last_updated = excluded.last_updated""", recordings)
 
     def upload_playlist(self, jspf):
         """
@@ -127,9 +150,10 @@ def upload_playlist(self, jspf):
         song_ids = []
         for track in jspf["playlist"]["track"]:
             try:
-                song_ids.append(track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:])
+                song_ids.append(
+                    track["extension"]["https://musicbrainz.org/doc/jspf#track"]["additional_metadata"]["subsonic_identifier"][33:])
             except KeyError:
                 continue
-        
+
         name = jspf["playlist"]["title"]
         conn.createPlaylist(name=name, songIds=song_ids)

From 9e0325b961d67315e017bc5e976ab72d63fb9dd3 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 13:46:08 +0100
Subject: [PATCH 16/39] Minor cleanup

---
 lb_content_resolver/subsonic.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index a3f1bbb..b6a699d 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -76,6 +76,7 @@ def run_sync(self):
             except KeyError:
                 pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" %
                            (album["album"], album["artist"]))
+                self.error += 1
                 continue
 
             cursor.execute(
@@ -122,6 +123,9 @@ def run_sync(self):
             self.total += 1
             pbar.update(1)
 
+        if len(recordings) >= self.BATCH_SIZE:
+            self.update_recordings(recordings)
+
 
     def update_recordings(self, recordings):
         """

From 907cc99a7398bebbd2fc63372676b0a4118777a3 Mon Sep 17 00:00:00 2001
From: Philipp Wolfer <ph.wolfer@gmail.com>
Date: Sun, 31 Dec 2023 14:45:35 +0100
Subject: [PATCH 17/39] subsonic: use getAlbumList2

---
 lb_content_resolver/subsonic.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index b6a699d..59b34e1 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -13,10 +13,10 @@
 
 
 class SubsonicDatabase(Database):
-    ''' 
+    '''
     Add subsonic sync capabilities to the Database
     '''
-    
+
     # Determined by the number of albums we can fetch in one go
     BATCH_SIZE = 500
 
@@ -55,11 +55,11 @@ def run_sync(self):
         albums = []
         offset = 0
         while True:
-            results = conn.getAlbumList(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset)
-            albums.extend(results["albumList"]["album"])
-            album_ids.update([r["id"] for r in results["albumList"]["album"] ])
+            results = conn.getAlbumList2(ltype="alphabeticalByArtist", size=self.BATCH_SIZE, offset=offset)
+            albums.extend(results["albumList2"]["album"])
+            album_ids.update([r["id"] for r in results["albumList2"]["album"] ])
 
-            album_count = len(results["albumList"]["album"])
+            album_count = len(results["albumList2"]["album"])
             offset += album_count
             if album_count < self.BATCH_SIZE:
                 break

From 5521f49235e0678f96d0d69b8a298380dd18aaa8 Mon Sep 17 00:00:00 2001
From: Philipp Wolfer <ph.wolfer@gmail.com>
Date: Sun, 31 Dec 2023 15:00:37 +0100
Subject: [PATCH 18/39] subsonic: avoid call to getAlbumInfo2 if MBID is
 already present

This adds compatibility with clients not supporting getAlbumInfo2
---
 lb_content_resolver/subsonic.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index 59b34e1..60f1f8b 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -70,14 +70,19 @@ def run_sync(self):
         recordings = []
 
         for album in albums:
-            album_info = conn.getAlbumInfo2(id=album["id"])
-            try:
-                album_mbid = album_info["albumInfo"]["musicBrainzId"]
-            except KeyError:
-                pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" %
-                           (album["album"], album["artist"]))
-                self.error += 1
-                continue
+            album_info = conn.getAlbum(id=album["id"])
+
+            # Some servers might already include the MBID in the list or album response
+            album_mbid = album_info.get("musicBrainzId", album.get("musicBrainzId"))
+            if not album_mbid:
+                album_info2 = conn.getAlbumInfo2(id=album["id"])
+                try:
+                    album_mbid = album_info2["albumInfo"]["musicBrainzId"]
+                except KeyError:
+                    pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" %
+                            (album_info["name"], album_info["artist"]))
+                    self.error += 1
+                    continue
 
             cursor.execute(
                 """SELECT recording.id
@@ -89,8 +94,6 @@ def run_sync(self):
             # create index on (track_num, disc_num)
             release_tracks = {(row[1], row[2]): row[0] for row in cursor.fetchall()}
 
-            album_info = conn.getAlbum(id=album["id"])
-
             if len(release_tracks) == 0:
                 pbar.write("For album %s" % album_mbid)
                 pbar.write("loaded %d of %d expected tracks from DB." %
@@ -108,11 +111,11 @@ def run_sync(self):
                         continue
             if msg == "":
                 pbar.write(bcolors.OKGREEN + "OK   " + bcolors.ENDC + "album %-50s %-50s" %
-                           (album["album"][:49], album["artist"][:49]))
+                           (album_info["name"][:49], album_info["artist"][:49]))
                 self.matched += 1
             else:
                 pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" %
-                           (album["album"][:49], album["artist"][:49]))
+                           (album_info["name"][:49], album_info["artist"][:49]))
                 pbar.write(msg)
                 self.error += 1
 

From 8f419aa2f5ea07949f0c931d253969fd6a9cfdb1 Mon Sep 17 00:00:00 2001
From: Philipp Wolfer <ph.wolfer@gmail.com>
Date: Sun, 31 Dec 2023 15:44:54 +0100
Subject: [PATCH 19/39] subsonic: fix wrong variable use to read album name and
 artist

---
 lb_content_resolver/subsonic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index 60f1f8b..ab4695f 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -80,7 +80,7 @@ def run_sync(self):
                     album_mbid = album_info2["albumInfo"]["musicBrainzId"]
                 except KeyError:
                     pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "subsonic album '%s' by '%s' has no MBID" %
-                            (album_info["name"], album_info["artist"]))
+                            (album["name"], album["artist"]))
                     self.error += 1
                     continue
 
@@ -111,11 +111,11 @@ def run_sync(self):
                         continue
             if msg == "":
                 pbar.write(bcolors.OKGREEN + "OK   " + bcolors.ENDC + "album %-50s %-50s" %
-                           (album_info["name"][:49], album_info["artist"][:49]))
+                           (album["name"][:49], album["artist"][:49]))
                 self.matched += 1
             else:
                 pbar.write(bcolors.FAIL + "FAIL " + bcolors.ENDC + "album %-50s %-50s" %
-                           (album_info["name"][:49], album_info["artist"][:49]))
+                           (album["name"][:49], album["artist"][:49]))
                 pbar.write(msg)
                 self.error += 1
 

From 042956b7e91f00d42a2ad5c4ca38a27f1e8dcadb Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 16:45:45 +0100
Subject: [PATCH 20/39] Make the metadata lookup suck less with proper progress
 bars

---
 lb_content_resolver/metadata_lookup.py | 41 ++++++++++++++++----------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py
index 2162985..883ebe1 100755
--- a/lb_content_resolver/metadata_lookup.py
+++ b/lb_content_resolver/metadata_lookup.py
@@ -5,6 +5,7 @@
 
 import peewee
 import requests
+from tqdm import tqdm
 
 from lb_content_resolver.model.database import db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
@@ -15,6 +16,8 @@ class MetadataLookup:
     Given the local database, lookup metadata from MusicBrainz to allow local playlist resolution.
     '''
 
+    BATCH_SIZE = 1000
+
     def __init__(self, db):
         self.db = db
 
@@ -24,34 +27,38 @@ def lookup(self):
         """
 
         self.db.open_db()
-        args = []
-        mbid_to_id_index = {}
 
-        cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id, popularity
+        cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id
                                      FROM recording 
                                 LEFT JOIN recording_metadata
                                        ON recording.id = recording_metadata.recording_id
-                                    WHERE recording.recording_mbid IS NOT NULL """)
+                                    WHERE recording_mbid IS NOT NULL
+                                 ORDER BY artist_name, release_name""")
+        recordings = []
         for row in cursor.fetchall():
-            mbid = str(row[1])
-            args.append({ "[recording_mbid]": mbid })
-            mbid_to_id_index[mbid] = row
-            if len(args) == 1000:
-                if not self.lookup_chunk(args, mbid_to_id_index):
-                    return
-                args = []
-                mbid_to_id_index = {}
+            recordings.append(row)
+
+        print("[ %d recordings to lookup ]" % len(recordings))
 
-        if len(args) > 0:
-            self.lookup_chunk(args, mbid_to_id_index)
+        offset = 0
+        with tqdm(total=len(recordings)) as self.pbar:
+            while offset <= len(recordings):
+                self.process_recordings(recordings[offset:offset+self.BATCH_SIZE])
+                offset += self.BATCH_SIZE
 
 
-    def lookup_chunk(self, args, mbid_to_id_index):
+    def process_recordings(self, recordings):
         """
             This function carries out the actual lookup of the metadata and inserting the
             popularity and tags into the DB for the given chunk of recordings.
         """
 
+        args = []
+        mbid_to_id_index = {}
+        for rec in recordings:
+            mbid_to_id_index[ str(rec[1])] = rec
+            args.append({ "[recording_mbid]": str(rec[1]) })
+
         r = requests.post("https://labs.api.listenbrainz.org/bulk-tag-lookup/json", json=args)
         if r.status_code != 200:
             print("Fail: %d %s" % (r.status_code, r.text))
@@ -69,6 +76,8 @@ def lookup_chunk(self, args, mbid_to_id_index):
             recording_tags[mbid][row["source"]].append(row["tag"])
             tags.add(row["tag"])
 
+        self.pbar.update(len(recordings))
+
         tags = list(tags)
         with db.atomic():
 
@@ -81,7 +90,7 @@ def lookup_chunk(self, args, mbid_to_id_index):
             for mbid in list(set(mbids)):
                 mbid = str(mbid)
                 row = mbid_to_id_index[mbid]
-                if row[3] is None:
+                if row[2] is None:
                     recording_metadata = RecordingMetadata.create(recording=row[0],
                                                                   popularity=recording_pop[mbid],
                                                                   last_updated=datetime.datetime.now())

From 1c594ac420b702c084f66a494c5c2fc0f5c019c3 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 16:53:46 +0100
Subject: [PATCH 21/39] Show top tags after metadata load

---
 lb_content_resolver/top_tags.py | 2 +-
 resolve.py                      | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/lb_content_resolver/top_tags.py b/lb_content_resolver/top_tags.py
index da909ce..6a7d5bf 100755
--- a/lb_content_resolver/top_tags.py
+++ b/lb_content_resolver/top_tags.py
@@ -55,4 +55,4 @@ def print_top_tags_tightly(self, limit=250):
 
         top_tags = self.get_top_tags(limit)
 
-        print("; ".join([ tt["tag"] for tt in top_tags ]))
+        print("; ".join([ "%s %s" % (tt["tag"], tt["count"]) for tt in top_tags ]))
diff --git a/resolve.py b/resolve.py
index 3ef1275..ebe92cb 100755
--- a/resolve.py
+++ b/resolve.py
@@ -56,6 +56,10 @@ def metadata(index_dir):
     lookup = MetadataLookup(db)
     lookup.lookup()
 
+    print("\nThese top tags describe your collection:")
+    tt = TopTags(db)
+    tt.print_top_tags_tightly(100)
+
 
 @click.command()
 @click.argument('index_dir')

From c8e82410f84427950ef8da141c751c15a404d706 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 21:15:00 +0100
Subject: [PATCH 22/39] Do not resolve playlists if no tracks are missing. Less
 crashy.

---
 lb_content_resolver/content_resolver.py | 12 ++++++++----
 lb_content_resolver/lb_radio.py         |  3 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index cf23e46..f137f0f 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -94,17 +94,21 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
 
         print("\nResolve recordings to local files or subsonic ids")
 
-        self.db.open_db()
-        self.build_index()
-
         artist_recording_data = []
         if jspf_playlist is not None:
+            if len(jspf_playlist["playlist"]["track"]) == 0:
+                return []
             for i, track in enumerate(jspf_playlist["playlist"]["track"]):
                 artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]})
         else:
+            if not recordings:
+                return []
             for rec in recordings:
                 artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name})
 
+        self.db.open_db()
+        self.build_index()
+
         hits = self.resolve_recordings(artist_recording_data, match_threshold)
         hit_index = {hit["index"]: hit for hit in hits}
 
@@ -135,7 +139,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
 
         if len(results) == 0:
             print("Sorry, but no tracks could be resolved, no playlist generated.")
-            return
+            return []
 
         print(f'\n{len(recordings)} recordings resolved, {len(artist_recording_data) - len(recordings)} not resolved.')
 
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index ca897d1..277b431 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -83,6 +83,9 @@ def resolve_recordings(self, playlist):
 
             recordings.append(recording)
 
+        if not recordings:
+            return 
+
         cr = ContentResolver(self.db)
         resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings)
 

From a59fa0139b176e369aafe15c609e996fc35de661 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 21:43:14 +0100
Subject: [PATCH 23/39] Update readme for the new features on this branch

---
 README.md | 44 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 16aa3ea..64346da 100644
--- a/README.md
+++ b/README.md
@@ -83,9 +83,38 @@ Finally, match your collection against the subsonic collection:
 
 ### Playlist generation
 
-Currently only tag elements are supported for LB Local Radio.
+Currently artist and tag elements are supported for LB Local Radio,
+which means that playlists from these two elements are made from the local 
+collection and thus will not need to be resolved. All other elements
+may generate playlists with tracks that are not availalble in your
+collection. In this case, the fuzzy search will attempt to make the
+missing tracks to your collection.
 
-To generate a playlist:
+For a complete reference to LB Radio, see:
+[ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html)
+
+The playlist generator works with a given mode: "easy", "medium"
+and "hard". An easy playlist will generate data that more closely
+meets the prompt, which should translate into a playlist that should
+be easier and pleasent to listen to. Medium goes further and includes
+less popular and more far flung stuff, before hard digs at the bottom
+of the barrel. 
+
+This may not always feel very pronounced, especially if your collection
+isn't very suited for the prompt that was given.
+
+
+#### Artist Element
+
+```
+./resolve.py lb-radio music_index easy 'artist:(taylor swift, drakee)'
+```
+
+Generates a playlist with music from Taylor Swift and artists similar
+to her and Drake, and artists similar to him.
+
+
+#### Tag Element
 
 ```
 ./resolve.py lb-radio music_index easy 'tag:(downtempo, trip hop)'
@@ -107,3 +136,14 @@ You can include more than on tag query in a prompt:
 ```
 ./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)'
 ```
+
+#### Stats, Collections, Playlists and Rec
+
+There are more elements, but these are "global" elements that will need to 
+have their results resolved to the local collection. The resolution process is
+always a bit tricky since its outcome heavily depends on the collection. The
+generator will do its best to generate a fitting playlist, but that doesn't
+always happen. 
+
+For the other elements, please refer to the 
+[ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html)

From 84ab20f6cb56e7808563a070630de1767ae2f791 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 21:50:26 +0100
Subject: [PATCH 24/39] Document new features

---
 README.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/README.md b/README.md
index 64346da..8ebc93b 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,12 @@ Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, Og
 ./resolve.py scan music_index <path to mp3/flac files>
 ```
 
+If you remove from tracks from your collection, use cleanup to remove refereces to those tracks:
+
+```
+./resolve.py cleanup music_index
+```
+
 ## Resolve JSPF playlists to local collection
 
 Then make a JSPF playlist on LB:
@@ -147,3 +153,23 @@ always happen.
 
 For the other elements, please refer to the 
 [ListenBrainz Radio Docs](https://troi.readthedocs.io/en/latest/lb_radio.html)
+
+## Other features
+
+### Collection deduplication
+
+The "duplicates" command will print a report of duplicate recordings
+in your collection, based on MusicBrainz Recording MBIDs. There are several
+types of duplicates that this may find:
+
+1. Duplicated tracks with the same title, release and artist.
+2. Duplicated tracks that live on different releases, but have the same name
+3. Duplicated tracks that exist once on an album and again on a compilation.
+
+If you specify -e or --exclude-different-release, then case #3 will not be shown.
+
+### Top tags
+
+The top-tags command will print the top tags and the number of times they
+have been used in your collection. This requires that the "metadata"
+command was run before.

From aad73e3930eeccf4036352e1ea11bc9908035c04 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 31 Dec 2023 21:55:03 +0100
Subject: [PATCH 25/39] Finish updating the README

---
 README.md | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 8ebc93b..c5b4c95 100644
--- a/README.md
+++ b/README.md
@@ -58,9 +58,17 @@ Then open the m3u playlist with a local tool.
 ### Prerequisites
 
 NOTE: This feature only works if you music collection 
-is tagged with MusicBrainz tags. (We recommend Picard:
-http://picard.musicbrainz.org ) and if your music
-collection is also available via a Subsonic API.
+is tagged with MusicBrainz tags. We recommend Picard:
+http://picard.musicbrainz.org for tagging your collection.
+
+If you're unwilling to properly tag your collection,
+then please do not contact us to request that we remove
+this requirement. We can't. We won't. Please close this 
+tab and move on.
+
+If you have your collection hosted on an app like Funkwhale,
+Navidrom or Gonic, who have a Subsonic API, you can generate
+playlists directly the web application.
 
 ### Setup
 
@@ -113,7 +121,7 @@ isn't very suited for the prompt that was given.
 #### Artist Element
 
 ```
-./resolve.py lb-radio music_index easy 'artist:(taylor swift, drakee)'
+./resolve.py lb-radio music_index easy 'artist:(taylor swift, drake)'
 ```
 
 Generates a playlist with music from Taylor Swift and artists similar

From f91626af7b5b66b7627c68bfdbe18137ab9742e3 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Thu, 4 Jan 2024 23:46:25 +0100
Subject: [PATCH 26/39] First cut at periodic jams for lb local. Not a bad
 start!

---
 .gitignore                                    |  3 +
 lb_content_resolver/content_resolver.py       | 16 +++-
 lb_content_resolver/database.py               | 21 -----
 lb_content_resolver/fuzzy_index.py            | 10 +--
 lb_content_resolver/lb_radio.py               | 13 +--
 lb_content_resolver/troi/__init__.py          |  0
 lb_content_resolver/troi/patches/__init__.py  |  0
 .../troi/patches/periodic_jams.py             | 79 +++++++++++++++++++
 lb_content_resolver/troi/periodic_jams.py     | 40 ++++++++++
 .../troi/recording_resolver.py                | 65 +++++++++++++++
 resolve.py                                    | 53 +++++++++----
 11 files changed, 249 insertions(+), 51 deletions(-)
 create mode 100644 lb_content_resolver/troi/__init__.py
 create mode 100644 lb_content_resolver/troi/patches/__init__.py
 create mode 100755 lb_content_resolver/troi/patches/periodic_jams.py
 create mode 100755 lb_content_resolver/troi/periodic_jams.py
 create mode 100644 lb_content_resolver/troi/recording_resolver.py

diff --git a/.gitignore b/.gitignore
index 0f33172..b4d8fff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,6 @@ mp3
 /build/
 /dist/
 config.py
+*.jspf
+*.m3u
+.eggs
diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index f137f0f..79f8b80 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -24,16 +24,28 @@ def __init__(self, db):
         self.db = db
         self.fuzzy_index = None
 
+    def get_artist_recording_metadata(self):
+        """
+            Fetch the metadata needed to build a fuzzy search index.
+        """
+
+        artist_recording_data = []
+        for recording in Recording.select():
+            artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
+
+        return artist_recording_data
+
+
     def build_index(self):
         """
             Fetch the data from the DB and then build the fuzzy lookup index.
         """
 
-        artist_recording_data = self.db.get_artist_recording_metadata()
+        artist_recording_data = self.get_artist_recording_metadata()
         for recording in Recording.select():
             artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
 
-        self.fuzzy_index = FuzzyIndex(self.db.index_dir)
+        self.fuzzy_index = FuzzyIndex()
         self.fuzzy_index.build(artist_recording_data)
 
     def resolve_recordings(self, query_data, match_threshold):
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 4cbb71e..5ee0e97 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -119,27 +119,6 @@ def traverse(self, relative_path, dry_run=False):
 
         return True
 
-    def get_artist_recording_metadata(self):
-        """
-            Fetch the metadata needed to build a fuzzy search index.
-        """
-
-        artist_recording_data = []
-        for recording in Recording.select():
-            artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
-
-        return artist_recording_data
-
-    def encode_string(self, text):
-        """ 
-            Remove unwanted crap from the query string and only keep essential information.
-
-            'This is the ultimate track !!' -> 'thisistheultimatetrack'
-        """
-        if text is None:
-            return None
-        return unidecode(re.sub(" +", " ", re.sub(r'[^\w ]+', '', text)).strip().lower())
-
     def add_or_update_recording(self, mdata):
         """ 
             Given a Recording, add it to the DB if it does not exist. If it does,
diff --git a/lb_content_resolver/fuzzy_index.py b/lb_content_resolver/fuzzy_index.py
index a524df5..548c794 100755
--- a/lb_content_resolver/fuzzy_index.py
+++ b/lb_content_resolver/fuzzy_index.py
@@ -26,18 +26,10 @@ class FuzzyIndex:
        be quick to rebuild this index.
     '''
 
-    def __init__(self, index_dir):
-        self.index_dir = index_dir
+    def __init__(self):
         self.vectorizer = None
         self.index = None
 
-    def create(self):
-        try:
-            os.mkdir(self.index_dir)
-        except OSError as err:
-            print("Could not create index directory: %s (%s)" % (self.index_dir, err))
-            return
-
     def encode_string(self, text):
         if text is None:
             return None
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 277b431..b916088 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -45,7 +45,7 @@ def sanity_check(self):
                 "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.")
         elif num_subsonic < num_recordings // 2:
             print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." %
-                (num_subsonic, num_recordings))
+                  (num_subsonic, num_recordings))
 
     def generate(self, mode, prompt):
         """
@@ -70,11 +70,11 @@ def generate(self, mode, prompt):
             self.sanity_check()
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
-        self.resolve_recordings(playlist)
+        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
 
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
 
-    def resolve_recordings(self, playlist):
+    def resolve_playlist(self, match_threshold, playlist):
 
         recordings = []
         for recording in playlist.playlists[0].recordings:
@@ -84,10 +84,13 @@ def resolve_recordings(self, playlist):
             recordings.append(recording)
 
         if not recordings:
-            return 
+            return
 
+        return self.resolve_recordings(match_threshold, recordings)
+
+    def resolve_recordings(self, match_threshold, recordings):
         cr = ContentResolver(self.db)
-        resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings)
+        resolved = cr.resolve_playlist(match_threshold, recordings)
 
         for i, t_recording in enumerate(recordings):
             if resolved[i] is not None:
diff --git a/lb_content_resolver/troi/__init__.py b/lb_content_resolver/troi/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lb_content_resolver/troi/patches/__init__.py b/lb_content_resolver/troi/patches/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
new file mode 100755
index 0000000..c33acd1
--- /dev/null
+++ b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -0,0 +1,79 @@
+from datetime import datetime, timedelta
+
+import troi.listenbrainz.recs
+import troi.musicbrainz.recording_lookup
+from troi import Playlist
+from troi.playlist import PlaylistMakerElement
+
+from lb_content_resolver.troi.recording_resolver import RecordingResolverElement
+from lb_content_resolver.model.database import db
+
+DAYS_OF_RECENT_LISTENS_TO_EXCLUDE = 60  # Exclude tracks listened in last X days from the daily jams playlist
+DAILY_JAMS_MIN_RECORDINGS = 25  # the minimum number of recordings we aspire to have in a daily jam, this is not a hard limit
+BATCH_SIZE_RECS = 1000  # the number of recommendations fetched in 1 go
+MAX_RECS_LIMIT = 1000  # the maximum of recommendations available in LB
+
+class LocalPeriodicJamsPatch(troi.patch.Patch):
+    """
+    """
+
+
+    def __init__(self, args, debug=False):
+        super().__init__(args, debug)
+
+    @staticmethod
+    def inputs():
+        """
+        Generate a periodic playlist from the ListenBrainz recommended recordings.
+
+        \b
+        USER_NAME is a MusicBrainz user name that has an account on ListenBrainz.
+        TYPE Must be one of "daily-jams", "weekly-jams" or "weekly-exploration".
+        JAM_DATE is the date for which the jam is created (this is needed to account for the fact different timezones
+        can be on different dates). Required formatting for the date is 'YYYY-MM-DD'.
+        """
+        return [{
+            "type": "argument",
+            "args": ["user_name"]
+        }, {
+            "type": "argument",
+            "args": ["type"],
+            "kwargs": {
+                "required": False
+            }
+        }]
+
+    @staticmethod
+    def outputs():
+        return [Playlist]
+
+    @staticmethod
+    def slug():
+        return "local-periodic-jams"
+
+    @staticmethod
+    def description():
+        return "Generate a localized periodic playlist from the ListenBrainz recommended recordings."
+
+    def create(self, inputs):
+        user_name = inputs['user_name']
+
+        recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name,
+                                                                          "raw",
+                                                                          count=1000)
+        recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
+        recs_lookup.set_sources(recs)
+
+        resolve = RecordingResolverElement(db, .8)
+        resolve.set_sources(recs_lookup)
+
+        pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name),
+                                        desc="test playlist!",
+                                        patch_slug="periodic-jams",
+                                        max_num_recordings=50,
+                                        max_artist_occurrence=2,
+                                        shuffle=True,
+                                        expires_at=datetime.utcnow() + timedelta(weeks=2))
+        pl_maker.set_sources(resolve)
+
+        return pl_maker
diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py
new file mode 100755
index 0000000..73b0f3b
--- /dev/null
+++ b/lb_content_resolver/troi/periodic_jams.py
@@ -0,0 +1,40 @@
+from lb_content_resolver.lb_radio import ListenBrainzRadioLocal
+from lb_content_resolver.troi.patches.periodic_jams import LocalPeriodicJamsPatch
+
+
+class LocalPeriodicJams(ListenBrainzRadioLocal):
+    ''' 
+       Generate local playlists against a music collection available via subsonic.
+    '''
+
+    # TODO: Make this an argument
+    MATCH_THRESHOLD = .8
+
+    def __init__(self, db, user_name):
+        ListenBrainzRadioLocal.__init__(self, db)
+        self.user_name = user_name
+
+    def generate(self):
+        """
+           Generate a periodic jams playlist
+        """
+
+        self.db.open_db()
+
+        patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1})
+
+        # Now generate the playlist
+        try:
+            playlist = patch.generate_playlist()
+        except RuntimeError as err:
+            print(f"LB Radio generation failed: {err}")
+            return None
+
+        if playlist == None:
+            print("Your prompt generated an empty playlist.")
+            self.sanity_check()
+
+        # Resolve any tracks that have not been resolved to a subsonic_id or a local file
+        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
+
+        return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
new file mode 100644
index 0000000..5b301b9
--- /dev/null
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -0,0 +1,65 @@
+#from troi.musicbrainz.recording_lookup import RecordingLookupElement
+from troi import Element
+
+from lb_content_resolver.content_resolver import ContentResolver
+from lb_content_resolver.model.subsonic import RecordingSubsonic
+from lb_content_resolver.model.recording import Recording
+from troi import Recording
+
+
+class RecordingResolverElement(Element):
+
+    def __init__(self, db, match_threshold):
+        Element.__init__(self)
+        self.db = db
+        self.match_threshold = match_threshold
+        self.resolve = ContentResolver(db)
+
+    @staticmethod
+    def inputs():
+        return []
+
+    @staticmethod
+    def outputs():
+        return [Recording]
+
+    def read(self, inputs):
+
+        # TODO: Add a check to make sure that metadata is present.
+
+        # Build the fuzzy index
+        lookup_data = []
+        for recording in inputs[0]:
+            lookup_data.append({"artist_name": recording.artist.name, "recording_name": recording.name})
+
+        self.resolve.build_index()
+
+        # Resolve the recordings
+        resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold)
+        recording_ids = [result["recording_id"] for result in resolved]
+
+        # Fetch the recordings to lookup subsonic ids
+        recordings = RecordingSubsonic \
+                      .select() \
+                      .where(RecordingSubsonic.recording_id.in_(recording_ids)) \
+                      .dicts()
+
+        # Build a subsonic index
+        subsonic_index = {}
+        matched = []
+        for recording in recordings:
+            matched.append(recording["recording"])
+            subsonic_index[recording["recording"]] = recording["subsonic_id"]
+
+        # Set the subsonic ids into the recordings and only return recordings with an ID
+        results = []
+        for r in resolved:
+            try:
+                recording = inputs[0][r["index"]]
+                recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]]
+            except KeyError:
+                continue
+
+            results.append(recording)
+
+        return results
diff --git a/resolve.py b/resolve.py
index ebe92cb..47eb84d 100755
--- a/resolve.py
+++ b/resolve.py
@@ -13,9 +13,27 @@
 from lb_content_resolver.top_tags import TopTags
 from lb_content_resolver.duplicates import FindDuplicates
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
+from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams
 from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 import config
 
+# TODO: Make sure all functions work with subsonic and with local files
+
+
+def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
+    if jspf is None:
+        return
+
+    if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
+        if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
+            print("uploading playlist")
+            db.upload_playlist(jspf)
+    elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0:
+        if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"):
+            print("saving playlist")
+            write_m3u_playlist_from_jspf(save_to_playlist, jspf)
+    else:
+        print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.")
 
 
 @click.group()
@@ -68,6 +86,7 @@ def subsonic(index_dir):
     db = SubsonicDatabase(index_dir)
     db.sync()
 
+
 @click.command()
 @click.argument('index_dir')
 @click.argument('jspf_playlist')
@@ -81,6 +100,7 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
     results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist)
     write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"])
 
+
 @click.command()
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
 @click.option('-p', '--save-to-playlist', required=False)
@@ -93,39 +113,43 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr
     db = SubsonicDatabase(index_dir)
     r = ListenBrainzRadioLocal(db)
     jspf = r.generate(mode, prompt)
-    if jspf is None:
-        return
+    output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
-    if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
-        if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
-            print("uploading playlist")
-            db.upload_playlist(jspf)
-    elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0:
-        if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"):
-            print("saving playlist")
-            write_m3u_playlist_from_jspf(save_to_playlist, jspf)
-    else:
-        print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.")
 
 @click.command()
 @click.argument('index_dir')
 @click.argument('count', required=False, default=250)
 def top_tags(index_dir, count):
-    "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts"""
+    "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" ""
     db = Database(index_dir)
     tt = TopTags(db)
     tt.print_top_tags_tightly(count)
 
+
 @click.command()
 @click.argument('index_dir')
 @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True)
 def duplicates(exclude_different_release, index_dir):
-    "Print all the tracks in the DB that are duplciated as per recording_mbid"""
+    "Print all the tracks in the DB that are duplciated as per recording_mbid" ""
     db = Database(index_dir)
     fd = FindDuplicates(db)
     fd.print_duplicate_recordings(exclude_different_release)
 
 
+@click.command()
+@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
+@click.option('-p', '--save-to-playlist', required=False)
+@click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
+@click.argument('index_dir')
+@click.argument('user_name')
+def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name):
+    "Generate a periodic jams playlist"
+    db = SubsonicDatabase(index_dir)
+    pj = LocalPeriodicJams(db, user_name)
+    jspf = pj.generate()
+    output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
+
+
 cli.add_command(create)
 cli.add_command(scan)
 cli.add_command(playlist)
@@ -135,6 +159,7 @@ def duplicates(exclude_different_release, index_dir):
 cli.add_command(lb_radio)
 cli.add_command(top_tags)
 cli.add_command(duplicates)
+cli.add_command(periodic_jams)
 
 
 def usage(command):

From 0e272c0f308074fb6c34bb4446b573e64e3249b9 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Fri, 5 Jan 2024 23:54:44 +0100
Subject: [PATCH 27/39] Add the recent listens filter, which is really critical

---
 lb_content_resolver/troi/patches/periodic_jams.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
index c33acd1..fad2e44 100755
--- a/lb_content_resolver/troi/patches/periodic_jams.py
+++ b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -61,8 +61,12 @@ def create(self, inputs):
         recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name,
                                                                           "raw",
                                                                           count=1000)
+
+        latest_filter = troi.filters.LatestListenedAtFilterElement(DAYS_OF_RECENT_LISTENS_TO_EXCLUDE)
+        latest_filter.set_sources(recs)
+
         recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
-        recs_lookup.set_sources(recs)
+        recs_lookup.set_sources(latest_filter)
 
         resolve = RecordingResolverElement(db, .8)
         resolve.set_sources(recs_lookup)

From 8b80e216a8cb2a06228d721c4ce2dae92f728a59 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 6 Jan 2024 14:11:58 +0100
Subject: [PATCH 28/39] Start tracking recordings that went unresolved

---
 lb_content_resolver/content_resolver.py       | 31 +++++++++++++----
 lb_content_resolver/database.py               | 15 ++++----
 lb_content_resolver/model/recording.py        |  2 +-
 .../model/unresolved_recording.py             | 24 +++++++++++++
 lb_content_resolver/playlist.py               |  8 +++--
 .../troi/recording_resolver.py                | 14 +++++---
 lb_content_resolver/unresolved_recording.py   | 34 +++++++++++++++++++
 resolve.py                                    |  7 ++--
 8 files changed, 112 insertions(+), 23 deletions(-)
 create mode 100644 lb_content_resolver/model/unresolved_recording.py
 create mode 100755 lb_content_resolver/unresolved_recording.py

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 79f8b80..ac4c5c9 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -8,6 +8,7 @@
 from lb_content_resolver.model.database import db, setup_db
 from lb_content_resolver.model.recording import Recording
 from lb_content_resolver.model.subsonic import RecordingSubsonic
+from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker
 from lb_content_resolver.fuzzy_index import FuzzyIndex
 from lb_matching_tools.cleaner import MetadataCleaner
 from lb_content_resolver.playlist import read_jspf_playlist
@@ -50,11 +51,12 @@ def build_index(self):
 
     def resolve_recordings(self, query_data, match_threshold):
         """
-        Given a list of dicts with artist_name and recording_name in query data and a matching threshold,
-        attempt to match recordings by looking them up in the fuzzy index.
+        Given a list of dicts with artist_name, recording_name, recording_mbid in query data and
+        a matching threshold, attempt to match recordings by looking them up in the fuzzy index.
         """
 
         resolved_recordings = []
+        unresolved_recording_mbids = []
 
         # Set indexes in the data so we can correlate matches
         for i, data in enumerate(query_data):
@@ -67,10 +69,12 @@ def resolve_recordings(self, query_data, match_threshold):
             for hit, data in zip(hits, query_data):
                 if hit["confidence"] < match_threshold:
                     next_query_data.append(data)
+                    unresolved_recording_mbids.append(data["recording_mbid"])
                 else:
                     resolved_recordings.append({
                         "artist_name": data["artist_name"],
                         "recording_name": data["recording_name"],
+                        "recording_mbid": data["recording_mbid"],
                         "recording_id": hit["recording_id"],
                         "confidence": hit["confidence"],
                         "index": data["index"],
@@ -83,16 +87,25 @@ def resolve_recordings(self, query_data, match_threshold):
             for data in next_query_data:
                 recording_name = mc.clean_recording(data["recording_name"])
                 if recording_name != data["recording_name"]:
-                    query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]})
+                    query_data.append({"artist_name": artist_name,
+                                       "recording_name": recording_name,
+                                       "recording_mbid": data["recording_mbid"],
+                                       "index": data["index"]})
 
                 artist_name = mc.clean_artist(data["artist_name"])
                 if artist_name != data["artist_name"]:
-                    query_data.append({"artist_name": artist_name, "recording_name": recording_name, "index": data["index"]})
+                    query_data.append({"artist_name": artist_name,
+                                       "recording_name": recording_name,
+                                       "recording_mbid": data["recording_mbid"],
+                                       "index": data["index"]})
 
             # If nothing got cleaned, we can finish now
             if len(query_data) == 0:
                 break
 
+        ur = UnresolvedRecordingTracker()
+        ur.add(unresolved_recording_mbids)
+
         return resolved_recordings
 
     def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None):
@@ -111,12 +124,16 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
             if len(jspf_playlist["playlist"]["track"]) == 0:
                 return []
             for i, track in enumerate(jspf_playlist["playlist"]["track"]):
-                artist_recording_data.append({"artist_name": track["creator"], "recording_name": track["title"]})
+                artist_recording_data.append({"artist_name": track["creator"],
+                                              "recording_name": track["title"],
+                                              "recording_mbid": track["identifier"][35:]})
         else:
             if not recordings:
                 return []
             for rec in recordings:
-                artist_recording_data.append({"artist_name": rec.artist.name, "recording_name": rec.name})
+                artist_recording_data.append({"artist_name": rec.artist.name,
+                                              "recording_name": rec.name,
+                                              "recording_mbid": rec.mbid})
 
         self.db.open_db()
         self.build_index()
@@ -134,10 +151,12 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
 
         print("     %-40s %-40s %-40s" % ("RECORDING", "RELEASE", "ARTIST"))
         results = [None] * len(artist_recording_data)
+        unresolved_recordings = []
         for i, artist_recording in enumerate(artist_recording_data):
             if i not in hit_index:
                 print(bcolors.FAIL + "FAIL"  + bcolors.ENDC + " %-40s %-40s %-40s" % (artist_recording["recording_name"][:39], "",
                                               artist_recording["artist_name"][:39]))
+                unresolved_recordings.append(artist_recording["recording_mbid"])
                 continue
 
             hit = hit_index[i]
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 5ee0e97..d82164a 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -11,6 +11,7 @@
 
 from lb_content_resolver.model.database import db, setup_db
 from lb_content_resolver.model.recording import Recording, RecordingMetadata
+from lb_content_resolver.model.unresolved_recording import UnresolvedRecording
 from lb_content_resolver.model.subsonic import RecordingSubsonic
 from lb_content_resolver.model.tag import Tag, RecordingTag
 from lb_content_resolver.formats import mp3, m4a, flac, ogg_opus, ogg_vorbis, wma
@@ -32,15 +33,17 @@ def create(self):
             Create the index directory for the data. Currently it contains only
             the sqlite dir, but in the future we may serialize the fuzzy index here as well.
         """
-        try:
-            os.mkdir(self.index_dir)
-        except OSError as err:
-            print("Could not create index directory: %s (%s)" % (self.index_dir, err))
-            return
+
+        if not os.path.exists(self.index_dir):
+            try:
+                os.mkdir(self.index_dir)
+            except OSError as err:
+                print("Could not create index directory: %s (%s)" % (self.index_dir, err))
+                return
 
         setup_db(self.db_file)
         db.connect()
-        db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic])
+        db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording])
 
     def open_db(self):
         """ 
diff --git a/lb_content_resolver/model/recording.py b/lb_content_resolver/model/recording.py
index 4852945..49c2433 100644
--- a/lb_content_resolver/model/recording.py
+++ b/lb_content_resolver/model/recording.py
@@ -34,7 +34,7 @@ def __repr__(self):
 
 class RecordingMetadata(Model):
     """
-    Additional metadata for recorings: popularity. In future additional fields
+    Additional metadata for recordings: popularity. In future additional fields
     like release date and release country could be added to this table.
     """
 
diff --git a/lb_content_resolver/model/unresolved_recording.py b/lb_content_resolver/model/unresolved_recording.py
new file mode 100644
index 0000000..c60f0ef
--- /dev/null
+++ b/lb_content_resolver/model/unresolved_recording.py
@@ -0,0 +1,24 @@
+import datetime
+from peewee import *
+from lb_content_resolver.model.database import db
+
+
+class UnresolvedRecording(Model):
+    """
+    Table used to track which recordings where resolving failed. This can be used both
+    for debugging purposes and to provide the user with a list of 'if you had this
+    album, you'd resolve more music' kind of report.
+    """
+
+    class Meta:
+        database = db
+        table_name = "unresolved_recording"
+
+    id = AutoField()
+    # Not using the UUIDField here, since it annoyingly removes '-' from the UUID.
+    recording_mbid = TextField(null=True, index=True, unique=True)
+    lookup_count = IntegerField(null=False, default=1)
+    last_updated = DateTimeField(null=False, default=datetime.datetime.now)
+
+    def __repr__(self):
+        return "<UnresolvedRecording(%s,%d')>" % (self.recording_mbid, self.count)
diff --git a/lb_content_resolver/playlist.py b/lb_content_resolver/playlist.py
index 5b28634..8f15110 100644
--- a/lb_content_resolver/playlist.py
+++ b/lb_content_resolver/playlist.py
@@ -21,9 +21,11 @@ def write_m3u_playlist_from_results(file_name, playlist_title, hits):
         m3u.write("#EXTM3U\n")
         m3u.write("#EXTENC: UTF-8\n")
         m3u.write("#PLAYLIST %s\n" % playlist_title)
-        for rec in recordings:
-            m3u.write("#EXTINF %d,%s\n" % (rec.duration / 1000, rec.recording_name))
-            m3u.write(rec.file_path + "\n")
+        for rec in hits:
+            if rec is None:
+                continue
+            m3u.write("#EXTINF %d,%s\n" % (rec["duration"] / 1000, rec["recording_name"]))
+            m3u.write(rec["file_path"] + "\n")
 
 
 def write_m3u_playlist_from_jspf(file_name, jspf):
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
index 5b301b9..acc4ce0 100644
--- a/lb_content_resolver/troi/recording_resolver.py
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -1,4 +1,3 @@
-#from troi.musicbrainz.recording_lookup import RecordingLookupElement
 from troi import Element
 
 from lb_content_resolver.content_resolver import ContentResolver
@@ -8,6 +7,10 @@
 
 
 class RecordingResolverElement(Element):
+    """
+        This Troi element takes in a list of recordings, which *must* have artist name and recording
+        name set and resolves them to a local collection by using the ContentResolver class
+    """
 
     def __init__(self, db, match_threshold):
         Element.__init__(self)
@@ -25,12 +28,15 @@ def outputs():
 
     def read(self, inputs):
 
-        # TODO: Add a check to make sure that metadata is present.
-
         # Build the fuzzy index
         lookup_data = []
         for recording in inputs[0]:
-            lookup_data.append({"artist_name": recording.artist.name, "recording_name": recording.name})
+            if recording.artist is None or recording.artist.name is None or recording.name is None:
+                raise RuntimeError("artist name and recording name are needed for RecordingResolverElement.")
+
+            lookup_data.append({"artist_name": recording.artist.name,
+                                "recording_name": recording.name,
+                                "recording_mbid": recording.mbid})
 
         self.resolve.build_index()
 
diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py
new file mode 100755
index 0000000..e23e0ae
--- /dev/null
+++ b/lb_content_resolver/unresolved_recording.py
@@ -0,0 +1,34 @@
+import os
+import datetime
+import sys
+
+import peewee
+
+from lb_content_resolver.model.database import db
+from lb_content_resolver.model.unresolved_recording import UnresolvedRecording
+
+
+class UnresolvedRecordingTracker:
+    ''' 
+        This class keeps track of recordings that were not resolved when 
+        a playlist was resolved. This will allow us to give recommendations
+        on which albums to add to their collection to resolve more recordings.
+    '''
+
+    def __init__(self):
+        pass
+
+    def add(self, recording_mbids):
+        """
+            Add one or more recording MBIDs to the unresolved recordings track. If this has
+            previously been unresolved, increment the count for the number 
+            of times it has been unresolved.
+        """
+
+        query = """INSERT INTO unresolved_recording (recording_mbid, last_updated, lookup_count)
+                        VALUES (?, ?, 1)
+         ON CONFLICT DO UPDATE SET lookup_count = EXCLUDED.lookup_count + 1"""
+
+        with db.atomic() as transaction:
+            for mbid in recording_mbids:
+                db.execute_sql(query, (mbid, datetime.datetime.now()))
diff --git a/resolve.py b/resolve.py
index 47eb84d..a7a9902 100755
--- a/resolve.py
+++ b/resolve.py
@@ -14,7 +14,7 @@
 from lb_content_resolver.duplicates import FindDuplicates
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams
-from lb_content_resolver.playlist import write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
+from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 import config
 
 # TODO: Make sure all functions work with subsonic and with local files
@@ -32,6 +32,7 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
         if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"):
             print("saving playlist")
             write_m3u_playlist_from_jspf(save_to_playlist, jspf)
+
     else:
         print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.")
 
@@ -97,8 +98,8 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
     db = Database(index_dir)
     cr = ContentResolver(db)
     jspf = read_jspf_playlist(jspf_playlist)
-    results = cr.resolve_playlist(threshold, jspf_playlist=jspf_playlist)
-    write_m3u_playlist_from_results(m3u_playlist, results, jspf["playlist"]["title"])
+    results = cr.resolve_playlist(threshold, jspf_playlist=jspf)
+    write_m3u_playlist_from_results(m3u_playlist, jspf["playlist"]["title"], results)
 
 
 @click.command()

From 0aacd34650104e13d7c1b6720fcbd2b22b737769 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 6 Jan 2024 15:36:27 +0100
Subject: [PATCH 29/39] Very simple unresolved recordings report is in place

---
 lb_content_resolver/content_resolver.py     |  2 +-
 lb_content_resolver/unresolved_recording.py | 64 +++++++++++++++++++++
 resolve.py                                  | 17 ++++++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index ac4c5c9..3559839 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -126,7 +126,7 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
             for i, track in enumerate(jspf_playlist["playlist"]["track"]):
                 artist_recording_data.append({"artist_name": track["creator"],
                                               "recording_name": track["title"],
-                                              "recording_mbid": track["identifier"][35:]})
+                                              "recording_mbid": track["identifier"][34:]})
         else:
             if not recordings:
                 return []
diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py
index e23e0ae..29f1f7e 100755
--- a/lb_content_resolver/unresolved_recording.py
+++ b/lb_content_resolver/unresolved_recording.py
@@ -1,6 +1,8 @@
 import os
 import datetime
+import requests
 import sys
+from math import ceil
 
 import peewee
 
@@ -15,9 +17,17 @@ class UnresolvedRecordingTracker:
         on which albums to add to their collection to resolve more recordings.
     '''
 
+    LOOKUP_BATCH_SIZE = 50
+
     def __init__(self):
         pass
 
+    @staticmethod
+    def chunks(lst, n):
+        """Yield successive n-sized chunks from lst."""
+        for i in range(0, len(lst), n):
+            yield lst[i:i + n]
+
     def add(self, recording_mbids):
         """
             Add one or more recording MBIDs to the unresolved recordings track. If this has
@@ -32,3 +42,57 @@ def add(self, recording_mbids):
         with db.atomic() as transaction:
             for mbid in recording_mbids:
                 db.execute_sql(query, (mbid, datetime.datetime.now()))
+
+    def get(self, num_items, lookup_count):
+
+        if lookup_count is not None:
+            where_clause = f"WHERE lookup_count >= {lookup_count}"
+        else:
+            where_clause = ""
+
+        query = f"""SELECT recording_mbid
+                         , lookup_count
+                      FROM unresolved_recording
+                           {where_clause}
+                  ORDER BY lookup_count DESC"""
+
+        cursor = db.execute_sql(query)
+        recording_mbids = []
+        lookup_counts = {}
+        for row in cursor.fetchall():
+            recording_mbids.append(row[0])
+            lookup_counts[row[0]] = row[1]
+
+        recording_data = {}
+        for chunk in self.chunks(recording_mbids, self.LOOKUP_BATCH_SIZE):
+            args = ",".join(chunk)
+
+            params = { "recording_mbids": args, "inc": "artist release" }
+            while True:
+                r = requests.get("https://api.listenbrainz.org/1/metadata/recording", params=params)
+                if r.status_code != 200:
+                    print("Failed to fetch metadata for recordings: ", r.text)
+                    return []
+
+                if r.status_code == 429:
+                    sleep(1)
+                    continue
+
+                break
+            recording_data.update(dict(r.json()))
+
+        results = []
+        for mbid in recording_mbids:
+            rec = recording_data[mbid]
+            results.append({
+                "artist_name": rec["artist"]["name"],
+                "artists": rec["artist"]["artists"],
+                "release_name": rec["release"]["name"],
+                "release_mbid": rec["release"]["mbid"],
+                "release_group_mbid": rec["release"]["release_group_mbid"],
+                "recording_name": "Contact",
+                "recording_mbid": mbid,
+                "lookup_count": lookup_counts[mbid]
+            })
+
+        return results
diff --git a/resolve.py b/resolve.py
index a7a9902..d71ceba 100755
--- a/resolve.py
+++ b/resolve.py
@@ -15,9 +15,11 @@
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams
 from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
+from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker
 import config
 
 # TODO: Make sure all functions work with subsonic and with local files
+# TODO: avoid passing in db to objects and just open the db
 
 
 def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
@@ -150,6 +152,20 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
     jspf = pj.generate()
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
+@click.command()
+@click.option('-c', '--count', required=False, default=25)
+@click.option('-l', '--lookup-count', required=False, default=3)
+@click.argument('index_dir')
+def unresolved_releases(count, lookup_count, index_dir):
+    "Show the top unresolved releases"
+
+    db = SubsonicDatabase(index_dir)
+    db.open_db()
+    urt = UnresolvedRecordingTracker()
+    recordings = urt.get(num_items=count, lookup_count=lookup_count)
+    from icecream import ic
+    ic(recordings)
+
 
 cli.add_command(create)
 cli.add_command(scan)
@@ -161,6 +177,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
 cli.add_command(top_tags)
 cli.add_command(duplicates)
 cli.add_command(periodic_jams)
+cli.add_command(unresolved_releases)
 
 
 def usage(command):

From 22fe98cae66ed64e4135ec70788d55c6667c4c97 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sat, 6 Jan 2024 16:09:42 +0100
Subject: [PATCH 30/39] Unresolved albums report is now done

---
 lb_content_resolver/unresolved_recording.py | 30 ++++++++++++++-------
 resolve.py                                  |  5 ++--
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py
index 29f1f7e..1bed543 100755
--- a/lb_content_resolver/unresolved_recording.py
+++ b/lb_content_resolver/unresolved_recording.py
@@ -1,8 +1,7 @@
-import os
+from collections import defaultdict
 import datetime
-import requests
-import sys
 from math import ceil
+import requests
 
 import peewee
 
@@ -43,7 +42,10 @@ def add(self, recording_mbids):
             for mbid in recording_mbids:
                 db.execute_sql(query, (mbid, datetime.datetime.now()))
 
-    def get(self, num_items, lookup_count):
+    def get_releases(self, num_items, lookup_count):
+        """
+            Organize the unresolved recordings into releases with a list of recordings.
+        """
 
         if lookup_count is not None:
             where_clause = f"WHERE lookup_count >= {lookup_count}"
@@ -67,7 +69,7 @@ def get(self, num_items, lookup_count):
         for chunk in self.chunks(recording_mbids, self.LOOKUP_BATCH_SIZE):
             args = ",".join(chunk)
 
-            params = { "recording_mbids": args, "inc": "artist release" }
+            params = {"recording_mbids": args, "inc": "artist release"}
             while True:
                 r = requests.get("https://api.listenbrainz.org/1/metadata/recording", params=params)
                 if r.status_code != 200:
@@ -81,18 +83,28 @@ def get(self, num_items, lookup_count):
                 break
             recording_data.update(dict(r.json()))
 
-        results = []
+        releases = defaultdict(list)
         for mbid in recording_mbids:
             rec = recording_data[mbid]
-            results.append({
+            releases[rec["release"]["mbid"]].append({
                 "artist_name": rec["artist"]["name"],
                 "artists": rec["artist"]["artists"],
                 "release_name": rec["release"]["name"],
                 "release_mbid": rec["release"]["mbid"],
                 "release_group_mbid": rec["release"]["release_group_mbid"],
-                "recording_name": "Contact",
+                "recording_name": rec["recording"]["name"],
                 "recording_mbid": mbid,
                 "lookup_count": lookup_counts[mbid]
             })
 
-        return results
+        return releases
+
+    def print_releases(self, releases):
+
+        print("%-50s %-50s" % ("RELEASE", "ARTIST"))
+        for release_mbid in sorted(releases.keys(), key=lambda a: releases[a][0]["release_name"]):
+            rel = releases[release_mbid]
+            print("%-60s %-50s" % (rel[0]["release_name"][:59], rel[0]["artist_name"][:49]))
+            for rec in rel:
+                print("   %-57s %d lookups" % (rec["recording_name"][:56], rec["lookup_count"]))
+            print()
diff --git a/resolve.py b/resolve.py
index d71ceba..b77a7f2 100755
--- a/resolve.py
+++ b/resolve.py
@@ -162,9 +162,8 @@ def unresolved_releases(count, lookup_count, index_dir):
     db = SubsonicDatabase(index_dir)
     db.open_db()
     urt = UnresolvedRecordingTracker()
-    recordings = urt.get(num_items=count, lookup_count=lookup_count)
-    from icecream import ic
-    ic(recordings)
+    releases = urt.get_releases(num_items=count, lookup_count=lookup_count)
+    urt.print_releases(releases)
 
 
 cli.add_command(create)

From 877d800d71924a99f953323de0aa6800f6d1b143 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Sun, 7 Jan 2024 22:45:21 +0100
Subject: [PATCH 31/39] Filter recent listens too

---
 lb_content_resolver/troi/patches/periodic_jams.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
index fad2e44..7cd59e3 100755
--- a/lb_content_resolver/troi/patches/periodic_jams.py
+++ b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -62,8 +62,12 @@ def create(self, inputs):
                                                                           "raw",
                                                                           count=1000)
 
+        recent_listens_lookup = troi.listenbrainz.listens.RecentListensTimestampLookup(user_name,
+                                                                                       days=2)
+        recent_listens_lookup.set_sources(recs)
+
         latest_filter = troi.filters.LatestListenedAtFilterElement(DAYS_OF_RECENT_LISTENS_TO_EXCLUDE)
-        latest_filter.set_sources(recs)
+        latest_filter.set_sources(recent_listens_lookup)
 
         recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
         recs_lookup.set_sources(latest_filter)

From 44844b1891632c9c0149452de270b80bef023b73 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Mon, 8 Jan 2024 21:23:41 +0100
Subject: [PATCH 32/39] Improve the unresolved recordings function

---
 lb_content_resolver/content_resolver.py     |  2 +-
 lb_content_resolver/unresolved_recording.py | 44 ++++++++++++++-------
 resolve.py                                  |  5 +--
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 3559839..fe19531 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -86,13 +86,13 @@ def resolve_recordings(self, query_data, match_threshold):
             query_data = []
             for data in next_query_data:
                 recording_name = mc.clean_recording(data["recording_name"])
+                artist_name = mc.clean_artist(data["artist_name"])
                 if recording_name != data["recording_name"]:
                     query_data.append({"artist_name": artist_name,
                                        "recording_name": recording_name,
                                        "recording_mbid": data["recording_mbid"],
                                        "index": data["index"]})
 
-                artist_name = mc.clean_artist(data["artist_name"])
                 if artist_name != data["artist_name"]:
                     query_data.append({"artist_name": artist_name,
                                        "recording_name": recording_name,
diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py
index 1bed543..1bcd95b 100755
--- a/lb_content_resolver/unresolved_recording.py
+++ b/lb_content_resolver/unresolved_recording.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 import datetime
 from math import ceil
+from operator import itemgetter
 import requests
 
 import peewee
@@ -27,6 +28,13 @@ def chunks(lst, n):
         for i in range(0, len(lst), n):
             yield lst[i:i + n]
 
+    @staticmethod
+    def multisort(xs, specs):
+        """ Multiple key sort helper """
+        for key, reverse in reversed(specs):
+            xs.sort(key=itemgetter(key), reverse=reverse)
+        return xs
+
     def add(self, recording_mbids):
         """
             Add one or more recording MBIDs to the unresolved recordings track. If this has
@@ -42,21 +50,15 @@ def add(self, recording_mbids):
             for mbid in recording_mbids:
                 db.execute_sql(query, (mbid, datetime.datetime.now()))
 
-    def get_releases(self, num_items, lookup_count):
+    def get_releases(self, num_items):
         """
             Organize the unresolved recordings into releases with a list of recordings.
+            Return up to num_item releases.
         """
 
-        if lookup_count is not None:
-            where_clause = f"WHERE lookup_count >= {lookup_count}"
-        else:
-            where_clause = ""
-
         query = f"""SELECT recording_mbid
                          , lookup_count
-                      FROM unresolved_recording
-                           {where_clause}
-                  ORDER BY lookup_count DESC"""
+                      FROM unresolved_recording"""
 
         cursor = db.execute_sql(query)
         recording_mbids = []
@@ -97,14 +99,26 @@ def get_releases(self, num_items, lookup_count):
                 "lookup_count": lookup_counts[mbid]
             })
 
-        return releases
+        release_list = []
+        for mbid in releases:
+            release = releases[mbid]
+            total_count = sum([rec["lookup_count"] for rec in release])
+            release_list.append({
+                "mbid": release[0]["release_mbid"],
+                "release_name": release[0]["release_name"],
+                "artist_name": release[0]["artist_name"],
+                "lookup_count": total_count,
+                "recordings": release
+            })
+
+        return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False)))[:num_items]
 
     def print_releases(self, releases):
+        """ Neatly print all the release/recordings returned from the get_releases function """
 
-        print("%-50s %-50s" % ("RELEASE", "ARTIST"))
-        for release_mbid in sorted(releases.keys(), key=lambda a: releases[a][0]["release_name"]):
-            rel = releases[release_mbid]
-            print("%-60s %-50s" % (rel[0]["release_name"][:59], rel[0]["artist_name"][:49]))
-            for rec in rel:
+        print("%-60s %-50s" % ("RELEASE", "ARTIST"))
+        for release in releases:
+            print("%-60s %-50s" % (release["release_name"][:59], release["artist_name"][:49]))
+            for rec in release["recordings"]:
                 print("   %-57s %d lookups" % (rec["recording_name"][:56], rec["lookup_count"]))
             print()
diff --git a/resolve.py b/resolve.py
index b77a7f2..19edb89 100755
--- a/resolve.py
+++ b/resolve.py
@@ -154,15 +154,14 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
 
 @click.command()
 @click.option('-c', '--count', required=False, default=25)
-@click.option('-l', '--lookup-count', required=False, default=3)
 @click.argument('index_dir')
-def unresolved_releases(count, lookup_count, index_dir):
+def unresolved_releases(count, index_dir):
     "Show the top unresolved releases"
 
     db = SubsonicDatabase(index_dir)
     db.open_db()
     urt = UnresolvedRecordingTracker()
-    releases = urt.get_releases(num_items=count, lookup_count=lookup_count)
+    releases = urt.get_releases(num_items=count)
     urt.print_releases(releases)
 
 

From 3b29b227a2ff639a2b28e69c30ffe36f1bc82d02 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Mon, 8 Jan 2024 21:54:08 +0100
Subject: [PATCH 33/39] db open cleanup

---
 lb_content_resolver/artist_search.py          |  4 +--
 lb_content_resolver/content_resolver.py       |  4 +--
 lb_content_resolver/database.py               | 20 +++++------
 lb_content_resolver/duplicates.py             |  2 --
 lb_content_resolver/lb_radio.py               | 13 ++-----
 lb_content_resolver/metadata_lookup.py        |  2 --
 lb_content_resolver/subsonic.py               |  2 --
 lb_content_resolver/tag_search.py             |  4 +--
 lb_content_resolver/top_tags.py               |  4 ---
 .../troi/patches/periodic_jams.py             |  2 +-
 lb_content_resolver/troi/periodic_jams.py     |  6 ++--
 .../troi/recording_resolver.py                |  5 ++-
 lb_content_resolver/unresolved_recording.py   |  4 +--
 resolve.py                                    | 36 +++++++++++--------
 14 files changed, 45 insertions(+), 63 deletions(-)

diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py
index 113a09e..4aa30d5 100755
--- a/lb_content_resolver/artist_search.py
+++ b/lb_content_resolver/artist_search.py
@@ -18,9 +18,8 @@ class LocalRecordingSearchByArtistService(RecordingSearchByArtistService):
     Given the local database, search for artists that meet given tag criteria
     '''
 
-    def __init__(self, db):
+    def __init__(self):
         RecordingSearchByArtistService.__init__(self)
-        self.db = db
 
     def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
         """
@@ -52,7 +51,6 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
                  ORDER BY artist_mbid
                         , popularity"""
 
-        self.db.open_db()
         placeholders = ",".join(("?", ) * len(artist_mbids))
         cursor = db.execute_sql(query % placeholders, params=tuple(artist_mbids))
 
diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index fe19531..915a5c7 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -21,8 +21,7 @@ class ContentResolver:
     Scan a given path and enter/update the metadata in the search index
     '''
 
-    def __init__(self, db):
-        self.db = db
+    def __init__(self):
         self.fuzzy_index = None
 
     def get_artist_recording_metadata(self):
@@ -135,7 +134,6 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
                                               "recording_name": rec.name,
                                               "recording_mbid": rec.mbid})
 
-        self.db.open_db()
         self.build_index()
 
         hits = self.resolve_recordings(artist_recording_data, match_threshold)
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index d82164a..e368462 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -45,14 +45,14 @@ def create(self):
         db.connect()
         db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording])
 
-    def open_db(self):
+    def open(self):
         """ 
             Open the database file and connect to the db.
         """
         setup_db(self.db_file)
         db.connect()
 
-    def close_db(self):
+    def close(self):
         """ Close the db."""
         db.close()
 
@@ -72,7 +72,6 @@ def scan(self, music_dir):
 
         # Future improvement, commit to DB only every 1000 tracks or so.
         print("Check collection size...")
-        self.open_db()
         self.track_count_estimate = 0
         self.traverse("", dry_run=True)
         self.audio_file_count = self.track_count_estimate
@@ -270,20 +269,21 @@ def add(self, relative_path):
             self.progress_bar.write("    error %s" % details)
 
 
-    def database_cleanup(self):
+    def database_cleanup(self, dry_run):
         '''
         Look for missing tracks and remove them from the DB. Then look for empty releases/artists and remove those too
         '''
 
-        self.open_db()
         query = Recording.select()
         recording_ids = []
         for recording in query:
             if not os.path.exists(recording.file_path):
-                print("UNLINK %s" % recording.file_path)
+                print("RM %s" % recording.file_path)
                 recording_ids.append(recording.id)
 
-        placeholders = ",".join(("?", ) * len(recording_ids))
-        db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids))
-
-        self.close_db()
+        if not dry_run:
+            placeholders = ",".join(("?", ) * len(recording_ids))
+            db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids))
+            print("Stale references removed")
+        else:
+            print("--delete not specified, no refeences removed")
diff --git a/lb_content_resolver/duplicates.py b/lb_content_resolver/duplicates.py
index 7715963..afc8925 100755
--- a/lb_content_resolver/duplicates.py
+++ b/lb_content_resolver/duplicates.py
@@ -50,8 +50,6 @@ def get_duplicate_recordings(self, include_different_releases):
                        HAVING cnt > 1 
                      ORDER BY cnt DESC, artist_name, recording_name"""
 
-        self.db.open_db()
-
         return [ (r[0], r[1], r[2], r[3], json.loads(r[4]), r[5]) for r in db.execute_sql(query).fetchall() ]
 
     
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index b916088..1c6347c 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -20,16 +20,11 @@ class ListenBrainzRadioLocal:
     # TODO: Make this an argument
     MATCH_THRESHOLD = .8
 
-    def __init__(self, db):
-        self.db = db
-
     def sanity_check(self):
         """
         Run a sanity check on the DB to see if data is missing that is required for LB Radio to work.
         """
 
-        self.db.open_db()
-
         num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0]
         num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0]
         num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0]
@@ -52,11 +47,9 @@ def generate(self, mode, prompt):
            Generate a playlist given the mode and prompt.
         """
 
-        self.db.open_db()
-
         patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1})
-        patch.register_service(LocalRecordingSearchByTagService(self.db))
-        patch.register_service(LocalRecordingSearchByArtistService(self.db))
+        patch.register_service(LocalRecordingSearchByTagService())
+        patch.register_service(LocalRecordingSearchByArtistService())
 
         # Now generate the playlist
         try:
@@ -89,7 +82,7 @@ def resolve_playlist(self, match_threshold, playlist):
         return self.resolve_recordings(match_threshold, recordings)
 
     def resolve_recordings(self, match_threshold, recordings):
-        cr = ContentResolver(self.db)
+        cr = ContentResolver()
         resolved = cr.resolve_playlist(match_threshold, recordings)
 
         for i, t_recording in enumerate(recordings):
diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py
index 883ebe1..108abec 100755
--- a/lb_content_resolver/metadata_lookup.py
+++ b/lb_content_resolver/metadata_lookup.py
@@ -26,8 +26,6 @@ def lookup(self):
         Iterate over all recordings in the database and call lookup_chunk for chunks of recordings.
         """
 
-        self.db.open_db()
-
         cursor = db.execute_sql("""SELECT recording.id, recording.recording_mbid, recording_metadata.id
                                      FROM recording 
                                 LEFT JOIN recording_metadata
diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index ab4695f..fcf3377 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -33,9 +33,7 @@ def sync(self):
         self.matched = 0
         self.error = 0
 
-        self.open_db()
         self.run_sync()
-        self.close_db()
 
         print("Checked %s albums:" % self.total)
         print("  %5d albums matched" % self.matched)
diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py
index 4aa6c28..9343ca9 100755
--- a/lb_content_resolver/tag_search.py
+++ b/lb_content_resolver/tag_search.py
@@ -21,9 +21,8 @@ class LocalRecordingSearchByTagService(RecordingSearchByTagService):
     to make this work for tracks without subsonic ids.
     '''
 
-    def __init__(self, db):
+    def __init__(self):
         RecordingSearchByTagService.__init__(self)
-        self.db = db
 
     def search(self, tags, operator, begin_percent, end_percent, num_recordings):
         """
@@ -48,7 +47,6 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings):
         else:
             query, params, pop_clause = self.and_search(tags)
 
-        self.db.open_db()
         placeholders = ",".join(("?", ) * len(tags))
         cursor = db.execute_sql(query % (placeholders, pop_clause), params)
 
diff --git a/lb_content_resolver/top_tags.py b/lb_content_resolver/top_tags.py
index 6a7d5bf..10999d1 100755
--- a/lb_content_resolver/top_tags.py
+++ b/lb_content_resolver/top_tags.py
@@ -17,9 +17,6 @@ class TopTags:
        Class to fetch top tags
     '''
 
-    def __init__(self, db):
-        self.db = db
-
     def get_top_tags(self, limit=50):
         """
         """
@@ -35,7 +32,6 @@ def get_top_tags(self, limit=50):
                  ORDER BY cnt DESC
                     LIMIT ?"""
 
-        self.db.open_db()
         cursor = db.execute_sql(query, (limit,))
 
         top_tags = []
diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
index 7cd59e3..48abfcf 100755
--- a/lb_content_resolver/troi/patches/periodic_jams.py
+++ b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -72,7 +72,7 @@ def create(self, inputs):
         recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
         recs_lookup.set_sources(latest_filter)
 
-        resolve = RecordingResolverElement(db, .8)
+        resolve = RecordingResolverElement(.8)
         resolve.set_sources(recs_lookup)
 
         pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name),
diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py
index 73b0f3b..97318c9 100755
--- a/lb_content_resolver/troi/periodic_jams.py
+++ b/lb_content_resolver/troi/periodic_jams.py
@@ -10,8 +10,8 @@ class LocalPeriodicJams(ListenBrainzRadioLocal):
     # TODO: Make this an argument
     MATCH_THRESHOLD = .8
 
-    def __init__(self, db, user_name):
-        ListenBrainzRadioLocal.__init__(self, db)
+    def __init__(self, user_name):
+        ListenBrainzRadioLocal.__init__(self)
         self.user_name = user_name
 
     def generate(self):
@@ -19,8 +19,6 @@ def generate(self):
            Generate a periodic jams playlist
         """
 
-        self.db.open_db()
-
         patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1})
 
         # Now generate the playlist
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
index acc4ce0..58348c0 100644
--- a/lb_content_resolver/troi/recording_resolver.py
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -12,11 +12,10 @@ class RecordingResolverElement(Element):
         name set and resolves them to a local collection by using the ContentResolver class
     """
 
-    def __init__(self, db, match_threshold):
+    def __init__(self, match_threshold):
         Element.__init__(self)
-        self.db = db
         self.match_threshold = match_threshold
-        self.resolve = ContentResolver(db)
+        self.resolve = ContentResolver()
 
     @staticmethod
     def inputs():
diff --git a/lb_content_resolver/unresolved_recording.py b/lb_content_resolver/unresolved_recording.py
index 1bcd95b..4d0f518 100755
--- a/lb_content_resolver/unresolved_recording.py
+++ b/lb_content_resolver/unresolved_recording.py
@@ -50,7 +50,7 @@ def add(self, recording_mbids):
             for mbid in recording_mbids:
                 db.execute_sql(query, (mbid, datetime.datetime.now()))
 
-    def get_releases(self, num_items):
+    def get_releases(self):
         """
             Organize the unresolved recordings into releases with a list of recordings.
             Return up to num_item releases.
@@ -111,7 +111,7 @@ def get_releases(self, num_items):
                 "recordings": release
             })
 
-        return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False)))[:num_items]
+        return self.multisort(release_list, (("lookup_count", True), ("artist_name", False), ("release_name", False)))
 
     def print_releases(self, releases):
         """ Neatly print all the release/recordings returned from the get_releases function """
diff --git a/resolve.py b/resolve.py
index 19edb89..84eec3b 100755
--- a/resolve.py
+++ b/resolve.py
@@ -19,7 +19,6 @@
 import config
 
 # TODO: Make sure all functions work with subsonic and with local files
-# TODO: avoid passing in db to objects and just open the db
 
 
 def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
@@ -58,15 +57,18 @@ def create(index_dir):
 def scan(index_dir, music_dir):
     """Scan a directory and its subdirectories for music files to add to the collection"""
     db = Database(index_dir)
+    db.open()
     db.scan(music_dir)
 
 
 @click.command()
+@click.option('-d', '--delete', required=False, is_flag=True, default=True)
 @click.argument('index_dir')
-def cleanup(index_dir):
+def cleanup(delete, index_dir):
     """Perform a database cleanup. Check that files exist and if they don't remove from the index"""
     db = Database(index_dir)
-    db.database_cleanup()
+    db.open()
+    db.database_cleanup(delete)
 
 
 @click.command()
@@ -74,6 +76,7 @@ def cleanup(index_dir):
 def metadata(index_dir):
     """Lookup metadata (popularity and tags) for recordings"""
     db = Database(index_dir)
+    db.open()
     lookup = MetadataLookup(db)
     lookup.lookup()
 
@@ -87,6 +90,7 @@ def metadata(index_dir):
 def subsonic(index_dir):
     """Scan a remote subsonic music collection"""
     db = SubsonicDatabase(index_dir)
+    db.open()
     db.sync()
 
 
@@ -98,7 +102,8 @@ def subsonic(index_dir):
 def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
     """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection"""
     db = Database(index_dir)
-    cr = ContentResolver(db)
+    db.open()
+    cr = ContentResolver()
     jspf = read_jspf_playlist(jspf_playlist)
     results = cr.resolve_playlist(threshold, jspf_playlist=jspf)
     write_m3u_playlist_from_results(m3u_playlist, jspf["playlist"]["title"], results)
@@ -114,7 +119,8 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
 def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt):
     """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection"""
     db = SubsonicDatabase(index_dir)
-    r = ListenBrainzRadioLocal(db)
+    db.open()
+    r = ListenBrainzRadioLocal()
     jspf = r.generate(mode, prompt)
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
@@ -123,9 +129,10 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr
 @click.argument('index_dir')
 @click.argument('count', required=False, default=250)
 def top_tags(index_dir, count):
-    "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts" ""
+    "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts"
     db = Database(index_dir)
-    tt = TopTags(db)
+    db.open()
+    tt = TopTags()
     tt.print_top_tags_tightly(count)
 
 
@@ -133,8 +140,9 @@ def top_tags(index_dir, count):
 @click.argument('index_dir')
 @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True)
 def duplicates(exclude_different_release, index_dir):
-    "Print all the tracks in the DB that are duplciated as per recording_mbid" ""
+    "Print all the tracks in the DB that are duplciated as per recording_mbid"
     db = Database(index_dir)
+    db.open()
     fd = FindDuplicates(db)
     fd.print_duplicate_recordings(exclude_different_release)
 
@@ -148,20 +156,20 @@ def duplicates(exclude_different_release, index_dir):
 def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name):
     "Generate a periodic jams playlist"
     db = SubsonicDatabase(index_dir)
-    pj = LocalPeriodicJams(db, user_name)
+    db.open()
+    pj = LocalPeriodicJams(user_name)
     jspf = pj.generate()
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
 @click.command()
-@click.option('-c', '--count', required=False, default=25)
 @click.argument('index_dir')
-def unresolved_releases(count, index_dir):
+def unresolved(index_dir):
     "Show the top unresolved releases"
 
     db = SubsonicDatabase(index_dir)
-    db.open_db()
+    db.open()
     urt = UnresolvedRecordingTracker()
-    releases = urt.get_releases(num_items=count)
+    releases = urt.get_releases()
     urt.print_releases(releases)
 
 
@@ -175,7 +183,7 @@ def unresolved_releases(count, index_dir):
 cli.add_command(top_tags)
 cli.add_command(duplicates)
 cli.add_command(periodic_jams)
-cli.add_command(unresolved_releases)
+cli.add_command(unresolved)
 
 
 def usage(command):

From 19ae013b50424afa7bf412c6950f3e704fc18fc2 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Tue, 9 Jan 2024 11:32:25 +0100
Subject: [PATCH 34/39] Huh. I'm stuck

---
 README.md                                     | 37 +++++++++-
 lb_content_resolver/artist_search.py          |  2 +-
 lb_content_resolver/database.py               | 23 +++++++
 lb_content_resolver/lb_radio.py               | 24 +------
 lb_content_resolver/metadata_lookup.py        |  3 -
 lb_content_resolver/tag_search.py             | 10 +--
 .../troi/patches/periodic_jams.py             | 11 ++-
 lb_content_resolver/troi/periodic_jams.py     | 13 +++-
 .../troi/recording_resolver.py                | 67 ++++++++++++++-----
 lb_content_resolver/utils.py                  |  4 +-
 resolve.py                                    | 25 +++++--
 11 files changed, 150 insertions(+), 69 deletions(-)

diff --git a/README.md b/README.md
index c5b4c95..a1c7cec 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,34 @@
 The ListenBrainz Content Resolver resolves global JSPF playlists to
 a local collection of music, using the resolve function.
 
-ListenBrainz Local Radio allows you to generate tag radio playlists that
-can be uploaded to your favorite subsonic API enabled music system.
+The features of this package include:
 
-## Quick Start
+1. ListenBrainz Radio Local: allows you to generate radio-style playlists that
+that are created using only the files in the local collection, or if that is not
+possible, a global playlist with MBIDS will be resolved to a local file collection
+as best as possible.
+
+2. Periodic-jams: ListenBrainz periodic-jams, but fully resolved against your own
+local collection. This is optimized for local and gives better results than
+the global troi patch by the same name.
+
+3. Metadata fetchgin: Several of the features here require metadata to be downloaded
+from ListenBrainz in order to power the LB Radio Local.
+
+4. Scan local file collections. MP3, Ogg Vorbis, Ogg Opus, WMA, M4A and FLAC file are supported.
+
+5. Scan a remote subsonic API collection. We've tested Navidrome, Funkwhale and Gonic.
+
+6. Print a report of duplicate files in the collection
+
+7. Print a list of top tags for the collection
+
+8. Print a list of tracks that failed to resolve and print the list of albums that they
+belong to. This gives the user feedback about tracks that could be added to the collection
+to improve the local matching.
+
+
+## Installation
 
 To install the package:
 
@@ -18,6 +42,8 @@ pip install -r requirements.txt
 
 ## Scanning your collection
 
+### Scan a collection on the local filesystem
+
 Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported.
 
 ```
@@ -31,6 +57,11 @@ If you remove from tracks from your collection, use cleanup to remove refereces
 ./resolve.py cleanup music_index
 ```
 
+### Scan a Subsonic collection
+
+Finish me
+
+
 ## Resolve JSPF playlists to local collection
 
 Then make a JSPF playlist on LB:
diff --git a/lb_content_resolver/artist_search.py b/lb_content_resolver/artist_search.py
index 4aa30d5..6f694ba 100755
--- a/lb_content_resolver/artist_search.py
+++ b/lb_content_resolver/artist_search.py
@@ -45,7 +45,7 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings):
                      FROM recording
                      JOIN recording_metadata
                        ON recording.id = recording_metadata.recording_id
-                     JOIN recording_subsonic
+                LEFT JOIN recording_subsonic
                        ON recording.id = recording_subsonic.recording_id
                     WHERE artist_mbid in (%s)
                  ORDER BY artist_mbid
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index e368462..1c2bef8 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -287,3 +287,26 @@ def database_cleanup(self, dry_run):
             print("Stale references removed")
         else:
             print("--delete not specified, no refeences removed")
+
+    def metadata_sanity_check(self, include_subsonic=False):
+        """
+        Run a sanity check on the DB to see if data is missing that is required for LB Radio to work.
+        """
+
+        num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0]
+        num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0]
+        num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0]
+
+        if num_metadata == 0:
+            print("sanity check: You have not downloaded metadata for your collection. Run the metadata command.")
+        elif num_metadata < num_recordings // 2:
+            print("sanity check: Only %d of your %d recordings have metadata information available. Run the metdata command." %
+                  (num_metadata, num_recordings))
+
+        if include_subsonic:
+            if num_subsonic == 0 and include_subsonic:
+                print(
+                    "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.")
+            elif num_subsonic < num_recordings // 2:
+                print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." %
+                      (num_subsonic, num_recordings))
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 1c6347c..61c3d92 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -20,28 +20,6 @@ class ListenBrainzRadioLocal:
     # TODO: Make this an argument
     MATCH_THRESHOLD = .8
 
-    def sanity_check(self):
-        """
-        Run a sanity check on the DB to see if data is missing that is required for LB Radio to work.
-        """
-
-        num_recordings = db.execute_sql("SELECT COUNT(*) FROM recording").fetchone()[0]
-        num_metadata = db.execute_sql("SELECT COUNT(*) FROM recording_metadata").fetchone()[0]
-        num_subsonic = db.execute_sql("SELECT COUNT(*) FROM recording_subsonic").fetchone()[0]
-
-        if num_metadata == 0:
-            print("sanity check: You have not downloaded metadata for your collection. Run the metadata command.")
-        elif num_metadata < num_recordings // 2:
-            print("sanity check: Only %d of your %d recordings have metadata information available. Run the metdata command." %
-                  (num_metadata, num_recordings))
-
-        if num_subsonic == 0:
-            print(
-                "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.")
-        elif num_subsonic < num_recordings // 2:
-            print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." %
-                  (num_subsonic, num_recordings))
-
     def generate(self, mode, prompt):
         """
            Generate a playlist given the mode and prompt.
@@ -60,7 +38,7 @@ def generate(self, mode, prompt):
 
         if playlist == None:
             print("Your prompt generated an empty playlist.")
-            self.sanity_check()
+            return {"playlist": {"track": []}}
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
         self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py
index 108abec..cfba09a 100755
--- a/lb_content_resolver/metadata_lookup.py
+++ b/lb_content_resolver/metadata_lookup.py
@@ -18,9 +18,6 @@ class MetadataLookup:
 
     BATCH_SIZE = 1000
 
-    def __init__(self, db):
-        self.db = db
-
     def lookup(self):
         """
         Iterate over all recordings in the database and call lookup_chunk for chunks of recordings.
diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py
index 9343ca9..7398fea 100755
--- a/lb_content_resolver/tag_search.py
+++ b/lb_content_resolver/tag_search.py
@@ -52,7 +52,7 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings):
 
         recordings = []
         for rec in cursor.fetchall():
-            recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2]})
+            recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "subsonic_id": rec[2], "file_path": rec[3]})
 
         return select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings)
 
@@ -73,14 +73,13 @@ def or_search(self, tags, min_popularity=None, max_popularity=None):
                        SELECT recording_mbid
                             , popularity AS percent
                             , subsonic_id
-                            , recording_name
-                            , artist_name
+                            , file_path
                          FROM recording
                          JOIN recording_ids
                            ON recording.id = recording_ids.recording_id
                          JOIN recording_metadata
                            ON recording.id = recording_metadata.recording_id
-                         JOIN recording_subsonic
+                    LEFT JOIN recording_subsonic
                            ON recording.id = recording_subsonic.recording_id
                            %s
                      ORDER BY popularity DESC"""
@@ -118,12 +117,13 @@ def and_search(self, tags, min_popularity=None, max_popularity=None):
                        SELECT recording_mbid
                             , popularity AS percent
                             , subsonic_id
+                            , file_path
                          FROM recording
                          JOIN recording_ids
                            ON recording.id = recording_ids.recording_id
                          JOIN recording_metadata
                            ON recording.id = recording_metadata.recording_id
-                         JOIN recording_subsonic
+                    LEFT JOIN recording_subsonic
                            ON recording.id = recording_subsonic.recording_id
                            %s
                      ORDER BY popularity DESC"""
diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
index 48abfcf..37b0d0d 100755
--- a/lb_content_resolver/troi/patches/periodic_jams.py
+++ b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -18,7 +18,7 @@ class LocalPeriodicJamsPatch(troi.patch.Patch):
     """
 
 
-    def __init__(self, args, debug=False):
+    def __init__(self, args,  debug=False):
         super().__init__(args, debug)
 
     @staticmethod
@@ -37,10 +37,8 @@ def inputs():
             "args": ["user_name"]
         }, {
             "type": "argument",
-            "args": ["type"],
-            "kwargs": {
-                "required": False
-            }
+            "args": ["target"],
+            
         }]
 
     @staticmethod
@@ -57,6 +55,7 @@ def description():
 
     def create(self, inputs):
         user_name = inputs['user_name']
+        target = inputs['target']
 
         recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name,
                                                                           "raw",
@@ -72,7 +71,7 @@ def create(self, inputs):
         recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
         recs_lookup.set_sources(latest_filter)
 
-        resolve = RecordingResolverElement(.8)
+        resolve = RecordingResolverElement(.8, target)
         resolve.set_sources(recs_lookup)
 
         pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name),
diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py
index 97318c9..7b6fb5a 100755
--- a/lb_content_resolver/troi/periodic_jams.py
+++ b/lb_content_resolver/troi/periodic_jams.py
@@ -10,16 +10,23 @@ class LocalPeriodicJams(ListenBrainzRadioLocal):
     # TODO: Make this an argument
     MATCH_THRESHOLD = .8
 
-    def __init__(self, user_name):
+    def __init__(self, user_name, target):
         ListenBrainzRadioLocal.__init__(self)
         self.user_name = user_name
+        self.target = target
 
     def generate(self):
         """
            Generate a periodic jams playlist
         """
 
-        patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1})
+        patch = LocalPeriodicJamsPatch({
+            "user_name": self.user_name,
+            "echo": True,
+            "debug": True,
+            "target": self.target,
+            "min_recordings": 1
+        })
 
         # Now generate the playlist
         try:
@@ -30,7 +37,7 @@ def generate(self):
 
         if playlist == None:
             print("Your prompt generated an empty playlist.")
-            self.sanity_check()
+            return {"playlist": {"track": []}}
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
         self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
index 58348c0..990f923 100644
--- a/lb_content_resolver/troi/recording_resolver.py
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -3,6 +3,7 @@
 from lb_content_resolver.content_resolver import ContentResolver
 from lb_content_resolver.model.subsonic import RecordingSubsonic
 from lb_content_resolver.model.recording import Recording
+from lb_content_resolver.model.database import db
 from troi import Recording
 
 
@@ -12,10 +13,14 @@ class RecordingResolverElement(Element):
         name set and resolves them to a local collection by using the ContentResolver class
     """
 
-    def __init__(self, match_threshold):
+    def __init__(self, match_threshold, target="filesystem"):
+        """ Match threshold: The value from 0 to 1.0 on how sure a match must be to be accepted.
+            target: Either "filesystem" or "subsonic", the audio file source we're working with.
+        """
         Element.__init__(self)
         self.match_threshold = match_threshold
         self.resolve = ContentResolver()
+        self.target = target
 
     @staticmethod
     def inputs():
@@ -44,27 +49,53 @@ def read(self, inputs):
         recording_ids = [result["recording_id"] for result in resolved]
 
         # Fetch the recordings to lookup subsonic ids
-        recordings = RecordingSubsonic \
-                      .select() \
-                      .where(RecordingSubsonic.recording_id.in_(recording_ids)) \
-                      .dicts()
-
-        # Build a subsonic index
+        query = """SELECT recording_mbid
+                        , file_path
+                        , subsonic_id
+                     FROM recording
+                LEFT JOIN recording_subsonic
+                       ON recording_subsonic.recording_id = recording.id
+                    WHERE recording.id IN (%s)"""
+
+        placeholders = ",".join(("?", ) * len(recording_ids))
+        print(query % placeholders)
+        cursor = db.execute_sql(query % placeholders, params=tuple(recording_ids))
+        recordings = []
+        for row in cursor.fetchall():
+            print("row ", row)
+            recordings.append({ "recording_mbid": row[0],
+                                "file_path": row[1],
+                                "subsonic_id": row[2] })
+        print(recordings)
+        print(recording_ids)
+
+        # Build a indexes
         subsonic_index = {}
-        matched = []
+        file_index = {}
         for recording in recordings:
-            matched.append(recording["recording"])
-            subsonic_index[recording["recording"]] = recording["subsonic_id"]
+            if "subsonic_id" in recording:
+                subsonic_index[recording["recording_mbid"]] = recording["subsonic_id"]
+            if "file_path" in recording:
+                subsonic_index[recording["recording_mbid"]] = recording["file_path"]
 
-        # Set the subsonic ids into the recordings and only return recordings with an ID
+        # Set the ids into the recordings and only return recordings with an ID, depending on target
         results = []
         for r in resolved:
-            try:
-                recording = inputs[0][r["index"]]
-                recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]]
-            except KeyError:
-                continue
-
-            results.append(recording)
+            recording = inputs[0][r["index"]]
+            if self.target == "subsonic":
+                try:
+                    recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]]
+                except KeyError:
+                    continue
+
+                results.append(recording)
+
+            if self.target == "filesystem":
+                try:
+                    recording.musicbrainz["filename"] = file_index[r["recording_id"]]
+                except KeyError:
+                    continue
+
+                results.append(recording)
 
         return results
diff --git a/lb_content_resolver/utils.py b/lb_content_resolver/utils.py
index 042842e..f55d9de 100755
--- a/lb_content_resolver/utils.py
+++ b/lb_content_resolver/utils.py
@@ -70,8 +70,10 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_
     results = plist()
     for rec in matching_recordings:
         r = TroiRecording(mbid=rec["recording_mbid"])
-        if "subsonic_id" in rec:
+        if "subsonic_id" in rec and rec["subsonic_id"]:
             r.musicbrainz = {"subsonic_id": rec["subsonic_id"]}
+        if "file_path" in rec and rec["file_path"]:
+            r.musicbrainz = {"filename": rec["file_path"]}
 
         results.append(r)
 
diff --git a/resolve.py b/resolve.py
index 84eec3b..7fa31cd 100755
--- a/resolve.py
+++ b/resolve.py
@@ -19,6 +19,8 @@
 import config
 
 # TODO: Make sure all functions work with subsonic and with local files
+# TODO: Make sure config.py is only needed for subsonic functions
+# TODO: Think up a better way to specify the DB location
 
 
 def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
@@ -77,11 +79,11 @@ def metadata(index_dir):
     """Lookup metadata (popularity and tags) for recordings"""
     db = Database(index_dir)
     db.open()
-    lookup = MetadataLookup(db)
+    lookup = MetadataLookup()
     lookup.lookup()
 
     print("\nThese top tags describe your collection:")
-    tt = TopTags(db)
+    tt = TopTags()
     tt.print_top_tags_tightly(100)
 
 
@@ -122,6 +124,11 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr
     db.open()
     r = ListenBrainzRadioLocal()
     jspf = r.generate(mode, prompt)
+    if len(jspf["playlist"]["track"]) == 0:
+        print(upload_to_subsonic)
+        db.metadata_sanity_check(include_subsonic=upload_to_subsonic)
+        return
+
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
 
@@ -148,17 +155,23 @@ def duplicates(exclude_different_release, index_dir):
 
 
 @click.command()
-@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
+@click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False)
 @click.option('-p', '--save-to-playlist', required=False)
 @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
 @click.argument('index_dir')
 @click.argument('user_name')
 def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name):
     "Generate a periodic jams playlist"
-    db = SubsonicDatabase(index_dir)
+    db = Database(index_dir)
     db.open()
-    pj = LocalPeriodicJams(user_name)
+
+    target = "subsonic" if upload_to_subsonic else "filesystem"
+    pj = LocalPeriodicJams(user_name, target)
     jspf = pj.generate()
+    if len(jspf["playlist"]["track"]) == 0:
+        db.metadata_sanity_check(include_subsonic=upload_to_subsonic)
+        return
+
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
 @click.command()
@@ -166,7 +179,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
 def unresolved(index_dir):
     "Show the top unresolved releases"
 
-    db = SubsonicDatabase(index_dir)
+    db = Database(index_dir)
     db.open()
     urt = UnresolvedRecordingTracker()
     releases = urt.get_releases()

From b16af12eb60e7bc7ca77d5369b5ded36952008be Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Tue, 9 Jan 2024 21:02:14 +0100
Subject: [PATCH 35/39] Interim checkin

---
 lb_content_resolver/troi/recording_resolver.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
index 990f923..10572e2 100644
--- a/lb_content_resolver/troi/recording_resolver.py
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -46,7 +46,7 @@ def read(self, inputs):
 
         # Resolve the recordings
         resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold)
-        recording_ids = [result["recording_id"] for result in resolved]
+        recording_ids = tuple([result["recording_id"] for result in resolved])
 
         # Fetch the recordings to lookup subsonic ids
         query = """SELECT recording_mbid
@@ -58,16 +58,13 @@ def read(self, inputs):
                     WHERE recording.id IN (%s)"""
 
         placeholders = ",".join(("?", ) * len(recording_ids))
-        print(query % placeholders)
-        cursor = db.execute_sql(query % placeholders, params=tuple(recording_ids))
+        cursor = db.execute_sql(query % placeholders, params=recording_ids)
         recordings = []
         for row in cursor.fetchall():
-            print("row ", row)
             recordings.append({ "recording_mbid": row[0],
                                 "file_path": row[1],
                                 "subsonic_id": row[2] })
-        print(recordings)
-        print(recording_ids)
+        print(len(recordings))
 
         # Build a indexes
         subsonic_index = {}

From 74f7a3412c81dd9404f5b20d301e5c24c959ffb7 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Tue, 9 Jan 2024 23:17:52 +0100
Subject: [PATCH 36/39] All features now work with filename or subsonic_id

---
 lb_content_resolver/database.py               |  8 +++++--
 lb_content_resolver/lb_radio.py               | 16 ++++++++++++-
 lb_content_resolver/tag_search.py             |  3 ---
 .../troi/recording_resolver.py                | 24 ++++++++++++-------
 resolve.py                                    | 20 ++++++++++++++--
 5 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 1c2bef8..75eee1b 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -49,8 +49,12 @@ def open(self):
         """ 
             Open the database file and connect to the db.
         """
-        setup_db(self.db_file)
-        db.connect()
+        try:
+            setup_db(self.db_file)
+            db.connect()
+        except peewee.OperationalError:
+            print("Cannot open database index file: '%s'" % self.db_file)
+            sys.exit(-1)
 
     def close(self):
         """ Close the db."""
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 61c3d92..206e5b4 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -46,7 +46,9 @@ def generate(self, mode, prompt):
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
 
     def resolve_playlist(self, match_threshold, playlist):
+        """ Attempt to resolve any tracks without local ids to local ids """
 
+        # Find recordings that are missing local ids
         recordings = []
         for recording in playlist.playlists[0].recordings:
             if "subsonic_id" in recording.musicbrainz or "filename" in recording.musicbrainz:
@@ -57,9 +59,21 @@ def resolve_playlist(self, match_threshold, playlist):
         if not recordings:
             return
 
-        return self.resolve_recordings(match_threshold, recordings)
+        # Use the content resolver to resolve the recordings
+        self.resolve_recordings(match_threshold, recordings)
+
+        # Now filter out the tracks that were not matched
+        filtered = []
+        for rec in playlist.playlists[0].recordings:
+            if "subsonic_id" in rec.musicbrainz or "fileame" in rec.musicbrainz:
+                filtered.append(rec)
+
+        playlist.playlists[0].recordings = filtered
+
 
     def resolve_recordings(self, match_threshold, recordings):
+        """ Use the content resolver to resolve the given recordings """
+
         cr = ContentResolver()
         resolved = cr.resolve_playlist(match_threshold, recordings)
 
diff --git a/lb_content_resolver/tag_search.py b/lb_content_resolver/tag_search.py
index 7398fea..c266e95 100755
--- a/lb_content_resolver/tag_search.py
+++ b/lb_content_resolver/tag_search.py
@@ -16,9 +16,6 @@
 class LocalRecordingSearchByTagService(RecordingSearchByTagService):
     ''' 
     Given the local database, search for recordings that meet given tag criteria
-
-    NOTE: Right now this only works for subsonic tracks -- at some point we may need
-    to make this work for tracks without subsonic ids.
     '''
 
     def __init__(self):
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
index 10572e2..3bb8f0a 100644
--- a/lb_content_resolver/troi/recording_resolver.py
+++ b/lb_content_resolver/troi/recording_resolver.py
@@ -48,32 +48,40 @@ def read(self, inputs):
         resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold)
         recording_ids = tuple([result["recording_id"] for result in resolved])
 
+        # Could also be done with: 
+        # Recording.select().join(RecordingSubsonic, JOIN.LEFT_OUTER).where(Recording.id.in_(recording_ids))
+
         # Fetch the recordings to lookup subsonic ids
-        query = """SELECT recording_mbid
+        query = """SELECT recording.id
                         , file_path
                         , subsonic_id
                      FROM recording
                 LEFT JOIN recording_subsonic
                        ON recording_subsonic.recording_id = recording.id
-                    WHERE recording.id IN (%s)"""
+                    WHERE """
+
+        where_clause_elements = []
+        for id in recording_ids:
+            where_clause_elements.append("recording.id = %d" % id)
+            
+        where_clause = " or ".join(where_clause_elements)
+        query += where_clause
 
-        placeholders = ",".join(("?", ) * len(recording_ids))
-        cursor = db.execute_sql(query % placeholders, params=recording_ids)
+        cursor = db.execute_sql(query)
         recordings = []
         for row in cursor.fetchall():
-            recordings.append({ "recording_mbid": row[0],
+            recordings.append({ "recording_id": row[0],
                                 "file_path": row[1],
                                 "subsonic_id": row[2] })
-        print(len(recordings))
 
         # Build a indexes
         subsonic_index = {}
         file_index = {}
         for recording in recordings:
             if "subsonic_id" in recording:
-                subsonic_index[recording["recording_mbid"]] = recording["subsonic_id"]
+                subsonic_index[recording["recording_id"]] = recording["subsonic_id"]
             if "file_path" in recording:
-                subsonic_index[recording["recording_mbid"]] = recording["file_path"]
+                file_index[recording["recording_id"]] = recording["file_path"]
 
         # Set the ids into the recordings and only return recordings with an ID, depending on target
         results = []
diff --git a/resolve.py b/resolve.py
index 7fa31cd..c239c91 100755
--- a/resolve.py
+++ b/resolve.py
@@ -16,9 +16,9 @@
 from lb_content_resolver.troi.periodic_jams import LocalPeriodicJams
 from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker
+from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI
 import config
 
-# TODO: Make sure all functions work with subsonic and with local files
 # TODO: Make sure config.py is only needed for subsonic functions
 # TODO: Think up a better way to specify the DB location
 
@@ -28,10 +28,23 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
         return
 
     if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
+        try:
+            _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \
+                    ["additional_metadata"]["subsonic_identifier"]
+        except KeyError:
+            print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.")
+            return
+
         if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
             print("uploading playlist")
             db.upload_playlist(jspf)
+
     elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0:
+        try:
+            _ = jspf["playlist"]["track"][0]["location"]
+        except KeyError:
+            print("Playlist does not appear to contain file paths. Can't write a local playlist.")
+            return
         if dont_ask or ask_yes_no_question(f"Save to '{save_to_playlist}'? (Y/n)"):
             print("saving playlist")
             write_m3u_playlist_from_jspf(save_to_playlist, jspf)
@@ -162,9 +175,11 @@ def duplicates(exclude_different_release, index_dir):
 @click.argument('user_name')
 def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name):
     "Generate a periodic jams playlist"
-    db = Database(index_dir)
+    db = SubsonicDatabase(index_dir)
     db.open()
 
+    # TODO: ensure that we catch upload to subsonic when we have a FS playlist
+
     target = "subsonic" if upload_to_subsonic else "filesystem"
     pj = LocalPeriodicJams(user_name, target)
     jspf = pj.generate()
@@ -174,6 +189,7 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
 
     output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask)
 
+
 @click.command()
 @click.argument('index_dir')
 def unresolved(index_dir):

From f866b0d4007c2675430a5e9d235d85f4d77078d2 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Tue, 9 Jan 2024 23:30:48 +0100
Subject: [PATCH 37/39] All functions now work without config.py if you dont
 use subsonic

---
 README.md                       | 25 ++++++++++++++++++++++++-
 lb_content_resolver/lb_radio.py |  1 -
 lb_content_resolver/subsonic.py |  4 +++-
 resolve.py                      | 25 +++++++++++++------------
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index a1c7cec..fb415de 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,9 @@ pip install -r requirements.txt
 
 ## Scanning your collection
 
+Note: Soon we will eliminate the requirement to do a filesystem scan before also doing a subsonic
+scan (if you plan to use subsonic). For now, do the file system scan, then the subsonic scan.
+
 ### Scan a collection on the local filesystem
 
 Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported.
@@ -59,7 +62,19 @@ If you remove from tracks from your collection, use cleanup to remove refereces
 
 ### Scan a Subsonic collection
 
-Finish me
+To enable support you need to create a config.py file config.py.sample:
+
+```
+cp config.py.sample config.py
+```
+
+Then edit the file and add your subsonic configuration.
+
+```
+./resolve.py subsonic music_index
+```
+
+This will match your collection to the remove subsonic API collection.
 
 
 ## Resolve JSPF playlists to local collection
@@ -212,3 +227,11 @@ If you specify -e or --exclude-different-release, then case #3 will not be shown
 The top-tags command will print the top tags and the number of times they
 have been used in your collection. This requires that the "metadata"
 command was run before.
+
+### Unresolved Releases
+
+Any tracks that fail to resolve to a local collection will have their
+recording_mbid saved in the database. This enables the unresolved releases
+report which specifies a list of releases that you might consider adding to your
+collection, because in the past they failed to resolve to your location collection.
+
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index 206e5b4..d6d947a 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -9,7 +9,6 @@
 from lb_content_resolver.artist_search import LocalRecordingSearchByArtistService
 from lb_content_resolver.model.database import db
 from lb_content_resolver.content_resolver import ContentResolver
-import config
 
 
 class ListenBrainzRadioLocal:
diff --git a/lb_content_resolver/subsonic.py b/lb_content_resolver/subsonic.py
index fcf3377..50c3ce5 100755
--- a/lb_content_resolver/subsonic.py
+++ b/lb_content_resolver/subsonic.py
@@ -9,7 +9,6 @@
 from lb_content_resolver.database import Database
 from lb_content_resolver.model.database import db
 from lb_content_resolver.utils import bcolors
-import config
 
 
 class SubsonicDatabase(Database):
@@ -45,6 +44,8 @@ def run_sync(self):
         """
 
         print("[ connect to subsonic ]")
+
+        import config
         conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT)
         cursor = db.connection().cursor()
 
@@ -150,6 +151,7 @@ def upload_playlist(self, jspf):
             Given a JSPF playlist, upload the playlist to the subsonic API.
         """
 
+        import config
         conn = libsonic.Connection(config.SUBSONIC_HOST, config.SUBSONIC_USER, config.SUBSONIC_PASSWORD, config.SUBSONIC_PORT)
 
         song_ids = []
diff --git a/resolve.py b/resolve.py
index c239c91..e6b6837 100755
--- a/resolve.py
+++ b/resolve.py
@@ -17,9 +17,7 @@
 from lb_content_resolver.playlist import read_jspf_playlist, write_m3u_playlist_from_results, write_m3u_playlist_from_jspf
 from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker
 from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI
-import config
 
-# TODO: Make sure config.py is only needed for subsonic functions
 # TODO: Think up a better way to specify the DB location
 
 
@@ -27,17 +25,20 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
     if jspf is None:
         return
 
-    if upload_to_subsonic and len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
-        try:
-            _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \
-                    ["additional_metadata"]["subsonic_identifier"]
-        except KeyError:
-            print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.")
-            return
+    if upload_to_subsonic:
+        import config
+
+        if len(jspf["playlist"]["track"]) > 0 and config.SUBSONIC_HOST != "":
+            try:
+                _ = jspf["playlist"]["track"][0]["extension"][PLAYLIST_TRACK_EXTENSION_URI] \
+                        ["additional_metadata"]["subsonic_identifier"]
+            except KeyError:
+                print("Playlist does not appear to contain subsonic ids. Can't upload to subsonic.")
+                return
 
-        if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
-            print("uploading playlist")
-            db.upload_playlist(jspf)
+            if dont_ask or ask_yes_no_question("Upload via subsonic? (Y/n)"):
+                print("uploading playlist")
+                db.upload_playlist(jspf)
 
     elif save_to_playlist is not None and len(jspf["playlist"]["track"]) > 0:
         try:

From 9b31507a805f1a9faf436a548f9492a268d81049 Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Wed, 10 Jan 2024 12:12:56 +0100
Subject: [PATCH 38/39] Rework the index_dir and use db_files instead.

---
 README.md                               |  49 +++++------
 config.py.sample                        |   3 +
 lb_content_resolver/content_resolver.py |   2 -
 lb_content_resolver/database.py         |  24 +++---
 lb_content_resolver/fuzzy_index.py      |   7 +-
 resolve.py                              | 110 +++++++++++++++---------
 6 files changed, 111 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index fb415de..e0e3b0a 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,21 @@ source .virtualenv/bin/activate
 pip install -r requirements.txt
 ```
 
+### Setting up config.py
+
+While it isn't strictly necessary to setup config.py, it makes using the resolver easier:
+
+```
+cp config.py.sample config.py
+```
+
+Then edit config.py and set the location of where you're going to store your resolver database file
+into DATABASE_FILE. If you plan to use a Subsonic API, the fill out the Subsonic section as well.
+
+If you decide not to use the config.py file, make sure to pass the path to the DB file with -d to each
+command. All further examples in this file assume you added the config file and will therefore omit
+the -d option.
+
 ## Scanning your collection
 
 Note: Soon we will eliminate the requirement to do a filesystem scan before also doing a subsonic
@@ -50,14 +65,14 @@ scan (if you plan to use subsonic). For now, do the file system scan, then the s
 Then prepare the index and scan a music collection. mp3, m4a, wma, OggVorbis, OggOpus and flac files are supported.
 
 ```
-./resolve.py create music_index
-./resolve.py scan music_index <path to mp3/flac files>
+./resolve.py create
+./resolve.py scan <path to mp3/flac files>
 ```
 
 If you remove from tracks from your collection, use cleanup to remove refereces to those tracks:
 
 ```
-./resolve.py cleanup music_index
+./resolve.py cleanup
 ```
 
 ### Scan a Subsonic collection
@@ -71,7 +86,7 @@ cp config.py.sample config.py
 Then edit the file and add your subsonic configuration.
 
 ```
-./resolve.py subsonic music_index
+./resolve.py subsonic
 ```
 
 This will match your collection to the remove subsonic API collection.
@@ -94,7 +109,7 @@ curl "https://api.listenbrainz.org/1/playlist/<playlist MBID>" > test.jspf
 Finally, resolve the playlist to local files:
 
 ```
-./resolve.py playlist music_index input.jspf output.m3u
+./resolve.py playlist input.jspf output.m3u
 ```
 
 Then open the m3u playlist with a local tool.
@@ -124,21 +139,7 @@ to download more data for your MusicBrainz tagged music collection.
 First, download tag and popularity data:
 
 ```
-./resolve.py metadata music_index
-```
-
-Then, copy config.py.sample to config.py and then edit config.py:
-
-```
-cp config.py.sample config.py
-edit config.py
-```
-
-Fill out the values for your subsonic server API and save the file.
-Finally, match your collection against the subsonic collection:
-
-```
-./resolve.py subsonic music_index
+./resolve.py metadata
 ```
 
 ### Playlist generation
@@ -167,7 +168,7 @@ isn't very suited for the prompt that was given.
 #### Artist Element
 
 ```
-./resolve.py lb-radio music_index easy 'artist:(taylor swift, drake)'
+./resolve.py lb-radio easy 'artist:(taylor swift, drake)'
 ```
 
 Generates a playlist with music from Taylor Swift and artists similar
@@ -177,14 +178,14 @@ to her and Drake, and artists similar to him.
 #### Tag Element
 
 ```
-./resolve.py lb-radio music_index easy 'tag:(downtempo, trip hop)'
+./resolve.py lb-radio easy 'tag:(downtempo, trip hop)'
 ```
 
 This will generate a playlist on easy mode for recordings that are
 tagged with "downtempo" AND "trip hop".
 
 ```
-./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or'
+./resolve.py lb-radio medium 'tag:(downtempo, trip hop)::or'
 ```
 
 This will generate a playlist on medium mode for recordings that are
@@ -194,7 +195,7 @@ at the end of the prompt.
 You can include more than on tag query in a prompt:
 
 ```
-./resolve.py lb-radio music_index medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)'
+./resolve.py lb-radio medium 'tag:(downtempo, trip hop)::or tag:(punk, ska)'
 ```
 
 #### Stats, Collections, Playlists and Rec
diff --git a/config.py.sample b/config.py.sample
index 2a22996..fa007f4 100644
--- a/config.py.sample
+++ b/config.py.sample
@@ -1,3 +1,6 @@
+# Where to find the database file
+DATABASE_FILE = ""
+
 # To connect to a subsonic API
 SUBSONIC_HOST = ""  # include http:// or https://
 SUBSONIC_USER = ""
diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
index 915a5c7..d5b2418 100755
--- a/lb_content_resolver/content_resolver.py
+++ b/lb_content_resolver/content_resolver.py
@@ -116,8 +116,6 @@ def resolve_playlist(self, match_threshold, recordings=None, jspf_playlist=None)
         if recordings is None and jspf_playlist is None:
             raise ValueError("Either recordings or jspf_playlist must be passed.")
 
-        print("\nResolve recordings to local files or subsonic ids")
-
         artist_recording_data = []
         if jspf_playlist is not None:
             if len(jspf_playlist["playlist"]["track"]) == 0:
diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 75eee1b..33d2973 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -23,24 +23,16 @@ class Database:
     ''' 
     Keep a database with metadata for a collection of local music files.
     '''
-    def __init__(self, index_dir):
-        self.index_dir = index_dir
-        self.db_file = os.path.join(index_dir, "lb_resolve.db")
+    def __init__(self, db_file):
+        self.db_file = db_file
         self.fuzzy_index = None
 
     def create(self):
         """ 
-            Create the index directory for the data. Currently it contains only
-            the sqlite dir, but in the future we may serialize the fuzzy index here as well.
+            Create the database. Can be run again to create tables that have been recently added to the code,
+            but don't exist in the DB yet.
         """
 
-        if not os.path.exists(self.index_dir):
-            try:
-                os.mkdir(self.index_dir)
-            except OSError as err:
-                print("Could not create index directory: %s (%s)" % (self.index_dir, err))
-                return
-
         setup_db(self.db_file)
         db.connect()
         db.create_tables([Recording, RecordingMetadata, Tag, RecordingTag, RecordingSubsonic, UnresolvedRecording])
@@ -84,7 +76,7 @@ def scan(self, music_dir):
         with tqdm(total=self.track_count_estimate) as self.progress_bar:
             self.traverse("")
 
-        self.close_db()
+        self.close()
 
         print("Checked %s tracks:" % self.total)
         print("  %5d tracks not changed since last run" % self.not_changed)
@@ -285,12 +277,16 @@ def database_cleanup(self, dry_run):
                 print("RM %s" % recording.file_path)
                 recording_ids.append(recording.id)
 
+        if not recording_ids:
+            print("No cleanup needed, all recordings found")
+            return
+
         if not dry_run:
             placeholders = ",".join(("?", ) * len(recording_ids))
             db.execute_sql("""DELETE FROM recording WHERE recording.id IN (%s)""" % placeholders, tuple(recording_ids))
             print("Stale references removed")
         else:
-            print("--delete not specified, no refeences removed")
+            print("--delete not specified, no refences removed")
 
     def metadata_sanity_check(self, include_subsonic=False):
         """
diff --git a/lb_content_resolver/fuzzy_index.py b/lb_content_resolver/fuzzy_index.py
index 548c794..8142326 100755
--- a/lb_content_resolver/fuzzy_index.py
+++ b/lb_content_resolver/fuzzy_index.py
@@ -71,7 +71,10 @@ def search(self, query_data):
 
         output = []
         for i, result in enumerate(results):
-            output.append({ "confidence": fabs(result[1][0]),
-                            "recording_id": result[0][0] })
+            if len(result[0]):
+                output.append({ "confidence": fabs(result[1][0]),
+                                "recording_id": result[0][0] })
+            else:
+                output.append({ "confidence": 0.0, "recording_id": 0 })
 
         return output
diff --git a/resolve.py b/resolve.py
index e6b6837..cd9b51d 100755
--- a/resolve.py
+++ b/resolve.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import os
+import sys
 
 import click
 
@@ -18,8 +19,6 @@
 from lb_content_resolver.unresolved_recording import UnresolvedRecordingTracker
 from troi.playlist import PLAYLIST_TRACK_EXTENSION_URI
 
-# TODO: Think up a better way to specify the DB location
-
 
 def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
     if jspf is None:
@@ -54,44 +53,67 @@ def output_playlist(db, jspf, upload_to_subsonic, save_to_playlist, dont_ask):
         print("Playlist displayed, but not saved. Use -p or -u options to save/upload playlists.")
 
 
+def db_file_check(db_file):
+    """ Check the db_file argument and give useful user feedback. """
+
+    if not db_file:
+        try:
+            import config
+        except ModuleNotFoundError:
+            print("Database file not specified with -d (--db_file) argument. Consider adding it to config.py for ease of use.")
+            sys.exit(-1)
+
+        if not config.DATABASE_FILE:
+            print("config.py found, but DATABASE_FILE is empty. Please add it or use -d option to specify it.")
+            sys.exit(-1)
+
+        return config.DATABASE_FILE
+    else:
+        return db_file
+
+
 @click.group()
 def cli():
     pass
 
 
 @click.command()
-@click.argument('index_dir')
-def create(index_dir):
-    """Create a new index directory to track a music collection"""
-    db = Database(index_dir)
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+def create(db_file):
+    """Create a new database to track a music collection"""
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.create()
 
 
 @click.command()
-@click.argument('index_dir')
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
 @click.argument('music_dir')
-def scan(index_dir, music_dir):
+def scan(db_file, music_dir):
     """Scan a directory and its subdirectories for music files to add to the collection"""
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     db.scan(music_dir)
 
 
 @click.command()
-@click.option('-d', '--delete', required=False, is_flag=True, default=True)
-@click.argument('index_dir')
-def cleanup(delete, index_dir):
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+@click.option("-r", "--remove", required=False, is_flag=True, default=True)
+def cleanup(db_file, remove):
     """Perform a database cleanup. Check that files exist and if they don't remove from the index"""
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
-    db.database_cleanup(delete)
+    db.database_cleanup(remove)
 
 
 @click.command()
-@click.argument('index_dir')
-def metadata(index_dir):
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+def metadata(db_file):
     """Lookup metadata (popularity and tags) for recordings"""
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     lookup = MetadataLookup()
     lookup.lookup()
@@ -102,22 +124,24 @@ def metadata(index_dir):
 
 
 @click.command()
-@click.argument('index_dir')
-def subsonic(index_dir):
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+def subsonic(db_file):
     """Scan a remote subsonic music collection"""
-    db = SubsonicDatabase(index_dir)
+    db_file = db_file_check(db_file)
+    db = SubsonicDatabase(db_file)
     db.open()
     db.sync()
 
 
 @click.command()
-@click.argument('index_dir')
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+@click.option('-t', '--threshold', default=.80)
 @click.argument('jspf_playlist')
 @click.argument('m3u_playlist')
-@click.option('-t', '--threshold', default=.80)
-def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
+def playlist(db_file, threshold, jspf_playlist, m3u_playlist):
     """ Resolve a JSPF file with MusicBrainz recording MBIDs to files in the local collection"""
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     cr = ContentResolver()
     jspf = read_jspf_playlist(jspf_playlist)
@@ -126,15 +150,16 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold):
 
 
 @click.command()
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
 @click.option('-p', '--save-to-playlist', required=False)
 @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
-@click.argument('index_dir')
 @click.argument('mode')
 @click.argument('prompt')
-def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, prompt):
+def lb_radio(db_file, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt):
     """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection"""
-    db = SubsonicDatabase(index_dir)
+    db_file = db_file_check(db_file)
+    db = SubsonicDatabase(db_file)
     db.open()
     r = ListenBrainzRadioLocal()
     jspf = r.generate(mode, prompt)
@@ -147,40 +172,41 @@ def lb_radio(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, mode, pr
 
 
 @click.command()
-@click.argument('index_dir')
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
 @click.argument('count', required=False, default=250)
-def top_tags(index_dir, count):
+def top_tags(db_file, count):
     "Display the top most used tags in the music collection. Useful for writing LB Radio tag prompts"
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     tt = TopTags()
     tt.print_top_tags_tightly(count)
 
 
 @click.command()
-@click.argument('index_dir')
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
 @click.option('-e', '--exclude-different-release', required=False, default=False, is_flag=True)
-def duplicates(exclude_different_release, index_dir):
+def duplicates(db_file, exclude_different_release):
     "Print all the tracks in the DB that are duplciated as per recording_mbid"
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     fd = FindDuplicates(db)
     fd.print_duplicate_recordings(exclude_different_release)
 
 
 @click.command()
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False)
 @click.option('-p', '--save-to-playlist', required=False)
 @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
-@click.argument('index_dir')
 @click.argument('user_name')
-def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, user_name):
+def periodic_jams(db_file, upload_to_subsonic, save_to_playlist, dont_ask, user_name):
     "Generate a periodic jams playlist"
-    db = SubsonicDatabase(index_dir)
+    db_file = db_file_check(db_file)
+    db = SubsonicDatabase(db_file)
     db.open()
 
-    # TODO: ensure that we catch upload to subsonic when we have a FS playlist
-
     target = "subsonic" if upload_to_subsonic else "filesystem"
     pj = LocalPeriodicJams(user_name, target)
     jspf = pj.generate()
@@ -192,11 +218,11 @@ def periodic_jams(upload_to_subsonic, save_to_playlist, dont_ask, index_dir, use
 
 
 @click.command()
-@click.argument('index_dir')
-def unresolved(index_dir):
+@click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+def unresolved(db_file):
     "Show the top unresolved releases"
-
-    db = Database(index_dir)
+    db_file = db_file_check(db_file)
+    db = Database(db_file)
     db.open()
     urt = UnresolvedRecordingTracker()
     releases = urt.get_releases()

From f2dbf1aba9f5112dfa29b1f43198dd36293cc88e Mon Sep 17 00:00:00 2001
From: Robert Kaye <rob@musicbrainz.org>
Date: Wed, 10 Jan 2024 12:32:35 +0100
Subject: [PATCH 39/39] Make match threshold a command line arg

---
 lb_content_resolver/database.py           |  2 +-
 lb_content_resolver/lb_radio.py           | 11 +++++------
 lb_content_resolver/troi/periodic_jams.py |  8 +++-----
 resolve.py                                | 10 ++++++----
 4 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
index 33d2973..c72e840 100755
--- a/lb_content_resolver/database.py
+++ b/lb_content_resolver/database.py
@@ -182,7 +182,7 @@ def read_metadata_and_add(self, relative_path, format, mtime, update):
         elif format == "wma":
             mdata = wma.read(file_path)
 
-        # TODO: In the future we should attempt to read basic metadata from
+        # In the future we should attempt to read basic metadata from
         # the filename here. But, if you have untagged files, this tool
         # really isn't for you anyway. heh.
         if mdata is not None:
diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
index d6d947a..4114eae 100755
--- a/lb_content_resolver/lb_radio.py
+++ b/lb_content_resolver/lb_radio.py
@@ -16,12 +16,11 @@ class ListenBrainzRadioLocal:
        Generate local playlists against a music collection available via subsonic.
     '''
 
-    # TODO: Make this an argument
-    MATCH_THRESHOLD = .8
-
-    def generate(self, mode, prompt):
+    def generate(self, mode, prompt, match_threshold):
         """
-           Generate a playlist given the mode and prompt.
+           Generate a playlist given the mode and prompt. Optional match_threshold, a value from
+           0 to 1.0 allows the use to control how well local resolution tracks must match before
+           being considered a match.
         """
 
         patch = LBRadioPatch({"mode": mode, "prompt": prompt, "echo": True, "debug": True, "min_recordings": 1})
@@ -40,7 +39,7 @@ def generate(self, mode, prompt):
             return {"playlist": {"track": []}}
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
-        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
+        self.resolve_playlist(match_threshold, playlist)
 
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
 
diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py
index 7b6fb5a..e61812c 100755
--- a/lb_content_resolver/troi/periodic_jams.py
+++ b/lb_content_resolver/troi/periodic_jams.py
@@ -7,13 +7,11 @@ class LocalPeriodicJams(ListenBrainzRadioLocal):
        Generate local playlists against a music collection available via subsonic.
     '''
 
-    # TODO: Make this an argument
-    MATCH_THRESHOLD = .8
-
-    def __init__(self, user_name, target):
+    def __init__(self, user_name, target, match_threshold):
         ListenBrainzRadioLocal.__init__(self)
         self.user_name = user_name
         self.target = target
+        self.match_threshold = match_threshold
 
     def generate(self):
         """
@@ -40,6 +38,6 @@ def generate(self):
             return {"playlist": {"track": []}}
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
-        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
+        self.resolve_playlist(self.match_threshold, playlist)
 
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
diff --git a/resolve.py b/resolve.py
index cd9b51d..1e55c64 100755
--- a/resolve.py
+++ b/resolve.py
@@ -151,18 +151,19 @@ def playlist(db_file, threshold, jspf_playlist, m3u_playlist):
 
 @click.command()
 @click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+@click.option('-t', '--threshold', default=.80)
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True)
 @click.option('-p', '--save-to-playlist', required=False)
 @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
 @click.argument('mode')
 @click.argument('prompt')
-def lb_radio(db_file, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt):
+def lb_radio(db_file, threshold, upload_to_subsonic, save_to_playlist, dont_ask, mode, prompt):
     """Use the ListenBrainz Radio engine to create a playlist from a prompt, using a local music collection"""
     db_file = db_file_check(db_file)
     db = SubsonicDatabase(db_file)
     db.open()
     r = ListenBrainzRadioLocal()
-    jspf = r.generate(mode, prompt)
+    jspf = r.generate(mode, prompt, threshold)
     if len(jspf["playlist"]["track"]) == 0:
         print(upload_to_subsonic)
         db.metadata_sanity_check(include_subsonic=upload_to_subsonic)
@@ -197,18 +198,19 @@ def duplicates(db_file, exclude_different_release):
 
 @click.command()
 @click.option("-d", "--db_file", help="Database file for the local collection", required=False, is_flag=False)
+@click.option('-t', '--threshold', default=.80)
 @click.option('-u', '--upload-to-subsonic', required=False, is_flag=True, default=False)
 @click.option('-p', '--save-to-playlist', required=False)
 @click.option('-y', '--dont-ask', required=False, is_flag=True, help="write playlist to m3u file")
 @click.argument('user_name')
-def periodic_jams(db_file, upload_to_subsonic, save_to_playlist, dont_ask, user_name):
+def periodic_jams(db_file, threshold, upload_to_subsonic, save_to_playlist, dont_ask, user_name):
     "Generate a periodic jams playlist"
     db_file = db_file_check(db_file)
     db = SubsonicDatabase(db_file)
     db.open()
 
     target = "subsonic" if upload_to_subsonic else "filesystem"
-    pj = LocalPeriodicJams(user_name, target)
+    pj = LocalPeriodicJams(user_name, target, threshold)
     jspf = pj.generate()
     if len(jspf["playlist"]["track"]) == 0:
         db.metadata_sanity_check(include_subsonic=upload_to_subsonic)