First cut at periodic jams for lb local. Not a bad start!

metabrainz · Jan 4, 2024 · f91626a · f91626a
1 parent aad73e3
commit f91626a
Show file tree

Hide file tree

Showing 11 changed files with 249 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,6 @@ mp3
 /build/
 /dist/
 config.py
+*.jspf
+*.m3u
+.eggs
diff --git a/lb_content_resolver/content_resolver.py b/lb_content_resolver/content_resolver.py
@@ -24,16 +24,28 @@ def __init__(self, db):
         self.db = db
         self.fuzzy_index = None
 
+    def get_artist_recording_metadata(self):
+        """
+            Fetch the metadata needed to build a fuzzy search index.
+        """
+
+        artist_recording_data = []
+        for recording in Recording.select():
+            artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
+
+        return artist_recording_data
+
+
     def build_index(self):
         """
             Fetch the data from the DB and then build the fuzzy lookup index.
         """
 
-        artist_recording_data = self.db.get_artist_recording_metadata()
+        artist_recording_data = self.get_artist_recording_metadata()
         for recording in Recording.select():
             artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
 
-        self.fuzzy_index = FuzzyIndex(self.db.index_dir)
+        self.fuzzy_index = FuzzyIndex()
         self.fuzzy_index.build(artist_recording_data)
 
     def resolve_recordings(self, query_data, match_threshold):

diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py
@@ -119,27 +119,6 @@ def traverse(self, relative_path, dry_run=False):
 
         return True
 
-    def get_artist_recording_metadata(self):
-        """
-            Fetch the metadata needed to build a fuzzy search index.
-        """
-
-        artist_recording_data = []
-        for recording in Recording.select():
-            artist_recording_data.append((recording.artist_name, recording.recording_name, recording.id))
-
-        return artist_recording_data
-
-    def encode_string(self, text):
-        """ 
-            Remove unwanted crap from the query string and only keep essential information.
-
-            'This is the ultimate track !!' -> 'thisistheultimatetrack'
-        """
-        if text is None:
-            return None
-        return unidecode(re.sub(" +", " ", re.sub(r'[^\w ]+', '', text)).strip().lower())
-
     def add_or_update_recording(self, mdata):
         """ 
             Given a Recording, add it to the DB if it does not exist. If it does,

diff --git a/lb_content_resolver/fuzzy_index.py b/lb_content_resolver/fuzzy_index.py
@@ -26,18 +26,10 @@ class FuzzyIndex:
        be quick to rebuild this index.
     '''
 
-    def __init__(self, index_dir):
-        self.index_dir = index_dir
+    def __init__(self):
         self.vectorizer = None
         self.index = None
 
-    def create(self):
-        try:
-            os.mkdir(self.index_dir)
-        except OSError as err:
-            print("Could not create index directory: %s (%s)" % (self.index_dir, err))
-            return
-
     def encode_string(self, text):
         if text is None:
             return None

diff --git a/lb_content_resolver/lb_radio.py b/lb_content_resolver/lb_radio.py
@@ -45,7 +45,7 @@ def sanity_check(self):
                 "sanity check: You have not matched your collection against the collection in subsonic. Run the subsonic command.")
         elif num_subsonic < num_recordings // 2:
             print("sanity check: Only %d of your %d recordings have subsonic matches. Run the subsonic command." %
-                (num_subsonic, num_recordings))
+                  (num_subsonic, num_recordings))
 
     def generate(self, mode, prompt):
         """
@@ -70,11 +70,11 @@ def generate(self, mode, prompt):
             self.sanity_check()
 
         # Resolve any tracks that have not been resolved to a subsonic_id or a local file
-        self.resolve_recordings(playlist)
+        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
 
         return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
 
-    def resolve_recordings(self, playlist):
+    def resolve_playlist(self, match_threshold, playlist):
 
         recordings = []
         for recording in playlist.playlists[0].recordings:
@@ -84,10 +84,13 @@ def resolve_recordings(self, playlist):
             recordings.append(recording)
 
         if not recordings:
-            return 
+            return
 
+        return self.resolve_recordings(match_threshold, recordings)
+
+    def resolve_recordings(self, match_threshold, recordings):
         cr = ContentResolver(self.db)
-        resolved = cr.resolve_playlist(self.MATCH_THRESHOLD, recordings)
+        resolved = cr.resolve_playlist(match_threshold, recordings)
 
         for i, t_recording in enumerate(recordings):
             if resolved[i] is not None:

diff --git a/lb_content_resolver/troi/__init__.py b/lb_content_resolver/troi/__init__.py
diff --git a/lb_content_resolver/troi/patches/__init__.py b/lb_content_resolver/troi/patches/__init__.py
diff --git a/lb_content_resolver/troi/patches/periodic_jams.py b/lb_content_resolver/troi/patches/periodic_jams.py
@@ -0,0 +1,79 @@
+from datetime import datetime, timedelta
+
+import troi.listenbrainz.recs
+import troi.musicbrainz.recording_lookup
+from troi import Playlist
+from troi.playlist import PlaylistMakerElement
+
+from lb_content_resolver.troi.recording_resolver import RecordingResolverElement
+from lb_content_resolver.model.database import db
+
+DAYS_OF_RECENT_LISTENS_TO_EXCLUDE = 60  # Exclude tracks listened in last X days from the daily jams playlist
+DAILY_JAMS_MIN_RECORDINGS = 25  # the minimum number of recordings we aspire to have in a daily jam, this is not a hard limit
+BATCH_SIZE_RECS = 1000  # the number of recommendations fetched in 1 go
+MAX_RECS_LIMIT = 1000  # the maximum of recommendations available in LB
+
+class LocalPeriodicJamsPatch(troi.patch.Patch):
+    """
+    """
+
+
+    def __init__(self, args, debug=False):
+        super().__init__(args, debug)
+
+    @staticmethod
+    def inputs():
+        """
+        Generate a periodic playlist from the ListenBrainz recommended recordings.
+
+        \b
+        USER_NAME is a MusicBrainz user name that has an account on ListenBrainz.
+        TYPE Must be one of "daily-jams", "weekly-jams" or "weekly-exploration".
+        JAM_DATE is the date for which the jam is created (this is needed to account for the fact different timezones
+        can be on different dates). Required formatting for the date is 'YYYY-MM-DD'.
+        """
+        return [{
+            "type": "argument",
+            "args": ["user_name"]
+        }, {
+            "type": "argument",
+            "args": ["type"],
+            "kwargs": {
+                "required": False
+            }
+        }]
+
+    @staticmethod
+    def outputs():
+        return [Playlist]
+
+    @staticmethod
+    def slug():
+        return "local-periodic-jams"
+
+    @staticmethod
+    def description():
+        return "Generate a localized periodic playlist from the ListenBrainz recommended recordings."
+
+    def create(self, inputs):
+        user_name = inputs['user_name']
+
+        recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(user_name,
+                                                                          "raw",
+                                                                          count=1000)
+        recs_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
+        recs_lookup.set_sources(recs)
+
+        resolve = RecordingResolverElement(db, .8)
+        resolve.set_sources(recs_lookup)
+
+        pl_maker = PlaylistMakerElement(name="Local Periodic Jams for %s" % (user_name),
+                                        desc="test playlist!",
+                                        patch_slug="periodic-jams",
+                                        max_num_recordings=50,
+                                        max_artist_occurrence=2,
+                                        shuffle=True,
+                                        expires_at=datetime.utcnow() + timedelta(weeks=2))
+        pl_maker.set_sources(resolve)
+
+        return pl_maker
diff --git a/lb_content_resolver/troi/periodic_jams.py b/lb_content_resolver/troi/periodic_jams.py
@@ -0,0 +1,40 @@
+from lb_content_resolver.lb_radio import ListenBrainzRadioLocal
+from lb_content_resolver.troi.patches.periodic_jams import LocalPeriodicJamsPatch
+
+
+class LocalPeriodicJams(ListenBrainzRadioLocal):
+    ''' 
+       Generate local playlists against a music collection available via subsonic.
+    '''
+
+    # TODO: Make this an argument
+    MATCH_THRESHOLD = .8
+
+    def __init__(self, db, user_name):
+        ListenBrainzRadioLocal.__init__(self, db)
+        self.user_name = user_name
+
+    def generate(self):
+        """
+           Generate a periodic jams playlist
+        """
+
+        self.db.open_db()
+
+        patch = LocalPeriodicJamsPatch({"user_name": self.user_name, "echo": True, "debug": True, "min_recordings": 1})
+
+        # Now generate the playlist
+        try:
+            playlist = patch.generate_playlist()
+        except RuntimeError as err:
+            print(f"LB Radio generation failed: {err}")
+            return None
+
+        if playlist == None:
+            print("Your prompt generated an empty playlist.")
+            self.sanity_check()
+
+        # Resolve any tracks that have not been resolved to a subsonic_id or a local file
+        self.resolve_playlist(self.MATCH_THRESHOLD, playlist)
+
+        return playlist.get_jspf() if playlist is not None else {"playlist": {"track": []}}
diff --git a/lb_content_resolver/troi/recording_resolver.py b/lb_content_resolver/troi/recording_resolver.py
@@ -0,0 +1,65 @@
+#from troi.musicbrainz.recording_lookup import RecordingLookupElement
+from troi import Element
+
+from lb_content_resolver.content_resolver import ContentResolver
+from lb_content_resolver.model.subsonic import RecordingSubsonic
+from lb_content_resolver.model.recording import Recording
+from troi import Recording
+
+
+class RecordingResolverElement(Element):
+
+    def __init__(self, db, match_threshold):
+        Element.__init__(self)
+        self.db = db
+        self.match_threshold = match_threshold
+        self.resolve = ContentResolver(db)
+
+    @staticmethod
+    def inputs():
+        return []
+
+    @staticmethod
+    def outputs():
+        return [Recording]
+
+    def read(self, inputs):
+
+        # TODO: Add a check to make sure that metadata is present.
+
+        # Build the fuzzy index
+        lookup_data = []
+        for recording in inputs[0]:
+            lookup_data.append({"artist_name": recording.artist.name, "recording_name": recording.name})
+
+        self.resolve.build_index()
+
+        # Resolve the recordings
+        resolved = self.resolve.resolve_recordings(lookup_data, self.match_threshold)
+        recording_ids = [result["recording_id"] for result in resolved]
+
+        # Fetch the recordings to lookup subsonic ids
+        recordings = RecordingSubsonic \
+                      .select() \
+                      .where(RecordingSubsonic.recording_id.in_(recording_ids)) \
+                      .dicts()
+
+        # Build a subsonic index
+        subsonic_index = {}
+        matched = []
+        for recording in recordings:
+            matched.append(recording["recording"])
+            subsonic_index[recording["recording"]] = recording["subsonic_id"]
+
+        # Set the subsonic ids into the recordings and only return recordings with an ID
+        results = []
+        for r in resolved:
+            try:
+                recording = inputs[0][r["index"]]
+                recording.musicbrainz["subsonic_id"] = subsonic_index[r["recording_id"]]
+            except KeyError:
+                continue
+
+            results.append(recording)
+
+        return results