diff --git a/lb_content_resolver/database.py b/lb_content_resolver/database.py index d002c31..f971488 100755 --- a/lb_content_resolver/database.py +++ b/lb_content_resolver/database.py @@ -8,7 +8,7 @@ import peewee from lb_content_resolver.model.database import db, setup_db -from lb_content_resolver.model.recording import Recording +from lb_content_resolver.model.recording import Recording, RecordingMetadata from lb_content_resolver.formats import mp3, m4a, flac, ogg_opus, ogg_vorbis, wma SUPPORTED_FORMATS = ["flac", "ogg", "opus", "mp3", "m4a", "wma"] @@ -37,7 +37,7 @@ def create(self): setup_db(self.db_file) db.connect() - db.create_tables([Recording]) + db.create_tables([Recording, RecordingMetadata]) def open_db(self): """ diff --git a/lb_content_resolver/metadata_lookup.py b/lb_content_resolver/metadata_lookup.py new file mode 100755 index 0000000..259b32b --- /dev/null +++ b/lb_content_resolver/metadata_lookup.py @@ -0,0 +1,74 @@ +import os +from collections import defaultdict +import datetime +import sys +from uuid import UUID + +import peewee +import requests + +from lb_content_resolver.model.database import db +from lb_content_resolver.model.recording import Recording, RecordingMetadata + + +class MetadataLookup: + ''' + Given the local database, lookup metadata from MusicBrainz to allow local playlist resolution. + ''' + + def __init__(self, db): + self.db = db + + def lookup(self): + """ + """ + + self.db.open_db() + args = [] + mbid_to_id_index = {} + for recording in Recording.select() \ + .join(RecordingMetadata, peewee.JOIN.LEFT_OUTER) \ + .order_by(RecordingMetadata.last_updated): + args.append({ "[recording_mbid]": str(recording.recording_mbid) }) + mbid_to_id_index[str(recording.recording_mbid)] = recording + if len(args) == 1000: + break + + r = requests.post("https://labs.api.listenbrainz.org/bulk-tag-lookup/json", json=args) + if r.status_code != 200: + print("Fail: %d %s" % (r.status_code, r.text)) + return + + recording_pop = {} + recording_tags = {} + for row in r.json(): + print("%s, %s, %s" % (row["recording_mbid"], row["tag"], row["source"])) + + mbid = str(row["recording_mbid"]) + recording_pop[mbid] = row["percent"] + if mbid not in recording_tags: + recording_tags[mbid] = { "artist": [], "release-group": [], "recording": [] } + + recording_tags[mbid][row["source"]].append(row["tag"]) + + print(f"{len(args)} db rows, {len(r.json())} api rows") + + with db.atomic(): + mbids = recording_pop.keys() + for mbid in list(set(mbids)): + recording = mbid_to_id_index[mbid] + print(f"update {mbid}") + try: + _ = recording.metadata.last_updated + print("update existing") + recording.metadata.popularity = row["percent"] + recording.metadata.last_updated = datetime.datetime.now() + recording.save() + except AttributeError: + print("create new") + recording.metadata = RecordingMetadata.create(recording=recording.id, + popularity=recording_pop[mbid], + last_updated=datetime.datetime.now()) + recording.save() + +# for row in r.json(): diff --git a/lb_content_resolver/model/recording.py b/lb_content_resolver/model/recording.py index a21e601..2816d66 100644 --- a/lb_content_resolver/model/recording.py +++ b/lb_content_resolver/model/recording.py @@ -1,3 +1,4 @@ +import datetime from peewee import * from lb_content_resolver.model.database import db @@ -27,3 +28,22 @@ class Meta: def __repr__(self): return "" % (self.recording_mbid or "", self.recording_name) + + +class RecordingMetadata(Model): + """ + Additional metadata for recorings: popularity. In future additional fields + like release date and release country could be added to this table. + """ + + class Meta: + database = db + + id = AutoField() + recording = ForeignKeyField(Recording, backref="metadata") + + popularity = FloatField() + last_updated = DateTimeField(null=False, default=datetime.datetime.now) + + def __repr__(self): + return "" % (self.recording or 0, self.popularity) diff --git a/lb_content_resolver/model/tag.py b/lb_content_resolver/model/tag.py new file mode 100644 index 0000000..de73fda --- /dev/null +++ b/lb_content_resolver/model/tag.py @@ -0,0 +1,36 @@ +import datetime +from peewee import * +from lb_content_resolver.model.database import db + + +class Tag(Model): + """ + Represents a tag that could be joined to an entity + """ + + class Meta: + database = db + + id = AutoField() + name = TextField(null=False, unique=True) + + def __repr__(self): + return "" % (self.name or "") + + +class RecordingTag(Model): + """ + A tag connected to a recording + """ + + class Meta: + database = db + + id = AutoField() + recording = ForeignKeyField(Recording) + tag = ForeignKeyField(Tag) + last_updated = DateTimeField(null=False, default=datetime.datetime.now) + entity = TextField(null=False) + + def __repr__(self): + return "" % (self.tag.name or "", self.recording) diff --git a/requirements.txt b/requirements.txt index 22eca23..f6fc244 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ scikit-learn==1.2.1 nmslib==2.1.1 regex==2023.6.3 lb_matching_tools@git+https://github.com/metabrainz/listenbrainz-matching-tools.git@v-2023-07-19.0 +requests diff --git a/resolve.py b/resolve.py index ec08611..ffbf9e6 100755 --- a/resolve.py +++ b/resolve.py @@ -3,6 +3,7 @@ import os from lb_content_resolver.content_resolver import ContentResolver from lb_content_resolver.database import Database +from lb_content_resolver.metadata_lookup import MetadataLookup import click @@ -33,6 +34,14 @@ def cleanup(index_dir): db.database_cleanup() +@click.command() +@click.argument('index_dir') +def lookup(index_dir): + db = Database(index_dir) + lookup = MetadataLookup(db) + lookup.lookup() + + @click.command() @click.argument('index_dir') @click.argument('jspf_playlist') @@ -48,6 +57,7 @@ def playlist(index_dir, jspf_playlist, m3u_playlist, threshold): cli.add_command(scan) cli.add_command(playlist) cli.add_command(cleanup) +cli.add_command(lookup) def usage(command):