From 8099df69dce4996c01a0fcad54620307629168d0 Mon Sep 17 00:00:00 2001 From: Believethehype <1097224+believethehype@users.noreply.github.com> Date: Tue, 26 Nov 2024 19:52:41 +0100 Subject: [PATCH] delete db if over certain limit on startup --- .../content_discovery_currently_popular.py | 2 ++ .../tasks/content_discovery_update_db_only.py | 7 ++++- nostr_dvm/utils/database_utils.py | 30 ++++++++++++++++++- tests/discovery.py | 15 +++++----- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/nostr_dvm/tasks/content_discovery_currently_popular.py b/nostr_dvm/tasks/content_discovery_currently_popular.py index d939c90..c69bd37 100644 --- a/nostr_dvm/tasks/content_discovery_currently_popular.py +++ b/nostr_dvm/tasks/content_discovery_currently_popular.py @@ -9,6 +9,7 @@ from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface, process_venv from nostr_dvm.utils import definitions from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.database_utils import init_db from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip88_utils import NIP88Config, check_and_set_d_tag_nip88, check_and_set_tiereventid_nip88 @@ -171,6 +172,7 @@ async def sync_db(self): try: sk = SecretKey.from_hex(self.dvm_config.PRIVATE_KEY) keys = Keys.parse(sk.to_hex()) + database = NostrDatabase.lmdb(self.db_name) cli = ClientBuilder().signer(NostrSigner.keys(keys)).database(database).build() diff --git a/nostr_dvm/tasks/content_discovery_update_db_only.py b/nostr_dvm/tasks/content_discovery_update_db_only.py index f2f8f86..74ddf4e 100644 --- a/nostr_dvm/tasks/content_discovery_update_db_only.py +++ b/nostr_dvm/tasks/content_discovery_update_db_only.py @@ -11,6 +11,7 @@ from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface, process_venv from nostr_dvm.utils import definitions from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.database_utils import init_db from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip88_utils import NIP88Config, check_and_set_d_tag_nip88, check_and_set_tiereventid_nip88 @@ -43,6 +44,7 @@ class DicoverContentDBUpdateScheduler(DVMTaskInterface): result = "" database = None wot_counter = 0 + max_db_size = 280 async def init_dvm(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, nip88config: NIP88Config = None, admin_config: AdminConfig = None, options=None): @@ -63,6 +65,8 @@ async def init_dvm(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, self.db_name = self.options.get("db_name") if self.options.get("db_since"): self.db_since = int(self.options.get("db_since")) + if self.options.get("max_db_size"): + self.max_db_size = int(self.options.get("max_db_size")) use_logger = False if use_logger: @@ -136,7 +140,8 @@ async def sync_db(self): sk = SecretKey.from_hex(self.dvm_config.PRIVATE_KEY) keys = Keys.parse(sk.to_hex()) if self.database is None: - self.database = NostrDatabase.lmdb(self.db_name) + self.database = await init_db(self.db_name, True, self.max_db_size) + #self.database = NostrDatabase.lmdb(self.db_name) cli = ClientBuilder().signer(NostrSigner.keys(keys)).database(self.database).opts(opts).build() diff --git a/nostr_dvm/utils/database_utils.py b/nostr_dvm/utils/database_utils.py index 9ef2c6b..6ce7ba9 100644 --- a/nostr_dvm/utils/database_utils.py +++ b/nostr_dvm/utils/database_utils.py @@ -1,11 +1,15 @@ # DATABASE LOGIC import json +import os +import pathlib +import shutil import sqlite3 from dataclasses import dataclass from logging import Filter from sqlite3 import Error +from tkinter.filedialog import Directory -from nostr_sdk import Timestamp, Keys, PublicKey, Filter, Kind, make_private_msg, NostrSigner +from nostr_sdk import Timestamp, Keys, PublicKey, Filter, Kind, make_private_msg, NostrSigner, NostrDatabase from nostr_dvm.utils.definitions import relay_timeout from nostr_dvm.utils.nostr_utils import send_nip04_dm @@ -246,6 +250,30 @@ async def get_or_add_user(db, npub, client, config, update=False, skip_meta=Fals return user +async def init_db(database, wipe=False, limit=1000, print_filesize=True): + # LMDB can't grow smaller, so by using this function we can wipe the database on init to avoid + # it growing too big. If wipe is set to true, the database will be deleted once the size is above the limit param. + database_content = database + "/data.mdb" + if os.path.isfile(database_content): + file_stats = os.stat(database_content) + sizeinmb = file_stats.st_size / (1024 * 1024) + if print_filesize: + print("Filesize of database \"" + database + "\": " + str(sizeinmb) + " Mb.") + + if wipe and sizeinmb > limit: + try: + shutil.rmtree(database) + print("Removed database due to large file size. Waiting for resync") + except OSError as e: + print("Error: %s - %s." % (e.filename, e.strerror)) + else: + print("Creating database: " + database) + + + return NostrDatabase.lmdb(database) + + + async def fetch_user_metadata(npub, client): name = "" nip05 = "" diff --git a/tests/discovery.py b/tests/discovery.py index f9670e4..93f8b70 100644 --- a/tests/discovery.py +++ b/tests/discovery.py @@ -1,6 +1,7 @@ import asyncio import json import os +import shutil import threading from pathlib import Path @@ -22,6 +23,7 @@ from nostr_dvm.tasks.content_discovery_update_db_only import DicoverContentDBUpdateScheduler from nostr_dvm.tasks.discovery_trending_notes_nostrband import TrendingNotesNostrBand from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.database_utils import init_db from nostr_dvm.utils.dvmconfig import build_default_config, DVMConfig from nostr_dvm.utils.nip88_utils import NIP88Config, check_and_set_d_tag_nip88, check_and_set_tiereventid_nip88 from nostr_dvm.utils.nip89_utils import create_amount_tag, NIP89Config, check_and_set_d_tag @@ -606,25 +608,24 @@ def build_example_oneperfollow(name, identifier, admin_config, options, image, c admin_config=admin_config, options=options) -async def init_db(database): - return NostrDatabase.lmdb(database) - - def playground(): main_db = "db/nostr_recent_notes.db" - DATABASE = asyncio.run(init_db(main_db)) + main_db_limit = 1024 # in mb + + DATABASE = asyncio.run(init_db(main_db, wipe=True, limit=main_db_limit, print_filesize=True)) # DB Scheduler, do not announce, just use it to update the DB for the other DVMs. admin_config_db_scheduler = AdminConfig() - options_animal = { + options_db = { "db_name": main_db, "db_since": max_sync_duration_in_h * 60 * 60, # 48h since gmt, "personalized": False, + "max_db_size" : main_db_limit, "logger": False} image = "" about = "I just update the Database based on my schedule" db_scheduler = build_db_scheduler("DB Scheduler", "db_scheduler", - admin_config_db_scheduler, options_animal, + admin_config_db_scheduler, options_db, image=image, description=about, update_rate=global_update_rate,