From c5fe1d31c8d6cc36004a11cb4e0d87594b076abc Mon Sep 17 00:00:00 2001 From: Matheus Xavier Date: Thu, 13 May 2021 08:28:38 -0300 Subject: [PATCH 1/4] Move storage format over to binary, this change allows the addition of: Checksums, File format versioning and feature flags i.e: encryption, this change also makes the db files way more compact. Other changes: tighten except clauses in Util class, change some names to snake case --- elara/elara.py | 10 ++-- elara/elarautil.py | 110 +++++++++++++++++++++++++++++++------------- elara/exceptions.py | 9 ++++ elara/shared.py | 8 ++-- requirements.txt | 1 + setup.py | 65 +++++++++++++------------- 6 files changed, 129 insertions(+), 74 deletions(-) diff --git a/elara/elara.py b/elara/elara.py index 9b8f5c8..91a39b6 100644 --- a/elara/elara.py +++ b/elara/elara.py @@ -42,6 +42,8 @@ def __init__(self, path, commitdb, key_path=None): self.path = os.path.expanduser(path) self.commitdb = commitdb self.lru = LRU() + # this is in place to prevent opening incompatible databases between versions of the storage format + self.db_format_version = 0x0001 # Since key file is generated first, invalid token error for pre existing open dbs @@ -65,15 +67,15 @@ def __init__(self, path, commitdb, key_path=None): def _load(self): if self.key: - self.db = Util.readAndDecrypt(self) + self.db = Util.read_and_decrypt(self) else: - self.db = Util.readJSON(self) + self.db = Util.read_plain_db(self) def _dump(self): if self.key: - Util.encryptAndStore(self) # Enclose in try-catch + Util.encrypt_and_store(self) # Enclose in try-catch else: - Util.storeJSON(self) + Util.store_plain_db(self) def _autocommit(self): if self.commitdb: diff --git a/elara/elarautil.py b/elara/elarautil.py index 94ee3be..89d10e2 100644 --- a/elara/elarautil.py +++ b/elara/elarautil.py @@ -4,62 +4,108 @@ This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. """ +from typing import Dict -from cryptography.fernet import Fernet -import json +import msgpack import os -from .exceptions import FileAccessError, FileKeyError +from zlib import crc32 + +from cryptography.fernet import Fernet + +from .exceptions import FileAccessError, FileKeyError, LoadChecksumError, LoadIncompatibleDB class Util: @staticmethod - def readJSON(obj): - try: - curr_db = json.load(open(obj.path, "rb")) - except Exception: - # print("Read JSON error. File might be encrypted. Run in secure mode.") - raise FileAccessError( - "Read JSON error. File might be encrypted. Run in secure mode with key path." - ) - return curr_db + def check_mag(mag): + return mag == b"ELDB" @staticmethod - def storeJSON(obj): - try: - json.dump(obj.db, open(obj.path, "wt"), indent=4) - except Exception: - raise FileAccessError( - "Store JSON error. File might be encrypted. Run in secure mode with key path." - ) + def check_encrypted(version): + # if msb of version number is set the db is encrypted + return (version & (1 << 15)) != 0 + + @staticmethod + def read_plain_db(obj) -> Dict: + with open(obj.path, "rb") as fctx: + if not Util.check_mag(fctx.read(4)): + raise FileAccessError("File magic number not known") + version = int.from_bytes(fctx.read(2), "little", signed=False) + # check for encryption before trying anything + if Util.check_encrypted(version): + raise FileAccessError("This file is encrypted, run in secure mode") + checksum = int.from_bytes(fctx.read(4), "little", signed=False) + data = fctx.read() + calculated_checksum = crc32(data) + if calculated_checksum != checksum: + raise LoadChecksumError( + f"calculated checksum: {calculated_checksum} is different from stored checksum {checksum}") + elif version != obj.db_format_version: + raise LoadIncompatibleDB(f"db format version {version} is incompatible with {obj.db_format_version}") + try: + curr_db = msgpack.unpackb(data) + except FileNotFoundError: + raise FileAccessError( + "File not found" + ) + return curr_db + + @staticmethod + def store_plain_db(obj): + with open(obj.path, "wb") as fctx: + try: + data = msgpack.packb(obj.db) + buffer = b"ELDB" + buffer += obj.db_format_version.to_bytes(2, "little") + buffer += (crc32(data)).to_bytes(4, "little") + buffer += data + fctx.write(buffer) + except FileExistsError: + raise FileAccessError( + "File already exists" + ) @staticmethod - def readAndDecrypt(obj): + def read_and_decrypt(obj): if obj.key: fernet = Fernet(obj.key) - encrypted_data = None try: - with open(obj.path, "rb") as file: - encrypted_data = file.read() - except Exception: + with open(obj.path, "rb") as fctx: + if not Util.check_mag(fctx.read(4)): + raise FileAccessError("File magic number not known") + version = int.from_bytes(fctx.read(2), "little") + if not Util.check_encrypted(version): + raise FileAccessError("File is marked not encrypted, you might have a corrupt db") + checksum = int.from_bytes(fctx.read(4), "little") + encrypted_data = fctx.read() + calculated_checksum = crc32(encrypted_data) & 0xFFFFFFFF + if calculated_checksum != checksum: + raise LoadChecksumError( + f"calculated checksum: {calculated_checksum} is different from stored checksum {checksum}") + except FileNotFoundError: raise FileAccessError("File open & read error") decrypted_data = fernet.decrypt(encrypted_data) - return json.loads(decrypted_data.decode("utf-8")) + return msgpack.unpackb(decrypted_data) else: return None @staticmethod - def encryptAndStore(obj): + def encrypt_and_store(obj): if obj.key: fernet = Fernet(obj.key) - db_snapshot = json.dumps(obj.db) - db_byte = db_snapshot.encode("utf-8") - encrypted_data = fernet.encrypt(db_byte) + db_snapshot = msgpack.packb(obj.db) + buffer = b"ELDB" + # set version msb + buffer += (obj.db_format_version | 1 << 15).to_bytes(2, "little") + encrypted_data = fernet.encrypt(db_snapshot) + buffer += crc32(encrypted_data).to_bytes(4, "little") + buffer += encrypted_data try: with open(obj.path, "wb") as file: - file.write(encrypted_data) + file.write(buffer) return True - except Exception: - raise FileAccessError("File open & write error") + except FileExistsError: + raise FileAccessError("File exists") else: return False diff --git a/elara/exceptions.py b/elara/exceptions.py index fc648a8..540e6c9 100644 --- a/elara/exceptions.py +++ b/elara/exceptions.py @@ -5,6 +5,7 @@ This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. """ + # Add all custom exception classes here @@ -24,3 +25,11 @@ def __init__(self, message): def __str__(self): return f"Error -> {self.message}" + + +class LoadChecksumError(Exception): + pass + + +class LoadIncompatibleDB(Exception): + pass diff --git a/elara/shared.py b/elara/shared.py index a10339c..30e94fc 100644 --- a/elara/shared.py +++ b/elara/shared.py @@ -15,9 +15,9 @@ def retdb(self): if self.key: - return Util.readAndDecrypt(self) + return Util.read_and_decrypt(self) else: - return Util.readJSON(self) + return Util.read_plain_db(self) def retmem(self): @@ -83,7 +83,7 @@ def securedb(self, key_path=None): Util.keygen(new_key_path) self.key = Util.readkey(new_key_path) - Util.encryptAndStore(self) + Util.encrypt_and_store(self) return True @@ -110,7 +110,7 @@ def updatekey(self, key_path=None): f.truncate(0) f.close self.key = Util.readkey(new_key_path) - Util.encryptAndStore(self) + Util.encrypt_and_store(self) else: raise FileKeyError("Update key Failed") diff --git a/requirements.txt b/requirements.txt index 0041d01..cf55205 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ cryptography==3.4.7 +msgpack>=1.0.0 diff --git a/setup.py b/setup.py index 7f4728a..322ca2b 100644 --- a/setup.py +++ b/setup.py @@ -1,39 +1,36 @@ from distutils.core import setup -with open('README.rst') as f: +with open("README.rst") as f: long_description = f.read() - + setup( - name = 'elara', - packages = ['elara'], - version = '0.3.0', - license='three-clause BSD', - description = 'Elara DB is an easy to use, lightweight NoSQL database written for python that can also be used as a fast in-memory cache for JSON-serializable data. Includes various methods to manipulate data structures in-memory, secure database files and export data.', - long_description = long_description, - author = 'Saurabh Pujari', - author_email = 'saurabhpuj99@gmail.com', - url = 'https://github.com/saurabh0719/elara', - keywords = [ - 'database', - 'key-value', - 'storage', - 'file storage', - 'json storage', - 'json database', - 'key-value database' , - 'nosql', - 'nosql database' - 'cache', - 'file cache' - ], - install_requires=[ - 'cryptography' + name="elara", + packages=["elara"], + version="0.3.0", + license="three-clause BSD", + description="Elara DB is an easy to use, lightweight NoSQL database written for python that can also be used as a fast in-memory cache for JSON-serializable data. Includes various methods to manipulate data structures in-memory, secure database files and export data.", + long_description=long_description, + author="Saurabh Pujari", + author_email="saurabhpuj99@gmail.com", + url="https://github.com/saurabh0719/elara", + keywords=[ + "database", + "key-value", + "storage", + "file storage", + "json storage", + "json database", + "key-value database", + "nosql", + "nosql database" "cache", + "file cache", + ], + install_requires=["cryptography", "msgpack"], + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Database", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", ], - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Topic :: Database', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python' - ], -) \ No newline at end of file +) From 0bf923f085e0323717aa81f089354e6510caa9c5 Mon Sep 17 00:00:00 2001 From: Matheus Xavier Date: Thu, 13 May 2021 12:58:31 -0300 Subject: [PATCH 2/4] Fix leftover 0xFFFFFFFF --- elara/elarautil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elara/elarautil.py b/elara/elarautil.py index 89d10e2..947919e 100644 --- a/elara/elarautil.py +++ b/elara/elarautil.py @@ -78,7 +78,7 @@ def read_and_decrypt(obj): raise FileAccessError("File is marked not encrypted, you might have a corrupt db") checksum = int.from_bytes(fctx.read(4), "little") encrypted_data = fctx.read() - calculated_checksum = crc32(encrypted_data) & 0xFFFFFFFF + calculated_checksum = crc32(encrypted_data) if calculated_checksum != checksum: raise LoadChecksumError( f"calculated checksum: {calculated_checksum} is different from stored checksum {checksum}") From 09b2cf577b18aee9f01bbd3e40cada8af8ee3eb0 Mon Sep 17 00:00:00 2001 From: Matheus Xavier Date: Thu, 13 May 2021 21:40:10 -0300 Subject: [PATCH 3/4] Add store_restore loop minimal test --- test/test_1.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_1.py b/test/test_1.py index 08ab1a5..a21971e 100644 --- a/test/test_1.py +++ b/test/test_1.py @@ -12,6 +12,15 @@ def test_exe(self): res = elara.exe("test.db", False) assert res is not None + def test_store_restore_data(self): + db = elara.exe("test.db") + db.set("test_key", "test_data:\"ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]\"") + db.commit() + db_load = elara.exe("test.db") + recov_data = db.get("test_key") + self.assertEqual(recov_data, + "test_data:\"ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]\"") + def test_get(self): self.db.db["key"] = "test" res = self.db.db["key"] From f241547b583a535f5665cf1bdb7eb068ab8c51c4 Mon Sep 17 00:00:00 2001 From: Matheus Xavier Date: Thu, 13 May 2021 21:47:19 -0300 Subject: [PATCH 4/4] Add store_restore with encryption loop test --- test/test_1.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_1.py b/test/test_1.py index a21971e..abb94c1 100644 --- a/test/test_1.py +++ b/test/test_1.py @@ -21,6 +21,15 @@ def test_store_restore_data(self): self.assertEqual(recov_data, "test_data:\"ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]\"") + def test_store_restore_data_secure(self): + db = elara.exe_secure("test_enc.db") + db.set("test_key", "test_data:\"ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]\"") + db.commit() + db_load = elara.exe_secure("test_enc.db") + recov_data = db.get("test_key") + self.assertEqual(recov_data, + "test_data:\"ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]\"") + def test_get(self): self.db.db["key"] = "test" res = self.db.db["key"]