diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 66a6f06ab..e4e3db512 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -115,6 +115,7 @@ library exposed-modules: Data.Arena Data.Map.Range + Database.LSMTree Database.LSMTree.Common Database.LSMTree.Internal Database.LSMTree.Internal.Assertions diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs new file mode 100644 index 000000000..b1bd99faf --- /dev/null +++ b/src/Database/LSMTree.hs @@ -0,0 +1,551 @@ +-- TODO: document this module +module Database.LSMTree ( + -- * Exceptions + Common.LSMTreeError (..) + + -- * Tracing + , Common.LSMTreeTrace (..) + , Common.TableTrace (..) + , Common.MergeTrace (..) + + -- * Table sessions + , Session + , withSession + , openSession + , closeSession + + -- * Table + , Table + , Common.TableConfig (..) + , Common.defaultTableConfig + , Common.SizeRatio (..) + , Common.MergePolicy (..) + , Common.WriteBufferAlloc (..) + , Common.NumEntries (..) + , Common.BloomFilterAlloc (..) + , Common.defaultBloomFilterAlloc + , Common.FencePointerIndex (..) + , Common.DiskCachePolicy (..) + , Common.MergeSchedule (..) + , Common.defaultMergeSchedule + , withTable + , new + , close + + -- * Table queries and updates + -- ** Queries + , lookups + , LookupResult (..) + , rangeLookup + , Range (..) + , QueryResult (..) + -- ** Cursor + , Cursor + , withCursor + , withCursorAtOffset + , newCursor + , newCursorAtOffset + , closeCursor + , readCursor + -- ** Updates + , inserts + , deletes + , mupserts + , updates + , Update (..) + -- ** Blobs + , BlobRef + , retrieveBlobs + + -- * Durability (snapshots) + , SnapshotName + , Common.mkSnapshotName + , Common.Labellable (..) + , createSnapshot + , openSnapshot + , Common.TableConfigOverride + , Common.configNoOverride + , Common.configOverrideDiskCachePolicy + , deleteSnapshot + , listSnapshots + + -- * Persistence + , duplicate + + -- * Table union + , union + + -- * Serialisation + , SerialiseKey + , SerialiseValue + + -- * Monoidal value resolution + , ResolveValue (..) + , resolveDeserialised + -- ** Properties + , resolveValueValidOutput + , resolveValueAssociativity + -- ** DerivingVia wrappers + , ResolveAsFirst (..) + + -- * Utility types + , IOLike + ) where + +import Control.DeepSeq +import Control.Exception (throw) +import Control.Monad +import Data.Bifunctor (Bifunctor (..)) +import Data.Coerce (coerce) +import Data.Kind (Type) +import Data.Typeable (Proxy (..), eqT, type (:~:) (Refl)) +import qualified Data.Vector as V +import Database.LSMTree.Common (BlobRef (BlobRef), IOLike, Range (..), + SerialiseKey, SerialiseValue, Session, SnapshotName, + closeSession, deleteSnapshot, listSnapshots, openSession, + withSession) +import qualified Database.LSMTree.Common as Common +import qualified Database.LSMTree.Internal as Internal +import qualified Database.LSMTree.Internal.BlobRef as Internal +import qualified Database.LSMTree.Internal.Entry as Entry +import qualified Database.LSMTree.Internal.RawBytes as RB +import qualified Database.LSMTree.Internal.Serialise as Internal +import qualified Database.LSMTree.Internal.Snapshot as Internal +import qualified Database.LSMTree.Internal.Vector as V +import Database.LSMTree.Monoidal (ResolveValue (..), + resolveDeserialised, resolveValueAssociativity, + resolveValueValidOutput) + +{------------------------------------------------------------------------------- + Tables +-------------------------------------------------------------------------------} + +type Table = Internal.Table' + +{-# SPECIALISE withTable :: + Session IO + -> Common.TableConfig + -> (Table IO k v b -> IO a) + -> IO a #-} +withTable :: + IOLike m + => Session m + -> Common.TableConfig + -> (Table m k v b -> m a) + -> m a +withTable (Internal.Session' sesh) conf action = + Internal.withTable sesh conf (action . Internal.Table') + +{-# SPECIALISE new :: + Session IO + -> Common.TableConfig + -> IO (Table IO k v b) #-} +new :: + IOLike m + => Session m + -> Common.TableConfig + -> m (Table m k v b) +new (Internal.Session' sesh) conf = Internal.Table' <$> Internal.new sesh conf + +{-# SPECIALISE close :: + Table IO k v b + -> IO () #-} +close :: + IOLike m + => Table m k v b + -> m () +close (Internal.Table' t) = Internal.close t + +{------------------------------------------------------------------------------- + Table queries +-------------------------------------------------------------------------------} + +-- | Result of a single point lookup. +data LookupResult v b = + NotFound + | Found !v + | FoundWithBlob !v !b + deriving stock (Eq, Show, Functor, Foldable, Traversable) + +instance Bifunctor LookupResult where + first f = \case + NotFound -> NotFound + Found v -> Found (f v) + FoundWithBlob v b -> FoundWithBlob (f v) b + + second g = \case + NotFound -> NotFound + Found v -> Found v + FoundWithBlob v b -> FoundWithBlob v (g b) + +{-# SPECIALISE lookups :: + (SerialiseKey k, SerialiseValue v, ResolveValue v) + => V.Vector k + -> Table IO k v b + -> IO (V.Vector (LookupResult v (BlobRef IO b))) #-} +{-# INLINEABLE lookups #-} +lookups :: + forall m k v b. ( + IOLike m + , SerialiseKey k + , SerialiseValue v + , ResolveValue v + ) + => V.Vector k + -> Table m k v b + -> m (V.Vector (LookupResult v (BlobRef m b))) +lookups ks (Internal.Table' t) = + V.map toLookupResult <$> + Internal.lookups (resolve @v Proxy) (V.map Internal.serialiseKey ks) t + where + toLookupResult (Just e) = case e of + Entry.Insert v -> Found (Internal.deserialiseValue v) + Entry.InsertWithBlob v br -> FoundWithBlob (Internal.deserialiseValue v) + (BlobRef br) + Entry.Mupdate v -> Found (Internal.deserialiseValue v) + Entry.Delete -> NotFound + toLookupResult Nothing = NotFound + +data QueryResult k v b = + FoundInQuery !k !v + | FoundInQueryWithBlob !k !v !b + deriving stock (Eq, Show, Functor, Foldable, Traversable) + +instance Bifunctor (QueryResult k) where + bimap f g = \case + FoundInQuery k v -> FoundInQuery k (f v) + FoundInQueryWithBlob k v b -> FoundInQueryWithBlob k (f v) (g b) + +{-# SPECIALISE rangeLookup :: + (SerialiseKey k, SerialiseValue v, ResolveValue v) + => Range k + -> Table IO k v b + -> IO (V.Vector (QueryResult k v (BlobRef IO b))) #-} +rangeLookup :: + forall m k v b. ( + IOLike m + , SerialiseKey k + , SerialiseValue v + , ResolveValue v + ) + => Range k + -> Table m k v b + -> m (V.Vector (QueryResult k v (BlobRef m b))) +rangeLookup range (Internal.Table' t) = + Internal.rangeLookup (resolve @v Proxy) (Internal.serialiseKey <$> range) t $ \k v mblob -> + toNormalQueryResult + (Internal.deserialiseKey k) + (Internal.deserialiseValue v) + (BlobRef <$> mblob) + +{------------------------------------------------------------------------------- + Cursor +-------------------------------------------------------------------------------} + +type Cursor :: (Type -> Type) -> Type -> Type -> Type -> Type +type Cursor = Internal.Cursor' + +{-# SPECIALISE withCursor :: + Table IO k v b + -> (Cursor IO k v b -> IO a) + -> IO a #-} +withCursor :: + IOLike m + => Table m k v b + -> (Cursor m k v b -> m a) + -> m a +withCursor (Internal.Table' t) action = + Internal.withCursor Internal.NoOffsetKey t (action . Internal.Cursor') + +{-# SPECIALISE withCursorAtOffset :: + SerialiseKey k + => k + -> Table IO k v b + -> (Cursor IO k v b -> IO a) + -> IO a #-} +withCursorAtOffset :: + ( IOLike m + , SerialiseKey k + ) + => k + -> Table m k v b + -> (Cursor m k v b -> m a) + -> m a +withCursorAtOffset offset (Internal.Table' t) action = + Internal.withCursor (Internal.OffsetKey (Internal.serialiseKey offset)) t $ + action . Internal.Cursor' + +{-# SPECIALISE newCursor :: + Table IO k v b + -> IO (Cursor IO k v b) #-} +newCursor :: + IOLike m + => Table m k v b + -> m (Cursor m k v b) +newCursor (Internal.Table' t) = + Internal.Cursor' <$!> Internal.newCursor Internal.NoOffsetKey t + +{-# SPECIALISE newCursorAtOffset :: + SerialiseKey k + => k + -> Table IO k v b + -> IO (Cursor IO k v b) #-} +newCursorAtOffset :: + ( IOLike m + , SerialiseKey k + ) + => k + -> Table m k v b + -> m (Cursor m k v b) +newCursorAtOffset offset (Internal.Table' t) = + Internal.Cursor' <$!> + Internal.newCursor (Internal.OffsetKey (Internal.serialiseKey offset)) t + +{-# SPECIALISE closeCursor :: + Cursor IO k v b + -> IO () #-} +closeCursor :: + IOLike m + => Cursor m k v b + -> m () +closeCursor (Internal.Cursor' c) = Internal.closeCursor c + +{-# SPECIALISE readCursor :: + (SerialiseKey k, SerialiseValue v, ResolveValue v) + => Int + -> Cursor IO k v b + -> IO (V.Vector (QueryResult k v (BlobRef IO b))) #-} +readCursor :: + forall m k v b. ( + IOLike m + , SerialiseKey k + , SerialiseValue v + , ResolveValue v + ) + => Int + -> Cursor m k v b + -> m (V.Vector (QueryResult k v (BlobRef m b))) +readCursor n (Internal.Cursor' c) = + Internal.readCursor (resolve (Proxy @v)) n c $ \k v mblob -> + toNormalQueryResult + (Internal.deserialiseKey k) + (Internal.deserialiseValue v) + (BlobRef <$> mblob) + +toNormalQueryResult :: k -> v -> Maybe b -> QueryResult k v b +toNormalQueryResult k v = \case + Nothing -> FoundInQuery k v + Just b -> FoundInQueryWithBlob k v b + +{------------------------------------------------------------------------------- + Table updates +-------------------------------------------------------------------------------} + +data Update v b = + Insert !v !(Maybe b) + | Delete + | Mupsert !v + deriving stock (Show, Eq) + +instance (NFData v, NFData b) => NFData (Update v b) where + rnf Delete = () + rnf (Insert v b) = rnf v `seq` rnf b + rnf (Mupsert v) = rnf v + +{-# SPECIALISE updates :: + (SerialiseKey k, SerialiseValue v, SerialiseValue b, ResolveValue v) + => V.Vector (k, Update v b) + -> Table IO k v b + -> IO () #-} +updates :: + forall m k v b. ( + IOLike m + , SerialiseKey k + , SerialiseValue v + , SerialiseValue b + , ResolveValue v + ) + => V.Vector (k, Update v b) + -> Table m k v b + -> m () +updates es (Internal.Table' t) = do + Internal.updates (resolve @v Proxy) (V.mapStrict serialiseEntry es) t + where + serialiseEntry = bimap Internal.serialiseKey serialiseOp + serialiseOp = bimap Internal.serialiseValue Internal.serialiseBlob + . updateToEntry + + updateToEntry :: Update v b -> Entry.Entry v b + updateToEntry = \case + Insert v Nothing -> Entry.Insert v + Insert v (Just b) -> Entry.InsertWithBlob v b + Delete -> Entry.Delete + Mupsert v -> Entry.Mupdate v + +{-# SPECIALISE inserts :: + (SerialiseKey k, SerialiseValue v, SerialiseValue b, ResolveValue v) + => V.Vector (k, v, Maybe b) + -> Table IO k v b + -> IO () #-} +inserts :: + ( IOLike m + , SerialiseKey k + , SerialiseValue v + , SerialiseValue b + , ResolveValue v + ) + => V.Vector (k, v, Maybe b) + -> Table m k v b + -> m () +inserts = updates . fmap (\(k, v, b) -> (k, Insert v b)) + +{-# SPECIALISE deletes :: + (SerialiseKey k, SerialiseValue v, SerialiseValue b, ResolveValue v) + => V.Vector k + -> Table IO k v b + -> IO () #-} +deletes :: + ( IOLike m + , SerialiseKey k + , SerialiseValue v + , SerialiseValue b + , ResolveValue v + ) + => V.Vector k + -> Table m k v b + -> m () +deletes = updates . fmap (,Delete) + +{-# SPECIALISE mupserts :: + (SerialiseKey k, SerialiseValue v, SerialiseValue b, ResolveValue v) + => V.Vector (k, v) + -> Table IO k v b + -> IO () #-} +mupserts :: + ( IOLike m + , SerialiseKey k + , SerialiseValue v + , SerialiseValue b + , ResolveValue v + ) + => V.Vector (k, v) + -> Table m k v b + -> m () +mupserts = updates . fmap (second Mupsert) + +{-# SPECIALISE retrieveBlobs :: + SerialiseValue b + => Session IO + -> V.Vector (BlobRef IO b) + -> IO (V.Vector b) #-} +retrieveBlobs :: + ( IOLike m + , SerialiseValue b + ) + => Session m + -> V.Vector (BlobRef m b) + -> m (V.Vector b) +retrieveBlobs (Internal.Session' (sesh :: Internal.Session m h)) refs = + V.map Internal.deserialiseBlob <$> + Internal.retrieveBlobs sesh (V.imap checkBlobRefType refs) + where + checkBlobRefType _ (BlobRef (ref :: Internal.WeakBlobRef m h')) + | Just Refl <- eqT @h @h' = ref + checkBlobRefType i _ = throw (Internal.ErrBlobRefInvalid i) + +{------------------------------------------------------------------------------- + Snapshots +-------------------------------------------------------------------------------} + +{-# SPECIALISE createSnapshot :: + (Common.Labellable (k, v, b), ResolveValue v) + => SnapshotName + -> Table IO k v b + -> IO () #-} +createSnapshot :: forall m k v b. + ( IOLike m + , Common.Labellable (k, v, b) + , ResolveValue v + ) + => SnapshotName + -> Table m k v b + -> m () +createSnapshot snap (Internal.Table' t) = + void $ Internal.createSnapshot (resolve (Proxy @v)) snap label Internal.SnapFullTable t + where + label = Internal.SnapshotLabel $ Common.makeSnapshotLabel (Proxy @(k, v, b)) + +{-# SPECIALISE openSnapshot :: + (Common.Labellable (k, v, b), ResolveValue v) + => Session IO + -> Common.TableConfigOverride + -> SnapshotName + -> IO (Table IO k v b ) #-} +openSnapshot :: forall m k v b. + ( IOLike m + , Common.Labellable (k, v, b) + , ResolveValue v + ) + => Session m + -> Common.TableConfigOverride -- ^ Optional config override + -> SnapshotName + -> m (Table m k v b) +openSnapshot (Internal.Session' sesh) override snap = + Internal.Table' <$!> Internal.openSnapshot sesh label Internal.SnapFullTable override snap (resolve (Proxy @v)) + where + label = Internal.SnapshotLabel $ Common.makeSnapshotLabel (Proxy @(k, v, b)) + +{------------------------------------------------------------------------------- + Mutiple writable tables +-------------------------------------------------------------------------------} + +{-# SPECIALISE duplicate :: + Table IO k v b + -> IO (Table IO k v b) #-} +duplicate :: + IOLike m + => Table m k v b + -> m (Table m k v b) +duplicate (Internal.Table' t) = Internal.Table' <$!> Internal.duplicate t + +{------------------------------------------------------------------------------- + Table union +-------------------------------------------------------------------------------} + +{-# SPECIALISE union :: + ResolveValue v + => Table IO k v b + -> Table IO k v b + -> IO (Table IO k v b) #-} +union :: forall m k v b. + ( IOLike m + , ResolveValue v + ) + => Table m k v b + -> Table m k v b + -> m (Table m k v b) +union = error "union: not yet implemented" $ union @m @k @v @b + +{------------------------------------------------------------------------------- + Monoidal value resolution +-------------------------------------------------------------------------------} + +resolve :: forall v. ResolveValue v => Proxy v -> Internal.ResolveSerialisedValue +resolve = coerce . resolveValue + +-- | Newtype wrapper for use with deriving via, which resolves values as a +-- 'Data.Semigroup.First' semigroup. +-- +-- The instance makes 'Insert's isomorphic to 'Mupsert's. This can be a sensible +-- default if there is no intent to use 'Mupsert's. +newtype ResolveAsFirst v = ResolveAsFirst v + deriving stock (Show, Eq, Ord) + deriving newtype SerialiseValue + +instance ResolveValue (ResolveAsFirst v) where + resolveValue :: + Proxy (ResolveAsFirst v) + -> RB.RawBytes + -> RB.RawBytes + -> RB.RawBytes + resolveValue _ x _ = x diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index 438ef43f7..ce398934b 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -14,6 +14,8 @@ module Database.LSMTree.Internal ( -- * Existentials Session' (..) + , Table' (..) + , Cursor' (..) , NormalTable (..) , NormalCursor (..) , MonoidalTable (..) @@ -129,6 +131,18 @@ data Session' m = forall h. Typeable h => Session' !(Session m h) instance NFData (Session' m) where rnf (Session' s) = rnf s +type Table' :: (Type -> Type) -> Type -> Type -> Type -> Type +data Table' m k v b = forall h. Typeable h => Table' (Table m h) + +instance NFData (Table' m k v b) where + rnf (Table' t) = rnf t + +type Cursor' :: (Type -> Type) -> Type -> Type -> Type -> Type +data Cursor' m k v b = forall h. Typeable h => Cursor' (Cursor m h) + +instance NFData (Cursor' m k v b) where + rnf (Cursor' t) = rnf t + type NormalTable :: (Type -> Type) -> Type -> Type -> Type -> Type data NormalTable m k v b = forall h. Typeable h => NormalTable !(Table m h) diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index e46c8ed6f..31b0bfe15 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -61,7 +61,7 @@ import System.FS.BlockIO.API (HasBlockIO) newtype SnapshotLabel = SnapshotLabel Text deriving stock (Show, Eq) -data SnapshotTableType = SnapNormalTable | SnapMonoidalTable +data SnapshotTableType = SnapNormalTable | SnapMonoidalTable | SnapFullTable deriving stock (Show, Eq) data SnapshotMetaData = SnapshotMetaData { diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index 0fc30b816..8922d3a23 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -227,6 +227,7 @@ instance DecodeVersioned SnapshotLabel where instance Encode SnapshotTableType where encode SnapNormalTable = encodeWord 0 encode SnapMonoidalTable = encodeWord 1 + encode SnapFullTable = encodeWord 2 instance DecodeVersioned SnapshotTableType where decodeVersioned V0 = do @@ -234,6 +235,7 @@ instance DecodeVersioned SnapshotTableType where case tag of 0 -> pure SnapNormalTable 1 -> pure SnapMonoidalTable + 2 -> pure SnapFullTable _ -> fail ("[SnapshotTableType] Unexpected tag: " <> show tag) {-------------------------------------------------------------------------------