Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[omm] Interfaces for storage #1353

Merged
merged 1 commit into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
25 changes: 25 additions & 0 deletions open-media-match/src/OpenMediaMatch/storage/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

"""
The default store for accessing persistent data on OMM.
"""

from OpenMediaMatch.storage import interface
from OpenMediaMatch.storage import mocked

class DefaultOMMStore(mocked.MockedUnifiedStore, interface.IUnifiedStore):
"""
The default store for accessing persistent data on OMM.

During the initial development, the storage is mostly mocked, but
that will go away as implementation progresses.

In implementation, don't refer to DefaultOMMStore directly, but
instead to the interfaces to allow future authors more ease in
extending.

Data is stored in a combination of:
* Static config set by deployment (e.g. installed SignalTypes)
* PostGres-backed tables (e.g. info downloaded from external APIs)
* Blobstore (e.g. built indices)
"""
89 changes: 89 additions & 0 deletions open-media-match/src/OpenMediaMatch/storage/interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

"""
Abstraction layer for fetching information needed to run OMM.

To try and separate concerns for extension, this file provides
access to all the persistent data needed to run OMM. In the
default implementation, we have a unified implementation that
implements all interfaces, but different implementations may
prefer to store different data in different places.

For implementations, see storage.mocked.MockedStore, which provides
plausable defaults for all of these interfaces, useful for testing,
or storage.default.DefaultOMMStore, which uses a combination of
static configuration and postgres.
"""

import abc
from dataclasses import dataclass
import typing as t
from threatexchange.content_type.content_base import ContentType
from threatexchange.signal_type.signal_base import SignalType
from threatexchange.exchanges.signal_exchange_api import (
TSignalExchangeAPICls,
)

@dataclass
class ContentTypeConfig:
"""
Holder for ContentType configuration.
"""
# Content types that are not enabled should not be used in hashing/matching
enabled: bool
signal_type: ContentType


class IContentTypeConfigStore(metaclass=abc.ABCMeta):
"""Interface for accessing ContentType configuration"""

@abc.abstractmethod
def get_content_type_configs(self) -> t.Mapping[str, ContentTypeConfig]:
"""
Return all installed content types.
"""


@dataclass
class SignalTypeConfig:
"""
Holder for SignalType configuration
"""
# Signal types that are not enabled should not be used in hashing/matching
enabled: bool
signal_type: SignalType


class ISignalTypeConfigStore(metaclass=abc.ABCMeta):
"""Interface for accessing SignalType configuration"""
@abc.abstractmethod
def get_signal_type_configs(self) -> t.Mapping[str, SignalTypeConfig]:
"""
Return all installed signal types.
"""


class ISignalExchangeConfigStore(metaclass=abc.ABCMeta):
"""Interface for accessing SignalExchange configuration"""
@abc.abstractmethod
def get_exchange_type_configs(self) -> t.Mapping[str, TSignalExchangeAPICls]:
"""
Return all installed SignalExchange types.
"""

# TODO - index, collaborations, banks, OMM-specific

class IUnifiedStore(
IContentTypeConfigStore,
ISignalTypeConfigStore,
ISignalExchangeConfigStore,
metaclass=abc.ABCMeta
):
"""
All the store classes combined into one interfaces.

This is probably the most common way to use this, especially early on
in development - the option to pass them more narrowly is helpful
mostly for typing.
"""

37 changes: 37 additions & 0 deletions open-media-match/src/OpenMediaMatch/storage/mocked.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.

import typing as t

from threatexchange.content_type.photo import PhotoContent
from threatexchange.content_type.video import VideoContent
from threatexchange.exchanges.signal_exchange_api import TSignalExchangeAPICls
from threatexchange.exchanges.impl.static_sample import StaticSampleSignalExchangeAPI
from threatexchange.signal_type.pdq.signal import PdqSignal
from threatexchange.signal_type.md5 import VideoMD5Signal

from OpenMediaMatch.storage import interface
from OpenMediaMatch.storage.interface import SignalTypeConfig


class MockedUnifiedStore(interface.IUnifiedStore):
"""
Provides plausible default values for all store interfaces.
"""

def get_content_type_configs(self) -> t.Mapping[str, interface.ContentTypeConfig]:
return {
c.get_name(): interface.ContentTypeConfig(True, c)
for c in (PhotoContent, VideoContent)
}

def get_exchange_type_configs(self) -> t.Mapping[str, TSignalExchangeAPICls]:
return {
e.get_name(): e
for e in (StaticSampleSignalExchangeAPI,)
}

def get_signal_type_configs(self) -> t.Mapping[str, SignalTypeConfig]:
return {
s.get_name(): interface.SignalTypeConfig(True, s)
for s in (PdqSignal, VideoMD5Signal)
}