Skip to content

Commit

Permalink
HJ-24 Add DataHub integration config (#5401)
Browse files Browse the repository at this point in the history
  • Loading branch information
erosselli authored Oct 24, 2024
1 parent c940b82 commit 4bec008
Show file tree
Hide file tree
Showing 18 changed files with 440 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .fides/db_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,8 @@ dataset:
data_categories: [system.operations]
- name: last_test_timestamp
data_categories: [system.operations]
- name: last_run_timestamp
data_categories: [system.operations]
- name: name
data_categories: [system.operations]
- name: saas_config
Expand Down Expand Up @@ -2334,4 +2336,4 @@ dataset:
- name: created_at
data_categories: [system.operations]
- name: updated_at
data_categories: [system.operations]
data_categories: [system.operations]
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ The types of changes are:

## [Unreleased](https://github.com/ethyca/fidesplus/compare/2.48.0...main)

### Added
- Added DataHub integration config [#5401](https://github.com/ethyca/fides/pull/5401)

### Fixed
- Fixed Snowflake DSR integration failing with syntax error [#5417](https://github.com/ethyca/fides/pull/5417)

Expand Down
1 change: 1 addition & 0 deletions clients/admin-ui/public/images/connector-logos/datahub.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const CONNECTOR_LOGOS_PATH = "/images/connector-logos/";
export const CONNECTION_TYPE_LOGO_MAP = new Map<ConnectionType, string>([
[ConnectionType.ATTENTIVE_EMAIL, "attentive.svg"],
[ConnectionType.BIGQUERY, "bigquery.svg"],
[ConnectionType.DATAHUB, "datahub.svg"],
[ConnectionType.DYNAMODB, "dynamodb.svg"],
[ConnectionType.GENERIC_CONSENT_EMAIL, "ethyca.svg"],
[ConnectionType.GENERIC_ERASURE_EMAIL, "ethyca.svg"],
Expand Down
1 change: 1 addition & 0 deletions clients/admin-ui/src/types/api/models/ConnectionType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
export enum ConnectionType {
ATTENTIVE_EMAIL = "attentive_email",
BIGQUERY = "bigquery",
DATAHUB = "datahub",
DYNAMODB = "dynamodb",
FIDES = "fides",
GENERIC_CONSENT_EMAIL = "generic_consent_email",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
acryl-datahub==0.14.1
alembic==1.8.1
anyascii==0.3.2
anyio==3.7.1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Add last_run_timestamp to ConnectionConfig
Revision ID: 5a4859f74832
Revises: c9a22b284afa
Create Date: 2024-10-22 14:58:09.174708
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "5a4859f74832"
down_revision = "c9a22b284afa"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"connectionconfig",
sa.Column("last_run_timestamp", sa.DateTime(timezone=True), nullable=True),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("connectionconfig", "last_run_timestamp")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Add Datahub connection type
Revision ID: c9a22b284afa
Revises: 4ebe0766021b
Create Date: 2024-10-21 13:40:31.313754
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "c9a22b284afa"
down_revision = "1c8dfd6a1bc6"
branch_labels = None
depends_on = None


def upgrade():
# Add 'datahub' to ConnectionType enum
op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old")
op.execute(
"""
CREATE TYPE connectiontype AS ENUM (
'mongodb',
'mysql',
'https',
'snowflake',
'redshift',
'mssql',
'mariadb',
'bigquery',
'saas',
'manual',
'manual_webhook',
'timescale',
'fides',
'sovrn',
'attentive_email',
'dynamodb',
'postgres',
'generic_consent_email',
'generic_erasure_email',
'scylla',
's3',
'google_cloud_sql_mysql',
'google_cloud_sql_postgres',
'dynamic_erasure_email',
'rds_mysql',
'rds_postgres',
'datahub'
)
"""
)
op.execute(
"""
ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING
connection_type::text::connectiontype
"""
)
op.execute("DROP TYPE connectiontype_old")


def downgrade():
# Remove 'datahub' from ConnectionType enum
op.execute("DELETE FROM connectionconfig WHERE connection_type IN ('datahub')")
op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old")
op.execute(
"""
CREATE TYPE connectiontype AS ENUM (
'mongodb',
'mysql',
'https',
'snowflake',
'redshift',
'mssql',
'mariadb',
'bigquery',
'saas',
'manual',
'manual_webhook',
'timescale',
'fides',
'sovrn',
'attentive_email',
'dynamodb',
'postgres',
'generic_consent_email',
'generic_erasure_email',
'scylla',
's3',
'google_cloud_sql_mysql',
'google_cloud_sql_postgres',
'dynamic_erasure_email',
'rds_mysql',
'rds_postgres'
)
"""
)
op.execute(
"""
ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING
connection_type::text::connectiontype
"""
)
op.execute("DROP TYPE connectiontype_old")
6 changes: 6 additions & 0 deletions src/fides/api/models/connectionconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ConnectionType(enum.Enum):

attentive_email = "attentive_email"
bigquery = "bigquery"
datahub = "datahub"
dynamodb = "dynamodb"
fides = "fides"
generic_consent_email = "generic_consent_email" # Run after the traversal
Expand Down Expand Up @@ -70,6 +71,7 @@ def human_readable(self) -> str:
readable_mapping: Dict[str, str] = {
ConnectionType.attentive_email.value: "Attentive Email",
ConnectionType.bigquery.value: "BigQuery",
ConnectionType.datahub.value: "DataHub",
ConnectionType.dynamic_erasure_email.value: "Dynamic Erasure Email",
ConnectionType.dynamodb.value: "DynamoDB",
ConnectionType.fides.value: "Fides Connector",
Expand Down Expand Up @@ -141,6 +143,10 @@ class ConnectionConfig(Base):
disabled = Column(Boolean, server_default="f", default=False)
disabled_at = Column(DateTime(timezone=True))

# Optional column to store the last time the connection was "ran"
# Each integration can determine the semantics of what "being run" is
last_run_timestamp = Column(DateTime(timezone=True), nullable=True)

# only applicable to ConnectionConfigs of connection type saas
saas_config = Column(
MutableDict.as_mutable(JSONB), index=False, unique=False, nullable=True
Expand Down
8 changes: 8 additions & 0 deletions src/fides/api/schemas/connection_configuration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
from fides.api.schemas.connection_configuration.connection_secrets_bigquery import (
BigQuerySchema as BigQuerySchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_datahub import (
DatahubDocsSchema as DatahubDocsSchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_datahub import (
DatahubSchema as DatahubSchema,
)
from fides.api.schemas.connection_configuration.connection_secrets_dynamic_erasure_email import (
DynamicErasureEmailDocsSchema as DynamicErasureEmailDocsSchema,
)
Expand Down Expand Up @@ -150,6 +156,7 @@
secrets_schemas: Dict[str, Any] = {
ConnectionType.attentive_email.value: AttentiveSchema,
ConnectionType.bigquery.value: BigQuerySchema,
ConnectionType.datahub.value: DatahubSchema,
ConnectionType.dynamic_erasure_email.value: DynamicErasureEmailSchema,
ConnectionType.dynamodb.value: DynamoDBSchema,
ConnectionType.fides.value: FidesConnectorSchema,
Expand Down Expand Up @@ -208,6 +215,7 @@ def get_connection_secrets_schema(
# what type of schema we should validate against.
connection_secrets_schemas = Union[
BigQueryDocsSchema,
DatahubDocsSchema,
DynamicErasureEmailDocsSchema,
DynamoDBDocsSchema,
EmailDocsSchema,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from enum import Enum
from typing import ClassVar, List

from pydantic import Field

from fides.api.custom_types import AnyHttpUrlStringRemovesSlash
from fides.api.schemas.base_class import NoValidationSchema
from fides.api.schemas.connection_configuration.connection_secrets import (
ConnectionConfigSecretsSchema,
)


class PeriodicIntegrationFrequency(Enum):
"""Enum for periodic integration frequency"""

daily = "daily"
weekly = "weekly"
monthly = "monthly"


class DatahubSchema(ConnectionConfigSecretsSchema):
datahub_server_url: AnyHttpUrlStringRemovesSlash = Field(
title="DataHub Server URL",
description="The URL of your DataHub server.",
)
datahub_token: str = Field(
title="DataHub Token",
description="The token used to authenticate with your DataHub server.",
json_schema_extra={"sensitive": True},
)
frequency: PeriodicIntegrationFrequency = Field(
title="Frequency",
description="The frequency at which the integration should run. Defaults to daily.",
default=PeriodicIntegrationFrequency.daily,
)

_required_components: ClassVar[List[str]] = ["datahub_server_url", "datahub_token"]


class DatahubDocsSchema(DatahubSchema, NoValidationSchema):
"""
Datahub Schema for API docs.
"""
4 changes: 4 additions & 0 deletions src/fides/api/service/connectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from fides.api.service.connectors.consent_email_connector import (
GenericConsentEmailConnector,
)
from fides.api.service.connectors.datahub_connector import (
DatahubConnector as DatahubConnector,
)
from fides.api.service.connectors.dynamic_erasure_email_connector import (
DynamicErasureEmailConnector,
)
Expand Down Expand Up @@ -76,6 +79,7 @@
supported_connectors: Dict[str, Any] = {
ConnectionType.attentive_email.value: AttentiveConnector,
ConnectionType.bigquery.value: BigQueryConnector,
ConnectionType.datahub.value: DatahubConnector,
ConnectionType.dynamic_erasure_email.value: DynamicErasureEmailConnector,
ConnectionType.dynamodb.value: DynamoDBConnector,
ConnectionType.fides.value: FidesConnector,
Expand Down
35 changes: 35 additions & 0 deletions src/fides/api/service/connectors/datahub_connector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Generic, Optional

from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
from loguru import logger

from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus
from fides.api.schemas.connection_configuration.connection_secrets_datahub import (
DatahubSchema,
)
from fides.api.service.connectors.base_connector import DB_CONNECTOR_TYPE


class DatahubConnector(Generic[DB_CONNECTOR_TYPE]):

def __init__(self, configuration: ConnectionConfig):
self.configuration = configuration
self.config = DatahubSchema(**configuration.secrets or {})
# TODO: use token for authentication
self.datahub_client = DataHubGraph(
DataHubGraphConfig(server=str(self.config.datahub_server_url))
)

def test_connection(self) -> Optional[ConnectionTestStatus]:
logger.info(f"Testing DataHub connection for {self.configuration.key}...")
try:
self.datahub_client.test_connection()
logger.info(
f"DataHub connection test for {self.configuration.key} succeeded."
)
return ConnectionTestStatus.succeeded
except Exception as e:
logger.error(
f"DataHub connection test for {self.configuration.key} failed: {e}"
)
return ConnectionTestStatus.failed
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from fides.config.config_proxy import ConfigProxy
from tests.fixtures.application_fixtures import *
from tests.fixtures.bigquery_fixtures import *
from tests.fixtures.datahub_fixtures import *
from tests.fixtures.dynamodb_fixtures import *
from tests.fixtures.email_fixtures import *
from tests.fixtures.fides_connector_example_fixtures import *
Expand Down
28 changes: 28 additions & 0 deletions tests/fixtures/datahub_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Generator

import pytest
from datahub.ingestion.graph.client import DataHubGraphConfig
from sqlalchemy.orm import Session

from fides.api.models.connectionconfig import (
AccessLevel,
ConnectionConfig,
ConnectionType,
)


@pytest.fixture(scope="function")
def datahub_connection_config_no_secrets(
db: Session,
) -> Generator[ConnectionConfig, None, None]:
connection_config = ConnectionConfig.create(
db=db,
data={
"name": "DataHub Config",
"key": "my_datahub_config",
"connection_type": ConnectionType.datahub,
"access": AccessLevel.write,
},
)
yield connection_config
connection_config.delete(db)
Loading

0 comments on commit 4bec008

Please sign in to comment.