Skip to content

Commit

Permalink
Merge pull request #7 from databrickslabs/feature/dais-demo
Browse files Browse the repository at this point in the history
Adding DAIS 2023 Demo
  • Loading branch information
ravi-databricks authored Jul 10, 2023
2 parents 49499b4 + 4d9b0de commit 8b4c190
Show file tree
Hide file tree
Showing 42 changed files with 4,245 additions and 0 deletions.
34 changes: 34 additions & 0 deletions dlt-meta-demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# DAIS 2023 [DLT-META](https://github.com/databrickslabs/dlt-meta) DEMO

1. Launch Terminal/Command promt

2. Install [Databricks CLI](https://docs.databricks.com/dev-tools/cli/index.html)

3. ```git clone https://github.com/databrickslabs/dlt-meta.git ```

4. ```cd dlt-meta/dlt-meta-demo```

5. Get DATABRICKS_HOST:
- Enter your workspace URL, with the format https://<instance-name>.cloud.databricks.com. To get your workspace URL, see Workspace instance names, URLs, and IDs.

6. Generate DATABRICKS_TOKEN:
- In your Databricks workspace, click your Databricks username in the top bar, and then select User Settings from the drop down.

- On the Access tokens tab, click Generate new token.

- (Optional) Enter a comment that helps you to identify this token in the future, and change the token’s default lifetime of 90 days. To create a token with no lifetime (not recommended), leave the Lifetime (days) box empty (blank).

- Click Generate.

- Copy the displayed token

7. Set environment variable into terminal
```
export DATABRICKS_HOST=<DATABRICKS HOST> # Paste from Step#5
export DATABRICKS_TOKEN=<DATABRICKS TOKEN> # Paste Token here from Step#6, Account needs permission to create clusters/dlt pipelines.
```

6. Run the command ```python launch_demo.py --cloud_provider_name=aws --dbr_version=12.2.x-scala2.12 --dbfs_path=dbfs:/dais-dlt-meta-demo-automated/```
- cloud_provider_name : aws or azure or gcp
- db_version : Databricks Runtime Version
- dbfs_path : Path on your Databricks workspace where demo will be copied for launching DLT-META Pipelines
9 changes: 9 additions & 0 deletions dlt-meta-demo/demo/conf/dqe/customers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"expect_or_drop": {
"no_rescued_data": "_rescued_data IS NULL",
"valid_customer_id": "customer_id IS NOT NULL"
},
"expect_or_quarantine": {
"quarantine_rule": "_rescued_data IS NOT NULL OR customer_id IS NULL"
}
}
9 changes: 9 additions & 0 deletions dlt-meta-demo/demo/conf/dqe/products.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"expect_or_drop": {
"no_rescued_data": "_rescued_data IS NULL",
"valid_product_id": "product_id IS NOT NULL"
},
"expect_or_quarantine": {
"quarantine_rule": "_rescued_data IS NOT NULL OR product_id IS NULL"
}
}
9 changes: 9 additions & 0 deletions dlt-meta-demo/demo/conf/dqe/stores.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"expect_or_drop": {
"no_rescued_data": "_rescued_data IS NULL",
"valid_store_id": "store_id IS NOT NULL"
},
"expect_or_quarantine": {
"quarantine_rule": "_rescued_data IS NOT NULL OR store_id IS NULL"
}
}
10 changes: 10 additions & 0 deletions dlt-meta-demo/demo/conf/dqe/transactions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"expect_or_drop": {
"no_rescued_data": "_rescued_data IS NULL",
"valid_transaction_id": "transaction_id IS NOT NULL",
"valid_customer_id": "customer_id IS NOT NULL"
},
"expect_or_quarantine": {
"quarantine_rule": "_rescued_data IS NOT NULL OR transaction_id IS NULL OR customer_id IS NULL"
}
}
166 changes: 166 additions & 0 deletions dlt-meta-demo/demo/conf/onboarding.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
[
{
"data_flow_id": "100",
"data_flow_group": "A1",
"source_system": "mysql",
"source_format": "cloudFiles",
"source_details": {
"source_database": "customers",
"source_table": "customers",
"source_path_prod": "dbfs:/dais-dlt-meta-demo/demo/resources/data/customers",
"source_schema_path": "dbfs:/dais-dlt-meta-demo/demo/resources/ddl/customers.ddl"
},
"bronze_database_prod": "dlt_meta_bronze",
"bronze_table": "customers",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/customers",
"bronze_data_quality_expectations_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/dqe/customers.json",
"bronze_database_quarantine_prod": "dlt_meta_bronze",
"bronze_quarantine_table": "customers_quarantine",
"bronze_quarantine_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/customers_quarantine",
"silver_database_prod": "dlt_meta_silver",
"silver_table": "customers",
"silver_cdc_apply_changes": {
"keys": [
"customer_id"
],
"sequence_by": "dmsTimestamp",
"scd_type": "2",
"apply_as_deletes": "Op = 'D'",
"except_column_list": [
"Op",
"dmsTimestamp",
"_rescued_data"
]
},
"silver_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/silver/customers",
"silver_transformation_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/silver_transformations.json"
},
{
"data_flow_id": "101",
"data_flow_group": "A1",
"source_system": "mysql",
"source_format": "cloudFiles",
"source_details": {
"source_database": "transactions",
"source_table": "transactions",
"source_path_prod": "dbfs:/dais-dlt-meta-demo/demo/resources/data/transactions",
"source_schema_path": "dbfs:/dais-dlt-meta-demo/demo/resources/ddl/transactions.ddl"
},
"bronze_database_prod": "dlt_meta_bronze",
"bronze_table": "transactions",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/transactions",
"bronze_data_quality_expectations_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/dqe/transactions.json",
"bronze_database_quarantine_prod": "dlt_meta_bronze",
"bronze_quarantine_table": "transactions_quarantine",
"bronze_quarantine_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/transactions_quarantine",
"silver_database_prod": "dlt_meta_silver",
"silver_table": "transactions",
"silver_cdc_apply_changes": {
"keys": [
"transaction_id"
],
"sequence_by": "dmsTimestamp",
"scd_type": "2",
"apply_as_deletes": "Op = 'D'",
"except_column_list": [
"Op",
"dmsTimestamp",
"_rescued_data"
]
},
"silver_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/silver/transactions",
"silver_transformation_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/silver_transformations.json"
},
{
"data_flow_id": "103",
"data_flow_group": "A1",
"source_system": "mysql",
"source_format": "cloudFiles",
"source_details": {
"source_database": "products",
"source_table": "products",
"source_path_prod": "dbfs:/dais-dlt-meta-demo/demo/resources/data/products",
"source_schema_path": "dbfs:/dais-dlt-meta-demo/demo/resources/ddl/products.ddl"
},
"bronze_database_prod": "dlt_meta_bronze",
"bronze_table": "products",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/products",
"bronze_data_quality_expectations_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/dqe/products.json",
"bronze_database_quarantine_prod": "dlt_meta_quarantine",
"bronze_quarantine_table": "products_quarantine",
"bronze_quarantine_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/products_quarantine",
"silver_database_prod": "dlt_meta_silver",
"silver_table": "products",
"silver_cdc_apply_changes": {
"keys": [
"product_id"
],
"sequence_by": "dmsTimestamp",
"scd_type": "2",
"apply_as_deletes": "Op = 'D'",
"except_column_list": [
"Op",
"dmsTimestamp",
"_rescued_data"
]
},
"silver_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/silver/products",
"silver_transformation_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/silver_transformations.json"
},
{
"data_flow_id": "104",
"data_flow_group": "A1",
"source_system": "mysql",
"source_format": "cloudFiles",
"source_details": {
"source_database": "stores",
"source_table": "stores",
"source_path_prod": "dbfs:/dais-dlt-meta-demo/demo/resources/data/stores",
"source_schema_path": "dbfs:/dais-dlt-meta-demo/demo/resources/ddl/stores.ddl"
},
"bronze_database_prod": "dlt_meta_bronze",
"bronze_table": "stores",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/stores",
"bronze_data_quality_expectations_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/dqe/stores.json",
"bronze_database_quarantine_prod": "dlt_meta_bronze",
"bronze_quarantine_table": "stores_quarantine",
"bronze_quarantine_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/bronze/stores_quarantine",
"silver_database_prod": "dlt_meta_silver",
"silver_table": "stores",
"silver_cdc_apply_changes": {
"keys": [
"store_id"
],
"sequence_by": "dmsTimestamp",
"scd_type": "2",
"apply_as_deletes": "Op = 'D'",
"except_column_list": [
"Op",
"dmsTimestamp",
"_rescued_data"
]
},
"silver_table_path_prod": "dbfs:/dais-dlt-meta-demo/demo/data/silver/stores",
"silver_transformation_json_prod": "dbfs:/dais-dlt-meta-demo/demo/conf/silver_transformations.json"
}
]
Loading

0 comments on commit 8b4c190

Please sign in to comment.