From 53bcfc10f223419574aa6a0380b8e002527a4d08 Mon Sep 17 00:00:00 2001 From: Dominik Hoffmann Date: Wed, 27 Nov 2024 15:29:57 +0100 Subject: [PATCH] #80: Refactoring project structure according to feedback. Fixed imports and black-linted Signed-off-by: Dominik Hoffmann --- .../pipelines/data_quality/__init__.py | 2 +- .../data_manipulation/interfaces.py | 2 +- .../spark/data_quality/duplicate_detection.py | 5 ++++- .../spark/data_quality/interval_filtering.py | 7 +++++-- .../data_quality/k_sigma_anomaly_detection.py | 5 ++++- .../data_quality/missing_value_imputation.py | 5 ++++- .../normalization/denormalization.py | 9 +++++++-- .../normalization/normalization.py | 9 +++++++-- .../spark/data_quality/prediction/arima.py | 5 ++++- .../data_quality/monitoring/interfaces.py | 2 +- .../monitoring/spark/check_value_ranges.py | 9 +++++++-- .../monitoring/spark/flatline_detection.py | 9 +++++++-- .../spark/great_expectations_data_quality.py | 9 +++++++-- .../spark/identify_missing_data_interval.py | 13 +++++++++--- .../spark/identify_missing_data_pattern.py | 13 +++++++++--- .../pipelines/data_wranglers/__init__.py | 13 ------------ .../pipelines/data_quality/__init__.py | 2 +- .../data_manipulation/__init__.py | 2 +- .../data_manipulation/spark/test_arima.py | 4 +++- .../spark/test_k_sigma_anomaly_detection.py | 5 +++-- .../spark/test_normalization.py | 20 +++++++++++-------- .../data_quality/monitoring/spark/__init__.py | 13 ++++++++++++ .../spark/test_check_value_ranges.py | 2 +- .../spark/test_flatline_detection.py | 2 +- .../test_great_expectations_data_quality.py | 2 +- .../test_identify_missing_data_interval.py | 2 +- .../test_identify_missing_data_pattern.py | 2 +- .../spark/test_linear_regression.py | 12 +++++------ 28 files changed, 123 insertions(+), 62 deletions(-) delete mode 100644 src/sdk/python/rtdip_sdk/pipelines/data_wranglers/__init__.py diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py index 0b3d67993..5305a429e 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/interfaces.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/interfaces.py index 007a09fb0..2ad1a2811 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/interfaces.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/interfaces.py @@ -16,7 +16,7 @@ from pyspark.sql import DataFrame -from rtdip_sdk.pipelines.interfaces import PipelineComponentBaseInterface +from src.sdk.python.rtdip_sdk.pipelines.interfaces import PipelineComponentBaseInterface class DataManipulationBaseInterface(PipelineComponentBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/duplicate_detection.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/duplicate_detection.py index 52ed2407a..89b98729e 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/duplicate_detection.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/duplicate_detection.py @@ -15,7 +15,10 @@ from pyspark.sql import DataFrame as PySparkDataFrame from ...interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class DuplicateDetection(DataManipulationBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/interval_filtering.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/interval_filtering.py index 3b192b90c..2727fd8c2 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/interval_filtering.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/interval_filtering.py @@ -18,11 +18,14 @@ from pyspark.sql import SparkSession from pyspark.sql import DataFrame -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) from ...interfaces import DataManipulationBaseInterface -class IntervalFiltering (DataManipulationBaseInterface): +class IntervalFiltering(DataManipulationBaseInterface): """ Cleanses a DataFrame by removing rows outside a specified interval window. Example: diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/k_sigma_anomaly_detection.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/k_sigma_anomaly_detection.py index 591f5d081..417898cc4 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/k_sigma_anomaly_detection.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/k_sigma_anomaly_detection.py @@ -15,7 +15,10 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import mean, stddev, abs, col from ...interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class KSigmaAnomalyDetection(DataManipulationBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/missing_value_imputation.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/missing_value_imputation.py index ed54021e7..081068b41 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/missing_value_imputation.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/missing_value_imputation.py @@ -21,7 +21,10 @@ from datetime import timedelta from typing import List from ...interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class MissingValueImputation(DataManipulationBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/denormalization.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/denormalization.py index a2079c8ce..e63bb6b03 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/denormalization.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/denormalization.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. from pyspark.sql import DataFrame as PySparkDataFrame -from rtdip_sdk.pipelines.data_quality.data_manipulation.interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.interfaces import ( + DataManipulationBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) from .normalization import ( NormalizationBaseClass, ) diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/normalization.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/normalization.py index c9226d9ee..fbe94b0ba 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/normalization.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/normalization/normalization.py @@ -14,8 +14,13 @@ from abc import abstractmethod from pyspark.sql import DataFrame as PySparkDataFrame from typing import List -from rtdip_sdk.pipelines.data_quality.data_manipulation.interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.interfaces import ( + DataManipulationBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class NormalizationBaseClass(DataManipulationBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/prediction/arima.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/prediction/arima.py index 32d05a3df..839409473 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/prediction/arima.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/data_quality/prediction/arima.py @@ -19,7 +19,10 @@ from statsmodels.tsa.arima.model import ARIMA, ARIMAResults from ....interfaces import DataManipulationBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class ArimaPrediction(DataManipulationBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/interfaces.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/interfaces.py index aaf7a4cab..05a0df443 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/interfaces.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/interfaces.py @@ -16,7 +16,7 @@ from pyspark.sql import DataFrame -from rtdip_sdk.pipelines.interfaces import PipelineComponentBaseInterface +from src.sdk.python.rtdip_sdk.pipelines.interfaces import PipelineComponentBaseInterface class MonitoringBaseInterface(PipelineComponentBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/check_value_ranges.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/check_value_ranges.py index 47db5a554..45d1ee195 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/check_value_ranges.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/check_value_ranges.py @@ -4,8 +4,13 @@ from functools import reduce from operator import or_ -from rtdip_sdk.pipelines.data_quality.monitoring.interfaces import MonitoringBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.interfaces import ( + MonitoringBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class CheckValueRanges(MonitoringBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/flatline_detection.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/flatline_detection.py index f68ca543f..dc31d13c2 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/flatline_detection.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/flatline_detection.py @@ -17,8 +17,13 @@ from pyspark.sql.functions import col, when, lag, count, sum from pyspark.sql.window import Window -from rtdip_sdk.pipelines.data_quality.monitoring.interfaces import MonitoringBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.interfaces import ( + MonitoringBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) class FlatlineDetection(MonitoringBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/great_expectations_data_quality.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/great_expectations_data_quality.py index 36bc93593..31e6e9be2 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/great_expectations_data_quality.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/great_expectations_data_quality.py @@ -14,8 +14,13 @@ import great_expectations as gx from pyspark.sql import DataFrame, SparkSession -from rtdip_sdk.pipelines.data_quality.monitoring.interfaces import MonitoringBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.interfaces import ( + MonitoringBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) from great_expectations.checkpoint import ( Checkpoint, ) diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.py index 437cd0bd3..6b3a8dd19 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.py @@ -18,9 +18,16 @@ from pyspark.sql import functions as F from pyspark.sql.window import Window -from rtdip_sdk.pipelines.data_quality.monitoring.interfaces import MonitoringBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType -from rtdip_sdk.pipelines.utilities.spark.time_string_parsing import parse_time_string_to_ms +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.interfaces import ( + MonitoringBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) +from src.sdk.python.rtdip_sdk.pipelines.utilities.spark.time_string_parsing import ( + parse_time_string_to_ms, +) class IdentifyMissingDataInterval(MonitoringBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.py index 19be9bc44..97bb84107 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.py @@ -4,9 +4,16 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from rtdip_sdk.pipelines.data_quality.monitoring.interfaces import MonitoringBaseInterface -from rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType -from rtdip_sdk.pipelines.utilities.spark.time_string_parsing import parse_time_string_to_ms +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.interfaces import ( + MonitoringBaseInterface, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) +from src.sdk.python.rtdip_sdk.pipelines.utilities.spark.time_string_parsing import ( + parse_time_string_to_ms, +) class IdentifyMissingDataPattern(MonitoringBaseInterface): diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_wranglers/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/data_wranglers/__init__.py deleted file mode 100644 index 5305a429e..000000000 --- a/src/sdk/python/rtdip_sdk/pipelines/data_wranglers/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2022 RTDIP -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py index 16c2bbafa..74ed9fc2f 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/__init__.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/__init__.py index 16c2bbafa..74ed9fc2f 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/__init__.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_arima.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_arima.py index e7e9bc4f2..da6f4bd31 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_arima.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_arima.py @@ -19,7 +19,9 @@ from pyspark.sql import SparkSession from pyspark.sql.dataframe import DataFrame -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.prediction.arima import ArimaPrediction +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.prediction.arima import ( + ArimaPrediction, +) @pytest.fixture(scope="session") diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_k_sigma_anomaly_detection.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_k_sigma_anomaly_detection.py index 427ae906a..b7a5ab860 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_k_sigma_anomaly_detection.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_k_sigma_anomaly_detection.py @@ -1,7 +1,8 @@ from pyspark.sql import SparkSession -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.k_sigma_anomaly_detection import \ - KSigmaAnomalyDetection +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.k_sigma_anomaly_detection import ( + KSigmaAnomalyDetection, +) # Normal data mean=10 stddev=5 + 3 anomalies # fmt: off diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_normalization.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_normalization.py index 2ddd8eb8c..a994792f9 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_normalization.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/test_normalization.py @@ -17,14 +17,18 @@ from pyspark.sql import SparkSession from pyspark.sql.dataframe import DataFrame -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.denormalization import \ - Denormalization -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization import \ - NormalizationBaseClass -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization_mean import \ - NormalizationMean -from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization_minmax import \ - NormalizationMinMax +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.denormalization import ( + Denormalization, +) +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization import ( + NormalizationBaseClass, +) +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization_mean import ( + NormalizationMean, +) +from src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.data_quality.normalization.normalization_minmax import ( + NormalizationMinMax, +) @pytest.fixture(scope="session") diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/__init__.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/__init__.py index e69de29bb..74ed9fc2f 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/__init__.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_check_value_ranges.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_check_value_ranges.py index 78deb6ece..f14690861 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_check_value_ranges.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_check_value_ranges.py @@ -3,7 +3,7 @@ from io import StringIO import logging -from rtdip_sdk.pipelines.data_quality.monitoring.spark.check_value_ranges import ( +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.spark.check_value_ranges import ( CheckValueRanges, ) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_flatline_detection.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_flatline_detection.py index fdc816e04..ed5eb688c 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_flatline_detection.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_flatline_detection.py @@ -1,6 +1,6 @@ import pytest from pyspark.sql import SparkSession -from rtdip_sdk.pipelines.data_quality.monitoring.spark.flatline_detection import ( +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.spark.flatline_detection import ( FlatlineDetection, ) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_great_expectations_data_quality.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_great_expectations_data_quality.py index 7f4e20598..51584ba4b 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_great_expectations_data_quality.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_great_expectations_data_quality.py @@ -1,7 +1,7 @@ from pytest_mock import MockerFixture from pyspark.sql import SparkSession -from rtdip_sdk.pipelines.data_quality.monitoring.spark.great_expectations_data_quality import ( +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.spark.great_expectations_data_quality import ( GreatExpectationsDataQuality, ) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_interval.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_interval.py index d96a941ca..26fc2a375 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_interval.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_interval.py @@ -1,6 +1,6 @@ import pytest from pyspark.sql import SparkSession -from rtdip_sdk.pipelines.data_quality.monitoring.spark.identify_missing_data_interval import ( +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.spark.identify_missing_data_interval import ( IdentifyMissingDataInterval, ) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_pattern.py b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_pattern.py index 9dbce0495..fa170dafc 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_pattern.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/data_quality/monitoring/spark/test_identify_missing_data_pattern.py @@ -8,7 +8,7 @@ from pyspark.sql.types import StructType, StructField, StringType -from rtdip_sdk.pipelines.data_quality.monitoring.spark.identify_missing_data_pattern import ( +from src.sdk.python.rtdip_sdk.pipelines.data_quality.monitoring.spark.identify_missing_data_pattern import ( IdentifyMissingDataPattern, ) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/machine_learning/spark/test_linear_regression.py b/tests/sdk/python/rtdip_sdk/pipelines/machine_learning/spark/test_linear_regression.py index f58ebcd36..3bae9691c 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/machine_learning/spark/test_linear_regression.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/machine_learning/spark/test_linear_regression.py @@ -170,8 +170,8 @@ def sample_data(spark): # Test cases def test_cols_to_vector(sample_data): df = sample_data - # Pass the DataFrame to ColsToVector - cols_to_vector = ColsToVector(df=df, input_cols=["Value"], output_col="features") + # Pass the DataFrame to ColumnsToVector + cols_to_vector = ColumnsToVector(df=df, input_cols=["Value"], output_col="features") transformed_df = cols_to_vector.transform() assert "features" in transformed_df.columns @@ -180,8 +180,8 @@ def test_cols_to_vector(sample_data): def test_polynomial_features(sample_data): df = sample_data - # Convert 'Value' to a vector using ColsToVector - cols_to_vector = ColsToVector(df=df, input_cols=["Value"], output_col="features") + # Convert 'Value' to a vector using ColumnsToVector + cols_to_vector = ColumnsToVector(df=df, input_cols=["Value"], output_col="features") vectorized_df = cols_to_vector.transform() polynomial_features = PolynomialFeatures( @@ -197,8 +197,8 @@ def test_polynomial_features(sample_data): def test_linear_regression(sample_data): df = sample_data - # Use ColsToVector to assemble features into a single vector column - cols_to_vector = ColsToVector(df=df, input_cols=["Value"], output_col="features") + # Use ColumnsToVector to assemble features into a single vector column + cols_to_vector = ColumnsToVector(df=df, input_cols=["Value"], output_col="features") df = cols_to_vector.transform() linear_regression = LinearRegression( df, features_col="features", label_col="Value", prediction_col="prediction"