From ad3418123f10ba5c832437e4b35be70a188f186f Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Fri, 22 Nov 2024 23:50:59 +0100 Subject: [PATCH 01/13] Start implementing the simple rule-based analyzer Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/analyzer.py | 8 +++- apps/analyzer/metadata_analyzer/main.py | 8 +++- .../simple_rule_based_analyzer.py | 45 +++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index fbc7a56..f090843 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -1,8 +1,9 @@ class Analyzer: - def init(database, backend, simple_analyzer): + def init(database, backend, simple_analyzer, simple_rule_based_analyzer): Analyzer.database = database Analyzer.backend = backend Analyzer.simple_analyzer = simple_analyzer + Analyzer.simple_rule_based_analyzer = simple_rule_based_analyzer def analyze(): data = list(Analyzer.database.get_results()) @@ -53,4 +54,7 @@ def update_data(): return {"count": count} - + def simple_rule_based_analysis(): + data = list(Analyzer.database.get_results()) + result = Analyzer.simple_rule_based_analyzer.analyze(data) + return result diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index 3cbd76a..8ca15cd 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -2,6 +2,7 @@ from dotenv import load_dotenv from metadata_analyzer.database import Database from metadata_analyzer.simple_analyzer import SimpleAnalyzer +from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer from metadata_analyzer.analyzer import Analyzer from metadata_analyzer.backend import Backend from flasgger import Swagger @@ -142,12 +143,17 @@ def update_data(): """ return jsonify(Analyzer.update_data()) +@app.route("/simpleRuleBasedAnalysis", methods=["POST"]) +def simple_rule_based_analysis(): + return jsonify(Analyzer.simple_rule_based_analysis()) + def main(): database = Database() backend = Backend(os.getenv("BACKEND_URL")) simple_analyzer = SimpleAnalyzer() - Analyzer.init(database, backend, simple_analyzer) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer() + Analyzer.init(database, backend, simple_analyzer, simple_rule_based_analyzer) new_port = os.getenv("FLASK_RUN_PORT") int_port = int(new_port or 5000) diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py new file mode 100644 index 0000000..cff9e9b --- /dev/null +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -0,0 +1,45 @@ +import sys +from collections import defaultdict + +class SimpleRuleBasedAnalyzer: + def __init__(self): + pass + + # For now only search for size changes and trigger corresponding alerts + def analyze(self, data): + print(len(data), file=sys.stderr) + + # Group the 'full' results by their task + groups = defaultdict(list) + for result in data: + if (result.task == "" + or result.fdi_type != 'F' + or result.data_size is None + or result.start_time is None): + continue + groups[result.task].append(result) + + print(len(groups), file=sys.stderr) + + count = 0 + # Iterate through each group to find drastic size changes + for task, unordered_results in groups.items(): + results = sorted(unordered_results, key=lambda result: result.start_time) + print(task, len(results), file=sys.stderr) + # Iterate through each pair of consecutive results and compare their sizes + for result1, result2 in zip(results[:-1], results[1:]): + # Handle results with a data_size of zero + if result1.data_size != 0: + relative_change = abs(result1.data_size - result2.data_size) / result1.data_size + else: + relative_change = 0 if result2.data_size == 0 else float("inf") + + if relative_change > 0.2: + count += 1 + print(result1.data_size, result2.data_size) + + return { + "sizeChangeAlerts": { + "count": count + } + } From d39b9970b4e60909d6f1358e7d8f7305f3bb6a7f Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Sat, 23 Nov 2024 19:57:25 +0100 Subject: [PATCH 02/13] Adhere to naming convention, fix tests, refactor SimpleRuleBasedAnalyzer Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/analyzer.py | 4 +-- apps/analyzer/metadata_analyzer/backend.py | 6 +++- .../simple_rule_based_analyzer.py | 35 +++++++++---------- apps/analyzer/tests/test_main.py | 5 ++- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index f090843..f0528d3 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -45,12 +45,12 @@ def update_data(): # Send a full batch if len(batch) == 100: - Analyzer.backend.sendBackupDataBatched(batch) + Analyzer.backend.send_backup_data_batched(batch) batch = [] # Send the remaining results if len(batch) > 0: - Analyzer.backend.sendBackupDataBatched(batch) + Analyzer.backend.send_backup_data_batched(batch) return {"count": count} diff --git a/apps/analyzer/metadata_analyzer/backend.py b/apps/analyzer/metadata_analyzer/backend.py index a3dd427..f0e055a 100644 --- a/apps/analyzer/metadata_analyzer/backend.py +++ b/apps/analyzer/metadata_analyzer/backend.py @@ -4,6 +4,10 @@ class Backend: def __init__(self, backend_url): self.backend_url = backend_url - def sendBackupDataBatched(self, batch): + def send_backup_data_batched(self, batch): url = self.backend_url + "backupData/batched" r = requests.post(url, json=batch) + + def create_alert(self, alert): + url = self.backend_url + "alerting" + r = requests.post(url, json=alert) diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index cff9e9b..f71b63b 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -2,8 +2,20 @@ from collections import defaultdict class SimpleRuleBasedAnalyzer: - def __init__(self): - pass + def __init__(self, size_alert_percentage): + self.size_alert_percentage = size_alert_percentage + + # Analyze a pair of consecutive results + def _analyze_pair(self, result1, result2) + # Handle results with a data_size of zero + if result1.data_size != 0: + relative_change = (result1.data_size - result2.data_size) / result1.data_size + else: + relative_change = 0 if result2.data_size == 0 else float("inf") + + if relative_change > : + count += 1 + print(result1.data_size, result2.data_size) # For now only search for size changes and trigger corresponding alerts def analyze(self, data): @@ -21,25 +33,12 @@ def analyze(self, data): print(len(groups), file=sys.stderr) - count = 0 # Iterate through each group to find drastic size changes for task, unordered_results in groups.items(): results = sorted(unordered_results, key=lambda result: result.start_time) print(task, len(results), file=sys.stderr) # Iterate through each pair of consecutive results and compare their sizes for result1, result2 in zip(results[:-1], results[1:]): - # Handle results with a data_size of zero - if result1.data_size != 0: - relative_change = abs(result1.data_size - result2.data_size) / result1.data_size - else: - relative_change = 0 if result2.data_size == 0 else float("inf") - - if relative_change > 0.2: - count += 1 - print(result1.data_size, result2.data_size) - - return { - "sizeChangeAlerts": { - "count": count - } - } + self._analyze_pair(result1, result2) + + return {} diff --git a/apps/analyzer/tests/test_main.py b/apps/analyzer/tests/test_main.py index c534583..70a448b 100644 --- a/apps/analyzer/tests/test_main.py +++ b/apps/analyzer/tests/test_main.py @@ -16,7 +16,7 @@ class MockBackend: def __init__(self): self.backups = [] - def sendBackupDataBatched(self, batch): + def send_backup_data_batched(self, batch): self.backups += batch def test_hello_world(): @@ -32,8 +32,7 @@ def test_update_data(): database = MockDatabase([mock_result]) backend = MockBackend() - simple_analyzer = None - Analyzer.init(database, backend, simple_analyzer) + Analyzer.init(database, backend, None, None) Analyzer.update_data() assert backend.backups == [Analyzer._convert_result(mock_result)] From 15777764a1c5aca32a2c80262702aa4595783d94 Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Sat, 23 Nov 2024 22:45:22 +0100 Subject: [PATCH 03/13] Update SimpleRuleBasedAnalyzer to send alerts to the backend Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/main.py | 2 +- .../simple_rule_based_analyzer.py | 50 +++++++++++++------ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index 8ca15cd..4dc99b0 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -152,7 +152,7 @@ def main(): database = Database() backend = Backend(os.getenv("BACKEND_URL")) simple_analyzer = SimpleAnalyzer() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, simple_analyzer, simple_rule_based_analyzer) new_port = os.getenv("FLASK_RUN_PORT") diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index f71b63b..ec01952 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -1,25 +1,40 @@ import sys from collections import defaultdict +import metadata_analyzer.backend class SimpleRuleBasedAnalyzer: - def __init__(self, size_alert_percentage): + def __init__(self, backend, size_alert_percentage): + self.backend = backend self.size_alert_percentage = size_alert_percentage - # Analyze a pair of consecutive results - def _analyze_pair(self, result1, result2) + # Analyze a pair of consecutive results, returns a list of created alerts + def _analyze_pair(self, result1, result2): # Handle results with a data_size of zero - if result1.data_size != 0: - relative_change = (result1.data_size - result2.data_size) / result1.data_size + if result1.data_size == 0 and result2.data_size == 0: + relative_change = 0 + elif result1.data_size == 0: + relative_change = float("inf") + elif result2.data_size == 0: + relative_change = -float("inf") else: - relative_change = 0 if result2.data_size == 0 else float("inf") + relative_change = (result2.data_size - result1.data_size) / result1.data_size - if relative_change > : - count += 1 - print(result1.data_size, result2.data_size) + # Skip pairs of results with changes inside the bounds + if -self.size_alert_percentage <= relative_change <= self.size_alert_percentage: + return [] + + alert = { + "type": 0 if relative_change > 0 else 1, + "value": result2.data_size // 1_000_000, + "referenceValue": result1.data_size // 1_000_000, + "backupId": result2.uuid, + } + + return [alert] # For now only search for size changes and trigger corresponding alerts def analyze(self, data): - print(len(data), file=sys.stderr) + #print(len(data), file=sys.stderr) # Group the 'full' results by their task groups = defaultdict(list) @@ -31,14 +46,21 @@ def analyze(self, data): continue groups[result.task].append(result) - print(len(groups), file=sys.stderr) + #print(len(groups), file=sys.stderr) + alerts = [] # Iterate through each group to find drastic size changes for task, unordered_results in groups.items(): results = sorted(unordered_results, key=lambda result: result.start_time) - print(task, len(results), file=sys.stderr) # Iterate through each pair of consecutive results and compare their sizes for result1, result2 in zip(results[:-1], results[1:]): - self._analyze_pair(result1, result2) + new_alerts = self._analyze_pair(result1, result2) + alerts += new_alerts + + # Send the alerts to the backend + for alert in alerts: + self.backend.create_alert(alert) - return {} + return { + "count": len(alerts) + } From c7205eb83706baddf0820f9778eaf08f9e5396b4 Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Sat, 23 Nov 2024 23:03:11 +0100 Subject: [PATCH 04/13] Add basic test for the simpleRuleBasedAnalyzer Signed-off-by: Oliver Schnell --- apps/analyzer/tests/test_main.py | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/analyzer/tests/test_main.py b/apps/analyzer/tests/test_main.py index 70a448b..42ccdb8 100644 --- a/apps/analyzer/tests/test_main.py +++ b/apps/analyzer/tests/test_main.py @@ -3,6 +3,7 @@ from metadata_analyzer.main import hello_world, update_data from metadata_analyzer.analyzer import Analyzer from metadata_analyzer.models import Result +from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer from datetime import datetime class MockDatabase: @@ -15,10 +16,14 @@ def get_results(self): class MockBackend: def __init__(self): self.backups = [] + self.alerts = [] def send_backup_data_batched(self, batch): self.backups += batch + def create_alert(self, alert): + self.alerts.append(alert) + def test_hello_world(): """Test the hello_world function.""" assert hello_world() == "Hello, world!" @@ -36,3 +41,31 @@ def test_update_data(): Analyzer.update_data() assert backend.backups == [Analyzer._convert_result(mock_result)] + +def test_simple_rule_based_analyzer(): + mock_result1 = Result() + mock_result1.task = "foo" + mock_result1.uuid = "1" + mock_result1.fdi_type = "F" + mock_result1.data_size = 100_000_000 + mock_result1.start_time = datetime.fromisoformat('2000-01-01') + + mock_result2 = Result() + mock_result2.task = "foo" + mock_result2.uuid = "2" + mock_result2.fdi_type = "F" + mock_result2.data_size = 121_000_000 + mock_result2.start_time = datetime.fromisoformat('2000-01-02') + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [{ + "type": 0, + "value": mock_result2.data_size // 1_000_000, + "referenceValue": mock_result1.data_size // 1_000_000, + "backupId": mock_result2.uuid + }] From 2590222d2c144b01595e6e07725a06c422859168 Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Sun, 24 Nov 2024 22:57:01 +0100 Subject: [PATCH 05/13] Refactor and add more tests Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/analyzer.py | 2 +- .../simple_rule_based_analyzer.py | 4 +- apps/analyzer/tests/mock_backend.py | 10 +++ apps/analyzer/tests/mock_database.py | 6 ++ apps/analyzer/tests/test_main.py | 47 +--------- .../tests/test_simple_rule_based_analyzer.py | 85 +++++++++++++++++++ 6 files changed, 106 insertions(+), 48 deletions(-) create mode 100644 apps/analyzer/tests/mock_backend.py create mode 100644 apps/analyzer/tests/mock_database.py create mode 100644 apps/analyzer/tests/test_simple_rule_based_analyzer.py diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index f0528d3..c5be0fd 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -21,7 +21,7 @@ def analyze(): def _convert_result(result): return { "id": result.uuid, - "sizeMB": result.data_size // 1_000_000, + "sizeMB": result.data_size / 1_000_000, "creationDate": result.start_time.isoformat(), } diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index ec01952..bec9841 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -25,8 +25,8 @@ def _analyze_pair(self, result1, result2): alert = { "type": 0 if relative_change > 0 else 1, - "value": result2.data_size // 1_000_000, - "referenceValue": result1.data_size // 1_000_000, + "value": result2.data_size / 1_000_000, + "referenceValue": result1.data_size / 1_000_000, "backupId": result2.uuid, } diff --git a/apps/analyzer/tests/mock_backend.py b/apps/analyzer/tests/mock_backend.py new file mode 100644 index 0000000..bc4b8e5 --- /dev/null +++ b/apps/analyzer/tests/mock_backend.py @@ -0,0 +1,10 @@ +class MockBackend: + def __init__(self): + self.backups = [] + self.alerts = [] + + def send_backup_data_batched(self, batch): + self.backups += batch + + def create_alert(self, alert): + self.alerts.append(alert) diff --git a/apps/analyzer/tests/mock_database.py b/apps/analyzer/tests/mock_database.py new file mode 100644 index 0000000..3839999 --- /dev/null +++ b/apps/analyzer/tests/mock_database.py @@ -0,0 +1,6 @@ +class MockDatabase: + def __init__(self, results): + self.results = results + + def get_results(self): + return iter(self.results) diff --git a/apps/analyzer/tests/test_main.py b/apps/analyzer/tests/test_main.py index 42ccdb8..c0a3b8b 100644 --- a/apps/analyzer/tests/test_main.py +++ b/apps/analyzer/tests/test_main.py @@ -5,24 +5,8 @@ from metadata_analyzer.models import Result from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer from datetime import datetime - -class MockDatabase: - def __init__(self, results): - self.results = results - - def get_results(self): - return iter(self.results) - -class MockBackend: - def __init__(self): - self.backups = [] - self.alerts = [] - - def send_backup_data_batched(self, batch): - self.backups += batch - - def create_alert(self, alert): - self.alerts.append(alert) +from tests.mock_backend import MockBackend +from tests.mock_database import MockDatabase def test_hello_world(): """Test the hello_world function.""" @@ -42,30 +26,3 @@ def test_update_data(): assert backend.backups == [Analyzer._convert_result(mock_result)] -def test_simple_rule_based_analyzer(): - mock_result1 = Result() - mock_result1.task = "foo" - mock_result1.uuid = "1" - mock_result1.fdi_type = "F" - mock_result1.data_size = 100_000_000 - mock_result1.start_time = datetime.fromisoformat('2000-01-01') - - mock_result2 = Result() - mock_result2.task = "foo" - mock_result2.uuid = "2" - mock_result2.fdi_type = "F" - mock_result2.data_size = 121_000_000 - mock_result2.start_time = datetime.fromisoformat('2000-01-02') - - database = MockDatabase([mock_result1, mock_result2]) - backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) - Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() - - assert backend.alerts == [{ - "type": 0, - "value": mock_result2.data_size // 1_000_000, - "referenceValue": mock_result1.data_size // 1_000_000, - "backupId": mock_result2.uuid - }] diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py new file mode 100644 index 0000000..e8a5d21 --- /dev/null +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -0,0 +1,85 @@ +from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer +from metadata_analyzer.analyzer import Analyzer +from metadata_analyzer.models import Result +from tests.mock_backend import MockBackend +from tests.mock_database import MockDatabase +from datetime import datetime + +def _create_mock_result(task, uuid, fdi_type, data_size, start_time): + mock_result = Result() + mock_result.task = task + mock_result.uuid = uuid + mock_result.fdi_type = fdi_type + mock_result.data_size = data_size + mock_result.start_time = start_time + return mock_result + +def test_alert(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 121_000_000, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [{ + "type": 0, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + +def test_alert_backup_size_zero(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 0, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [{ + "type": 1, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + +def test_no_alert_size_diff_too_small(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 120_000_000, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [] + +def test_no_alert_wrong_type(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 121_000_000, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [] + +def test_no_alert_different_tasks(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("bar", "2", "F", 121_000_000, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis() + + assert backend.alerts == [] From d3af9d972f6e1d0fe200579ca95c10e126847972 Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Sun, 24 Nov 2024 23:21:50 +0100 Subject: [PATCH 06/13] Raise exceptions when backend API call fails Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/backend.py | 2 ++ apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/backend.py b/apps/analyzer/metadata_analyzer/backend.py index f0e055a..c13a42c 100644 --- a/apps/analyzer/metadata_analyzer/backend.py +++ b/apps/analyzer/metadata_analyzer/backend.py @@ -7,7 +7,9 @@ def __init__(self, backend_url): def send_backup_data_batched(self, batch): url = self.backend_url + "backupData/batched" r = requests.post(url, json=batch) + r.raise_for_status() def create_alert(self, alert): url = self.backend_url + "alerting" r = requests.post(url, json=alert) + r.raise_for_status() diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index bec9841..c6096da 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -34,8 +34,6 @@ def _analyze_pair(self, result1, result2): # For now only search for size changes and trigger corresponding alerts def analyze(self, data): - #print(len(data), file=sys.stderr) - # Group the 'full' results by their task groups = defaultdict(list) for result in data: @@ -46,8 +44,6 @@ def analyze(self, data): continue groups[result.task].append(result) - #print(len(groups), file=sys.stderr) - alerts = [] # Iterate through each group to find drastic size changes for task, unordered_results in groups.items(): From 888bf8a9dd3407a1abcdd08ef7b9779efe8bf146 Mon Sep 17 00:00:00 2001 From: Oliver Schnell Date: Tue, 26 Nov 2024 11:01:47 +0100 Subject: [PATCH 07/13] Add limit option to the API to limit the number of triggered alerts, add more tests Signed-off-by: Oliver Schnell --- apps/analyzer/metadata_analyzer/analyzer.py | 4 +- apps/analyzer/metadata_analyzer/main.py | 4 +- .../simple_rule_based_analyzer.py | 8 ++-- .../tests/test_simple_rule_based_analyzer.py | 47 +++++++++++++++++-- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index c5be0fd..ad8a2da 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -54,7 +54,7 @@ def update_data(): return {"count": count} - def simple_rule_based_analysis(): + def simple_rule_based_analysis(alert_limit): data = list(Analyzer.database.get_results()) - result = Analyzer.simple_rule_based_analyzer.analyze(data) + result = Analyzer.simple_rule_based_analyzer.analyze(data, alert_limit) return result diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index 4dc99b0..a18f259 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -145,7 +145,9 @@ def update_data(): @app.route("/simpleRuleBasedAnalysis", methods=["POST"]) def simple_rule_based_analysis(): - return jsonify(Analyzer.simple_rule_based_analysis()) + json = request.get_json() + alert_limit = json["alertLimit"] + return jsonify(Analyzer.simple_rule_based_analysis(alert_limit)) def main(): diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index c6096da..219fe51 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -33,7 +33,7 @@ def _analyze_pair(self, result1, result2): return [alert] # For now only search for size changes and trigger corresponding alerts - def analyze(self, data): + def analyze(self, data, alert_limit): # Group the 'full' results by their task groups = defaultdict(list) for result in data: @@ -53,10 +53,12 @@ def analyze(self, data): new_alerts = self._analyze_pair(result1, result2) alerts += new_alerts + # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 + count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts)) # Send the alerts to the backend - for alert in alerts: + for alert in alerts[:count]: self.backend.create_alert(alert) return { - "count": len(alerts) + "count": count } diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index e8a5d21..97f92eb 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -22,7 +22,7 @@ def test_alert(): backend = MockBackend() simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() + Analyzer.simple_rule_based_analysis(-1) assert backend.alerts == [{ "type": 0, @@ -31,6 +31,30 @@ def test_alert(): "backupId": mock_result2.uuid }] +def test_alerts_different_tasks(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 121_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("bar", "1", "F", 200_000_000, datetime.fromisoformat("2000-01-01")) + mock_result4 = _create_mock_result("bar", "2", "F", 100_000_000, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis(-1) + + assert backend.alerts == [{ + "type": 0, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }, { + "type": 1, + "value": mock_result4.data_size / 1_000_000, + "referenceValue": mock_result3.data_size / 1_000_000, + "backupId": mock_result4.uuid + }] + def test_alert_backup_size_zero(): mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) mock_result2 = _create_mock_result("foo", "2", "F", 0, datetime.fromisoformat("2000-01-02")) @@ -39,7 +63,7 @@ def test_alert_backup_size_zero(): backend = MockBackend() simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() + Analyzer.simple_rule_based_analysis(-1) assert backend.alerts == [{ "type": 1, @@ -56,7 +80,7 @@ def test_no_alert_size_diff_too_small(): backend = MockBackend() simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() + Analyzer.simple_rule_based_analysis(-1) assert backend.alerts == [] @@ -68,7 +92,7 @@ def test_no_alert_wrong_type(): backend = MockBackend() simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() + Analyzer.simple_rule_based_analysis(-1) assert backend.alerts == [] @@ -80,6 +104,19 @@ def test_no_alert_different_tasks(): backend = MockBackend() simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) - Analyzer.simple_rule_based_analysis() + Analyzer.simple_rule_based_analysis(-1) assert backend.alerts == [] + +def test_alert_limit(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 150_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "F", 200_000_000, datetime.fromisoformat("2000-01-03")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis(1) + + assert len(backend.alerts) == 1 From 89049301fb53c60096f3b2e4fb5324975e57cacc Mon Sep 17 00:00:00 2001 From: Amelie Regl Date: Tue, 26 Nov 2024 14:42:36 +0100 Subject: [PATCH 08/13] added rule based analysis of diff backups Signed-off-by: Amelie Regl --- apps/analyzer/metadata_analyzer/analyzer.py | 10 +++ apps/analyzer/metadata_analyzer/main.py | 13 +++- .../simple_rule_based_analyzer.py | 74 ++++++++++++++++++- .../tests/test_simple_rule_based_analyzer.py | 15 ++-- 4 files changed, 101 insertions(+), 11 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index ad8a2da..9f8686b 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -58,3 +58,13 @@ def simple_rule_based_analysis(alert_limit): data = list(Analyzer.database.get_results()) result = Analyzer.simple_rule_based_analyzer.analyze(data, alert_limit) return result + + def simple_rule_based_analysis_diff(alert_limit): + data = list(Analyzer.database.get_results()) + result = Analyzer.simple_rule_based_analyzer.analyze_diff(data,alert_limit) + return result + + def simple_rule_based_analysis_inc(alert_limit): + data = list(Analyzer.database.get_results()) + result = Analyzer.simple_rule_based_analyzer.analyze_inc(data,alert_limit) + return result diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index a18f259..65a376b 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -149,12 +149,23 @@ def simple_rule_based_analysis(): alert_limit = json["alertLimit"] return jsonify(Analyzer.simple_rule_based_analysis(alert_limit)) +@app.route("/simpleRuleBasedAnalysisDiff", methods=["POST"]) +def simple_rule_based_analysis_diff(): + json = request.get_json() + alert_limit = json["alertLimit"] + return jsonify(Analyzer.simple_rule_based_analysis_diff(alert_limit)) + +@app.route("/simpleRuleBasedAnalysisInc", methods=["POST"]) +def simple_rule_based_analysis_inc(): + json = request.get_json() + alert_limit = json["alertLimit"] + return jsonify(Analyzer.simple_rule_based_analysis_inc(alert_limit)) def main(): database = Database() backend = Backend(os.getenv("BACKEND_URL")) simple_analyzer = SimpleAnalyzer() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, simple_analyzer, simple_rule_based_analyzer) new_port = os.getenv("FLASK_RUN_PORT") diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index 219fe51..9dac2dc 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -3,12 +3,31 @@ import metadata_analyzer.backend class SimpleRuleBasedAnalyzer: - def __init__(self, backend, size_alert_percentage): + def __init__(self, backend, size_alert_percentage, inc_percentage, diff_percentage): self.backend = backend self.size_alert_percentage = size_alert_percentage + self.inc_size = 0 + self.inc_percentage = inc_percentage + self.diff_percentage = diff_percentage # Analyze a pair of consecutive results, returns a list of created alerts def _analyze_pair(self, result1, result2): + relative_change = self.handle_zero(result1, result2) + + # Skip pairs of results with changes inside the bounds + if -self.size_alert_percentage <= relative_change <= self.size_alert_percentage: + return [] + + alert = { + "type": 0 if relative_change > 0 else 1, + "value": result2.data_size / 1_000_000, + "referenceValue": result1.data_size / 1_000_000, + "backupId": result2.uuid, + } + + return [alert] + + def handle_zero(self,result1, result2): # Handle results with a data_size of zero if result1.data_size == 0 and result2.data_size == 0: relative_change = 0 @@ -18,9 +37,17 @@ def _analyze_pair(self, result1, result2): relative_change = -float("inf") else: relative_change = (result2.data_size - result1.data_size) / result1.data_size + return relative_change + + # Analyze a pair of consecutive results, returns a list of created alerts + def _analyze_pair_diff(self, result1, result2): + relative_change = self.handle_zero(result1, result2) + print("pair is") + print(str(result1.data_size) + " and " + str(result2.data_size)) - # Skip pairs of results with changes inside the bounds - if -self.size_alert_percentage <= relative_change <= self.size_alert_percentage: + # Skip pairs of results with changes inside the bounds that increase + if relative_change > 0 and relative_change <= self.diff_percentage: + print("relative change is 0, skipping") return [] alert = { @@ -29,6 +56,7 @@ def _analyze_pair(self, result1, result2): "referenceValue": result1.data_size / 1_000_000, "backupId": result2.uuid, } + print("relative change is " + str(relative_change) + ", not skipping") return [alert] @@ -62,3 +90,43 @@ def analyze(self, data, alert_limit): return { "count": count } + + # Searches for size increases in diffs and trigger corresponding alerts if not applicable + def analyze_diff(self, data, alert_limit): + # Group the 'full' and 'diff results by their task + groups = defaultdict(list) + groupNum = 0 + for result in data: + if (result.task == "" + or (result.fdi_type != 'F' and result.fdi_type != 'D') + or result.data_size is None + or result.start_time is None): + print("is not f or d, skipping") + continue + if (result.fdi_type == 'F'): + print("f found") + groupNum += 1 + continue + groups[groupNum].append(result) + print("number of groups:") + print(groupNum) + print(groups) + + alerts = [] + # Iterates through groups to ensure size increases except when a full backup was done + for task, unordered_results in groups.items(): + results = sorted(unordered_results, key=lambda result: result.start_time) + # Iterate through each pair of consecutive results and compare their sizes + for result1, result2 in zip(results[:-1], results[1:]): + new_alerts = self._analyze_pair_diff(result1, result2) + alerts += new_alerts + + # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 + count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts)) + # Send the alerts to the backend + for alert in alerts[:count]: + self.backend.create_alert(alert) + + return { + "count": count + } diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index 97f92eb..a6ee6c6 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -20,7 +20,7 @@ def test_alert(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -39,7 +39,7 @@ def test_alerts_different_tasks(): database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -61,7 +61,7 @@ def test_alert_backup_size_zero(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -78,7 +78,7 @@ def test_no_alert_size_diff_too_small(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -90,7 +90,7 @@ def test_no_alert_wrong_type(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -102,7 +102,7 @@ def test_no_alert_different_tasks(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -115,8 +115,9 @@ def test_alert_limit(): database = MockDatabase([mock_result1, mock_result2, mock_result3]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(1) assert len(backend.alerts) == 1 + From 44d882d62cda973f81bb07dd3be7081c60e35722 Mon Sep 17 00:00:00 2001 From: Amelie Regl Date: Tue, 26 Nov 2024 14:43:34 +0100 Subject: [PATCH 09/13] added tests for diff analysis Signed-off-by: Amelie Regl --- .../tests/test_simple_rule_based_analyzer.py | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index a6ee6c6..c595ca1 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -121,3 +121,142 @@ def test_alert_limit(): assert len(backend.alerts) == 1 +# extremely large difference +def test_alert_backup_size_zero_diff(): + mock_result1 = _create_mock_result("foo", "1", "D", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 0, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [{ + "type": 1, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + +# two decreasing diff backups (in the accepted range) with different full backups as base +def test_alert_backup_size_decrease_ok_diff(): + mock_result1 = _create_mock_result("foo", "1", "D", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "F", 100_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "D", 99_000_000, datetime.fromisoformat("2000-01-03")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [] + +# two decreasing diff backups (in the accepted range) with same full backup as base +def test_alert_backup_size_decrease_nok_diff(): + mock_result1 = _create_mock_result("foo", "1", "D", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 99_000_000, datetime.fromisoformat("2000-01-03")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [{ + "type": 1, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + + # two decreasing diff backups (not in the accepted range) with same full backup as base +def test_alert_backup_size_decrease_large_nok_diff(): + mock_result1 = _create_mock_result("foo", "1", "D", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 1_000_000, datetime.fromisoformat("2000-01-03")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [{ + "type": 1, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + +# two decreasing diff backups (not in the accepted range) with different full backups as base +def test_alert_backup_size_decrease_large_ok_diff(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 1_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "F", 100_000_000, datetime.fromisoformat("2000-01-03")) + mock_result4 = _create_mock_result("foo", "4", "D", 1_000_000, datetime.fromisoformat("2000-01-04")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [] + +# two increasing diff backups (not in the accepted range) with same full backups as base +def test_alert_backup_size_increase_large_nok_diff(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 1_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "D", 100_000_000, datetime.fromisoformat("2000-01-03")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [{ + "type": 0, + "value": mock_result3.data_size / 1_000_000, + "referenceValue": mock_result2.data_size / 1_000_000, + "backupId": mock_result3.uuid + }] + +# two increasing diff backups (not in the accepted range) with different full backups as base +def test_alert_backup_size_increase_large_ok_diff(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 1_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "F", 100_000_000, datetime.fromisoformat("2000-01-03")) + mock_result4 = _create_mock_result("foo", "4", "D", 1_000_000, datetime.fromisoformat("2000-01-04")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [] + + # multiple decreasing diff backups (not in the accepted range) with same full backups as base +def test_alert_backup_size_complex_nok_diff(): + mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "D", 1_000_000, datetime.fromisoformat("2000-01-02")) + mock_result3 = _create_mock_result("foo", "3", "F", 100_000_000, datetime.fromisoformat("2000-01-03")) + mock_result4 = _create_mock_result("foo", "4", "F", 1_000_000, datetime.fromisoformat("2000-01-04")) + mock_result5 = _create_mock_result("foo", "5", "D", 100_000_000, datetime.fromisoformat("2000-01-05")) + mock_result6 = _create_mock_result("foo", "6", "D", 101_000_000, datetime.fromisoformat("2000-01-06")) + mock_result7 = _create_mock_result("foo", "7", "D", 1_000_000, datetime.fromisoformat("2000-01-07")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4, mock_result5, mock_result6, mock_result7]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_diff(1) + + assert backend.alerts == [{ + "type": 1, + "value": mock_result7.data_size / 1_000_000, + "referenceValue": mock_result6.data_size / 1_000_000, + "backupId": mock_result7.uuid + }] From 78e928a8e9a0bbbd9654f26137884292c46f44f7 Mon Sep 17 00:00:00 2001 From: Amelie Regl Date: Tue, 26 Nov 2024 14:47:46 +0100 Subject: [PATCH 10/13] removed debug prints Signed-off-by: Amelie Regl --- apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index 9dac2dc..24c4d0e 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -91,7 +91,7 @@ def analyze(self, data, alert_limit): "count": count } - # Searches for size increases in diffs and trigger corresponding alerts if not applicable + # Searches for size increases in diffs and trigger corresponding alerts if not applicable def analyze_diff(self, data, alert_limit): # Group the 'full' and 'diff results by their task groups = defaultdict(list) @@ -130,3 +130,5 @@ def analyze_diff(self, data, alert_limit): return { "count": count } + + From c087a4a8946b8aa2c8c1f034aaa874270bf5ee11 Mon Sep 17 00:00:00 2001 From: Amelie Regl Date: Tue, 26 Nov 2024 15:44:16 +0100 Subject: [PATCH 11/13] added basic rulebased analysis to inc backups Signed-off-by: Amelie Regl --- apps/analyzer/metadata_analyzer/main.py | 2 +- .../simple_rule_based_analyzer.py | 85 +++++++++++++++---- .../tests/test_simple_rule_based_analyzer.py | 31 +++---- 3 files changed, 87 insertions(+), 31 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index 65a376b..f5a3c72 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -165,7 +165,7 @@ def main(): database = Database() backend = Backend(os.getenv("BACKEND_URL")) simple_analyzer = SimpleAnalyzer() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, simple_analyzer, simple_rule_based_analyzer) new_port = os.getenv("FLASK_RUN_PORT") diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index 24c4d0e..3907aad 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -1,21 +1,22 @@ import sys from collections import defaultdict import metadata_analyzer.backend +from datetime import datetime, timedelta class SimpleRuleBasedAnalyzer: - def __init__(self, backend, size_alert_percentage, inc_percentage, diff_percentage): + def __init__(self, backend, size_alert_percentage, inc_percentage, inc_date_percentage, diff_percentage): self.backend = backend self.size_alert_percentage = size_alert_percentage - self.inc_size = 0 - self.inc_percentage = inc_percentage + self.inc_data_percentage = inc_percentage + self.inc_date_percentage = inc_date_percentage self.diff_percentage = diff_percentage # Analyze a pair of consecutive results, returns a list of created alerts - def _analyze_pair(self, result1, result2): + def _analyze_pair(self, result1, result2, bound): relative_change = self.handle_zero(result1, result2) # Skip pairs of results with changes inside the bounds - if -self.size_alert_percentage <= relative_change <= self.size_alert_percentage: + if -bound <= relative_change <= bound: return [] alert = { @@ -42,12 +43,9 @@ def handle_zero(self,result1, result2): # Analyze a pair of consecutive results, returns a list of created alerts def _analyze_pair_diff(self, result1, result2): relative_change = self.handle_zero(result1, result2) - print("pair is") - print(str(result1.data_size) + " and " + str(result2.data_size)) # Skip pairs of results with changes inside the bounds that increase if relative_change > 0 and relative_change <= self.diff_percentage: - print("relative change is 0, skipping") return [] alert = { @@ -56,7 +54,6 @@ def _analyze_pair_diff(self, result1, result2): "referenceValue": result1.data_size / 1_000_000, "backupId": result2.uuid, } - print("relative change is " + str(relative_change) + ", not skipping") return [alert] @@ -78,7 +75,7 @@ def analyze(self, data, alert_limit): results = sorted(unordered_results, key=lambda result: result.start_time) # Iterate through each pair of consecutive results and compare their sizes for result1, result2 in zip(results[:-1], results[1:]): - new_alerts = self._analyze_pair(result1, result2) + new_alerts = self._analyze_pair(result1, result2, self.size_alert_percentage) alerts += new_alerts # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 @@ -101,16 +98,11 @@ def analyze_diff(self, data, alert_limit): or (result.fdi_type != 'F' and result.fdi_type != 'D') or result.data_size is None or result.start_time is None): - print("is not f or d, skipping") continue if (result.fdi_type == 'F'): - print("f found") groupNum += 1 continue groups[groupNum].append(result) - print("number of groups:") - print(groupNum) - print(groups) alerts = [] # Iterates through groups to ensure size increases except when a full backup was done @@ -130,5 +122,68 @@ def analyze_diff(self, data, alert_limit): return { "count": count } + +# Searches for size changes in incs and triggers corresponding alerts if not applicable + def analyze_inc(self, data, alert_limit): + + groups = defaultdict(list) + for result in data: + if (result.task == "" + or result.fdi_type != 'I' + or result.data_size is None + or result.start_time is None): + continue + groups[result.task].append(result) + print(groups) + + alerts = [] + # Iterates through groups to ensure size increases except when a full backup was done + for task, unordered_results in groups.items(): + results = sorted(unordered_results, key=lambda result: result.start_time) + + # For now assumes that average size of incs is base value from which to judge all incs, may be subject to change + # Iterate through each results get an average value + avg_size = 0 + prev_time = results[0].start_time + avg_time = timedelta(0) + + print("start time is " + str(avg_time)) + + for result in results: + avg_size += result.data_size + avg_time += result.start_time - prev_time + print("current delta is " + str(result.start_time - prev_time)) + + avg_size = avg_size/(len(results)) + print("avg size is " + str(avg_size)) + avg_time = avg_time/(len(results)-1) + print("avg time is " + str(avg_time)) + + #if(True): # so times are regular in margin and data sizes are same in margin + + for prev, current in zip(results[:-1], results[1:]): + + interval = current.start_time - prev.start_time + print("current interval is " + str(interval)) + # only compares if incs happened at quasi-regular intervals + if(interval >= avg_time * (1 - self.inc_date_percentage) and interval <= avg_time * (1 + self.inc_date_percentage)): + print("analyzing pair of avg size " + str(avg_size) + " and " + str(current.data_size)) + # converts prev to a result with the average size + prev.data_size = avg_size + new_alerts = self._analyze_pair(prev, current, self.inc_data_percentage) + alerts += new_alerts + else: + print("current interval was not in set bounds") + + # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 + count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts)) + # Send the alerts to the backend + for alert in alerts[:count]: + self.backend.create_alert(alert) + + return { + "count": count + } + diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index c595ca1..ebf85fa 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -20,7 +20,7 @@ def test_alert(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -39,7 +39,7 @@ def test_alerts_different_tasks(): database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -61,7 +61,7 @@ def test_alert_backup_size_zero(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -78,7 +78,7 @@ def test_no_alert_size_diff_too_small(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -90,7 +90,7 @@ def test_no_alert_wrong_type(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -102,7 +102,7 @@ def test_no_alert_different_tasks(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(-1) @@ -115,7 +115,7 @@ def test_alert_limit(): database = MockDatabase([mock_result1, mock_result2, mock_result3]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis(1) @@ -128,7 +128,7 @@ def test_alert_backup_size_zero_diff(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -147,7 +147,7 @@ def test_alert_backup_size_decrease_ok_diff(): database = MockDatabase([mock_result1, mock_result2, mock_result3]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -160,7 +160,7 @@ def test_alert_backup_size_decrease_nok_diff(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -178,7 +178,7 @@ def test_alert_backup_size_decrease_large_nok_diff(): database = MockDatabase([mock_result1, mock_result2]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -198,7 +198,7 @@ def test_alert_backup_size_decrease_large_ok_diff(): database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -212,7 +212,7 @@ def test_alert_backup_size_increase_large_nok_diff(): database = MockDatabase([mock_result1, mock_result2, mock_result3]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -232,7 +232,7 @@ def test_alert_backup_size_increase_large_ok_diff(): database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -250,7 +250,7 @@ def test_alert_backup_size_complex_nok_diff(): database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4, mock_result5, mock_result6, mock_result7]) backend = MockBackend() - simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2) + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) Analyzer.init(database, backend, None, simple_rule_based_analyzer) Analyzer.simple_rule_based_analysis_diff(1) @@ -260,3 +260,4 @@ def test_alert_backup_size_complex_nok_diff(): "referenceValue": mock_result6.data_size / 1_000_000, "backupId": mock_result7.uuid }] + From b6bf53002b1e37d46b8dd91d316f151804476ba6 Mon Sep 17 00:00:00 2001 From: Amelie Regl Date: Tue, 26 Nov 2024 16:08:41 +0100 Subject: [PATCH 12/13] small fix and removal of debug prints Signed-off-by: Amelie Regl --- .../simple_rule_based_analyzer.py | 12 +---- .../tests/test_simple_rule_based_analyzer.py | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index 3907aad..091d320 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -14,7 +14,6 @@ def __init__(self, backend, size_alert_percentage, inc_percentage, inc_date_perc # Analyze a pair of consecutive results, returns a list of created alerts def _analyze_pair(self, result1, result2, bound): relative_change = self.handle_zero(result1, result2) - # Skip pairs of results with changes inside the bounds if -bound <= relative_change <= bound: return [] @@ -25,7 +24,6 @@ def _analyze_pair(self, result1, result2, bound): "referenceValue": result1.data_size / 1_000_000, "backupId": result2.uuid, } - return [alert] def handle_zero(self,result1, result2): @@ -134,7 +132,6 @@ def analyze_inc(self, data, alert_limit): or result.start_time is None): continue groups[result.task].append(result) - print(groups) alerts = [] # Iterates through groups to ensure size increases except when a full backup was done @@ -147,33 +144,26 @@ def analyze_inc(self, data, alert_limit): prev_time = results[0].start_time avg_time = timedelta(0) - print("start time is " + str(avg_time)) for result in results: avg_size += result.data_size avg_time += result.start_time - prev_time - print("current delta is " + str(result.start_time - prev_time)) + prev_time = result.start_time avg_size = avg_size/(len(results)) - print("avg size is " + str(avg_size)) avg_time = avg_time/(len(results)-1) - print("avg time is " + str(avg_time)) #if(True): # so times are regular in margin and data sizes are same in margin for prev, current in zip(results[:-1], results[1:]): interval = current.start_time - prev.start_time - print("current interval is " + str(interval)) # only compares if incs happened at quasi-regular intervals if(interval >= avg_time * (1 - self.inc_date_percentage) and interval <= avg_time * (1 + self.inc_date_percentage)): - print("analyzing pair of avg size " + str(avg_size) + " and " + str(current.data_size)) # converts prev to a result with the average size prev.data_size = avg_size new_alerts = self._analyze_pair(prev, current, self.inc_data_percentage) alerts += new_alerts - else: - print("current interval was not in set bounds") # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts)) diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index ebf85fa..f3cc00e 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -261,3 +261,56 @@ def test_alert_backup_size_complex_nok_diff(): "backupId": mock_result7.uuid }] +# large increase of inc size +def test_alert_backup_size_zero_inc(): + mock_result1 = _create_mock_result("foo", "1", "I", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "I", 0, datetime.fromisoformat("2000-01-02")) + + database = MockDatabase([mock_result1, mock_result2]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_inc(1) + + assert backend.alerts == [{ + "type": 1, + "value": mock_result2.data_size / 1_000_000, + "referenceValue": mock_result1.data_size / 1_000_000, + "backupId": mock_result2.uuid + }] + +# irregular backup times that should not be alerted +def test_alert_backup_size_irregular_inc(): + mock_result1 = _create_mock_result("foo", "1", "I", 100_000_000, datetime.fromisoformat("2000-01-01")) + mock_result2 = _create_mock_result("foo", "2", "I", 0, datetime.fromisoformat("2000-01-08")) + mock_result3 = _create_mock_result("foo", "3", "I", 100_000_000, datetime.fromisoformat("2000-01-09")) + mock_result4 = _create_mock_result("foo", "4", "I", 100_000_000, datetime.fromisoformat("2000-01-10")) + + database = MockDatabase([mock_result1, mock_result2, mock_result3,mock_result4]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_inc(1) + + assert backend.alerts == [] + +# irregular backup sizes +def test_alert_backup_size_irregularSize_inc(): + mock_result1 = _create_mock_result("foo", "1", "I", 100_000_000, datetime.fromisoformat("2000-01-07")) + mock_result2 = _create_mock_result("foo", "2", "I", 100_000_000, datetime.fromisoformat("2000-01-08")) + mock_result3 = _create_mock_result("foo", "3", "I", 72_000_000, datetime.fromisoformat("2000-01-09")) + mock_result4 = _create_mock_result("foo", "4", "I", 100_000_000, datetime.fromisoformat("2000-01-10")) + avg = (mock_result1.data_size + mock_result2.data_size + mock_result3.data_size + mock_result4.data_size)/4 + + database = MockDatabase([mock_result1, mock_result2, mock_result3, mock_result4]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_inc(1) + + assert backend.alerts == [{ + "type": 1, + "value":72, + "referenceValue": avg / 1_000_000, + "backupId": mock_result3.uuid + }] \ No newline at end of file From 1e439ee518ec383df30d5357f817c2be64fe68e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Obernd=C3=B6rfer?= Date: Tue, 26 Nov 2024 19:44:52 +0100 Subject: [PATCH 13/13] Workaround for floats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Florian Oberndörfer --- apps/backend/src/app/alerting/alerting.service.ts | 5 +++-- .../src/app/backupData/backupData.service.ts | 14 +++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/apps/backend/src/app/alerting/alerting.service.ts b/apps/backend/src/app/alerting/alerting.service.ts index ba6a7f7..c5f3ef8 100644 --- a/apps/backend/src/app/alerting/alerting.service.ts +++ b/apps/backend/src/app/alerting/alerting.service.ts @@ -19,12 +19,13 @@ export class AlertingService { const alert = new AlertEntity(); alert.type = createAlertDto.type; - alert.value = createAlertDto.value; - alert.referenceValue = createAlertDto.referenceValue; + alert.value = Math.floor(createAlertDto.value); + alert.referenceValue = Math.floor(createAlertDto.referenceValue); const backupDataEntity = await this.backupDataService.findOneById( createAlertDto.backupId ); if (!backupDataEntity) { + console.log(`Backup with id ${createAlertDto.backupId} not found`); throw new NotFoundException( `Backup with id ${createAlertDto.backupId} not found` ); diff --git a/apps/backend/src/app/backupData/backupData.service.ts b/apps/backend/src/app/backupData/backupData.service.ts index 7338d3b..810273b 100644 --- a/apps/backend/src/app/backupData/backupData.service.ts +++ b/apps/backend/src/app/backupData/backupData.service.ts @@ -56,7 +56,10 @@ export class BackupDataService extends PaginationService { async create( createBackupDataDto: CreateBackupDataDto ): Promise { - return await this.backupDataRepository.save(createBackupDataDto); + return await this.backupDataRepository.save({ + ...createBackupDataDto, + sizeMB: Math.floor(createBackupDataDto.sizeMB), + }); } /** @@ -66,7 +69,12 @@ export class BackupDataService extends PaginationService { async createBatched( createBackupDataDtos: CreateBackupDataDto[] ): Promise { - await this.backupDataRepository.save(createBackupDataDtos); + await this.backupDataRepository.save( + createBackupDataDtos.map((dto) => ({ + ...dto, + sizeMB: Math.floor(dto.sizeMB), + })) + ); } /** @@ -95,7 +103,7 @@ export class BackupDataService extends PaginationService { from.setHours(0); from.setMinutes(0); from.setSeconds(0); - from.setMilliseconds(0) + from.setMilliseconds(0); console.log(from); } if (backupDataFilterDto.toDate) {