From a566e8763fdd4e39e2a71a45ea7af70217252e28 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 8 Nov 2024 22:04:27 +0530 Subject: [PATCH 1/4] Add on_failure to handle cleanup during pipeline failure - Resolves https://github.com/aboutcode-org/vulnerablecode/issues/1639 Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 53 ++++++++++++++++++++ vulnerabilities/pipelines/gitlab_importer.py | 3 ++ vulnerabilities/pipelines/npm_importer.py | 3 ++ vulnerabilities/pipelines/pypa_importer.py | 3 ++ 4 files changed, 62 insertions(+) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 0d3589b67..ff6c41631 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -10,11 +10,13 @@ import logging from datetime import datetime from datetime import timezone +from timeit import default_timer as timer from traceback import format_exc as traceback_format_exc from typing import Iterable from aboutcode.pipeline import BasePipeline from aboutcode.pipeline import LoopProgress +from aboutcode.pipeline import humanize_time from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE @@ -29,6 +31,57 @@ class VulnerableCodePipeline(BasePipeline): pipeline_id = None # Unique Pipeline ID + def on_failure(self): + """ + Tasks to run in the event that pipeline execution fails. + + Implement cleanup or other tasks that need to be performed + on pipeline failure, such as: + - Removing cloned repositories. + - Deleting downloaded archives. + """ + pass + + def execute(self): + """Execute each steps in the order defined on this pipeline class.""" + self.log(f"Pipeline [{self.pipeline_name}] starting") + + steps = self.pipeline_class.get_steps(groups=self.selected_groups) + steps_count = len(steps) + pipeline_start_time = timer() + + for current_index, step in enumerate(steps, start=1): + step_name = step.__name__ + + if self.selected_steps and step_name not in self.selected_steps: + self.log(f"Step [{step_name}] skipped") + continue + + self.set_current_step(f"{current_index}/{steps_count} {step_name}") + self.log(f"Step [{step_name}] starting") + step_start_time = timer() + + try: + step(self) + except Exception as exception: + self.log("Pipeline failed") + on_failure_start_time = timer() + self.log(f"Running [on_failure] tasks") + self.on_failure() + on_failure_run_time = timer() - on_failure_start_time + self.log(f"Completed [on_failure] tasks in {humanize_time(on_failure_run_time)}") + + return 1, self.output_from_exception(exception) + + step_run_time = timer() - step_start_time + self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}") + + self.set_current_step("") # Reset the `current_step` field on completion + pipeline_run_time = timer() - pipeline_start_time + self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}") + + return 0, "" + def log(self, message, level=logging.INFO): """Log the given `message` to the current module logger and execution_log.""" now_local = datetime.now(timezone.utc).astimezone() diff --git a/vulnerabilities/pipelines/gitlab_importer.py b/vulnerabilities/pipelines/gitlab_importer.py index 87fef15d0..4f25c4d94 100644 --- a/vulnerabilities/pipelines/gitlab_importer.py +++ b/vulnerabilities/pipelines/gitlab_importer.py @@ -106,6 +106,9 @@ def clean_downloads(self): self.log(f"Removing cloned repository") self.vcs_response.delete() + def on_failure(self): + self.clean_downloads() + def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: """ diff --git a/vulnerabilities/pipelines/npm_importer.py b/vulnerabilities/pipelines/npm_importer.py index 60a1d109c..7b6d3aba2 100644 --- a/vulnerabilities/pipelines/npm_importer.py +++ b/vulnerabilities/pipelines/npm_importer.py @@ -163,3 +163,6 @@ def clean_downloads(self): if self.vcs_response: self.log(f"Removing cloned repository") self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index bdda50c94..aebafacf4 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -68,3 +68,6 @@ def clean_downloads(self): if self.vcs_response: self.log(f"Removing cloned repository") self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() From f757e6064997e7c07b61a52ac46069befa9413c6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 8 Nov 2024 22:16:33 +0530 Subject: [PATCH 2/4] Add docs for on_failure Signed-off-by: Keshav Priyadarshi --- docs/source/tutorial_add_importer_pipeline.rst | 6 +++++- docs/source/tutorial_add_improver_pipeline.rst | 5 +++++ vulnerabilities/pipelines/__init__.py | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/source/tutorial_add_importer_pipeline.rst b/docs/source/tutorial_add_importer_pipeline.rst index 8b9bf0e89..3f5b6b785 100644 --- a/docs/source/tutorial_add_importer_pipeline.rst +++ b/docs/source/tutorial_add_importer_pipeline.rst @@ -298,10 +298,14 @@ version management from `univers `_. **advisories_count** should never be directly added in steps. +.. attention:: + + Implement ``on_failure`` to handle cleanup in case of pipeline failure. + Cleanup of downloaded archives or cloned repos is necessary to avoid potential resource leakage. .. note:: - | Use ``make valid`` to format your code using black and isort automatically. + | Use ``make valid`` to format your new code using black and isort automatically. | Use ``make check`` to check for formatting errors. Register the Importer Pipeline diff --git a/docs/source/tutorial_add_improver_pipeline.rst b/docs/source/tutorial_add_improver_pipeline.rst index 6c8d90d25..feed3556b 100644 --- a/docs/source/tutorial_add_improver_pipeline.rst +++ b/docs/source/tutorial_add_improver_pipeline.rst @@ -187,6 +187,11 @@ methods. self.log(f"Successfully flagged {ghost_package_count:,d} ghost Packages") +.. attention:: + + Implement ``on_failure`` to handle cleanup in case of pipeline failure. + Cleanup of downloaded archives or cloned repos is necessary to avoid potential resource leakage. + .. note:: | Use ``make valid`` to format your new code using black and isort automatically. diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index ff6c41631..d74db9f35 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -104,8 +104,8 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): Base importer pipeline for importing advisories. Uses: - Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method. - Also override the ``steps`` and ``advisory_confidence`` as needed. + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` + method. Also override the ``steps`` and ``advisory_confidence`` as needed. """ pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. From 4d9a9765671a301412c92ef1ca42ebaecd95eff7 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 14 Nov 2024 15:29:24 +0530 Subject: [PATCH 3/4] Update how to contribute link Signed-off-by: Keshav Priyadarshi --- docs/source/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 5b58c19ed..ba634ecc8 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -89,7 +89,7 @@ Helpful Resources - Review our `comprehensive guide `_ for more details on how to add quality contributions to our codebase and documentation -- Check this free resource on `how to contribute to an open source project on github `_ +- Check this free resource on `How to contribute to an open source project on github `_ - Follow `this wiki page `_ on how to write good commit messages - `Pro Git book `_ From 8f47de3acb01bd0cd56035d558f099c23dbbee68 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 14 Nov 2024 16:05:00 +0530 Subject: [PATCH 4/4] Add test for on_failure mechanism Signed-off-by: Keshav Priyadarshi --- .../tests/pipelines/test_base_pipeline.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index aaf01a7f9..02e8c6b09 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -19,6 +19,8 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.tests.pipelines import TestLogger advisory_data1 = AdvisoryData( aliases=["CVE-2020-13371337"], @@ -47,6 +49,33 @@ def get_advisory1(created_by="test_pipeline"): ) +class TestVulnerableCodePipeline(TestCase): + def test_on_failure(self): + class TestPipeline(VulnerableCodePipeline): + def __init__(self, test_logger): + super().__init__() + self.log = test_logger.write + + @classmethod + def steps(cls): + return (cls.step1,) + + def step1(self): + raise Exception("Something went wrong!") + + def on_failure(self): + self.log("Doing cleanup.") + + logger = TestLogger() + pipeline = TestPipeline(test_logger=logger) + + pipeline.execute() + log_result = logger.getvalue() + + self.assertIn("Pipeline failed", log_result) + self.assertIn("Running [on_failure] tasks", log_result) + + class TestVulnerableCodeBaseImporterPipeline(TestCase): @patch.object( VulnerableCodeBaseImporterPipeline,