From d9e7f281b6c8b7ce60b3a8b9db40b2e9acb0578b Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 21:30:09 -0400 Subject: [PATCH 01/19] detectoronly harness --- garak/_config.py | 2 +- garak/attempt.py | 27 +++++++++++++++++++ garak/cli.py | 37 +++++++++++++++++++++++++- garak/command.py | 14 +++++++++- garak/harnesses/base.py | 47 +++++++++++++++++---------------- garak/harnesses/detectoronly.py | 36 +++++++++++++++++++++++++ garak/resources/garak.core.yaml | 5 ++-- garak/test.py | 4 +++ 8 files changed, 144 insertions(+), 28 deletions(-) create mode 100644 garak/harnesses/detectoronly.py create mode 100644 garak/test.py diff --git a/garak/_config.py b/garak/_config.py index c2648c8fd..c8c96e9f6 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -28,7 +28,7 @@ system_params = ( "verbose narrow_output parallel_requests parallel_attempts skip_unknown".split() ) -run_params = "seed deprefix eval_threshold generations probe_tags interactive".split() +run_params = "seed deprefix eval_threshold generations probe_tags interactive probed_report_path".split() plugins_params = "model_type model_name extended_detectors".split() reporting_params = "taxonomy report_prefix".split() project_dir_name = "garak" diff --git a/garak/attempt.py b/garak/attempt.py index 08ba64418..24844d390 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -105,6 +105,24 @@ def as_dict(self) -> dict: "messages": self.messages, } + @classmethod + def from_dict(cls, dicti): + """Initializes an attempt object from dictionary""" + attempt_obj = cls() + attempt_obj.uuid = dicti['uuid'] + attempt_obj.seq = dicti['seq'] + attempt_obj.status = dicti['status'] + attempt_obj.probe_classname = dicti['probe_classname'] + attempt_obj.probe_params = dicti['probe_params'] + attempt_obj.targets = dicti['targets'] + attempt_obj.prompt = dicti['prompt'] + attempt_obj.outputs = dicti['outputs'] + attempt_obj.detector_results = dicti['detector_results'] + attempt_obj.notes = dicti['notes'] + attempt_obj.goal = dicti['goal'] + attempt_obj.messages = dicti['messages'] + return attempt_obj + def __getattribute__(self, name: str) -> Any: """override prompt and outputs access to take from history""" if name == "prompt": @@ -260,3 +278,12 @@ def _add_turn(self, role: str, contents: List[str]) -> None: "Conversation turn role must be one of '%s', got '%s'" % ("'/'".join(roles), role) ) + +if __name__ == "__main__": + import json + with open('test/test.report.jsonl') as f: + data = [json.loads(line) for line in f] + + data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] + attempt_object = Attempt.from_dict(data[0]) + print(attempt_object.as_dict()) \ No newline at end of file diff --git a/garak/cli.py b/garak/cli.py index c6df1af70..503d94703 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -3,7 +3,7 @@ """Flow for invoking garak from the command line""" -command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version".split() +command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version detector_only".split() def main(arguments=None) -> None: @@ -107,6 +107,12 @@ def main(arguments=None) -> None: parser.add_argument( "--config", type=str, default=None, help="YAML config file for this run" ) + parser.add_argument( + "--probed_report_path", + type=str, + default=None, + help="Path to jsonl report that stores the generators responses" + ) ## PLUGINS # generator @@ -247,6 +253,11 @@ def main(arguments=None) -> None: action="store_true", help="Launch garak in interactive.py mode", ) + parser.add_argument( + "--detector_only", + action="store_true", + help="run detector on jsonl report" + ) logging.debug("args - raw argument string received: %s", arguments) @@ -512,6 +523,30 @@ def main(arguments=None) -> None: ) command.end_run() + + elif args.detector_only: + # Run detector only detection + if not _config.plugins.detector_spec: + logging.error("Detector(s) not specified. Use --detectors") + raise ValueError("use --detectors to specify some detectors") + + if not _config.run.probed_report_path: + logging.error("report path not specified") + raise ValueError("Specify jsonl report path using --probed_report_path") + + evaluator = garak.evaluators.ThresholdEvaluator(_config.run.eval_threshold) + print(_config.plugins.detector_spec.split(",")) + + detector_names, detector_rejected = _config.parse_plugin_spec( + getattr(_config.plugins, "detector_spec", ""), + "detectors", + getattr(_config.run, "detector_tags", "") + ) + + command.start_run() + command.detector_only_run(_config.run.probed_report_path, detector_names, evaluator) + command.end_run() + else: print("nothing to do 🤷 try --help") if _config.plugins.model_name and not _config.plugins.model_type: diff --git a/garak/command.py b/garak/command.py index ec61b3ba0..82ec391b8 100644 --- a/garak/command.py +++ b/garak/command.py @@ -6,7 +6,6 @@ import logging import json - def start_logging(): from garak import _config @@ -255,3 +254,16 @@ def write_report_digest(report_filename, digest_filename): digest = report_digest.compile_digest(report_filename) with open(digest_filename, "w", encoding="utf-8") as f: f.write(digest) + +def detector_only_run(report_filename, detectors, evaluator): + import garak.harnesses.detectoronly + import garak.attempt + + with open(report_filename) as f: + data = [json.loads(line) for line in f] + + data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] + attempts = [garak.attempt.Attempt.from_dict(d) for d in data] + + detector_only_h = garak.harnesses.detectoronly.DetectorOnly() + detector_only_h.run(attempts, detectors, evaluator) diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py index 42a36f082..71baad82f 100644 --- a/garak/harnesses/base.py +++ b/garak/harnesses/base.py @@ -109,28 +109,29 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: assert isinstance( attempt_results, (list, types.GeneratorType) ), "probing should always return an ordered iterable" - - for d in detectors: - logging.debug("harness: run detector %s", d.detectorname) - attempt_iterator = tqdm.tqdm(attempt_results, leave=False) - detector_probe_name = d.detectorname.replace("garak.detectors.", "") - attempt_iterator.set_description("detectors." + detector_probe_name) - for attempt in attempt_iterator: - attempt.detector_results[detector_probe_name] = list( - d.detect(attempt) - ) - - for attempt in attempt_results: - attempt.status = garak.attempt.ATTEMPT_COMPLETE - _config.transient.reportfile.write(json.dumps(attempt.as_dict()) + "\n") - - if len(attempt_results) == 0: - logging.warning( - "zero attempt results: probe %s, detector %s", - probe.probename, - detector_probe_name, - ) - else: - evaluator.evaluate(attempt_results) + self.run_detectors(detectors, attempt_results, probe, evaluator) logging.debug("harness: probe list iteration completed") + + def run_detectors(self, detectors, attempt_results, evaluator, probe=None): + for d in detectors: + logging.debug("harness: run detector %s", d.detectorname) + attempt_iterator = tqdm.tqdm(attempt_results, leave=False) + detector_probe_name = d.detectorname.replace("garak.detectors.", "") + attempt_iterator.set_description("detectors." + detector_probe_name) + for attempt in attempt_iterator: + attempt.detector_results[detector_probe_name] = list( + d.detect(attempt) + ) + + for attempt in attempt_results: + attempt.status = garak.attempt.ATTEMPT_COMPLETE + _config.transient.reportfile.write(json.dumps(attempt.as_dict()) + "\n") + + if len(attempt_results) == 0: + logging.warning( + "zero attempt results: probe %s", + probe.probename + ) + else: + evaluator.evaluate(attempt_results) diff --git a/garak/harnesses/detectoronly.py b/garak/harnesses/detectoronly.py new file mode 100644 index 000000000..72c9a17f9 --- /dev/null +++ b/garak/harnesses/detectoronly.py @@ -0,0 +1,36 @@ +import logging + +from garak import _config, _plugins +from garak.harnesses import Harness +from garak.detectors import Detector + +class DetectorOnly(Harness): + def __init__(self, config_root=_config): + super().__init__(config_root) + + def _load_detector(self, detector_name: str) -> Detector: + detector = _plugins.load_plugin( + detector_name, break_on_fail=False + ) + if detector: + return detector + else: + print(f" detector load failed: {detector_name}, skipping >>") + logging.error(f" detector load failed: {detector_name}, skipping >>") + return False + + def run(self, attempts, detector_names, evaluator): + detectors = [] + for detector in sorted(detector_names): + d = self._load_detector(detector) + if d: + detectors.append(d) + + if len(detectors) == 0: + msg = "No detectors, nothing to do" + logging.warning(msg) + if hasattr(_config.system, "verbose") and _config.system.verbose >= 2: + print(msg) + raise ValueError(msg) + + super().run_detectors(detectors, attempts, evaluator) # The probe is None, but hopefully no errors occur with probe. \ No newline at end of file diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 02d765308..a7df40fd2 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -12,6 +12,7 @@ run: eval_threshold: 0.5 generations: 10 probe_tags: + probed_report_path: plugins: model_type: @@ -32,7 +33,7 @@ plugins: - default reporting: - report_prefix: + report_prefix: test taxonomy: - report_dir: garak_runs + report_dir: /Users/vidushi/desktop/garak/test show_100_pass_modules: true \ No newline at end of file diff --git a/garak/test.py b/garak/test.py new file mode 100644 index 000000000..020ffbec3 --- /dev/null +++ b/garak/test.py @@ -0,0 +1,4 @@ +import garak.attempt + +if __name__ == "__main__": + print(garak.attempt.Attempt._init_parameters) \ No newline at end of file From c8b7e7718335838dddfa659f6b3123f1747d89a6 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:24:09 -0400 Subject: [PATCH 02/19] Update attempt.py Signed-off-by: Vidushi Maheshwari --- garak/attempt.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/garak/attempt.py b/garak/attempt.py index 24844d390..b707bb32e 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -278,12 +278,3 @@ def _add_turn(self, role: str, contents: List[str]) -> None: "Conversation turn role must be one of '%s', got '%s'" % ("'/'".join(roles), role) ) - -if __name__ == "__main__": - import json - with open('test/test.report.jsonl') as f: - data = [json.loads(line) for line in f] - - data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] - attempt_object = Attempt.from_dict(data[0]) - print(attempt_object.as_dict()) \ No newline at end of file From 13c89fe1eb8546711dc13f7c3abec85541af3353 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:26:12 -0400 Subject: [PATCH 03/19] change --- garak/resources/garak.core.yaml | 4 ++-- garak/test.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 garak/test.py diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index a7df40fd2..e5ed83708 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -33,7 +33,7 @@ plugins: - default reporting: - report_prefix: test + report_prefix: taxonomy: - report_dir: /Users/vidushi/desktop/garak/test + report_dir: garak_runs show_100_pass_modules: true \ No newline at end of file diff --git a/garak/test.py b/garak/test.py deleted file mode 100644 index 020ffbec3..000000000 --- a/garak/test.py +++ /dev/null @@ -1,4 +0,0 @@ -import garak.attempt - -if __name__ == "__main__": - print(garak.attempt.Attempt._init_parameters) \ No newline at end of file From df107c26ce7ca62035fc6a7ef94dd119d6c9964c Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:28:04 -0400 Subject: [PATCH 04/19] Update garak.core.yaml Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index e5ed83708..8cd8ce939 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -33,7 +33,7 @@ plugins: - default reporting: - report_prefix: + report_prefix: taxonomy: report_dir: garak_runs - show_100_pass_modules: true \ No newline at end of file + show_100_pass_modules: true From d9d43a6180cc990b979c35bbad4a37162c4355cf Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:28:42 -0400 Subject: [PATCH 05/19] Update garak.core.yaml Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 8cd8ce939..292be8c25 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -36,4 +36,3 @@ reporting: report_prefix: taxonomy: report_dir: garak_runs - show_100_pass_modules: true From 662fbf046eb7e75955b823bc02cb3ce4da6a104d Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:29:31 -0400 Subject: [PATCH 06/19] 100_pass_mod --- garak/resources/garak.core.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 292be8c25..cf714a185 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -36,3 +36,4 @@ reporting: report_prefix: taxonomy: report_dir: garak_runs + show_100_pass_modules: true \ No newline at end of file From 15c1097b94f3ca70c8872826bd9c68b26a9da4de Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:26:12 -0400 Subject: [PATCH 07/19] change Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 4 ++-- garak/test.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 garak/test.py diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index a7df40fd2..e5ed83708 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -33,7 +33,7 @@ plugins: - default reporting: - report_prefix: test + report_prefix: taxonomy: - report_dir: /Users/vidushi/desktop/garak/test + report_dir: garak_runs show_100_pass_modules: true \ No newline at end of file diff --git a/garak/test.py b/garak/test.py deleted file mode 100644 index 020ffbec3..000000000 --- a/garak/test.py +++ /dev/null @@ -1,4 +0,0 @@ -import garak.attempt - -if __name__ == "__main__": - print(garak.attempt.Attempt._init_parameters) \ No newline at end of file From 3f8e26346000540e52bedf6ad36e16c7259dd2f6 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:24:09 -0400 Subject: [PATCH 08/19] Update attempt.py Signed-off-by: Vidushi Maheshwari --- garak/attempt.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/garak/attempt.py b/garak/attempt.py index 24844d390..b707bb32e 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -278,12 +278,3 @@ def _add_turn(self, role: str, contents: List[str]) -> None: "Conversation turn role must be one of '%s', got '%s'" % ("'/'".join(roles), role) ) - -if __name__ == "__main__": - import json - with open('test/test.report.jsonl') as f: - data = [json.loads(line) for line in f] - - data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] - attempt_object = Attempt.from_dict(data[0]) - print(attempt_object.as_dict()) \ No newline at end of file From 4714757a2dbc02d00a134f99a61ca8a3e59ffd02 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:28:04 -0400 Subject: [PATCH 09/19] Update garak.core.yaml Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index e5ed83708..8cd8ce939 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -33,7 +33,7 @@ plugins: - default reporting: - report_prefix: + report_prefix: taxonomy: report_dir: garak_runs - show_100_pass_modules: true \ No newline at end of file + show_100_pass_modules: true From 9f63ab1d907a317b74d78f4e7341c08cff385239 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:28:42 -0400 Subject: [PATCH 10/19] Update garak.core.yaml Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 8cd8ce939..292be8c25 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -36,4 +36,3 @@ reporting: report_prefix: taxonomy: report_dir: garak_runs - show_100_pass_modules: true From 1b5aa46318b90435c40c3bc6af1ef5004544563a Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Wed, 14 Aug 2024 22:29:31 -0400 Subject: [PATCH 11/19] 100_pass_mod Signed-off-by: Vidushi Maheshwari --- garak/resources/garak.core.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 292be8c25..cf714a185 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -36,3 +36,4 @@ reporting: report_prefix: taxonomy: report_dir: garak_runs + show_100_pass_modules: true \ No newline at end of file From 239cfc8719fdbfaf46de9394443ed954bebefef8 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Mon, 19 Aug 2024 11:08:15 -0400 Subject: [PATCH 12/19] docs --- docs/source/garak.harnesses.detectoronly.rst | 8 ++++++++ docs/source/harnesses.rst | 1 + garak/harnesses/base.py | 2 +- garak/harnesses/detectoronly.py | 8 ++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 docs/source/garak.harnesses.detectoronly.rst diff --git a/docs/source/garak.harnesses.detectoronly.rst b/docs/source/garak.harnesses.detectoronly.rst new file mode 100644 index 000000000..191da1529 --- /dev/null +++ b/docs/source/garak.harnesses.detectoronly.rst @@ -0,0 +1,8 @@ +garak.harnesses.detectoronly +==================== + +.. automodule:: garak.harnesses.detectoronly + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/harnesses.rst b/docs/source/harnesses.rst index 45bb49751..be8b1660d 100644 --- a/docs/source/harnesses.rst +++ b/docs/source/harnesses.rst @@ -6,5 +6,6 @@ garak.harnesses garak.harnesses garak.harnesses.base + garak.harnesses.detectoronly garak.harnesses.probewise garak.harnesses.pxd diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py index 71baad82f..22032d505 100644 --- a/garak/harnesses/base.py +++ b/garak/harnesses/base.py @@ -109,7 +109,7 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: assert isinstance( attempt_results, (list, types.GeneratorType) ), "probing should always return an ordered iterable" - self.run_detectors(detectors, attempt_results, probe, evaluator) + self.run_detectors(detectors, attempt_results, evaluator, probe) logging.debug("harness: probe list iteration completed") diff --git a/garak/harnesses/detectoronly.py b/garak/harnesses/detectoronly.py index 72c9a17f9..4bd1b9ce1 100644 --- a/garak/harnesses/detectoronly.py +++ b/garak/harnesses/detectoronly.py @@ -1,3 +1,11 @@ +# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Detector only harness + +Runs specified detectors on already existing prompt-response pairs from parsing a report.jsonl file. +""" + import logging from garak import _config, _plugins From e9eb742cc11c48d710a8474dfe0f861f77d1e2aa Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Mon, 19 Aug 2024 17:20:31 -0400 Subject: [PATCH 13/19] harness config options and files --- garak/_config.py | 2 +- garak/cli.py | 67 +++++++++++++++------------------ garak/command.py | 33 ++++++++++++++-- garak/resources/garak.core.yaml | 2 +- 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index c8c96e9f6..c2648c8fd 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -28,7 +28,7 @@ system_params = ( "verbose narrow_output parallel_requests parallel_attempts skip_unknown".split() ) -run_params = "seed deprefix eval_threshold generations probe_tags interactive probed_report_path".split() +run_params = "seed deprefix eval_threshold generations probe_tags interactive".split() plugins_params = "model_type model_name extended_detectors".split() reporting_params = "taxonomy report_prefix".split() project_dir_name = "garak" diff --git a/garak/cli.py b/garak/cli.py index 503d94703..2c3b4e252 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -3,7 +3,7 @@ """Flow for invoking garak from the command line""" -command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version detector_only".split() +command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version".split() def main(arguments=None) -> None: @@ -107,12 +107,6 @@ def main(arguments=None) -> None: parser.add_argument( "--config", type=str, default=None, help="YAML config file for this run" ) - parser.add_argument( - "--probed_report_path", - type=str, - default=None, - help="Path to jsonl report that stores the generators responses" - ) ## PLUGINS # generator @@ -180,6 +174,21 @@ def main(arguments=None) -> None: action="store_true", help="If detectors aren't specified on the command line, should we run all detectors? (default is just the primary detector, if given, else everything)", ) + + # harness + parser.add_argument( + "--harness_options", + type=str, + help="Type of harness to use. Default is probewise." + ) + + parser.add_argument( + "--harness_option_file", + "-H", + type=str, + help="path to JSON file containing information about harnesses" + ) + # buffs parser.add_argument( "--buffs", @@ -253,11 +262,6 @@ def main(arguments=None) -> None: action="store_true", help="Launch garak in interactive.py mode", ) - parser.add_argument( - "--detector_only", - action="store_true", - help="run detector on jsonl report" - ) logging.debug("args - raw argument string received: %s", arguments) @@ -348,7 +352,7 @@ def main(arguments=None) -> None: import garak.evaluators try: - plugin_types = ["probe", "generator"] + plugin_types = ["probe", "generator", "harness"] # do a special thing for CLI probe options, generator options for plugin_type in plugin_types: opts_arg = f"{plugin_type}_options" @@ -379,7 +383,10 @@ def main(arguments=None) -> None: ) raise e - config_plugin_type = getattr(_config.plugins, f"{plugin_type}s") + if plugin_type.endswith('s'): + config_plugin_type = getattr(_config.plugins, f"{plugin_type}es") + else: + config_plugin_type = getattr(_config.plugins, f"{plugin_type}s") config_plugin_type = _config._combine_into( opts_cli_config, config_plugin_type @@ -508,43 +515,31 @@ def main(arguments=None) -> None: command.start_run() # start the run now that all config validation is complete print(f"📜 reporting to {_config.transient.report_filename}") - - if parsed_specs["detector"] == []: + + if "Probewise" in _config.plugins.harnesses: command.probewise_run( generator, parsed_specs["probe"], evaluator, parsed_specs["buff"] ) - else: + + if "Pxd" in _config.plugins.harnesses: command.pxd_run( generator, parsed_specs["probe"], parsed_specs["detector"], evaluator, - parsed_specs["buff"], + parsed_specs["buff"] ) command.end_run() - - elif args.detector_only: - # Run detector only detection - if not _config.plugins.detector_spec: - logging.error("Detector(s) not specified. Use --detectors") - raise ValueError("use --detectors to specify some detectors") - - if not _config.run.probed_report_path: - logging.error("report path not specified") - raise ValueError("Specify jsonl report path using --probed_report_path") - evaluator = garak.evaluators.ThresholdEvaluator(_config.run.eval_threshold) - print(_config.plugins.detector_spec.split(",")) + elif "DetectorOnly" in _config.plugins.harnesses: - detector_names, detector_rejected = _config.parse_plugin_spec( - getattr(_config.plugins, "detector_spec", ""), - "detectors", - getattr(_config.run, "detector_tags", "") - ) + if "report_path" not in _config.plugins.harnesses.DetectorOnly: + logging.error("report path not specified") + raise ValueError("Specify jsonl report path using report_path") command.start_run() - command.detector_only_run(_config.run.probed_report_path, detector_names, evaluator) + command.detector_only_run() command.end_run() else: diff --git a/garak/command.py b/garak/command.py index 82ec391b8..e1cb2cd0b 100644 --- a/garak/command.py +++ b/garak/command.py @@ -91,6 +91,7 @@ def start_run(): list, set, type(None), + float, # Without float eval_threshold was not being stored ): setup_dict[f"{subset}.{k}"] = v @@ -255,15 +256,41 @@ def write_report_digest(report_filename, digest_filename): with open(digest_filename, "w", encoding="utf-8") as f: f.write(digest) -def detector_only_run(report_filename, detectors, evaluator): +def detector_only_run(detectors, evaluator): import garak.harnesses.detectoronly import garak.attempt + import _config - with open(report_filename) as f: + config = _config.plugins.harnesses.DetectorOnly + + with open(config.report_path) as f: data = [json.loads(line) for line in f] + + ## Get detectors and evaluator from report if not specified by the user + if "detectors" not in config or 'eval_threshold' not in config: + try: + for d in data: + if 'entry_type' in d and d['entry_type'] == 'start_run setup': + entry_line = d + break + except: + raise ValueError("Unexpected start_run setup line in report.jsonl") + + if "detectors" not in config: + detectors = entry_line['plugins.detector_spec'].splut(',') + setattr(_config.plugins.harnesses.DetectorOnly, "detectors", detectors) + + if "eval_threshold" not in config: + eval_threshold = _config.run.eval_threshold + if "run.eval_threshold" in entry_line: + eval_threshold = entry_line["run.eval_threshold"] + setattr(_config.plugins.harnesses.DetectorOnly, "eval_threshold", eval_threshold) data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] attempts = [garak.attempt.Attempt.from_dict(d) for d in data] detector_only_h = garak.harnesses.detectoronly.DetectorOnly() - detector_only_h.run(attempts, detectors, evaluator) + config = _config.plugins.harnesses.DetectorOnly + evaluator = garak.evaluators.ThresholdEvaluator(config.eval_threshold) + + detector_only_h.run(attempts, config.detectors, evaluator) diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index cf714a185..058e1823b 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -26,7 +26,7 @@ plugins: detectors: {} generators: {} buffs: {} - harnesses: {} + harnesses: {"Probewise": {}} probes: encoding: payloads: From a35cb5a6391654fbdd8e8e5588e6bf437a791d12 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Tue, 20 Aug 2024 10:22:36 -0400 Subject: [PATCH 14/19] Probewise harness is a dictionary instead of attributed class --- garak/cli.py | 9 ++++++++- garak/command.py | 28 +++++++++++++++++----------- garak/harnesses/detectoronly.py | 2 +- garak/resources/garak.core.yaml | 2 +- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/garak/cli.py b/garak/cli.py index 2c3b4e252..bc5171ecf 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -515,6 +515,13 @@ def main(arguments=None) -> None: command.start_run() # start the run now that all config validation is complete print(f"📜 reporting to {_config.transient.report_filename}") + + if not _config.plugins.harnesses: + # Set the _config.plugins.harnesses + if parsed_specs["detector"] == []: + _config.plugins.harnesses["Probewise"] = {} + else: + _config.plugins.harnesses["Pxd"] = {} if "Probewise" in _config.plugins.harnesses: command.probewise_run( @@ -534,7 +541,7 @@ def main(arguments=None) -> None: elif "DetectorOnly" in _config.plugins.harnesses: - if "report_path" not in _config.plugins.harnesses.DetectorOnly: + if "report_path" not in _config.plugins.harnesses["DetectorOnly"]: logging.error("report path not specified") raise ValueError("Specify jsonl report path using report_path") diff --git a/garak/command.py b/garak/command.py index e1cb2cd0b..804f6e6e7 100644 --- a/garak/command.py +++ b/garak/command.py @@ -256,18 +256,18 @@ def write_report_digest(report_filename, digest_filename): with open(digest_filename, "w", encoding="utf-8") as f: f.write(digest) -def detector_only_run(detectors, evaluator): +def detector_only_run(): import garak.harnesses.detectoronly import garak.attempt - import _config + from garak import _config - config = _config.plugins.harnesses.DetectorOnly + config = _config.plugins.harnesses["DetectorOnly"] - with open(config.report_path) as f: + with open(config["report_path"]) as f: data = [json.loads(line) for line in f] ## Get detectors and evaluator from report if not specified by the user - if "detectors" not in config or 'eval_threshold' not in config: + if "detectors" not in config or "eval_threshold" not in config: try: for d in data: if 'entry_type' in d and d['entry_type'] == 'start_run setup': @@ -277,20 +277,26 @@ def detector_only_run(detectors, evaluator): raise ValueError("Unexpected start_run setup line in report.jsonl") if "detectors" not in config: - detectors = entry_line['plugins.detector_spec'].splut(',') - setattr(_config.plugins.harnesses.DetectorOnly, "detectors", detectors) + detectors = entry_line['plugins.detector_spec'].split(',') + # setattr(_config.plugins.harnesses.DetectorOnly, "detectors", detectors) + _config.plugins.harnesses["DetectorOnly"]["detectors"] = detectors if "eval_threshold" not in config: eval_threshold = _config.run.eval_threshold if "run.eval_threshold" in entry_line: eval_threshold = entry_line["run.eval_threshold"] - setattr(_config.plugins.harnesses.DetectorOnly, "eval_threshold", eval_threshold) + _config.plugins.harnesses["DetectorOnly"]["eval_threshold"] = eval_threshold data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] attempts = [garak.attempt.Attempt.from_dict(d) for d in data] + if len(attempts) == 0: + raise ValueError("No attempts found in report.jsonl") + detector_only_h = garak.harnesses.detectoronly.DetectorOnly() - config = _config.plugins.harnesses.DetectorOnly - evaluator = garak.evaluators.ThresholdEvaluator(config.eval_threshold) + config = _config.plugins.harnesses["DetectorOnly"] + evaluator = garak.evaluators.ThresholdEvaluator(config["eval_threshold"]) + + print(config["detectors"]) - detector_only_h.run(attempts, config.detectors, evaluator) + detector_only_h.run(attempts, config["detectors"], evaluator) diff --git a/garak/harnesses/detectoronly.py b/garak/harnesses/detectoronly.py index 4bd1b9ce1..c5c5cb4d6 100644 --- a/garak/harnesses/detectoronly.py +++ b/garak/harnesses/detectoronly.py @@ -18,7 +18,7 @@ def __init__(self, config_root=_config): def _load_detector(self, detector_name: str) -> Detector: detector = _plugins.load_plugin( - detector_name, break_on_fail=False + "detectors." + detector_name, break_on_fail=False ) if detector: return detector diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 058e1823b..cf714a185 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -26,7 +26,7 @@ plugins: detectors: {} generators: {} buffs: {} - harnesses: {"Probewise": {}} + harnesses: {} probes: encoding: payloads: From eab5c677dd0a25f2e170f2716cedf10948bbf7fd Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Thu, 22 Aug 2024 19:41:36 -0400 Subject: [PATCH 15/19] Update garak/cli.py Co-authored-by: Leon Derczynski Signed-off-by: Vidushi Maheshwari --- garak/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/cli.py b/garak/cli.py index bc5171ecf..8413e389d 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -179,7 +179,7 @@ def main(arguments=None) -> None: parser.add_argument( "--harness_options", type=str, - help="Type of harness to use. Default is probewise." + help="Type of harness to use." ) parser.add_argument( From c3d33d4b8193145a2b8a4140984040a8b3510580 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Thu, 22 Aug 2024 19:42:22 -0400 Subject: [PATCH 16/19] Update garak/cli.py Co-authored-by: Leon Derczynski Signed-off-by: Vidushi Maheshwari --- garak/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/cli.py b/garak/cli.py index 8413e389d..c64cfcc4a 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -186,7 +186,7 @@ def main(arguments=None) -> None: "--harness_option_file", "-H", type=str, - help="path to JSON file containing information about harnesses" + help="path to JSON file containing information harness options" ) # buffs From 153ff2e08eddc712cbf5f6efe72413db5725d928 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Thu, 22 Aug 2024 19:50:12 -0400 Subject: [PATCH 17/19] Update garak/harnesses/base.py Co-authored-by: Leon Derczynski Signed-off-by: Vidushi Maheshwari --- garak/harnesses/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py index 22032d505..07f0b1145 100644 --- a/garak/harnesses/base.py +++ b/garak/harnesses/base.py @@ -129,9 +129,9 @@ def run_detectors(self, detectors, attempt_results, evaluator, probe=None): _config.transient.reportfile.write(json.dumps(attempt.as_dict()) + "\n") if len(attempt_results) == 0: - logging.warning( - "zero attempt results: probe %s", - probe.probename - ) + msg = "zero attempt results: attempt %s" % attempt.uuid() + if probe is not None: + msg += ", probe %s" % probe.probename + logging.warning(msg) else: evaluator.evaluate(attempt_results) From 5086e807b6ef673ee59cc11146ba4babee4716f8 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Thu, 22 Aug 2024 19:52:24 -0400 Subject: [PATCH 18/19] Update garak/cli.py Co-authored-by: Jeffrey Martin Signed-off-by: Vidushi Maheshwari --- garak/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/cli.py b/garak/cli.py index c64cfcc4a..5695bcc2a 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -539,7 +539,7 @@ def main(arguments=None) -> None: command.end_run() - elif "DetectorOnly" in _config.plugins.harnesses: + elif "detectoronly" in _config.plugins.harnesses: if "report_path" not in _config.plugins.harnesses["DetectorOnly"]: logging.error("report path not specified") From dbe916af7de1049c76c31ab985b23f589db070e1 Mon Sep 17 00:00:00 2001 From: Vidushi Maheshwari Date: Tue, 24 Sep 2024 09:06:09 -0400 Subject: [PATCH 19/19] decouple harness only run from execution --- garak/cli.py | 44 ++++++++++++++++++++++++++++---------------- garak/command.py | 31 +++---------------------------- 2 files changed, 31 insertions(+), 44 deletions(-) diff --git a/garak/cli.py b/garak/cli.py index 5695bcc2a..1e27e55ac 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -516,19 +516,9 @@ def main(arguments=None) -> None: command.start_run() # start the run now that all config validation is complete print(f"📜 reporting to {_config.transient.report_filename}") - if not _config.plugins.harnesses: - # Set the _config.plugins.harnesses - if parsed_specs["detector"] == []: - _config.plugins.harnesses["Probewise"] = {} - else: - _config.plugins.harnesses["Pxd"] = {} - - if "Probewise" in _config.plugins.harnesses: - command.probewise_run( - generator, parsed_specs["probe"], evaluator, parsed_specs["buff"] - ) - - if "Pxd" in _config.plugins.harnesses: + if parsed_specs["detector"] == []: + command.probewise_run(generator, parsed_specs["probe"], evaluator, parsed_specs["buff"]) + else: command.pxd_run( generator, parsed_specs["probe"], @@ -537,7 +527,8 @@ def main(arguments=None) -> None: parsed_specs["buff"] ) - command.end_run() + if "detectoronly" not in _config.plugins.harnesses: + command.end_run() elif "detectoronly" in _config.plugins.harnesses: @@ -545,8 +536,29 @@ def main(arguments=None) -> None: logging.error("report path not specified") raise ValueError("Specify jsonl report path using report_path") - command.start_run() - command.detector_only_run() + if not parsed_specs["detector"]: # If the user doesn't specify any detectors, repeat the same as the reoport's + # read from the report + logging.info("Using detectors from the report file") + f = open(_config.plugins.harnesses["DetectorOnly"]["report_path"], "r") + entry_line = None + while True: + line = f.readline() + line = json.loads(line) + if 'entry_type' in line and line['entry_type'] == 'start_run setup': + entry_line = line + break + if not line: + break # EOF + + if not entry_line: + raise ValueError("Report file missing setup entry") + + parsed_specs["detector"] = entry_line['plugins.detector_spec'].split(',') + + if not _config.plugins.model_type: # Have not initialized the file yet + command.start_run() + + command.detector_only_run(parsed_specs["detector"], evaluator) command.end_run() else: diff --git a/garak/command.py b/garak/command.py index 804f6e6e7..e6cee47c4 100644 --- a/garak/command.py +++ b/garak/command.py @@ -6,6 +6,7 @@ import logging import json + def start_logging(): from garak import _config @@ -256,7 +257,7 @@ def write_report_digest(report_filename, digest_filename): with open(digest_filename, "w", encoding="utf-8") as f: f.write(digest) -def detector_only_run(): +def detector_only_run(detectors, evaluator): import garak.harnesses.detectoronly import garak.attempt from garak import _config @@ -265,27 +266,6 @@ def detector_only_run(): with open(config["report_path"]) as f: data = [json.loads(line) for line in f] - - ## Get detectors and evaluator from report if not specified by the user - if "detectors" not in config or "eval_threshold" not in config: - try: - for d in data: - if 'entry_type' in d and d['entry_type'] == 'start_run setup': - entry_line = d - break - except: - raise ValueError("Unexpected start_run setup line in report.jsonl") - - if "detectors" not in config: - detectors = entry_line['plugins.detector_spec'].split(',') - # setattr(_config.plugins.harnesses.DetectorOnly, "detectors", detectors) - _config.plugins.harnesses["DetectorOnly"]["detectors"] = detectors - - if "eval_threshold" not in config: - eval_threshold = _config.run.eval_threshold - if "run.eval_threshold" in entry_line: - eval_threshold = entry_line["run.eval_threshold"] - _config.plugins.harnesses["DetectorOnly"]["eval_threshold"] = eval_threshold data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] attempts = [garak.attempt.Attempt.from_dict(d) for d in data] @@ -294,9 +274,4 @@ def detector_only_run(): raise ValueError("No attempts found in report.jsonl") detector_only_h = garak.harnesses.detectoronly.DetectorOnly() - config = _config.plugins.harnesses["DetectorOnly"] - evaluator = garak.evaluators.ThresholdEvaluator(config["eval_threshold"]) - - print(config["detectors"]) - - detector_only_h.run(attempts, config["detectors"], evaluator) + detector_only_h.run(attempts, detectors, evaluator)