diff --git a/docs/source/garak.harnesses.detectoronly.rst b/docs/source/garak.harnesses.detectoronly.rst new file mode 100644 index 000000000..191da1529 --- /dev/null +++ b/docs/source/garak.harnesses.detectoronly.rst @@ -0,0 +1,8 @@ +garak.harnesses.detectoronly +==================== + +.. automodule:: garak.harnesses.detectoronly + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/source/harnesses.rst b/docs/source/harnesses.rst index 45bb49751..be8b1660d 100644 --- a/docs/source/harnesses.rst +++ b/docs/source/harnesses.rst @@ -6,5 +6,6 @@ garak.harnesses garak.harnesses garak.harnesses.base + garak.harnesses.detectoronly garak.harnesses.probewise garak.harnesses.pxd diff --git a/garak/attempt.py b/garak/attempt.py index 08ba64418..b707bb32e 100644 --- a/garak/attempt.py +++ b/garak/attempt.py @@ -105,6 +105,24 @@ def as_dict(self) -> dict: "messages": self.messages, } + @classmethod + def from_dict(cls, dicti): + """Initializes an attempt object from dictionary""" + attempt_obj = cls() + attempt_obj.uuid = dicti['uuid'] + attempt_obj.seq = dicti['seq'] + attempt_obj.status = dicti['status'] + attempt_obj.probe_classname = dicti['probe_classname'] + attempt_obj.probe_params = dicti['probe_params'] + attempt_obj.targets = dicti['targets'] + attempt_obj.prompt = dicti['prompt'] + attempt_obj.outputs = dicti['outputs'] + attempt_obj.detector_results = dicti['detector_results'] + attempt_obj.notes = dicti['notes'] + attempt_obj.goal = dicti['goal'] + attempt_obj.messages = dicti['messages'] + return attempt_obj + def __getattribute__(self, name: str) -> Any: """override prompt and outputs access to take from history""" if name == "prompt": diff --git a/garak/cli.py b/garak/cli.py index c6df1af70..1e27e55ac 100644 --- a/garak/cli.py +++ b/garak/cli.py @@ -174,6 +174,21 @@ def main(arguments=None) -> None: action="store_true", help="If detectors aren't specified on the command line, should we run all detectors? (default is just the primary detector, if given, else everything)", ) + + # harness + parser.add_argument( + "--harness_options", + type=str, + help="Type of harness to use." + ) + + parser.add_argument( + "--harness_option_file", + "-H", + type=str, + help="path to JSON file containing information harness options" + ) + # buffs parser.add_argument( "--buffs", @@ -337,7 +352,7 @@ def main(arguments=None) -> None: import garak.evaluators try: - plugin_types = ["probe", "generator"] + plugin_types = ["probe", "generator", "harness"] # do a special thing for CLI probe options, generator options for plugin_type in plugin_types: opts_arg = f"{plugin_type}_options" @@ -368,7 +383,10 @@ def main(arguments=None) -> None: ) raise e - config_plugin_type = getattr(_config.plugins, f"{plugin_type}s") + if plugin_type.endswith('s'): + config_plugin_type = getattr(_config.plugins, f"{plugin_type}es") + else: + config_plugin_type = getattr(_config.plugins, f"{plugin_type}s") config_plugin_type = _config._combine_into( opts_cli_config, config_plugin_type @@ -499,19 +517,50 @@ def main(arguments=None) -> None: print(f"📜 reporting to {_config.transient.report_filename}") if parsed_specs["detector"] == []: - command.probewise_run( - generator, parsed_specs["probe"], evaluator, parsed_specs["buff"] - ) + command.probewise_run(generator, parsed_specs["probe"], evaluator, parsed_specs["buff"]) else: command.pxd_run( generator, parsed_specs["probe"], parsed_specs["detector"], evaluator, - parsed_specs["buff"], + parsed_specs["buff"] ) + if "detectoronly" not in _config.plugins.harnesses: + command.end_run() + + elif "detectoronly" in _config.plugins.harnesses: + + if "report_path" not in _config.plugins.harnesses["DetectorOnly"]: + logging.error("report path not specified") + raise ValueError("Specify jsonl report path using report_path") + + if not parsed_specs["detector"]: # If the user doesn't specify any detectors, repeat the same as the reoport's + # read from the report + logging.info("Using detectors from the report file") + f = open(_config.plugins.harnesses["DetectorOnly"]["report_path"], "r") + entry_line = None + while True: + line = f.readline() + line = json.loads(line) + if 'entry_type' in line and line['entry_type'] == 'start_run setup': + entry_line = line + break + if not line: + break # EOF + + if not entry_line: + raise ValueError("Report file missing setup entry") + + parsed_specs["detector"] = entry_line['plugins.detector_spec'].split(',') + + if not _config.plugins.model_type: # Have not initialized the file yet + command.start_run() + + command.detector_only_run(parsed_specs["detector"], evaluator) command.end_run() + else: print("nothing to do 🤷 try --help") if _config.plugins.model_name and not _config.plugins.model_type: diff --git a/garak/command.py b/garak/command.py index ec61b3ba0..e6cee47c4 100644 --- a/garak/command.py +++ b/garak/command.py @@ -92,6 +92,7 @@ def start_run(): list, set, type(None), + float, # Without float eval_threshold was not being stored ): setup_dict[f"{subset}.{k}"] = v @@ -255,3 +256,22 @@ def write_report_digest(report_filename, digest_filename): digest = report_digest.compile_digest(report_filename) with open(digest_filename, "w", encoding="utf-8") as f: f.write(digest) + +def detector_only_run(detectors, evaluator): + import garak.harnesses.detectoronly + import garak.attempt + from garak import _config + + config = _config.plugins.harnesses["DetectorOnly"] + + with open(config["report_path"]) as f: + data = [json.loads(line) for line in f] + + data = [d for d in data if d["entry_type"] == "attempt" and d["status"] == 1] + attempts = [garak.attempt.Attempt.from_dict(d) for d in data] + + if len(attempts) == 0: + raise ValueError("No attempts found in report.jsonl") + + detector_only_h = garak.harnesses.detectoronly.DetectorOnly() + detector_only_h.run(attempts, detectors, evaluator) diff --git a/garak/harnesses/base.py b/garak/harnesses/base.py index 42a36f082..07f0b1145 100644 --- a/garak/harnesses/base.py +++ b/garak/harnesses/base.py @@ -109,28 +109,29 @@ def run(self, model, probes, detectors, evaluator, announce_probe=True) -> None: assert isinstance( attempt_results, (list, types.GeneratorType) ), "probing should always return an ordered iterable" - - for d in detectors: - logging.debug("harness: run detector %s", d.detectorname) - attempt_iterator = tqdm.tqdm(attempt_results, leave=False) - detector_probe_name = d.detectorname.replace("garak.detectors.", "") - attempt_iterator.set_description("detectors." + detector_probe_name) - for attempt in attempt_iterator: - attempt.detector_results[detector_probe_name] = list( - d.detect(attempt) - ) - - for attempt in attempt_results: - attempt.status = garak.attempt.ATTEMPT_COMPLETE - _config.transient.reportfile.write(json.dumps(attempt.as_dict()) + "\n") - - if len(attempt_results) == 0: - logging.warning( - "zero attempt results: probe %s, detector %s", - probe.probename, - detector_probe_name, - ) - else: - evaluator.evaluate(attempt_results) + self.run_detectors(detectors, attempt_results, evaluator, probe) logging.debug("harness: probe list iteration completed") + + def run_detectors(self, detectors, attempt_results, evaluator, probe=None): + for d in detectors: + logging.debug("harness: run detector %s", d.detectorname) + attempt_iterator = tqdm.tqdm(attempt_results, leave=False) + detector_probe_name = d.detectorname.replace("garak.detectors.", "") + attempt_iterator.set_description("detectors." + detector_probe_name) + for attempt in attempt_iterator: + attempt.detector_results[detector_probe_name] = list( + d.detect(attempt) + ) + + for attempt in attempt_results: + attempt.status = garak.attempt.ATTEMPT_COMPLETE + _config.transient.reportfile.write(json.dumps(attempt.as_dict()) + "\n") + + if len(attempt_results) == 0: + msg = "zero attempt results: attempt %s" % attempt.uuid() + if probe is not None: + msg += ", probe %s" % probe.probename + logging.warning(msg) + else: + evaluator.evaluate(attempt_results) diff --git a/garak/harnesses/detectoronly.py b/garak/harnesses/detectoronly.py new file mode 100644 index 000000000..c5c5cb4d6 --- /dev/null +++ b/garak/harnesses/detectoronly.py @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Portions Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Detector only harness + +Runs specified detectors on already existing prompt-response pairs from parsing a report.jsonl file. +""" + +import logging + +from garak import _config, _plugins +from garak.harnesses import Harness +from garak.detectors import Detector + +class DetectorOnly(Harness): + def __init__(self, config_root=_config): + super().__init__(config_root) + + def _load_detector(self, detector_name: str) -> Detector: + detector = _plugins.load_plugin( + "detectors." + detector_name, break_on_fail=False + ) + if detector: + return detector + else: + print(f" detector load failed: {detector_name}, skipping >>") + logging.error(f" detector load failed: {detector_name}, skipping >>") + return False + + def run(self, attempts, detector_names, evaluator): + detectors = [] + for detector in sorted(detector_names): + d = self._load_detector(detector) + if d: + detectors.append(d) + + if len(detectors) == 0: + msg = "No detectors, nothing to do" + logging.warning(msg) + if hasattr(_config.system, "verbose") and _config.system.verbose >= 2: + print(msg) + raise ValueError(msg) + + super().run_detectors(detectors, attempts, evaluator) # The probe is None, but hopefully no errors occur with probe. \ No newline at end of file diff --git a/garak/resources/garak.core.yaml b/garak/resources/garak.core.yaml index 02d765308..cf714a185 100644 --- a/garak/resources/garak.core.yaml +++ b/garak/resources/garak.core.yaml @@ -12,6 +12,7 @@ run: eval_threshold: 0.5 generations: 10 probe_tags: + probed_report_path: plugins: model_type: