diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh index 43930b4..ccf7487 100755 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -16,8 +16,20 @@ if uname -a | grep -q 'aarch64'; then $GHIDRA_INSTALL_DIR/support/buildNatives fi -# install local workspace -pip install -e . +# install local workspace and test requirements +pip install -e ".[testing]" + +# git clone test data if dir doesn't exist +TEST_DATA_PATH="tests/data" + +if [ -z "$(ls -A $TEST_DATA_PATH)" ]; then + git clone https://github.com/clearbluejar/ghidriff-test-data.git tests/data + pushd $TEST_DATA_PATH + git remote set-url origin git@github.com:clearbluejar/ghidriff-test-data.git + popd +fi + + # Setup Ghidra Dev for Reference # git clone https://github.com/NationalSecurityAgency/ghidra.git ~/ghidra-master diff --git a/.github/workflows/lint-python-package.yml b/.github/workflows/lint-python-package.yml new file mode 100644 index 0000000..e7b33f9 --- /dev/null +++ b/.github/workflows/lint-python-package.yml @@ -0,0 +1,45 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Lint Python Package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install autopep8 flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Install package + run: | + python -m pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=130 --statistics --ignore F405,F401,F403 + - name: Lint with autopep8 + run: | + # stop the build if there are Python syntax errors or undefined names + autopep8 -r . -d \ No newline at end of file diff --git a/.github/workflows/pytest-devcontainer.yml b/.github/workflows/pytest-devcontainer.yml new file mode 100644 index 0000000..8eee882 --- /dev/null +++ b/.github/workflows/pytest-devcontainer.yml @@ -0,0 +1,37 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Pytest Python Package In Devcontainer + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + +jobs: + test: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + image: ["latest", "10.1.5ghidra3.11python-bookworm"] + + steps: + - uses: actions/checkout@v3 + - name: Test with pytest on devcontainer + uses: devcontainers/ci@v0.3 + with: + imageName: ghcr.io/clearbluejar/ghidra-python + cacheFrom: ghcr.io/clearbluejar/ghidra-python + imageTag: ${{matrix.image}} + push: never + runCmd: | + pip install --upgrade pip + # install package and testing + pip install -e ".[testing]" + # download data to shared test data + git clone https://github.com/clearbluejar/ghidriff-test-data.git tests/data + pytest -rA \ No newline at end of file diff --git a/.gitignore b/.gitignore index 05e1676..e2b5907 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,7 @@ dmypy.json .ghidra_projects*/ .ghidra_bridge*/ .symbols*/ -.ghidriffs/ +ghidriffs/ + +# pytest data (pulled from https://github.com/clearbluejar/ghidriff-test-data) +tests/data diff --git a/ghidriff/__init__.py b/ghidriff/__init__.py index efd6bd6..de029cc 100644 --- a/ghidriff/__init__.py +++ b/ghidriff/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.3.0' +__version__ = '0.4.0' __author__ = 'clearbluejar' # Expose API @@ -6,7 +6,8 @@ from .version_tracking_diff import VersionTrackingDiff from .simple_diff import SimpleDiff from .structural_graph_diff import StructualGraphDiff +from .__main__ import get_parser, get_engine_classes __all__ = [ - "GhidraDiffEngine", "SimpleDiff", "StructualGraphDiff", "VersionTrackingDiff" + "GhidraDiffEngine", "SimpleDiff", "StructualGraphDiff", "VersionTrackingDiff", "get_parser", "get_engine_classes" ] diff --git a/ghidriff/__main__.py b/ghidriff/__main__.py index db0e823..3a95674 100644 --- a/ghidriff/__main__.py +++ b/ghidriff/__main__.py @@ -16,14 +16,13 @@ def get_engine_classes() -> dict: return engines - -def main(): +def get_parser() -> argparse.ArgumentParser: """ - ghidriff - GhidraDiffEngine module main function + Build main ghidriff parser """ parser = argparse.ArgumentParser(description='ghidriff - A Command Line Ghidra Binary Diffing Engine', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('old', nargs=1, help="Path to old version of binary '/somewhere/bin.old'") parser.add_argument('new', action='append', nargs='+', @@ -34,9 +33,19 @@ def main(): parser.add_argument('--engine', help='The diff implementation to use.', default='VersionTrackingDiff', choices=engines.keys()) - parser.add_argument('-o', '--output-path', help='Output path for resulting diffs', default='.ghidriffs') + parser.add_argument('-o', '--output-path', help='Output path for resulting diffs', default='ghidriffs') parser.add_argument('--summary', help='Add a summary diff if more than two bins are provided', default=False) + return parser + + +def main(): + """ + ghidriff - GhidraDiffEngine module main function + """ + + parser = get_parser() + GhidraDiffEngine.add_ghidra_args_to_parser(parser) args = parser.parse_args() @@ -61,6 +70,7 @@ def main(): project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + engines = get_engine_classes() DiffEngine: GhidraDiffEngine = engines[args.engine] d: GhidraDiffEngine = DiffEngine(args=args, diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 576a1e3..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -ghidra-stubs -pyhidra -mdutils \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index fdaf9e2..cde2b5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,14 +3,14 @@ name = ghidriff author = clearbluejar author_email = clearbluejar@clearbluejar.com version = attr:ghidriff.__version__ -description = A binary diffing engine leveraging Ghidra and its FlatProgramAPI +description = Ghidra Binary Diffing Engine long_description_content_type = text/markdown long_description = file:README.md license = GPL-3.0 license license_files = LICENSE url = https://github.com/clearbluejar/ghidriff -keywords = patchdiff, binaries, bindiff, ghidra +keywords = patchdiff, binaries, bindiff, ghidra, ghidriff platform = any classifiers = Development Status :: 3 - Alpha @@ -35,7 +35,19 @@ install_requires = console_scripts = ghidriff = ghidriff.__main__:main +[options.extras_require] +testing = + pytest + requests + pytest-datadir + [tool:pytest] testpaths = tests +required_plugins = + pytest-datadir + addopts = - -p no:faulthandler \ No newline at end of file + -p no:faulthandler + +[pycodestyle] +max_line_length = 130 \ No newline at end of file diff --git a/tests/test_diff.py b/tests/test_diff.py new file mode 100644 index 0000000..d547aad --- /dev/null +++ b/tests/test_diff.py @@ -0,0 +1,127 @@ +from pathlib import Path + +from ghidriff.ghidra_diff_engine import GhidraDiffEngine + +from ghidriff import get_parser, get_engine_classes, VersionTrackingDiff + +import requests +import json +import os + +SYMBOLS_DIR = 'symbols' +BINS_DIR = 'bins' + +def get_chrome_headers() -> dict: + + headers = { + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-US,en;q=0.9", + "cache-control": "no-cache", + "pragma": "no-cache", + "sec-ch-ua": '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-user": "?1", + "upgrade-insecure-requests": "1" + } + + return headers + + + +def test_diff_afd_cve_2023_21768(shared_datadir: Path): + """ + Tests end to end diff of CVE + """ + + test_name = 'cve-2023-21768' + output_path = shared_datadir / test_name + output_path.mkdir(exist_ok=True, parents=True) + symbols_path = shared_datadir / SYMBOLS_DIR + bins_path = shared_datadir / BINS_DIR + + + # setup bins + + old_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1028' + new_bin_path = bins_path / 'afd.sys.x64.10.0.22621.1415' + + # TODO figure out why these download are unreliable + # for now just git clone ghidriff-test-data + # old_bin_path = shared_datadir / 'afd.sys.x64.10.0.22621.1028' + # old_url = 'https://msdl.microsoft.com/download/symbols/afd.sys/0C5C6994A8000/afd.sys' + # new_bin_path = shared_datadir / 'afd.sys.x64.10.0.22621.1415' + # new_url = 'https://msdl.microsoft.com/download/symbols/afd.sys/50989142A9000/afd.sys' + + # download binaries + # download is unreliage + # headers = get_chrome_headers() + # old_bin_path.write_bytes(requests.get(old_url,headers=headers).content) + # new_bin_path.write_bytes(requests.get(new_url,headers=headers).content) + + assert old_bin_path.exists() + assert new_bin_path.exists() + + parser = get_parser() + + GhidraDiffEngine.add_ghidra_args_to_parser(parser) + + args = parser.parse_args(['-s', str(symbols_path), str(old_bin_path.absolute()),str(new_bin_path.absolute())]) + + engine_log_path = output_path / parser.get_default('log_path') + + binary_paths = args.old + [bin for sublist in args.new for bin in sublist] + + binary_paths = [Path(path) for path in binary_paths] + + if any([not path.exists() for path in binary_paths]): + missing_bins = [f'{path.name}' for path in binary_paths if not path.exists()] + raise FileNotFoundError(f"Missing Bins: {' '.join(missing_bins)}") + + project_name = f'{args.project_name}-{binary_paths[0].name}-{binary_paths[-1].name}' + + + DiffEngine: GhidraDiffEngine = VersionTrackingDiff + + d: GhidraDiffEngine = DiffEngine(args=args, + verbose=True, + threaded=args.threaded, + max_ram_percent=args.max_ram_percent, + print_jvm_flags=args.print_flags, + jvm_args=args.jvm_args, + force_analysis=args.force_analysis, + force_diff=args.force_diff, + verbose_analysis=args.va, + no_symbols=args.no_symbols, + engine_log_path=engine_log_path, + engine_log_level=args.log_level, + engine_file_log_level=args.file_log_level, + ) + + d.setup_project(binary_paths, args.project_location, project_name, args.symbols_path) + + d.analyze_project() + + pdiff = d.diff_bins(old_bin_path, new_bin_path) + pdiff_json = json.dumps(pdiff) + + d.validate_diff_json(pdiff_json) + + diff_name = f"{old_bin_path.name}-{new_bin_path.name}_diff" + + d.dump_pdiff_to_path(diff_name, + pdiff, + output_path, + side_by_side=args.side_by_side, + max_section_funcs=args.max_section_funcs, + md_title=args.md_title) + + assert len(pdiff['functions']['modified']) == 10 + assert len(pdiff['functions']['added']) == 28 + assert len(pdiff['functions']['deleted']) == 0 + + func_name = "AfdNotifyRemoveIoCompletion" + assert any([func_name in func['old']['name'] or func_name in func['new']['name'] for func in pdiff['functions']['modified'] ]) is True \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_startup.py b/tests/test_startup.py index 894e5fe..49a9cda 100644 --- a/tests/test_startup.py +++ b/tests/test_startup.py @@ -1,12 +1,63 @@ -from pathlib import Path -import ghidriff -import os -import pytest import pyhidra +def test_pyhidra_start(): + pyhidra.start(verbose=True) -def test_bogus_ghidra_install_dir(monkeypatch): +def test_ghidra_install_dir(): + import os + install_dir = os.getenv("GHIDRA_INSTALL_DIR") + assert install_dir == "/ghidra" - monkeypatch.setenv("GHIDRA_INSTALL_DIR", "/somebogusplace") - print(os.getenv('GHIDRA_INSTALL_DIR')) - pyhidra.start() +#from ghidriff import GhidraDiffEngine +# from pytest import MonkeyPatch + +# @pytest.fixture +# def setup_bogus_env(mon): +# from mock import patch + +# @patch('pyhidra.GHIDRA_INSTALL_DIR', "/someboguspath"): +# def test_bogus_ghidra_install_dira(): +# with pytest.raises(FileNotFoundError): +# import pyhidra as err_pyhidra +# err_pyhidra.start(verbose=True) + + +# def test_ghidra_install_dir(): +# # import sys +# # sys.modules.pop('pyhidra') +# with MonkeyPatch.context() as mp: +# mp.delenv("GHIDRA_INSTALL_DIR") + +# import pyhidra +# # print(os.getenv("GHIDRA_INSTALL_DIR")) +# with pytest.raises(SystemExit) as pytest_wrapped_e: +# launcher = pyhidra.start(verbose=True) +# assert pytest_wrapped_e.type == SystemExit +# #assert pytest_wrapped_e.value.code == 42 +# import os +# print(os.getenv("GHIDRA_INSTALL_DIR")) +# # from importlib import reload +# # reload(pyhidra) +# import sys +# del sys.modules['pyhidra'] + + + +# def test_bogus_ghidra_install_dir(): +# #monkeypatch.setenv("GHIDRA_INSTALL_DIR", '/someboguspath') +# import os +# print(os.getenv("GHIDRA_INSTALL_DIR")) +# with MonkeyPatch.context() as mp: +# with pytest.raises(FileNotFoundError): +# import pyhidra +# #mp.setattr(pyhidra.constants,'GHIDRA_INSTALL_DIR', '/someboguspath') +# pyhidra.start(verbose=True) + + + + + + # launcher = pyhidra.start(verbose=True) + # print(launcher.check_ghidra_version()) + +#det test_file_not_exist(): \ No newline at end of file