diff --git a/docs/cases_import.rst b/docs/cases_import.rst new file mode 100644 index 0000000..b0713ea --- /dev/null +++ b/docs/cases_import.rst @@ -0,0 +1,84 @@ +.. _main-cases_import: + +=========== +Case Import +=========== + +------------------------- +Phenopacket Bootstrapping +------------------------- + +.. note:: + Currently, only PED and VCF files are supported for bootstrapping phenopackets.`a` + +You must have loaded the project configuration via ``projects project-retrieve`` so the client knows the location/server and credentials of the raw data. + +The ``cases-import bootstrap-phenopackets`` will then go over each file, incorporate it into the phenopackets file, and write out the phenopackets YAML. + +The other files are handled as follows. +All absolute paths are assumed to be on the local file system whereas relative paths are assumed to be relative to the project import data store. +Note that these absolute paths are also written to the phenopackets YAML file and this will not work in the import. + +``*.ped`` + PED/Pedigree file, used to derive sample information from. + You can specify at most one PED file and it will overwrite existing pedigree information. + +``*.bam``, ``*.bam.bai`` + The header of sequence alignment files will be read and the sample name is used to match it to the pedigree. + Note that the samples in the BAM file and the PED file must match. + BAM files must be indexed. + +``*.vcf.gz``, ``*.vcf.gz.tbi`` + The header of variant call files will be read as well as the first ten records. + This will be used to differentiate between sequence and structural variant files. + You can currently only give at most one sequence variant file but any number of structural variant files. + VCF files must be indexed. + +``$FILE.md5`` + Assumed to be the MD5 checksum file of ``$FILE`` and stored as checksum attribute for it. + +``*.csv``, ``*.txt``, ... + Information related to quality control from pipelines. + The command will try to detect the file types and register them into the phenopackets YAML file appropriately. + +The ``--target-region`` argument can be given multiple time and specify the target regions of the used sequencing kit. +Supported target regions must be configured on the server. +They are given as pseudo S3 URLs in the internal storage where the server administrator must configure them. + +The following target regions are available by default (for ``$RELEASE`` being one of ``GRCh37`` or ``GRCh38``) on a VarFish server installation. + +whole genome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/whole-genome.bed.gz`` + +Agilent SureSelect Human All Exon V4 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v4.bed.gz`` + +Agilent SureSelect Human All Exon V5 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v5.bed.gz`` + +Agilent SureSelect Human All Exon V6 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v6.bed.gz`` + +Agilent SureSelect Human All Exon V7 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v7.bed.gz`` + +Agilent SureSelect Human All Exon V8 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v8.bed.gz`` + +IDT xGen Exome Research Panel v1 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v1.bed.gz`` + +IDT xGen Exome Research Panel v2 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v2.bed.gz`` + +Twist Comprehensive Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-comprehensive-exome.bed.gz`` + +Twist Core Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-core-exome.bed.gz`` + +Twist Exome V2.0 + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-exome-v2_0.bed.gz`` + +Twist RefSeq Exome + ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-refseq-exome.bed.gz`` diff --git a/docs/index.rst b/docs/index.rst index 41d8ed3..74f277d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ This documentation will be expanded over time. :maxdepth: 1 installation + cases_import .. toctree:: :caption: Project Info diff --git a/requirements/base.txt b/requirements/base.txt index 70d4651..0976316 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,7 +6,7 @@ python-dateutil >=2.8.1,<3.0 # pydantic: typed models and validation pydantic >=2,<3 -# toml parsing if python <3.11 +# toml parsing and writing toml >=0.10.2,<0.11 # typer: typed command line interfaces. @@ -38,3 +38,12 @@ jsonschema >=4.4.0,<4.20 # Type checking typeguard >=2.13.3,<3.0 + +# Phenopackets parsing +phenopackets >=2.0,<3.0 + +# Parsing of YAML files +pyyaml >=6,<7 + +# Access to VCF files. +vcfpy >=0.13.6,<0.14 diff --git a/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output b/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output new file mode 100644 index 0000000..754ccd0 --- /dev/null +++ b/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output @@ -0,0 +1,11 @@ +[[projects]] +uuid = "16251f30-1168-41c9-8af6-07c8f40f6860" +import_data_protocol = "http" +import_data_host = "http-host.example.com" +import_data_path = "http-prefix/" +import_data_user = "http-user" +import_data_password = "http-password" + +[global] +varfish_server_url = "http://varfish.example.com:8080" +varfish_api_token = "faKeTOKeN" diff --git a/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output b/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output index c419f72..7138f18 100644 --- a/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output +++ b/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output @@ -1,4 +1,3 @@ -{'sodar_uuid': 'e211747f-2a50-4a65-b192-c96bc2e111fa', 'date_created': '2023-10-31T08:15:15+01:00', 'date_modified': '2023-10-31T08:15:15+01:00', 'project': '062b8838-453f-4cf3-817d-a5ec76546462', 'title': 'my title', 'description': 'None', 'release': 'GRCh37', 'fields': ['pathogenicity', 'notes']} { "sodar_uuid": "e211747f-2a50-4a65-b192-c96bc2e111fa", "date_created": "2023-10-31T08:15:15+01:00", diff --git a/tests/cli/test_projects.py b/tests/cli/test_projects.py index 226e856..372859c 100644 --- a/tests/cli/test_projects.py +++ b/tests/cli/test_projects.py @@ -1,6 +1,7 @@ """Test CLI for projects API.""" import json +import types import typing import uuid @@ -12,6 +13,8 @@ from tests.conftest import FakeFs from varfish_cli.cli import app +from varfish_cli.cli.projects import cli_project_load_config +from varfish_cli.config import CommonOptions @pytest.fixture @@ -110,3 +113,74 @@ def test_project_retrieve( assert result.exit_code == 0, result.output snapshot.assert_match(result.output, "result_output") + + +def test_project_load_config_raw_func_call( + fake_fs_configured: FakeFs, + requests_mock: RequestsMocker, + fake_conn: typing.Tuple[str, str], + snapshot: Snapshot, + mocker: MockerFixture, +): + mocker.patch("varfish_cli.config.open", fake_fs_configured.open_, create=True) + mocker.patch("varfish_cli.config.os", fake_fs_configured.os) + mocker.patch("varfish_cli.cli.projects.open", fake_fs_configured.open_, create=True) + mocker.patch("varfish_cli.cli.projects.os", fake_fs_configured.os, create=True) + + responses = { + "import_data_host": ("STRING", "http-host.example.com"), + "import_data_password": ("STRING", "http-password"), + "import_data_path": ("STRING", "http-prefix/"), + "import_data_port": ("INTEGER", 80), + "import_data_protocol": ("STRING", "http"), + "import_data_user": ("STRING", "http-user"), + } + + project_uuid = "16251f30-1168-41c9-8af6-07c8f40f6860" + host, token = fake_conn + req_mocks = [] + for setting_name, (setting_type, setting_value) in responses.items(): + req_mocks.append( + requests_mock.get( + ( + f"{host}/project/api/settings/retrieve/{project_uuid}?app_name=cases_import" + f"&setting_name={setting_name}" + ), + request_headers={"Authorization": f"Token {token}"}, + json={ + "project": project_uuid, + "user": None, + "name": setting_name, + "type": setting_type, + "value": setting_value, + "user_modifiable": True, + "app_name": "cases_import", + }, + ) + ) + + ctx = types.SimpleNamespace( + obj=CommonOptions( + verbose=True, + verify_ssl=False, + config=None, + varfish_server_url=host, + varfish_api_token=token, + ) + ) + cli_project_load_config( + ctx, + project_uuid=project_uuid, + config_path=fake_fs_configured.os.path.expanduser("~/.varfishrc.toml"), + ) + + rc_path = fake_fs_configured.os.path.expanduser("~/.varfishrc.toml") + with fake_fs_configured.open_(rc_path, "rt") as inputf: + fcontents = inputf.read() + + mocker.stopall() + + for req_mock in req_mocks: + assert req_mock.called_once, req_mock._netloc + + snapshot.assert_match(fcontents, "result_output") diff --git a/tests/conftest.py b/tests/conftest.py index 6ad9b25..04a9716 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,7 +46,7 @@ def fake_conn() -> typing.Tuple[str, str]: @pytest.fixture def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs: - """Fake file system with filled ``~/.varfishrc.toml``""" + """Fake file system with minimal ``~/.varfishrc.toml``""" host, token = fake_conn conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml") fake_fs.fs.create_file( @@ -61,3 +61,32 @@ def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> Fa + "\n", ) return fake_fs + + +@pytest.fixture +def fake_fs_configured_projects(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs: + """Fake file system with ``~/.varfishrc.toml`` that also has project config""" + host, token = fake_conn + conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml") + fake_fs.fs.create_file( + conf_file_path, + contents="\n".join( + [ + "[global]", + f'varfish_server_url = "{host}"', + f'varfish_api_token = "{token}"', + "", + "[[projects]]", + 'title = "S3 Example"', + 'uuid = "00000000-0000-0000-0000-000000000001"', + 'import_data_protocol = "s3"', + 'import_data_host = "s3-server.example.net"', + "import_data_port = 443", + 'import_data_path = "bucket-name"', + 'import_data_user = "s3-user"', + 'import_data_password = "s3-key"', + ] + ) + + "\n", + ) + return fake_fs diff --git a/tests/data/config/varfishrc.projects.toml b/tests/data/config/varfishrc.projects.toml new file mode 100644 index 0000000..018aac4 --- /dev/null +++ b/tests/data/config/varfishrc.projects.toml @@ -0,0 +1,37 @@ +[global] +varfish_server_url = "https://varfish.example.com/" +varfish_api_token = "39c01db5-a808-4262-8b4d-7fd712389b59" + +[[projects]] +title = "S3 Example" +uuid = "00000000-0000-0000-0000-000000000001" +import_data_protocol = "s3" +import_data_host = "s3-server.example.net" +import_data_port = 443 +import_data_path = "bucket-name" +import_data_user = "s3-user" +import_data_password = "s3-key" + +[[projects]] +title = "HTTP Example" +uuid = "00000000-0000-0000-0000-000000000002" +import_data_protocol = "http" +import_data_host = "http-server.example.net" +import_data_path = "/http-prefix" +import_data_user = "http-user" +import_data_password = "http-password" + +[[projects]] +title = "HTTPS Example" +uuid = "00000000-0000-0000-0000-000000000003" +import_data_protocol = "https" +import_data_host = "https-server.example.net" +import_data_path = "/https-prefix" +import_data_user = "https-user" +import_data_password = "https-password" + +[[projects]] +title = "File Example" +uuid = "00000000-0000-0000-0000-000000000004" +import_data_protocol = "file" +import_data_path = "/path/prefix" diff --git a/tests/snapshots/test_common/test_load_project_config/configuration b/tests/snapshots/test_common/test_load_project_config/configuration new file mode 100644 index 0000000..d18aba2 --- /dev/null +++ b/tests/snapshots/test_common/test_load_project_config/configuration @@ -0,0 +1,9 @@ +{ + "title": "S3 Example", + "uuid": "00000000-0000-0000-0000-000000000001", + "import_data_protocol": "s3", + "import_data_host": "s3-server.example.net", + "import_data_path": "bucket-name", + "import_data_user": "s3-user", + "import_data_password": "s3-key" +} \ No newline at end of file diff --git a/tests/snapshots/test_config/test_load_projects/result b/tests/snapshots/test_config/test_load_projects/result new file mode 100644 index 0000000..1efa663 --- /dev/null +++ b/tests/snapshots/test_config/test_load_projects/result @@ -0,0 +1,38 @@ +[ + { + "title": "S3 Example", + "uuid": "00000000-0000-0000-0000-000000000001", + "import_data_protocol": "s3", + "import_data_host": "s3-server.example.net", + "import_data_path": "bucket-name", + "import_data_user": "s3-user", + "import_data_password": "s3-key" + }, + { + "title": "HTTP Example", + "uuid": "00000000-0000-0000-0000-000000000002", + "import_data_protocol": "http", + "import_data_host": "http-server.example.net", + "import_data_path": "/http-prefix", + "import_data_user": "http-user", + "import_data_password": "http-password" + }, + { + "title": "HTTPS Example", + "uuid": "00000000-0000-0000-0000-000000000003", + "import_data_protocol": "https", + "import_data_host": "https-server.example.net", + "import_data_path": "/https-prefix", + "import_data_user": "https-user", + "import_data_password": "https-password" + }, + { + "title": "File Example", + "uuid": "00000000-0000-0000-0000-000000000004", + "import_data_protocol": "file", + "import_data_host": null, + "import_data_path": "/path/prefix", + "import_data_user": null, + "import_data_password": null + } +] \ No newline at end of file diff --git a/tests/test_common.py b/tests/test_common.py new file mode 100644 index 0000000..15ee7e2 --- /dev/null +++ b/tests/test_common.py @@ -0,0 +1,31 @@ +import json +import typing +import uuid + +import pytest +from pytest_mock import MockerFixture +from pytest_snapshot.plugin import Snapshot +from requests_mock.mocker import Mocker as RequestsMocker +from typer.testing import CliRunner + +from tests.conftest import FakeFs +from varfish_cli import common + + +def test_load_project_config( + fake_fs_configured_projects: FakeFs, + mocker: MockerFixture, + snapshot: Snapshot, +): + mocker.patch("varfish_cli.common.open", fake_fs_configured_projects.open_, create=True) + mocker.patch("varfish_cli.common.os", fake_fs_configured_projects.os) + + config_null = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000000")) + config_some = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000001")) + + mocker.stopall() + + assert config_null is None + snapshot.assert_match( + json.dumps(config_some.model_dump(mode="json"), indent=2), "configuration" + ) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..ac5450d --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,17 @@ +import json + +from pytest_snapshot.plugin import Snapshot + +from varfish_cli import config + + +def test_load_config(): + result = config.load_config("tests/data/config/varfishrc.toml") + assert result == ("https://varfish.example.com/", "39c01db5-a808-4262-8b4d-7fd712389b59") + + +def test_load_projects(snapshot: Snapshot): + result = config.load_projects("tests/data/config/varfishrc.projects.toml") + assert len(result) == 4 + result_str = json.dumps([obj.model_dump(mode="json") for obj in result.values()], indent=2) + snapshot.assert_match(result_str, "result") diff --git a/tests/test_ftypes.py b/tests/test_ftypes.py new file mode 100644 index 0000000..62d8ee2 --- /dev/null +++ b/tests/test_ftypes.py @@ -0,0 +1,7 @@ +from varfish_cli.ftypes.ftypes import FileType, guess_by_path + + +def test_guess_by_path(): + assert guess_by_path("file.md5") == FileType.MD5 + assert guess_by_path("file.bam") == FileType.BAM + assert guess_by_path("file.xyz") == FileType.UNKNOWN diff --git a/varfish_cli/api/models.py b/varfish_cli/api/models.py index af5203b..aa6fbed 100644 --- a/varfish_cli/api/models.py +++ b/varfish_cli/api/models.py @@ -341,3 +341,22 @@ class VarAnnoSetEntryV1(pydantic.BaseModel): alternative: str #: Data, the set's fields are the keys. payload: typing.Dict[str, str] + + +class SettingsEntry(pydantic.BaseModel): + """Configuration entry from server""" + + #: Project UUID. + project: typing.Optional[uuid.UUID] + #: User UUID, if any. + user: typing.Optional[uuid.UUID] + #: Name of the app. + app_name: str + #: Name of the setting. + name: str + #: Type of the setting. + type: typing.Literal["STRING", "INTEGER", "BOOLEAN"] + #: Value of the setting. + value: typing.Any + #: Whether the user can modify the setting. + user_modifiable: bool diff --git a/varfish_cli/api/project.py b/varfish_cli/api/project.py index 04b664c..16f04a0 100644 --- a/varfish_cli/api/project.py +++ b/varfish_cli/api/project.py @@ -8,7 +8,7 @@ import requests from varfish_cli.api.common import raise_for_status -from varfish_cli.api.models import Project +from varfish_cli.api.models import Project, SettingsEntry from varfish_cli.common import strip_trailing_slash ACCEPT_API_VARFISH = "" @@ -17,6 +17,8 @@ ENDPOINT_PROJECT_LIST = "/project/api/list" #: End point for retrieving projects. ENDPOINT_PROJECT_RETRIEVE = "/project/api/retrieve/{project_uuid}" +#: End point for retrieving projects settings. +ENDPOINT_PROJECT_SETTING_RETRIEVE = "/project/api/settings/retrieve/{project_uuid}" def project_list( @@ -48,3 +50,30 @@ def project_retrieve( result = requests.get(endpoint, headers=headers, verify=verify_ssl) raise_for_status(result) return pydantic.TypeAdapter(Project).validate_python(result.json()) + + +def project_settings_retrieve( + server_url: str, + api_token: str, + project_uuid: typing.Union[str, uuid.UUID], + app_name: typing.Optional[str], + setting_name: typing.Optional[str], + verify_ssl: bool = True, +) -> SettingsEntry: + server_url = strip_trailing_slash(server_url) + queries = [] + if app_name: + queries.append(f"app_name={app_name}") + if setting_name: + queries.append(f"setting_name={setting_name}") + query = "&".join(queries) + if query: + query = f"?{query}" + endpoint = f"{server_url}{ENDPOINT_PROJECT_SETTING_RETRIEVE}{query}".format( + project_uuid=project_uuid + ) + logger.debug("Sending GET request to end point %s", endpoint) + headers = {"Authorization": "Token %s" % api_token} + result = requests.get(endpoint, headers=headers, verify=verify_ssl) + raise_for_status(result) + return pydantic.TypeAdapter(SettingsEntry).validate_python(result.json()) diff --git a/varfish_cli/api/varannos.py b/varfish_cli/api/varannos.py index bd4cee4..aa2cf18 100644 --- a/varfish_cli/api/varannos.py +++ b/varfish_cli/api/varannos.py @@ -64,7 +64,6 @@ def varannoset_create( endpoint, headers=headers, data=payload.model_dump(mode="json"), verify=verify_ssl ) raise_for_status(result) - print(result.json()) return pydantic.TypeAdapter(VarAnnoSetV1).validate_python(result.json()) diff --git a/varfish_cli/cli/__init__.py b/varfish_cli/cli/__init__.py index 38a0434..4195f99 100644 --- a/varfish_cli/cli/__init__.py +++ b/varfish_cli/cli/__init__.py @@ -7,13 +7,11 @@ import typer from varfish_cli import __version__ -from varfish_cli.cli import cases, importer, projects, varannos +from varfish_cli.cli import cases, cases_import, importer, projects, varannos +from varfish_cli.common import DEFAULT_PATH_VARFISHRC from varfish_cli.config import CommonOptions, load_config from varfish_cli.exceptions import InvalidConfiguration -#: Paths to search the global configuration in. -DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" - def version_callback(value: bool): """Callback when called with 'version' or '--version'""" @@ -29,6 +27,7 @@ def version_callback(value: bool): app.add_typer(varannos.app, name="varannos", help="Subcommands for 'varannos' API") app.add_typer(projects.app, name="projects", help="Subcommands for 'project' API") app.add_typer(cases.app, name="cases", help="Subcommands for 'cases' API") +app.add_typer(cases_import.app, name="cases-import", help="Subcommands for 'cases-import' API") app.add_typer(importer.app, name="importer", help="Subcommands for 'importer' API") diff --git a/varfish_cli/cli/cases_import.py b/varfish_cli/cli/cases_import.py new file mode 100644 index 0000000..1cfedf8 --- /dev/null +++ b/varfish_cli/cli/cases_import.py @@ -0,0 +1,134 @@ +"""Implementation of varfish-cli subcommand "cases-import *".""" + +import datetime +import os +import typing +import uuid + +#from google.protobuf import CopyFrom +from google.protobuf.json_format import MessageToDict, ParseDict, ParseError +from logzero import logger +from phenopackets import Family +import typer +import yaml + +from varfish_cli import api, common, ftypes +from varfish_cli.cli.common import ListObjects, RetrieveObject +from varfish_cli.common import DEFAULT_PATH_VARFISHRC, OutputFormat +from varfish_cli.parse_ped import Donor, parse_ped + +#: The ``Typer`` instance to use for the ``cases-import`` sub command. +app = typer.Typer(no_args_is_help=True) + + +def sync_family_with_donors(family: Family, donors: typing.Dict[str, Donor]) -> Family: + """Sync the given family with the given donors. + + The first donor is assumed to be the index. + """ + family.proband + + # make a copy of the family + result = Family() + result.CopyFrom(family) + + +@app.command("bootstrap-phenopackets") +def cli_bootstrap_phenopackets( + ctx: typer.Context, + project_uuid: typing.Annotated[uuid.UUID, typer.Argument(..., help="UUID of project")], + phenopacket_file: typing.Annotated[ + str, typer.Argument(..., help="Path of phenopacket file to bootstrap") + ], + other_files: typing.Annotated[ + typing.List[str], typer.Argument(..., help="Paths of files to incorporate") + ], + target_region: typing.Annotated[ + typing.List[str], typer.Option("--target-region", help="Target region specification") + ], + config_path: typing.Annotated[ + str, + typer.Option("--config-path", help="Path to configuration file", envvar="VARFISH_RC_PATH"), + ] = DEFAULT_PATH_VARFISHRC, +): + """Bootstrap a new or existing phenopackets YAML file""" + # load configuration for the selected project + logger.info("Loading configuration for project %s from %s", project_uuid, config_path) + project_config = common.load_project_config(project_uuid, config_path=config_path) + if not project_config: + logger.error("No configuration found for project %s", project_uuid) + raise typer.Exit(1) + + # split files by file type + files_by_type = {} + for other_file in other_files: + file_type = ftypes.guess_by_path(other_file) + if file_type not in files_by_type: + files_by_type[file_type] = [other_file] + else: + files_by_type[file_type].append(other_file) + for file_type, files in sorted(files_by_type.items()): + if file_type is ftypes.FileType.UNKNOWN: + logger.warn("could not determine file type for %d files", len(files)) + else: + logger.info("%d files of type %s", len(files), file_type.value) + for file_ in files: + logger.info(" - %s", file_) + + # if we do not have a phenopacket file, ensure that we have a PED file + num_peds = len(files_by_type.get(ftypes.FileType.PED, [])) + if not os.path.exists(phenopacket_file) and num_peds == 0: + logger.error("No PED file given and no phenopacket file given") + raise typer.Exit(1) + if num_peds > 1: + logger.error("More than one PED file given") + raise typer.Exit(1) + + # load phenopacket file or create new one + family: Family + create_output: bool + if os.path.exists(phenopacket_file): + create_output = False + with open(phenopacket_file, "rt") as inputf: + try: + yaml_dict = yaml.safe_load(inputf) + except yaml.parser.ParserError as e: + logger.error("Could not parse phenopacket YAML file: %s", e) + raise typer.Exit(1) + if "family" not in yaml_dict: # pragma: no cover + logger.error("No 'family' section found at top of phenopacket YAML file") + raise typer.Exit(1) + try: + family = ParseDict(js_dict=yaml_dict["family"], message=Family()) + except ParseError as e: # pragma: no cover + logger.error("Could not load phenopacket data: %s", e) + raise typer.Exit(1) + else: + create_output = True + family = Family() + + # load pedigree and sync members in PED and phenopackets file + if num_peds == 1: + path_ped = files_by_type[ftypes.FileType.PED][0] + if not os.path.exists(path_ped): + logger.error("PED file %s does not exist", path_ped) + raise typer.Exit(1) + with open(path_ped, "rt") as inputf: + donors = parse_ped(inputf) + donors_by_ped = {donor.name: donor for donor in donors} + family = sync_family_with_donors(family, donors_by_ped) + + + # write out phenopackets file + if create_output: + logger.info("Creating new phenopacket file %s", phenopacket_file) + else: + timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M%S") + path_bak = f"{phenopacket_file}.bak~{timestamp}" + logger.info("Move file %s => %s", phenopacket_file, path_bak) + os.rename(phenopacket_file, path_bak) + logger.info("Re-creating phenopacket file %s", phenopacket_file) + with open(phenopacket_file, "wt") as outputf: + yaml.dump({"family": MessageToDict(family)}, outputf) + + logger.info("All done. Have a nice day!") diff --git a/varfish_cli/cli/projects.py b/varfish_cli/cli/projects.py index bce2c27..9b989d3 100644 --- a/varfish_cli/cli/projects.py +++ b/varfish_cli/cli/projects.py @@ -1,13 +1,17 @@ """Implementation of varfish-cli subcommand "projects *".""" +import os import typing import uuid +from logzero import logger +import toml import typer from varfish_cli import api, common from varfish_cli.cli.common import ListObjects, RetrieveObject -from varfish_cli.common import OutputFormat +from varfish_cli.common import DEFAULT_PATH_VARFISHRC, OutputFormat +from varfish_cli.config import ProjectConfig #: Default fields for projects. DEFAULT_FIELDS_PROJECT: typing.Dict[OutputFormat, typing.Optional[typing.Tuple[str, ...]]] = { @@ -76,3 +80,72 @@ def cli_project_retrieve( object_uuid=object_uuid, output_file=output_file, ) + + +@app.command("project-load-config") +def cli_project_load_config( + ctx: typer.Context, + project_uuid: typing.Annotated[ + uuid.UUID, typer.Argument(..., help="UUID of the object to retrieve") + ], + config_path: typing.Annotated[ + str, + typer.Option("--config-path", help="Path to configuration file", envvar="VARFISH_RC_PATH"), + ] = DEFAULT_PATH_VARFISHRC, +): + """Load project configuration for import and store in ~/.varfishrc.toml""" + common_options: common.CommonOptions = ctx.obj + logger.info("Retrieving project configuration...") + + fields_types = { + "import_data_host": str, + "import_data_password": str, + "import_data_path": str, + "import_data_port": int, + "import_data_protocol": str, + "import_data_user": str, + } + + kwargs = {"uuid": project_uuid} + for field_name, field_type in fields_types.items(): + logger.debug(" - retrieving %s", field_name) + setting_entry = api.project_settings_retrieve( + server_url=common_options.varfish_server_url, + api_token=common_options.varfish_api_token.get_secret_value(), + project_uuid=project_uuid, + app_name="cases_import", + setting_name=field_name, + verify_ssl=common_options.verify_ssl, + ) + if setting_entry.value: + kwargs[field_name] = field_type(setting_entry.value) + project_config = ProjectConfig(**kwargs).model_dump(mode="json") + + logger.info("... all data retrieved, updating config...") + logger.debug(" - project_config: %s", project_config) + + config_path = os.path.expanduser(config_path) + if os.path.exists(config_path): + with open(config_path, "rt") as tomlf: + try: + config_toml = toml.loads(tomlf.read()) + except toml.TomlDecodeError as e: + logger.error("could not parse configuration file %s: %s", config_path, e) + raise typer.Exit(1) + else: + config_toml = {} + + config_toml.setdefault("projects", []) + match_idx = None + for idx, project in enumerate(config_toml["projects"]): + if project["uuid"] == str(project_config["uuid"]): + match_idx = idx + break + else: + config_toml["projects"].append(project_config) + if match_idx is not None: + config_toml["projects"][match_idx] = project_config + + with open(config_path, "wt") as outputf: + outputf.write(toml.dumps(config_toml)) + logger.info("All done. Have a nice day!") diff --git a/varfish_cli/common.py b/varfish_cli/common.py index af0bcc1..f72a48a 100644 --- a/varfish_cli/common.py +++ b/varfish_cli/common.py @@ -5,10 +5,17 @@ from enum import Enum, unique import io import json +import os import typing import uuid from tabulate import tabulate +import toml + +from varfish_cli.config import ProjectConfig + +#: Paths to search the global configuration in. +DEFAULT_PATH_VARFISHRC = "~/.varfishrc.toml" class CustomEncoder(json.JSONEncoder): @@ -89,3 +96,16 @@ def load_json(path_or_payload: str) -> typing.Any: return json.load(inputf) else: return json.loads(path_or_payload) + + +def load_project_config( + project_uuid: uuid.UUID, *, config_path: str = DEFAULT_PATH_VARFISHRC +) -> typing.Optional[ProjectConfig]: + """Load project configuration from file.""" + config_path = os.path.expanduser(config_path) + with open(config_path, "rt") as inputf: + config = toml.loads(inputf.read()) + for project in config["projects"]: + if project["uuid"] == str(project_uuid): + return ProjectConfig(**project) + return None diff --git a/varfish_cli/config.py b/varfish_cli/config.py index 363f987..ec197c9 100644 --- a/varfish_cli/config.py +++ b/varfish_cli/config.py @@ -1,18 +1,13 @@ """Common configuration code.""" +import enum import os - -try: - import tomllib - from tomllib import TOMLDecodeError -except ImportError: - import toml as tomllib - from toml import TomlDecodeError as TOMLDecodeError - import typing +import uuid from logzero import logger import pydantic +import toml import typer @@ -47,8 +42,8 @@ def load_config(config_path: str) -> typing.Tuple[typing.Optional[str], typing.O logger.debug("loading configuration from %s", config_path) with open(config_path, "rt") as tomlf: try: - config_toml = tomllib.loads(tomlf.read()) - except TOMLDecodeError as e: + config_toml = toml.loads(tomlf.read()) + except toml.TomlDecodeError as e: logger.error("could not parse configuration file %s: %s", config_path, e) raise typer.Exit(1) toml_varfish_server_url = config_toml.get("global", {}).get("varfish_server_url") @@ -58,8 +53,55 @@ def load_config(config_path: str) -> typing.Tuple[typing.Optional[str], typing.O logger.debug("global/varfish_server_url not set in %s", config_path) toml_varfish_api_token = config_toml.get("global", {}).get("varfish_api_token") if toml_varfish_api_token: - logger.debug("using global/varfish_server_url from %s", config_path) + logger.debug("using global/varfish_api_token from %s", config_path) else: logger.debug("global/varfish_api_token not set in %s", config_path) return toml_varfish_server_url, toml_varfish_api_token + + +@enum.unique +class ImportDataProtocol(enum.Enum): + """Protocol for importing data.""" + + S3 = "s3" + HTTP = "http" + HTTPS = "https" + FILE = "file" + + +class ProjectConfig(pydantic.BaseModel): + """Configuration for one project in ``~/.varfishrc.toml``.""" + + #: Human-readable name of the project. + title: typing.Optional[str] = None + #: UUID of project. + uuid: uuid.UUID + #: Protocol to use. + import_data_protocol: ImportDataProtocol + #: Host name to use. + import_data_host: typing.Optional[str] = None + #: Path prefix to use. + import_data_path: str + #: User name to use for connecting, if any. + import_data_user: typing.Optional[str] = None + #: Password to use for connecting, if any. + import_data_password: typing.Optional[pydantic.SecretStr] = None + + @pydantic.field_serializer("import_data_password", when_used="json") + def dump_secret(self, v: typing.Optional[pydantic.SecretStr]): + if v: + return v.get_secret_value() + else: + return v + + +def load_projects(config_path: str) -> typing.Dict[uuid.UUID, ProjectConfig]: + """Load projects from configuration TOML file at ``config_path``""" + + with open(config_path, "rt") as inputf: + fcontents = inputf.read() + toml_dict = toml.loads(fcontents) + + projects = list(map(ProjectConfig.model_validate, toml_dict.get("projects", []))) + return {p.uuid: p for p in projects} diff --git a/varfish_cli/ftypes.py b/varfish_cli/ftypes.py new file mode 100644 index 0000000..ac008a3 --- /dev/null +++ b/varfish_cli/ftypes.py @@ -0,0 +1,66 @@ +"""Support for file types (for upload).""" + +import enum +import pathlib +import typing +import warnings + + +class UnsupportedFileTypeWarning(UserWarning): + """Warning for unsupported file types.""" + + +@enum.unique +class FileType(enum.Enum): + """Enumeration of supported file types.""" + + #: Unknown file type. + UNKNOWN = "UNKNOWN" + + #: MD5 checksum file. + MD5 = "MD5" + #: SHA1 checksum file. + SHA1 = "SHA1" + #: SHA256 checksum file. + SHA256 = "SHA256" + + #: BAM file. + BAM = "BAM" + #: BAM index file. + BAM_BAI = "BAM_BAI" + + #: VCF file. + VCF = "VCF" + #: VCF tabix index file. + VCF_TBI = "VCF_TBI" + + #: PLINK pedigree file. + PED = "PED" + + @property + def is_checksum(self): + """Return whether this is a checksum file.""" + return self in (self.MD5, self.SHA1, self.SHA256) + + +#: Map from file suffixes to file types. +SUFFIX_MAP = { + ".md5": FileType.MD5, + ".sha1": FileType.SHA1, + ".sha256": FileType.SHA256, + ".bam": FileType.BAM, + ".bam.bai": FileType.BAM_BAI, + ".vcf": FileType.VCF, + ".vcf.gz": FileType.VCF, + ".vcf.tbi": FileType.VCF_TBI, + ".ped": FileType.PED, +} + + +def guess_by_path(path: typing.Union[str, pathlib.Path]) -> FileType: + """File file type by path.""" + path_ = pathlib.Path(path) + for suffix, file_type in SUFFIX_MAP.items(): + if path_.name.endswith(suffix): + return file_type + return FileType.UNKNOWN diff --git a/varfish_cli/parse_ped.py b/varfish_cli/parse_ped.py index c14e09e..f1608ac 100644 --- a/varfish_cli/parse_ped.py +++ b/varfish_cli/parse_ped.py @@ -25,7 +25,7 @@ class Donor(pydantic.BaseModel): disease: str -def parse_ped(ped_file) -> typing.List[Donor]: +def parse_ped(ped_file: typing.TextIO) -> typing.List[Donor]: """Parse a given PED file and return ``Donor`` objects.""" lines = [] for line in ped_file.readlines():