varfish-org · holtgrewe · Nov 1, 2023 · Nov 1, 2023 · Nov 2, 2023 · Nov 3, 2023
diff --git a/docs/cases_import.rst b/docs/cases_import.rst
@@ -0,0 +1,84 @@
+.. _main-cases_import:
+
+===========
+Case Import
+===========
+
+-------------------------
+Phenopacket Bootstrapping
+-------------------------
+
+.. note::
+    Currently, only PED and VCF files are supported for bootstrapping phenopackets.`a`
+
+You must have loaded the project configuration via ``projects project-retrieve`` so the client knows the location/server and credentials of the raw data.
+
+The ``cases-import bootstrap-phenopackets`` will then go over each file, incorporate it into the phenopackets file, and write out the phenopackets YAML.
+
+The other files are handled as follows.
+All absolute paths are assumed to be on the local file system whereas relative paths are assumed to be relative to the project import data store.
+Note that these absolute paths are also written to the phenopackets YAML file and this will not work in the import.
+
+``*.ped``
+    PED/Pedigree file, used to derive sample information from.
+    You can specify at most one PED file and it will overwrite existing pedigree information.
+
+``*.bam``, ``*.bam.bai``
+    The header of sequence alignment files will be read and the sample name is used to match it to the pedigree.
+    Note that the samples in the BAM file and the PED file must match.
+    BAM files must be indexed.
+
+``*.vcf.gz``, ``*.vcf.gz.tbi``
+    The header of variant call files will be read as well as the first ten records.
+    This will be used to differentiate between sequence and structural variant files.
+    You can currently only give at most one sequence variant file but any number of structural variant files.
+    VCF files must be indexed.
+
+``$FILE.md5``
+    Assumed to be the MD5 checksum file of ``$FILE`` and stored as checksum attribute for it.
+
+``*.csv``, ``*.txt``, ...
+    Information related to quality control from pipelines.
+    The command will try to detect the file types and register them into the phenopackets YAML file appropriately.
+
+The ``--target-region`` argument can be given multiple time and specify the target regions of the used sequencing kit.
+Supported target regions must be configured on the server.
+They are given as pseudo S3 URLs in the internal storage where the server administrator must configure them.
+
+The following target regions are available by default (for ``$RELEASE`` being one of ``GRCh37`` or ``GRCh38``) on a VarFish server installation.
+
+whole genome
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/whole-genome.bed.gz``
+
+Agilent SureSelect Human All Exon V4
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v4.bed.gz``
+
+Agilent SureSelect Human All Exon V5
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v5.bed.gz``
+
+Agilent SureSelect Human All Exon V6
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v6.bed.gz``
+
+Agilent SureSelect Human All Exon V7
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v7.bed.gz``
+
+Agilent SureSelect Human All Exon V8
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/agilent-all-exon-v8.bed.gz``
+
+IDT xGen Exome Research Panel v1
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v1.bed.gz``
+
+IDT xGen Exome Research Panel v2
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/idt-xgen-exome-research-panel-v2.bed.gz``
+
+Twist Comprehensive Exome
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-comprehensive-exome.bed.gz``
+
+Twist Core Exome
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-core-exome.bed.gz``
+
+Twist Exome V2.0
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-exome-v2_0.bed.gz``
+
+Twist RefSeq Exome
+    ``s3://varfish-server/seqmeta/target-regions/$RELEASE/twist-refseq-exome.bed.gz``
diff --git a/docs/index.rst b/docs/index.rst
@@ -20,6 +20,7 @@ This documentation will be expanded over time.
     :maxdepth: 1
 
     installation
+    cases_import
 
 .. toctree::
     :caption: Project Info

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -6,7 +6,7 @@ python-dateutil >=2.8.1,<3.0
 # pydantic: typed models and validation
 pydantic >=2,<3
 
-# toml parsing if python <3.11
+# toml parsing and writing
 toml >=0.10.2,<0.11
 
 # typer: typed command line interfaces.
@@ -38,3 +38,12 @@ jsonschema >=4.4.0,<4.20
 
 # Type checking
 typeguard >=2.13.3,<3.0
+
+# Phenopackets parsing
+phenopackets >=2.0,<3.0
+
+# Parsing of YAML files
+pyyaml >=6,<7
+
+# Access to VCF files.
+vcfpy >=0.13.6,<0.14
diff --git a/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output b/tests/cli/snapshots/test_projects/test_project_load_config_raw_func_call/result_output
@@ -0,0 +1,11 @@
+[[projects]]
+uuid = "16251f30-1168-41c9-8af6-07c8f40f6860"
+import_data_protocol = "http"
+import_data_host = "http-host.example.com"
+import_data_path = "http-prefix/"
+import_data_user = "http-user"
+import_data_password = "http-password"
+
+[global]
+varfish_server_url = "http://varfish.example.com:8080"
+varfish_api_token = "faKeTOKeN"
diff --git a/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output b/tests/cli/snapshots/test_varannos/test_varannoset_create/result_output
@@ -1,4 +1,3 @@
-{'sodar_uuid': 'e211747f-2a50-4a65-b192-c96bc2e111fa', 'date_created': '2023-10-31T08:15:15+01:00', 'date_modified': '2023-10-31T08:15:15+01:00', 'project': '062b8838-453f-4cf3-817d-a5ec76546462', 'title': 'my title', 'description': 'None', 'release': 'GRCh37', 'fields': ['pathogenicity', 'notes']}
 {
   "sodar_uuid": "e211747f-2a50-4a65-b192-c96bc2e111fa",
   "date_created": "2023-10-31T08:15:15+01:00",

diff --git a/tests/cli/test_projects.py b/tests/cli/test_projects.py
@@ -1,6 +1,7 @@
 """Test CLI for projects API."""
 
 import json
+import types
 import typing
 import uuid
 
@@ -12,6 +13,8 @@
 
 from tests.conftest import FakeFs
 from varfish_cli.cli import app
+from varfish_cli.cli.projects import cli_project_load_config
+from varfish_cli.config import CommonOptions
 
 
 @pytest.fixture
@@ -110,3 +113,74 @@ def test_project_retrieve(
 
     assert result.exit_code == 0, result.output
     snapshot.assert_match(result.output, "result_output")
+
+
+def test_project_load_config_raw_func_call(
+    fake_fs_configured: FakeFs,
+    requests_mock: RequestsMocker,
+    fake_conn: typing.Tuple[str, str],
+    snapshot: Snapshot,
+    mocker: MockerFixture,
+):
+    mocker.patch("varfish_cli.config.open", fake_fs_configured.open_, create=True)
+    mocker.patch("varfish_cli.config.os", fake_fs_configured.os)
+    mocker.patch("varfish_cli.cli.projects.open", fake_fs_configured.open_, create=True)
+    mocker.patch("varfish_cli.cli.projects.os", fake_fs_configured.os, create=True)
+
+    responses = {
+        "import_data_host": ("STRING", "http-host.example.com"),
+        "import_data_password": ("STRING", "http-password"),
+        "import_data_path": ("STRING", "http-prefix/"),
+        "import_data_port": ("INTEGER", 80),
+        "import_data_protocol": ("STRING", "http"),
+        "import_data_user": ("STRING", "http-user"),
+    }
+
+    project_uuid = "16251f30-1168-41c9-8af6-07c8f40f6860"
+    host, token = fake_conn
+    req_mocks = []
+    for setting_name, (setting_type, setting_value) in responses.items():
+        req_mocks.append(
+            requests_mock.get(
+                (
+                    f"{host}/project/api/settings/retrieve/{project_uuid}?app_name=cases_import"
+                    f"&setting_name={setting_name}"
+                ),
+                request_headers={"Authorization": f"Token {token}"},
+                json={
+                    "project": project_uuid,
+                    "user": None,
+                    "name": setting_name,
+                    "type": setting_type,
+                    "value": setting_value,
+                    "user_modifiable": True,
+                    "app_name": "cases_import",
+                },
+            )
+        )
+
+    ctx = types.SimpleNamespace(
+        obj=CommonOptions(
+            verbose=True,
+            verify_ssl=False,
+            config=None,
+            varfish_server_url=host,
+            varfish_api_token=token,
+        )
+    )
+    cli_project_load_config(
+        ctx,
+        project_uuid=project_uuid,
+        config_path=fake_fs_configured.os.path.expanduser("~/.varfishrc.toml"),
+    )
+
+    rc_path = fake_fs_configured.os.path.expanduser("~/.varfishrc.toml")
+    with fake_fs_configured.open_(rc_path, "rt") as inputf:
+        fcontents = inputf.read()
+
+    mocker.stopall()
+
+    for req_mock in req_mocks:
+        assert req_mock.called_once, req_mock._netloc
+
+    snapshot.assert_match(fcontents, "result_output")
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -46,7 +46,7 @@ def fake_conn() -> typing.Tuple[str, str]:
 
 @pytest.fixture
 def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs:
-    """Fake file system with filled ``~/.varfishrc.toml``"""
+    """Fake file system with minimal ``~/.varfishrc.toml``"""
     host, token = fake_conn
     conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml")
     fake_fs.fs.create_file(
@@ -61,3 +61,32 @@ def fake_fs_configured(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> Fa
         + "\n",
     )
     return fake_fs
+
+
+@pytest.fixture
+def fake_fs_configured_projects(fake_fs: FakeFs, fake_conn: typing.Tuple[str, str]) -> FakeFs:
+    """Fake file system with ``~/.varfishrc.toml`` that also has project config"""
+    host, token = fake_conn
+    conf_file_path = fake_fs.os.path.expanduser("~/.varfishrc.toml")
+    fake_fs.fs.create_file(
+        conf_file_path,
+        contents="\n".join(
+            [
+                "[global]",
+                f'varfish_server_url = "{host}"',
+                f'varfish_api_token = "{token}"',
+                "",
+                "[[projects]]",
+                'title = "S3 Example"',
+                'uuid = "00000000-0000-0000-0000-000000000001"',
+                'import_data_protocol = "s3"',
+                'import_data_host = "s3-server.example.net"',
+                "import_data_port = 443",
+                'import_data_path = "bucket-name"',
+                'import_data_user = "s3-user"',
+                'import_data_password = "s3-key"',
+            ]
+        )
+        + "\n",
+    )
+    return fake_fs
diff --git a/tests/data/config/varfishrc.projects.toml b/tests/data/config/varfishrc.projects.toml
@@ -0,0 +1,37 @@
+[global]
+varfish_server_url = "https://varfish.example.com/"
+varfish_api_token = "39c01db5-a808-4262-8b4d-7fd712389b59"
+
+[[projects]]
+title = "S3 Example"
+uuid = "00000000-0000-0000-0000-000000000001"
+import_data_protocol = "s3"
+import_data_host = "s3-server.example.net"
+import_data_port = 443
+import_data_path = "bucket-name"
+import_data_user = "s3-user"
+import_data_password = "s3-key"
+
+[[projects]]
+title = "HTTP Example"
+uuid = "00000000-0000-0000-0000-000000000002"
+import_data_protocol = "http"
+import_data_host = "http-server.example.net"
+import_data_path = "/http-prefix"
+import_data_user = "http-user"
+import_data_password = "http-password"
+
+[[projects]]
+title = "HTTPS Example"
+uuid = "00000000-0000-0000-0000-000000000003"
+import_data_protocol = "https"
+import_data_host = "https-server.example.net"
+import_data_path = "/https-prefix"
+import_data_user = "https-user"
+import_data_password = "https-password"
+
+[[projects]]
+title = "File Example"
+uuid = "00000000-0000-0000-0000-000000000004"
+import_data_protocol = "file"
+import_data_path = "/path/prefix"
diff --git a/tests/snapshots/test_common/test_load_project_config/configuration b/tests/snapshots/test_common/test_load_project_config/configuration
@@ -0,0 +1,9 @@
+{
+  "title": "S3 Example",
+  "uuid": "00000000-0000-0000-0000-000000000001",
+  "import_data_protocol": "s3",
+  "import_data_host": "s3-server.example.net",
+  "import_data_path": "bucket-name",
+  "import_data_user": "s3-user",
+  "import_data_password": "s3-key"
+}
diff --git a/tests/snapshots/test_config/test_load_projects/result b/tests/snapshots/test_config/test_load_projects/result
@@ -0,0 +1,38 @@
+[
+  {
+    "title": "S3 Example",
+    "uuid": "00000000-0000-0000-0000-000000000001",
+    "import_data_protocol": "s3",
+    "import_data_host": "s3-server.example.net",
+    "import_data_path": "bucket-name",
+    "import_data_user": "s3-user",
+    "import_data_password": "s3-key"
+  },
+  {
+    "title": "HTTP Example",
+    "uuid": "00000000-0000-0000-0000-000000000002",
+    "import_data_protocol": "http",
+    "import_data_host": "http-server.example.net",
+    "import_data_path": "/http-prefix",
+    "import_data_user": "http-user",
+    "import_data_password": "http-password"
+  },
+  {
+    "title": "HTTPS Example",
+    "uuid": "00000000-0000-0000-0000-000000000003",
+    "import_data_protocol": "https",
+    "import_data_host": "https-server.example.net",
+    "import_data_path": "/https-prefix",
+    "import_data_user": "https-user",
+    "import_data_password": "https-password"
+  },
+  {
+    "title": "File Example",
+    "uuid": "00000000-0000-0000-0000-000000000004",
+    "import_data_protocol": "file",
+    "import_data_host": null,
+    "import_data_path": "/path/prefix",
+    "import_data_user": null,
+    "import_data_password": null
+  }
+]
diff --git a/tests/test_common.py b/tests/test_common.py
@@ -0,0 +1,31 @@
+import json
+import typing
+import uuid
+
+import pytest
+from pytest_mock import MockerFixture
+from pytest_snapshot.plugin import Snapshot
+from requests_mock.mocker import Mocker as RequestsMocker
+from typer.testing import CliRunner
+
+from tests.conftest import FakeFs
+from varfish_cli import common
+
+
+def test_load_project_config(
+    fake_fs_configured_projects: FakeFs,
+    mocker: MockerFixture,
+    snapshot: Snapshot,
+):
+    mocker.patch("varfish_cli.common.open", fake_fs_configured_projects.open_, create=True)
+    mocker.patch("varfish_cli.common.os", fake_fs_configured_projects.os)
+
+    config_null = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000000"))
+    config_some = common.load_project_config(uuid.UUID("00000000-0000-0000-0000-000000000001"))
+
+    mocker.stopall()
+
+    assert config_null is None
+    snapshot.assert_match(
+        json.dumps(config_some.model_dump(mode="json"), indent=2), "configuration"
+    )