diff --git a/.coveragerc_omit b/.coveragerc_omit index 96a2463..b203626 100644 --- a/.coveragerc_omit +++ b/.coveragerc_omit @@ -2,6 +2,7 @@ omit = vitessce/config.py vitessce/export.py + vitessce/file_def_utils.py vitessce/routes.py vitessce/widget.py vitessce/wrappers.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d29bbec..2dddba7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: ['3.8', '3.12'] + version: ['3.9', '3.12'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/docs/notebooks/spatial_data.ipynb b/docs/notebooks/spatial_data.ipynb new file mode 100644 index 0000000..6e14a24 --- /dev/null +++ b/docs/notebooks/spatial_data.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "nbsphinx": "hidden" + }, + "source": [ + "# Vitessce Widget Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization of SpatialData Object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import dependencies\n", + "\n", + "We need to import the classes and functions that we will be using from the corresponding packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "import dask\n", + "\n", + "dask.config.set({'dataframe.query-planning-warning': False})\n", + "\n", + "from spatialdata import read_zarr\n", + "import scanpy as sc\n", + "\n", + "from vitessce import (\n", + " VitessceConfig,\n", + " Component as cm,\n", + " CoordinationType as ct,\n", + " CoordinationLevel as CL,\n", + " AbstractWrapper,\n", + " SpatialDataWrapper,\n", + " get_initial_coordination_scope_prefix\n", + ")\n", + "from vitessce.data_utils import (\n", + " optimize_adata,\n", + " VAR_CHUNK_SIZE,\n", + ")\n", + "import zipfile\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zip_filepath = Path(\"data/visium.spatialdata.zarr.zip\")\n", + "spatialdata_filepath = zip_filepath.with_suffix('')\n", + "if not zip_filepath.exists():\n", + " spatialdata_filepath.parent.mkdir(exist_ok=True)\n", + " urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/visium_associated_xenium_io.zip', zip_filepath)\n", + "if not spatialdata_filepath.exists():\n", + " with zipfile.ZipFile(zip_filepath,\"r\") as zip_ref:\n", + " zip_ref.extractall(spatialdata_filepath.parent)\n", + " (spatialdata_filepath.parent / \"data.zarr\").rename(spatialdata_filepath)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load the data\n", + "\n", + "Note: this function may print a `FutureWarning`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spatialdata = read_zarr(spatialdata_filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spatialdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Create the Vitessce widget configuration\n", + "\n", + "Vitessce needs to know which pieces of data we are interested in visualizing, the visualization types we would like to use, and how we want to coordinate (or link) the views." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.1. Instantiate a `VitessceConfig` object\n", + "\n", + "Use the `VitessceConfig` constructor to create an instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc = VitessceConfig(schema_version=\"1.0.16\", name='Visium SpatialData Demo (visium_associated_xenium_io)', description='From https://spatialdata.scverse.org/en/latest/tutorials/notebooks/datasets/README.html')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2. Add a dataset to the `VitessceConfig` instance\n", + "\n", + "In Vitessce, a dataset is a container for one file per data type. The `.add_dataset(name)` method on the `vc` instance sets up and returns a new dataset instance.\n", + "\n", + "Then, we can call the dataset's `.add_object(wrapper_object)` method to attach a \"data wrapper\" instance to our new dataset. For example, the `AnnDataWrapper` helps to configure AnnData Zarr stores for use in the Vitessce configuration.\n", + "\n", + "Dataset wrapper classes may require additional parameters to resolve ambiguities. For instance, `AnnData` objects may store multiple clusterings or cell type annotation columns in the `adata.obs` dataframe. We can use the parameter `obs_set_paths` to tell Vitessce that certain columns of the `obs` dataframe correspond to cell type annotations or cell clusterings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[wrapper] = SpatialDataWrapper.from_object(\n", + " spatialdata,\n", + " table_keys_to_image_elems={\"table\": \"images/CytAssist_FFPE_Human_Breast_Cancer_full_image\"},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = vc.add_dataset(name='Breast Cancer Visium').add_object(wrapper)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spatial = vc.add_view(\"spatialBeta\", dataset=dataset)\n", + "feature_list = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", + "layer_controller = vc.add_view(\"layerControllerBeta\", dataset=dataset)\n", + "vc.link_views_by_dict([spatial, layer_controller], {\n", + " 'imageLayer': CL([{\n", + " 'photometricInterpretation': 'RGB',\n", + " }]),\n", + "}, scope_prefix=get_initial_coordination_scope_prefix(\"A\", \"image\"))\n", + "vc.link_views_by_dict([spatial, layer_controller], {\n", + " 'spotLayer': CL([{\n", + " 'obsType': 'spot',\n", + " }]),\n", + "}, scope_prefix=get_initial_coordination_scope_prefix(\"A\", \"obsSpots\"))\n", + "obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", + "vc.link_views([spatial, layer_controller, feature_list, obs_sets], ['obsType'], [wrapper.obs_type_label])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.4. Define the visualization layout\n", + "\n", + "The `vc.layout(view_concat)` method allows us to specify how our views will be arranged in the layout grid in the widget. The `|` and `/` characters are magic syntax for `hconcat(v1, v2)` and `vconcat(v1, v2)`, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.layout(spatial | (feature_list / layer_controller / obs_sets));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "vw = vc.widget()\n", + "vw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vw.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vc.to_dict(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pyproject.toml b/pyproject.toml index 3ed83ad..f5b5f17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta" [project] name = "vitessce" -version = "3.4.0" +version = "3.4.1" authors = [ { name="Mark Keller", email="mark_keller@hms.harvard.edu" }, ] description = "Jupyter widget facilitating interactive visualization of spatial single-cell data with Vitessce" readme = "README.md" license = {file = "LICENSE"} -requires-python = ">=3.7" +requires-python = ">=3.9" keywords = ["ipython", "jupyter", "widgets"] classifiers = [ 'Development Status :: 4 - Beta', @@ -19,7 +19,6 @@ classifiers = [ 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'Topic :: Multimedia :: Graphics', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', @@ -34,8 +33,9 @@ dependencies = [ 'black>=21.11b1', 'numpy>=1.21.2,<2.0', 'anndata>=0.7.8,<0.11', + 'spatialdata>=0.2.2', 'scanpy>=1.9.3', - 'ome-zarr==0.8.3', + 'ome-zarr>=0.8.3', 'tifffile>=2020.10.1', 'jsonschema>=3.2', 'tqdm>=4.1.0' @@ -81,7 +81,7 @@ all = [ 'starlette==0.14.0', 'generate-tiff-offsets>=0.1.7', 'kerchunk>=0.2.6', - 'fsspec>=2023.12.2', + 'fsspec', # aiofiles is not explicitly referenced in our code, # but it is an implicit dependency of starlette==0.14.0. diff --git a/setup.cfg b/setup.cfg index e709e52..09602d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,11 +8,16 @@ per-file-ignores = vitessce/data_utils/__init__.py: F401 vitessce/widget_plugins/__init__.py: F401 ignore = - E501, # Ignore line too long - W605, # Ignore invalid escape sequence '\*' - W503, # Ignore line break before binary operator: Skim down the left edge to understand intent. - E127 # Ignore continuation line over-indented for visual indent - E128 # Ignore continuation line under-indented for visual indent + # Ignore line too long + E501, + # Ignore invalid escape sequence '\*' + W605, + # Ignore line break before binary operator: Skim down the left edge to understand intent. + W503, + # Ignore continuation line over-indented for visual indent + E127 + # Ignore continuation line under-indented for visual indent + E128 exclude = ./js/node_modules/, ./docs/notebooks/.ipynb_checkpoints/, diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 3c123fe..5ced8d6 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -22,7 +22,7 @@ ObsSegmentationsOmeZarrWrapper, ) -from vitessce.wrappers import file_path_to_url_path +from vitessce.wrappers import SpatialDataWrapper, file_path_to_url_path data_path = Path('tests/data') @@ -235,8 +235,23 @@ def test_anndata_with_base_dir(self): file_def = file_def_creator('http://localhost:8000') self.assertEqual(file_def, {'fileType': 'anndata.zarr', 'url': 'http://localhost:8000/test.h5ad.zarr', 'options': { - 'obsEmbedding': [{'path': 'obsm/X_umap', 'embeddingType': 'UMAP', 'dims': [0, 1]}], - 'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}] + 'obsEmbedding': [{'path': 'obsm/X_umap', 'dims': [0, 1], 'embeddingType': 'UMAP'}], + 'obsSets': [{'name': 'Cell Type', 'path': 'obs/CellType'}] + }}) + + def test_anndata_with_base_dir_no_names(self): + adata_path = 'test.h5ad.zarr' + w = AnnDataWrapper(adata_path, obs_set_paths=['obs/CellType'], obs_embedding_paths=[ + 'obsm/X_umap']) + w.base_dir = data_path + w.local_dir_uid = 'anndata.zarr' + + file_def_creator = w.make_file_def_creator('A', 0) + file_def = file_def_creator('http://localhost:8000') + self.assertEqual(file_def, {'fileType': 'anndata.zarr', 'url': 'http://localhost:8000/test.h5ad.zarr', + 'options': { + 'obsEmbedding': [{'path': 'obsm/X_umap', 'dims': [0, 1], 'embeddingType': 'X_umap'}], + 'obsSets': [{'name': 'CellType', 'path': 'obs/CellType'}] }}) def test_anndata_with_h5ad_and_ref_json(self): @@ -375,3 +390,73 @@ def test_multivec_zarr_with_base_dir(self): 'fileType': 'genomic-profiles.zarr', 'url': 'http://localhost:8000/test_out.snap.multivec.zarr', }) + + def test_spatial_data_with_base_dir(self): + + spatial_data_path = 'test.spatialdata.zarr' + w = SpatialDataWrapper( + sdata_path=spatial_data_path, + image_path="images/picture", + obs_set_paths=['obs/CellType'], + obs_set_names=['Cell Type'], + obs_embedding_paths=['obsm/X_umap'], + obs_embedding_names=['UMAP'] + ) + w.base_dir = data_path + w.local_dir_uid = 'spatialdata.zarr' + + file_def_creator = w.make_file_def_creator('A', 0) + file_def = file_def_creator('http://localhost:8000') + self.assertEqual(file_def, { + 'fileType': 'spatialdata.zarr', + 'url': 'http://localhost:8000/test.spatialdata.zarr', + 'options': { + 'obsSets': { + 'obsSets': [{'name': 'Cell Type', 'path': 'obs/CellType'}], + 'tablePath': 'tables/table' + }, + 'image': {'path': 'images/picture'} + }}) + + def test_spatial_data_with_base_dir_2(self): + spatial_data_path = 'test.spatialdata.zarr' + w = SpatialDataWrapper( + sdata_path=spatial_data_path, + image_path='images/CytAssist_FFPE_Human_Breast_Cancer_full_image', + coordinate_system='aligned', + region='CytAssist_FFPE_Human_Breast_Cancer', + obs_feature_matrix_path='tables/table/X', + obs_spots_path='shapes/CytAssist_FFPE_Human_Breast_Cancer', + table_path='tables/table', + coordination_values={ + "obsType": "spot" + } + ) + w.base_dir = data_path + w.local_dir_uid = 'spatialdata.zarr' + + file_def_creator = w.make_file_def_creator('A', 0) + file_def = file_def_creator('http://localhost:8000') + self.assertDictEqual(file_def, { + 'fileType': 'spatialdata.zarr', + 'url': 'http://localhost:8000/test.spatialdata.zarr', + 'options': { + 'image': { + 'path': 'images/CytAssist_FFPE_Human_Breast_Cancer_full_image', + 'coordinateSystem': 'aligned', + }, + 'obsFeatureMatrix': { + 'path': 'tables/table/X', + 'region': 'CytAssist_FFPE_Human_Breast_Cancer' + }, + 'obsSpots': { + 'path': 'shapes/CytAssist_FFPE_Human_Breast_Cancer', + 'tablePath': 'tables/table', + 'region': 'CytAssist_FFPE_Human_Breast_Cancer', + 'coordinateSystem': 'aligned', + } + }, + 'coordinationValues': { + "obsType": "spot" + } + }) diff --git a/vitessce/__init__.py b/vitessce/__init__.py index 2d3e3b4..00af54d 100644 --- a/vitessce/__init__.py +++ b/vitessce/__init__.py @@ -53,6 +53,7 @@ ObsSegmentationsOmeTiffWrapper, ImageOmeZarrWrapper, ObsSegmentationsOmeZarrWrapper, + SpatialDataWrapper, ) except ModuleNotFoundError as e: # pragma: no cover warn(f'Extra installs are necessary to use wrappers: {e}') diff --git a/vitessce/config.py b/vitessce/config.py index 2574d70..efb5317 100644 --- a/vitessce/config.py +++ b/vitessce/config.py @@ -1048,7 +1048,7 @@ def add_view(self, view_type, dataset=None, dataset_uid=None, x=0, y=0, w=1, h=1 dataset_uid, str) assert dataset is None or dataset_uid is None component = view_type - assert type(component) in [str, cm] + # assert type(component) in [str, cm] if dataset is None: dataset = self.get_dataset_by_uid(dataset_uid) diff --git a/vitessce/constants.py b/vitessce/constants.py index 4ba4860..2accca3 100644 --- a/vitessce/constants.py +++ b/vitessce/constants.py @@ -14,7 +14,7 @@ def __new__(cls, value, doc): def norm_enum(enum_val, expected_enum_class=None): - assert isinstance(enum_val, str) or isinstance(enum_val, expected_enum_class) + # assert isinstance(enum_val, str) or isinstance(enum_val, expected_enum_class), f"enum_val was {type(enum_val)} and not a string or expected value {type(expected_enum_class)}" # We don't actually use the expected_enum_class, # since it would not account for things like plugin coordination types, etc. # But we can pass it around anyway and in the future could use @@ -162,6 +162,7 @@ class FileType(DocEnum): An enum type representing the file format or schema to which a file conforms. """ ANNDATA_ZARR = "anndata.zarr", "Joint file type for AnnData objects" + SPATIALDATA_ZARR = "spatialdata.zarr", "Joint file type for SpatialData objects" ANNDATA_H5AD = "anndata.h5ad", "Joint file type for AnnData objects" OBS_EMBEDDING_CSV = 'obsEmbedding.csv', "File type for obsEmbedding values stored in a CSV file" OBS_LOCATIONS_CSV = 'obsLocations.csv', "File type for obsLocations values stored in a CSV file" diff --git a/vitessce/file_def_utils.py b/vitessce/file_def_utils.py new file mode 100644 index 0000000..4723bc6 --- /dev/null +++ b/vitessce/file_def_utils.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from functools import partial +from typing import Optional + +import numpy as np + + +def gen_obs_embedding_schema(options: dict, paths: Optional[list[str]] = None, names: Optional[list[str]] = None, dims: Optional[list[list[int]]] = None): + if paths is not None: + if "obsEmbedding" not in options: + options["obsEmbedding"] = [] + if names is not None: + for key, mapping in zip(paths, names): + options["obsEmbedding"].append({ + "path": key, + "dims": [0, 1], + "embeddingType": mapping + }) + else: + for mapping in paths: + mapping_key = mapping.split('/')[-1] + options["obsEmbedding"].append({ + "path": mapping, + "dims": [0, 1], + "embeddingType": mapping_key + }) + if dims is not None: + if "obsEmbedding" not in options: + options["obsEmbedding"] = [] + for dim_i, dim in enumerate(dims): + options["obsEmbedding"][dim_i]['dims'] = dim + return options + + +def gen_obs_sets_schema(options: dict, paths: Optional[list[str]] = None, names: Optional[list[str]] = None): + if paths is not None: + options["obsSets"] = [] + if names is not None: + names = names + else: + names = [] + for obs in paths: + obs_end_path = obs.split('/')[-1] + names += [obs_end_path] + for obs, name in zip(paths, names): + options["obsSets"].append({ + "name": name, + "path": obs + }) + return options + + +def gen_sdata_obs_sets_schema(options: dict, paths: Optional[list[str]] = None, names: Optional[list[str]] = None, table_path: Optional[str] = None, region: Optional[str] = None): + if paths is not None: + options["obsSets"] = {"obsSets": []} + if names is not None: + names = names + else: + names = [] + for obs in paths: + obs_end_path = obs.split('/')[-1] + names += [obs_end_path] + for obs, name in zip(paths, names): + options["obsSets"]["obsSets"].append({ + "name": name, + "path": obs + }) + if table_path is not None: + options["obsSets"]["tablePath"] = table_path + if region is not None: + options["obsSets"]["region"] = region + return options + + +def gen_obs_feature_matrix_schema(options: dict, matrix_path: Optional[str] = None, var_filter_path: Optional[str] = None, init_var_filter_path: Optional[str] = None): + if matrix_path is not None: + options["obsFeatureMatrix"] = { + "path": matrix_path + } + if var_filter_path is not None: + options["obsFeatureMatrix"]["featureFilterPath"] = var_filter_path + if init_var_filter_path is not None: + options["obsFeatureMatrix"]["initialFeatureFilterPath"] = init_var_filter_path + return options + + +def gen_obs_labels_schema(options: dict, paths: Optional[list[str]] = None, names: Optional[list[str]] = None): + if paths is not None: + if names is not None and len(paths) == len(names): + # A name was provided for each path element, so use those values. + names = names + else: + # Names were not provided for each path element, + # so fall back to using the final part of each path for the names. + names = [labels_path.split('/')[-1] for labels_path in paths] + obs_labels = [] + for path, name in zip(paths, names): + obs_labels.append({"path": path, "obsLabelsType": name}) + options["obsLabels"] = obs_labels + return options + + +def gen_path_schema(key: str, path: Optional[str], options: dict): + if path is not None: + options[key] = { + "path": path + } + return options + + +gen_obs_locations_schema = partial(gen_path_schema, "obsLocations") +gen_obs_segmentations_schema = partial(gen_path_schema, "obsSegmentations") +gen_obs_spots_schema = partial(gen_path_schema, "obsSpots") +gen_obs_points_schema = partial(gen_path_schema, "obsPoints") +gen_feature_labels_schema = partial(gen_path_schema, "featureLabels") + + +def gen_sdata_image_schema(options, path: str, coordinate_system: Optional[str] = None, affine_transformation: Optional[np.ndarray] = None) -> dict: + if path is not None: + options["image"] = { + "path": path + } + if affine_transformation is not None: + options["image"]['coordinateTransformations'] = affine_transformation + if coordinate_system is not None: + options["image"]['coordinateSystem'] = coordinate_system + return options + + +def gen_sdata_labels_schema(options, path: str, table_path: str = "tables/table", coordinate_system: Optional[str] = None, affine_transformation: Optional[np.ndarray] = None) -> dict: + if path is not None: + options["labels"] = { + "path": path + } + if table_path is not None: + options["labels"]['tablePath'] = table_path + if affine_transformation is not None: + options["labels"]['coordinateTransformations'] = affine_transformation + if coordinate_system is not None: + options["labels"]['coordinateSystem'] = coordinate_system + return options + + +def gen_sdata_obs_spots_schema(options: dict, shapes_path: str, table_path: str = "tables/table", region: Optional[str] = None, coordinate_system: Optional[str] = None) -> dict: + if shapes_path is not None: + options['obsSpots'] = { + "path": shapes_path, + "tablePath": table_path + } + if region is not None: + options['obsSpots']['region'] = region + if coordinate_system is not None: + options['obsSpots']['coordinateSystem'] = coordinate_system + return options + + +def gen_sdata_obs_feature_matrix_schema(options: dict, matrix_path: Optional[str] = None, var_filter_path: Optional[str] = None, init_var_filter_path: Optional[str] = None, region: Optional[str] = None): + if matrix_path is not None: + options["obsFeatureMatrix"] = { + "path": matrix_path + } + if region is not None: + options['obsFeatureMatrix']['region'] = region + if var_filter_path is not None: + options["obsFeatureMatrix"]["featureFilterPath"] = var_filter_path + if init_var_filter_path is not None: + options["obsFeatureMatrix"]["initialFeatureFilterPath"] = init_var_filter_path + return options diff --git a/vitessce/widget.py b/vitessce/widget.py index b38da7e..4b34473 100644 --- a/vitessce/widget.py +++ b/vitessce/widget.py @@ -454,7 +454,7 @@ class VitessceWidget(anywidget.AnyWidget): next_port = DEFAULT_PORT - js_package_version = Unicode('3.4.12').tag(sync=True) + js_package_version = Unicode('3.4.14').tag(sync=True) js_dev_mode = Bool(False).tag(sync=True) custom_js_url = Unicode('').tag(sync=True) plugin_esm = List(trait=Unicode(''), default_value=[]).tag(sync=True) @@ -463,7 +463,7 @@ class VitessceWidget(anywidget.AnyWidget): store_urls = List(trait=Unicode(''), default_value=[]).tag(sync=True) - def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.4.12', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, invoke_timeout=30000): + def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.4.14', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True, invoke_timeout=30000): """ Construct a new Vitessce widget. @@ -576,7 +576,7 @@ def _plugin_command(self, params, buffers): # Launch Vitessce using plain HTML representation (no ipywidgets) -def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.4.12', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True): +def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.4.14', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True): from IPython.display import display, HTML uid_str = "vitessce" + get_uid_str(uid) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index 87017f6..fda7345 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -1,10 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from collections import defaultdict import os from os.path import join import tempfile +from typing import Callable, Optional, Type, TypeVar, Union from uuid import uuid4 from pathlib import PurePath, PurePosixPath +import warnings import zarr +import numpy as np +from spatialdata import SpatialData + +if TYPE_CHECKING: + import lamindb as ln + +from vitessce.file_def_utils import ( + gen_obs_locations_schema, + gen_obs_segmentations_schema, + gen_obs_spots_schema, + gen_obs_points_schema, + gen_obs_embedding_schema, + gen_feature_labels_schema, + gen_obs_feature_matrix_schema, + gen_obs_labels_schema, + gen_obs_sets_schema, + gen_sdata_image_schema, + gen_sdata_labels_schema, + gen_sdata_obs_spots_schema, + gen_sdata_obs_sets_schema, + gen_sdata_obs_feature_matrix_schema, +) + from .constants import ( norm_enum, ViewType as cm, @@ -1027,6 +1057,37 @@ def image_file_def_creator(base_url): return image_file_def_creator +def raise_error_if_zero_or_more_than_one(inputs): + num_inputs = sum([1 for x in inputs if x is not None]) + if num_inputs > 1: + raise ValueError( + "Expected only one type of data input parameter to be provided (_url, _path, _store, etc.), but received more than one." + ) + if num_inputs == 0: + raise ValueError( + "Expected one type of data input parameter to be provided (_url, _path, _store, etc.), but received none." + ) + return True + + +def raise_error_if_any(inputs): + num_inputs = sum([1 for x in inputs if x is not None]) + if num_inputs > 0: + raise ValueError( + "Did not expect any of these parameters to be provided, but received one or more: " + str(inputs) + ) + return True + + +def raise_error_if_more_than_one(inputs): + num_inputs = sum([1 for x in inputs if x is not None]) + if num_inputs > 1: + raise ValueError( + "Expected only one of these parameters to be provided, but received more than one: " + str(inputs) + ) + return True + + class AnnDataWrapper(AbstractWrapper): def __init__(self, adata_path=None, adata_url=None, adata_store=None, adata_artifact=None, ref_path=None, ref_url=None, ref_artifact=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): """ @@ -1080,10 +1141,7 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, adata_arti raise ValueError( "Did not expect reference JSON to be provided with adata_store") - num_inputs = sum([1 for x in [adata_path, adata_url, adata_store, adata_artifact] if x is not None]) - if num_inputs != 1: - raise ValueError( - "Expected one of adata_path, adata_url, adata_artifact, or adata_store to be provided") + raise_error_if_zero_or_more_than_one([adata_path, adata_url, adata_store, adata_artifact]) if adata_path is not None: self.is_remote = False @@ -1111,24 +1169,25 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, adata_arti self.local_ref_uid = make_unique_filename(".ref.json") self._expression_matrix = obs_feature_matrix_path - self._cell_set_obs_names = obs_set_names + self._obs_set_names = obs_set_names self._mappings_obsm_names = obs_embedding_names self._gene_var_filter = feature_filter_path self._matrix_gene_var_filter = initial_feature_filter_path - self._cell_set_obs = obs_set_paths + self._obs_set_elems = obs_set_paths self._spatial_centroid_obsm = obs_locations_path self._spatial_polygon_obsm = obs_segmentations_path self._mappings_obsm = obs_embedding_paths self._mappings_obsm_dims = obs_embedding_dims self._spatial_spots_obsm = obs_spots_path self._spatial_points_obsm = obs_points_path - self._gene_alias = feature_labels_path + self._feature_labels = feature_labels_path # Support legacy provision of single obs labels path if (obs_labels_path is not None): - self._obs_labels_paths = [obs_labels_path] + warnings.warn("`obs_labels_path` will be deprecated in a future release.", DeprecationWarning) + self._obs_labels_elems = [obs_labels_path] self._obs_labels_names = [obs_labels_path.split('/')[-1]] else: - self._obs_labels_paths = obs_labels_paths + self._obs_labels_elems = obs_labels_paths self._obs_labels_names = obs_labels_names self._convert_to_dense = convert_to_dense self._coordination_values = coordination_values @@ -1140,12 +1199,12 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): file_def_creator = self.make_file_def_creator( dataset_uid, obj_i) - routes = self.make_anndata_routes(dataset_uid, obj_i) + routes = self.make_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) self.routes += routes - def make_anndata_routes(self, dataset_uid, obj_i): + def make_routes(self, dataset_uid, obj_i): if self.is_remote: return [] elif self.is_store: @@ -1181,78 +1240,15 @@ def get_ref_url(self, base_url="", dataset_uid="", obj_i=""): def make_file_def_creator(self, dataset_uid, obj_i): def get_anndata_zarr(base_url): options = {} - if self._spatial_centroid_obsm is not None: - options["obsLocations"] = { - "path": self._spatial_centroid_obsm - } - if self._spatial_polygon_obsm is not None: - options["obsSegmentations"] = { - "path": self._spatial_polygon_obsm - } - if self._spatial_spots_obsm is not None: - options["obsSpots"] = { - "path": self._spatial_spots_obsm - } - if self._spatial_points_obsm is not None: - options["obsPoints"] = { - "path": self._spatial_points_obsm - } - if self._mappings_obsm is not None: - options["obsEmbedding"] = [] - if self._mappings_obsm_names is not None: - for key, mapping in zip(self._mappings_obsm_names, self._mappings_obsm): - options["obsEmbedding"].append({ - "path": mapping, - "dims": [0, 1], - "embeddingType": key - }) - else: - for mapping in self._mappings_obsm: - mapping_key = mapping.split('/')[-1] - self._mappings_obsm_names = mapping_key - options["obsEmbedding"].append({ - "path": mapping, - "dims": [0, 1], - "embeddingType": mapping_key - }) - if self._mappings_obsm_dims is not None: - for dim_i, dim in enumerate(self._mappings_obsm_dims): - options["obsEmbedding"][dim_i]['dims'] = dim - if self._cell_set_obs is not None: - options["obsSets"] = [] - if self._cell_set_obs_names is not None: - names = self._cell_set_obs_names - else: - names = [obs.split('/')[-1] for obs in self._cell_set_obs] - for obs, name in zip(self._cell_set_obs, names): - options["obsSets"].append({ - "name": name, - "path": obs - }) - if self._expression_matrix is not None: - options["obsFeatureMatrix"] = { - "path": self._expression_matrix - } - if self._gene_var_filter is not None: - options["obsFeatureMatrix"]["featureFilterPath"] = self._gene_var_filter - if self._matrix_gene_var_filter is not None: - options["obsFeatureMatrix"]["initialFeatureFilterPath"] = self._matrix_gene_var_filter - if self._gene_alias is not None: - options["featureLabels"] = { - "path": self._gene_alias - } - if self._obs_labels_paths is not None: - if self._obs_labels_names is not None and len(self._obs_labels_paths) == len(self._obs_labels_names): - # A name was provided for each path element, so use those values. - names = self._obs_labels_names - else: - # Names were not provided for each path element, - # so fall back to using the final part of each path for the names. - names = [labels_path.split('/')[-1] for labels_path in self._obs_labels_paths] - obs_labels = [] - for path, name in zip(self._obs_labels_paths, names): - obs_labels.append({"path": path, "obsLabelsType": name}) - options["obsLabels"] = obs_labels + options = gen_obs_locations_schema(self._spatial_centroid_obsm, options) + options = gen_obs_segmentations_schema(self._spatial_polygon_obsm, options) + options = gen_obs_spots_schema(self._spatial_spots_obsm, options) + options = gen_obs_points_schema(self._spatial_points_obsm, options) + options = gen_obs_embedding_schema(options, self._mappings_obsm, self._mappings_obsm_names, self._mappings_obsm_dims) + options = gen_obs_sets_schema(options, self._obs_set_elems, self._obs_set_names,) + options = gen_obs_feature_matrix_schema(options, self._expression_matrix, self._gene_var_filter, self._matrix_gene_var_filter) + options = gen_feature_labels_schema(self._feature_labels, options) + options = gen_obs_labels_schema(options, self._obs_labels_elems, self._obs_labels_names) if len(options.keys()) > 0: if self.is_h5ad: @@ -1289,6 +1285,158 @@ def auto_view_config(self, vc): / heatmap) +SpatialDataWrapperType = TypeVar('SpatialDataWrapperType', bound='SpatialDataWrapper') + + +class SpatialDataWrapper(AnnDataWrapper): + + def __init__(self, sdata_path: Optional[str] = None, sdata_url: Optional[str] = None, sdata_store: Optional[Union[str, zarr.storage.StoreLike]] = None, sdata_artifact: Optional[ln.Artifact] = None, image_path: Optional[str] = None, region: Optional[str] = None, coordinate_system: Optional[str] = None, affine_transformation: Optional[np.ndarray] = None, obs_spots_path: Optional[str] = None, labels_path: Optional[str] = None, table_path: str = "tables/table", **kwargs): + """ + Wrap a SpatialData object. + + :param sdata_path: SpatialData path, exclusive with other `{sdata,adata}_xxxx` arguments, by default None + :type sdata_path: Optional[str] + :param sdata_url: SpatialData url, exclusive with other `{sdata,adata}_xxxx` arguments, by default None + :type sdata_url: Optional[str] + :param sdata_store: SpatialData store, exclusive with other `{spatialdata,adata}_xxxx` arguments, by default None + :type sdata_store: Optional[Union[str, zarr.storage.StoreLike]] + :param sdata_artifact: Artifact that corresponds to a SpatialData object. + :type sdata_artifact: Optional[ln.Artifact] + :param image_path: Path to the image element of interest. By default, None. + :type image_path: Optional[str] + :param coordinate_system: Name of a target coordinate system. + :type coordinate_system: Optional[str] + :param affine_transformation: Transformation to be applied to the image. By default, None. Prefer coordinate_system. + :type affine_transformation: Optional[np.ndarray] + :param obs_spots_path: Location of shapes that should be interpreted as spot observations, by default None + :type obs_spots_path: Optional[str] + :param labels_path: Location of the labels (segmentation bitmask image), by default None + :type labels_path: Optional[str] + """ + raise_error_if_zero_or_more_than_one([ + sdata_path, + sdata_url, + sdata_store, + sdata_artifact, + ]) + raise_error_if_any([ + kwargs.get('adata_path', None), + kwargs.get('adata_url', None), + kwargs.get('adata_store', None), + kwargs.get('adata_artifact', None) + ]) + super().__init__(adata_path=sdata_path, adata_url=sdata_url, adata_store=sdata_store, adata_artifact=sdata_artifact, **kwargs) + self.local_dir_uid = make_unique_filename(".sdata.zarr") + self._image_path = image_path + self._region = region + self._coordinate_system = coordinate_system + self._affine_transformation = affine_transformation + self._kwargs = kwargs + self._obs_spots_path = obs_spots_path + self._labels_path = labels_path + if self._adata_path is not None: + self.zarr_folder = 'spatialdata.zarr' + self.obs_type_label = None + if self._coordination_values is not None and "obsType" in self._coordination_values: + self.obs_type_label = self._coordination_values["obsType"] + self._table_path = table_path + + @classmethod + def from_object(cls: Type[SpatialDataWrapperType], sdata: SpatialData, table_keys_to_image_elems: dict[str, Union[str, None]] = defaultdict(type(None)), table_keys_to_regions: dict[str, Union[str, None]] = defaultdict(type(None)), obs_type_label: str = "spot") -> list[SpatialDataWrapperType]: + """Instantiate a wrapper for SpatialData stores, one per table, directly from the SpatialData object. + By default, we "show everything" that can reasonable be inferred given the information. If you wish to have more control, + consider instantiating the object directly. This function will error if something cannot be inferred i.e., the user does not present + regions explicitly but there is more than one for a given table. + + + Parameters + ---------- + cls : Type[SpatialDataWrapperType] + _description_ + spatialdata : SpatialData + _description_ + table_keys_to_image_elems : dict[str, str], optional + which image paths to use for a given table for the visualization, by default None for each table key. + table_keys_to_regions : dict[str, str], optional + which regions to use for a given table for the visualization, by default None for each table key. + + Returns + ------- + list[SpatialDataWrapperType] + + Raises + ------ + ValueError + """ + wrappers = [] + parent_table_key = "table" if (sdata.path / "table").exists() else "tables" + for table_key, table in sdata.tables.items(): + spot_shapes_elem = None + image_elem = table_keys_to_image_elems[table_key] + labels_elem = None + spatialdata_attr = table.uns['spatialdata_attrs'] + region = table_keys_to_regions[table_key] + if region is not None: + assert region in spatialdata_attr['region'] + else: + region = spatialdata_attr['region'] + if isinstance(region, list): + if len(region) > 1: + raise ValueError("Vitessce cannot subset AnnData objects on the fly. Please provide an explicit region") + region = region[0] + if region in sdata.shapes: + spot_shapes_elem = f"shapes/{region}" + # Currently, only circle shapes are supported. + # TODO: add if statement to check that this region contains spot shapes rather than other types of shapes + if region in sdata.labels: + labels_elem = f"labels/{region}" + obs_feature_matrix_elem = f"{parent_table_key}/{table_key}/X" + if 'highly_variable' in table.var: + # TODO: fix first key needing to be "table" in vitessce-js + initial_feature_filter_elem = 'highly_variable' + else: + initial_feature_filter_elem = None + obs_set_elems = [f"{parent_table_key}/{table_key}/obs/{elem}" for elem in table.obs if table.obs[elem].dtype == 'category'] + wrappers += [ + cls( + sdata_path=str(sdata.path), + image_path=str(image_elem) if image_elem is not None else None, + labels_path=str(labels_elem) if labels_elem is not None else None, + obs_feature_matrix_path=str(obs_feature_matrix_elem), + obs_spots_path=str(spot_shapes_elem) if spot_shapes_elem is not None else None, + initial_feature_filter_path=initial_feature_filter_elem, + obs_set_paths=obs_set_elems, + coordination_values={"obsType": "spot"} # TODO: should we remove? + ) + ] + return wrappers + + def make_file_def_creator(self, dataset_uid: str, obj_i: str) -> Optional[Callable]: + def generator(base_url): + options = {} + options = gen_obs_labels_schema(options, self._obs_labels_elems, self._obs_labels_names) + options = gen_sdata_obs_feature_matrix_schema(options, self._expression_matrix, self._gene_var_filter, self._matrix_gene_var_filter, self._region) + options = gen_sdata_obs_sets_schema(options, self._obs_set_elems, self._obs_set_names, self._table_path, self._region) + options = gen_sdata_obs_spots_schema(options, self._obs_spots_path, self._table_path, self._region, self._coordinate_system) + options = gen_sdata_image_schema(options, self._image_path, self._coordinate_system, self._affine_transformation) + options = gen_sdata_labels_schema(options, self._labels_path, self._table_path, self._coordinate_system, self._affine_transformation) + options = gen_feature_labels_schema(self._feature_labels, options) + if len(options.keys()) > 0: + obj_file_def = { + "fileType": ft.SPATIALDATA_ZARR.value, + "url": self.get_zarr_url(base_url, dataset_uid, obj_i), + "options": options + } + if self._request_init is not None: + obj_file_def['requestInit'] = self._request_init + if self._coordination_values is not None: + obj_file_def['coordinationValues'] = self._coordination_values + return obj_file_def + return None + + return generator + + class MultivecZarrWrapper(AbstractWrapper): def __init__(self, zarr_path=None, zarr_url=None, **kwargs):