diff --git a/.coveragerc_omit b/.coveragerc_omit index 316cb477..2a1a0f58 100644 --- a/.coveragerc_omit +++ b/.coveragerc_omit @@ -6,7 +6,8 @@ omit = vitessce/widget.py vitessce/wrappers.py vitessce/repr.py - vitessce/data_utils/anndata.py - vitessce/data_utils/ome.py - vitessce/data_utils/entities.py - vitessce/data_utils/multivec.py \ No newline at end of file + vitessce/data_utils/anndata/anndata.py + vitessce/data_utils/multivec/multivec.py + vitessce/data_utils/ome_tiff/ome_tiff.py + vitessce/data_utils/ome_zarr/ome_zarr.py + vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py diff --git a/docs/api_data.rst b/docs/api_data.rst index 34fffd81..5f246ff1 100644 --- a/docs/api_data.rst +++ b/docs/api_data.rst @@ -27,7 +27,13 @@ vitessce.export vitessce.data_utils ***************** -.. automodule:: vitessce.data_utils.ome +.. automodule:: vitessce.data_utils.anndata.anndata :members: -.. automodule:: vitessce.data_utils.anndata +.. automodule:: vitessce.data_utils.multivec.multivec + :members: +.. automodule:: vitessce.data_utils.ome_tiff.ome_tiff + :members: +.. automodule:: vitessce.data_utils.ome_zarr.ome_zarr + :members: +.. automodule:: vitessce.data_utils.ucsc_cellbrowser.ucsc_cellbrowser :members: \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index cd264769..9b930107 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,8 +79,40 @@ testing = [] linting = [] notebook = [] +# data_utils extras_require +# These dependencies are required +# to use the corresponding data_utils sub-packages. +anndata = [ + 'zarr>=2.5.0', + 'numcodecs>=0.5.7', + 'anndata>=0.7.8,<0.9', + 'scanpy>=1.9.3' +] +ome_zarr = [ + 'zarr>=2.5.0', + 'numcodecs>=0.5.7', + 'ome-zarr==0.2.1' +] +ome_tiff = [ + 'generate-tiff-offsets>=0.1.7', + 'tifffile>=2020.10.1' +] +multivec = [ + 'zarr>=2.5.0', + 'numcodecs>=0.5.7', + 'negspy>=0.2.24' +] + [project.urls] repository = "https://github.com/vitessce/vitessce-python" [tool.setuptools] -packages = ["vitessce", "vitessce.data_utils"] +packages = [ + "vitessce", + "vitessce.data_utils", + "vitessce.data_utils.anndata", + "vitessce.data_utils.multivec", + "vitessce.data_utils.ome_tiff", + "vitessce.data_utils.ome_zarr", + "vitessce.data_utils.ucsc_cellbrowser" +] diff --git a/tests/test_anndata_utils.py b/tests/test_anndata_utils.py index b835a00d..c400dfc1 100644 --- a/tests/test_anndata_utils.py +++ b/tests/test_anndata_utils.py @@ -7,11 +7,13 @@ from scipy.io import mmread import zarr -from vitessce.data_utils import ( +from vitessce.data_utils.anndata import ( optimize_arr, optimize_adata, sort_var_axis, to_uint8, +) +from vitessce.data_utils.multivec import ( adata_to_multivec_zarr, ) diff --git a/tests/test_config_converter.py b/tests/test_config_converter.py index e4926239..fd339780 100644 --- a/tests/test_config_converter.py +++ b/tests/test_config_converter.py @@ -2,7 +2,7 @@ from unittest.mock import patch, Mock from copy import deepcopy -from vitessce import ( +from vitessce.data_utils.ucsc_cellbrowser import ( CellBrowserToAnndataZarrConverter, convert_cell_browser_project_to_anndata, ) diff --git a/tests/test_entities.py b/tests/test_entities.py deleted file mode 100644 index cdcda782..00000000 --- a/tests/test_entities.py +++ /dev/null @@ -1,111 +0,0 @@ -import pytest - -from vitessce.data_utils.entities import ( - CellSets, - Cells, -) - - -def test_cells(): - - cell_ids = ['cell_1', 'cell_2', 'cell_3'] - - cells = Cells(cell_ids=cell_ids) - assert list(cells.json.keys()) == cell_ids - - cells.add_mapping('umap', [[1, 1], [2, 2], [3, 3]]) - - cells.add_mapping('pca', [[1, 1], [2, 2], [3, 3]]) - - cells.add_centroids([[1, 1], [2, 2], [3, 3]]) - - cells.add_polygon_outline( - [[[1, 1], [1, 1], [1, 1]], [[2, 2], [2, 2], [2, 2]], [[3, 3], [3, 3], [3, 3]]]) - - assert cells.json == { - 'cell_1': { - 'mappings': {'umap': [1, 1], 'pca': [1, 1]}, - 'xy': [1, 1], - 'poly': [[1, 1], [1, 1], [1, 1]] - }, - 'cell_2': { - 'mappings': {'umap': [2, 2], 'pca': [2, 2]}, - 'xy': [2, 2], - 'poly': [[2, 2], [2, 2], [2, 2]] - }, - 'cell_3': { - 'mappings': {'umap': [3, 3], 'pca': [3, 3]}, - 'xy': [3, 3], - 'poly': [[3, 3], [3, 3], [3, 3]] - } - } - - -def test_cells_bad_polygon_outline_type(): - - cell_ids = ['cell_1', 'cell_2', 'cell_3'] - cells = Cells(cell_ids=cell_ids) - with pytest.raises(Exception) as context: - # The extra 3 should be problematic since polygons are two dimensional. - cells.add_polygon_outline([ - [[1, 1, 3], [1, 1], [1, 1]], - [[2, 2], [2, 2], [2, 2]], - [[3, 3], [3, 3], [3, 3]] - ]) - assert 'Polygon outline for cell_1 should be a list of two element lists i.e xy coordinates' in str(context) - - -def test_cells_bad_mappings_length(): - - cell_ids = ['cell_1', 'cell_2', 'cell_3'] - cells = Cells(cell_ids=cell_ids) - with pytest.raises(Exception) as context: - # There are 3 cells in this object so only passing in two scatterplot cooridnates is problematic. - cells.add_mapping('umap', [[1, 1], [2, 2]]) - assert 'Coordinates length does not match Cell IDs Length' in str(context) - - -def test_cell_sets(): - - cell_sets = CellSets() - cell_sets.add_level_zero_node('Clusters') - - cell_sets.add_node('Cluster 1', ['Clusters']) - cell_sets.add_node('Cluster 2', ['Clusters']) - cell_sets.add_node('Subcluster 1', ['Clusters', 'Cluster 1'], ['cell_1', 'cell_2']) - cell_sets.add_node('Subcluster 2', ['Clusters', 'Cluster 1'], ['cell_3', 'cell_4']) - - assert cell_sets.json == { - "datatype": "cell", - "version": "0.1.2", - "tree": [{ - "name": 'Clusters', - "children": [ - { - "name": 'Cluster 1', - "children": [ - { - "name": 'Subcluster 1', - "set": ['cell_1', 'cell_2'] - }, - { - "name": 'Subcluster 2', - "set": ['cell_3', 'cell_4'] - } - ] - }, - { - "name": 'Cluster 2', - } - ] - }] - } - - -def test_cell_sets_node_not_found(): - - cell_sets = CellSets() - with pytest.raises(Exception) as context: - cell_sets.add_node('Cluster 1', ['Clusters Not Found']) - - assert "No node with path ['Clusters Not Found'] found to add Cluster 1 to" in str(context) diff --git a/tests/test_ome_utils.py b/tests/test_ome_utils.py index a9d183ef..a77758ed 100644 --- a/tests/test_ome_utils.py +++ b/tests/test_ome_utils.py @@ -3,7 +3,7 @@ import zarr import numpy as np -from vitessce.data_utils import ( +from vitessce.data_utils.ome_zarr import ( rgb_img_to_ome_zarr, ) diff --git a/vitessce/__init__.py b/vitessce/__init__.py index 3e948518..ae8f91b3 100644 --- a/vitessce/__init__.py +++ b/vitessce/__init__.py @@ -21,11 +21,6 @@ BASE_URL_PLACEHOLDER, ) -from .config_converter import ( - CellBrowserToAnndataZarrConverter, # only exported for testing. - convert_cell_browser_project_to_anndata, -) - from .wrappers import AbstractWrapper # We allow installation without all of the dependencies that the widget requires. diff --git a/vitessce/data_utils/__init__.py b/vitessce/data_utils/__init__.py index 88077ca1..e69de29b 100644 --- a/vitessce/data_utils/__init__.py +++ b/vitessce/data_utils/__init__.py @@ -1,18 +0,0 @@ -from .anndata import ( - optimize_adata, - optimize_arr, - to_dense, - to_uint8, - sort_var_axis, - to_diamond, - VAR_CHUNK_SIZE, -) -from .ome import ( - rgb_img_to_ome_zarr, - multiplex_img_to_ome_zarr, - rgb_img_to_ome_tiff, - multiplex_img_to_ome_tiff, -) -from .multivec import ( - adata_to_multivec_zarr, -) diff --git a/vitessce/data_utils/anndata/__init__.py b/vitessce/data_utils/anndata/__init__.py new file mode 100644 index 00000000..2b596475 --- /dev/null +++ b/vitessce/data_utils/anndata/__init__.py @@ -0,0 +1,10 @@ +from .anndata import ( + optimize_adata, + optimize_arr, + to_dense, + to_uint8, + sort_var_axis, + to_diamond, + VAR_CHUNK_SIZE, + cast_arr, +) diff --git a/vitessce/data_utils/anndata.py b/vitessce/data_utils/anndata/anndata.py similarity index 100% rename from vitessce/data_utils/anndata.py rename to vitessce/data_utils/anndata/anndata.py diff --git a/vitessce/data_utils/entities.py b/vitessce/data_utils/entities.py deleted file mode 100644 index aa32ddfb..00000000 --- a/vitessce/data_utils/entities.py +++ /dev/null @@ -1,287 +0,0 @@ -import negspy.coordinates as nc -import numpy as np -import math - - -class ArgumentLengthDoesNotMatchCellIdsException(Exception): - pass - - -class NodeNotFoundException(Exception): - pass - - -class Cells: - - """ - Generic Cells class for constructing the json needed for client side rendering of cell segmentations/scatterplots (UMAP, PCA etc.). - - :param json The json resulting from various calls to add_mapping, add_polygon_outline etc. that can be served to the client. - """ - - def __init__(self, cell_ids=[]): - """ - Constructor method - - :param list cell_ids: A list of cell ids to be shown in Vitessce. The order of these will be used to determine the order of future additions to this class, like segmentations and scatterplot coordinates. - """ - self._cell_ids = cell_ids - self.json = dict(zip(cell_ids, [{} for _ in cell_ids])) - - def add_mapping(self, name, coords): - """ - Add a (dimensionality reduction) scatterplot mapping to each cell. - - :param str name: The unique identifier for the mapping, like UMAP, tSNE or PCA. - :param list coords: A list of lists like [[1, 2], [3, 4], ...] in the order of cell_ids for each cell to be mapped to a scatterplot coorindate. - """ - if len(coords) != len(self._cell_ids): - raise ArgumentLengthDoesNotMatchCellIdsException( - 'Coordinates length does not match Cell IDs Length') - if not isinstance(name, str): - raise TypeError( - 'name argument needs to be a string for adding a scatterplot mapping') - for idx, id in enumerate(self._cell_ids): - if 'mappings' not in self.json[id]: - self.json[id]['mappings'] = {name: coords[idx]} - else: - self.json[id]['mappings'][name] = coords[idx] - - def add_centroids(self, centroids): - """ - Add a centroid for a spatial segmentation outline to each cell. - - :param list centroids: A list of lists like [[1, 2], [3, 4], ...] in the order of cell_ids for each cell to be mapped to a centroid coorindate. - """ - if len(centroids) != len(self._cell_ids): - raise ArgumentLengthDoesNotMatchCellIdsException( - 'Centroid length does not match Cell IDs Length') - if not isinstance(centroids, list) or any([len(centroid) != 2 or not isinstance(centroid, list) for centroid in centroids]): - raise TypeError('Centroids should be a list of two element lists') - for idx, id in enumerate(self._cell_ids): - self.json[id]['xy'] = centroids[idx] - - def add_polygon_outline(self, polygon_outline): - """ - Add a polygon for a spatial segmentation outline to each cell. - - :param list polygon_outline: A list of lists of lists like [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]...] in the order of cell_ids for each cell to be mapped to its segmentation. - """ - if len(polygon_outline) != len(self._cell_ids): - raise ArgumentLengthDoesNotMatchCellIdsException( - 'Segmentations length does not match Cell IDs Length') - for idx, id in enumerate(self._cell_ids): - if not isinstance(polygon_outline[idx], list) or any([len(coord) != 2 or not isinstance(coord, list) for coord in polygon_outline[idx]]): - raise TypeError( - f'Polygon outline for {id} should be a list of two element lists i.e xy coordinates') - self.json[id]['poly'] = polygon_outline[idx] - - -class CellSets: - - """ - Generic CellSets class for constructing the json needed for client side rendering of the cell sets. - - :param json The json resulting from various calls to add_node that can be served to the client. - """ - - def __init__(self): - """ - Constructor method - """ - - self.json = { - "datatype": "cell", - "version": "0.1.2", - "tree": [] - } - - def add_level_zero_node(self, name): - """ - Add a new level zero node to the root of the tree. - - :param str name: Name for the new node - """ - self.json['tree'].append({ - "name": name, - "children": [] - }) - - def add_node(self, name, parent_path, cell_set=None): - """ - Add a node to a parent node. - - :param str name: Name for the new node - :param list parent_path: List of strings representing the internal nodes to traverse to reach the desired parent node to which we will add the new node, like ['epithelial', 'meso-epithelial'] - :param list cell_set: List of cell ids which will be added to the new node as part of the set. - """ - parent_node = self._tree_find_node_by_path(parent_path) - if parent_node is None: - raise NodeNotFoundException( - f'No node with path {parent_path} found to add {name} to') - new_node = {"name": name} - if cell_set: - new_node['set'] = cell_set - if 'children' not in parent_node: - parent_node['children'] = [new_node] - else: - parent_node['children'].append(new_node) - - def _find_node_by_path(self, node, path, curr_index): - curr_node_name = path[curr_index] - if node['name'] == curr_node_name: - if curr_index == len(path) - 1: - return node - if 'children' in node: - found_nodes = [ - self._find_node_by_path(child, path, curr_index + 1) for child in node['children'] - ] - found_nodes_not_none = [n for n in found_nodes if n] - if len(found_nodes_not_none) == 1: - return found_nodes[0] - return None - - def _tree_find_node_by_path(self, path): - found_nodes = [self._find_node_by_path( - node, path, 0) for node in self.json['tree']] - found_nodes_not_none = [n for n in found_nodes if n] - if len(found_nodes_not_none) == 1: - return found_nodes_not_none[0] - return None - - -class Molecules(): - - """ - Generic Molecules class for constructing the json needed for client side rendering of spot data. - - :param json The json resulting from various calls to add_molecule. - """ - - def __init__(self): - """ - Constructor method - """ - self.json = {} - - def add_molecule(self, name, coords): - """ - Add a molecules to a parent node. - - :param str name: Name for the new molecules - :param list coords: A list of lists like [[1, 2], [3, 4], ...] or [[1, 2, 3], [3, 4, 5], ...] which denote where in xy space the spot data should be placed for the desired name. - """ - self.json[name] = coords - - -class GenomicProfiles(): - - """ - Generic class for representing genomic profiles. - """ - - def __init__(self, f, profile_paths, assembly='hg38', starting_resolution=5000, name="Genomic Profiles"): - """ - Constructor method - - :param f: The opened Zarr store object. - :type f: zarr.Group - :param list[list[str]] profile_paths: A list of cell set paths, one path for each profile. - :param str assembly: The genome assembly to use for chromosome lengths, passed to negspy. By default, 'hg38'. - :param int starting_resolution: The starting resolution. By default, 5000. - :param str name: The name for this set of profiles. By default, 'Genomic Profiles'. - """ - - self.f = f - - num_profiles = len(profile_paths) - - compressor = 'default' - - chromosomes = [str(chr_name) for chr_name in nc.get_chromorder( - assembly)[:25]] # TODO: should more than chr1-chrM be used? - chroms_length_arr = np.array( - [nc.get_chrominfo(assembly).chrom_lengths[x] for x in chromosomes], dtype="i8") - chroms_cumsum_arr = np.concatenate( - (np.array([0]), np.cumsum(chroms_length_arr))) - - chrom_name_to_length = dict(zip(chromosomes, chroms_length_arr)) - chrom_name_to_cumsum = dict(zip(chromosomes, chroms_cumsum_arr)) - - # Prepare to fill in resolutions datasets. - resolutions = [starting_resolution * (2 ** x) for x in range(16)] - - chromosomes_group = f.create_group("chromosomes") - for chr_name, chr_len in chrom_name_to_length.items(): - chr_group = chromosomes_group.create_group(chr_name) - # Create each resolution group. - for resolution in resolutions: - chr_shape = (num_profiles, math.ceil(chr_len / resolution)) - chr_group.create_dataset(str( - resolution), shape=chr_shape, dtype="f4", fill_value=np.nan, compressor=compressor) - - # f.attrs should contain the properties required for HiGlass's "tileset_info" requests. - f.attrs['row_infos'] = [ - {"path": profile_path} - for profile_path in profile_paths - ] - f.attrs['resolutions'] = sorted(resolutions, reverse=True) - f.attrs['shape'] = [num_profiles, 256] - f.attrs['name'] = name - f.attrs['coordSystem'] = assembly - - self.resolutions = resolutions - self.chromosomes = chromosomes - self.chromosomes_group = chromosomes_group - self.chrom_name_to_length = chrom_name_to_length - self.num_profiles = num_profiles - - # https://github.com/zarr-developers/zarr-specs/issues/50 - f.attrs['multiscales'] = [ - { - "version": "0.1", - "name": chr_name, - "datasets": [ - {"path": f"chromosomes/{chr_name}/{resolution}"} - for resolution in sorted(resolutions, reverse=True) - ], - "type": "zarr-multivec", - "metadata": { - "chromoffset": int(chrom_name_to_cumsum[chr_name]), - "chromsize": int(chr_len), - } - } - for (chr_name, chr_len) in list(zip(chromosomes, chroms_length_arr)) - ] - - def add_profile(self, values, chr_name, profile_index): - """ - Add a single genomic profile to the output store. This function will aggregate for each resolution. - - :param values: A profile array for one chromosome. - :type values: np.array - :param str chr_name: The name the chromosome corresponding to this array. - :param int profile_index: The index of this profile among the list of profiles. - """ - chromosomes_group = self.chromosomes_group - resolutions = self.resolutions - resolution_exps = [(2**x) for x in range(len(resolutions))] - - chr_len = self.chrom_name_to_length[chr_name] - # Fill in the data for this cluster and chromosome at each resolution. - for resolution, resolution_exp in zip(resolutions, resolution_exps): - arr_len = math.ceil(chr_len / resolution) - - # Pad the array of values with zeros if necessary before reshaping. - padding_len = resolution_exp - (values.shape[0] % resolution_exp) - if values.shape[0] % resolution_exp > 0: - values = np.concatenate((values, np.zeros((padding_len,)))) - # Reshape to be able to sum every `resolution_exp` number of values. - arr = np.reshape(values, (-1, resolution_exp)).sum(axis=-1) - - padding_len = arr_len - arr.shape[0] - if padding_len > 0: - arr = np.concatenate((arr, np.zeros((padding_len,)))) - # Set the array in the Zarr store. - chromosomes_group[chr_name][str( - resolution)][profile_index, :] = arr diff --git a/vitessce/data_utils/multivec/__init__.py b/vitessce/data_utils/multivec/__init__.py new file mode 100644 index 00000000..3dad028e --- /dev/null +++ b/vitessce/data_utils/multivec/__init__.py @@ -0,0 +1,3 @@ +from .multivec import ( + adata_to_multivec_zarr, +) diff --git a/vitessce/data_utils/multivec/entities.py b/vitessce/data_utils/multivec/entities.py new file mode 100644 index 00000000..d8c6cbdd --- /dev/null +++ b/vitessce/data_utils/multivec/entities.py @@ -0,0 +1,124 @@ +import negspy.coordinates as nc +import numpy as np +import math + + +class ArgumentLengthDoesNotMatchCellIdsException(Exception): + pass + + +class NodeNotFoundException(Exception): + pass + + +class GenomicProfiles(): + + """ + Generic class for representing genomic profiles. + """ + + def __init__(self, f, profile_paths, assembly='hg38', starting_resolution=5000, name="Genomic Profiles"): + """ + Constructor method + + :param f: The opened Zarr store object. + :type f: zarr.Group + :param list[list[str]] profile_paths: A list of cell set paths, one path for each profile. + :param str assembly: The genome assembly to use for chromosome lengths, passed to negspy. By default, 'hg38'. + :param int starting_resolution: The starting resolution. By default, 5000. + :param str name: The name for this set of profiles. By default, 'Genomic Profiles'. + """ + + self.f = f + + num_profiles = len(profile_paths) + + compressor = 'default' + + chromosomes = [str(chr_name) for chr_name in nc.get_chromorder( + assembly)[:25]] # TODO: should more than chr1-chrM be used? + chroms_length_arr = np.array( + [nc.get_chrominfo(assembly).chrom_lengths[x] for x in chromosomes], dtype="i8") + chroms_cumsum_arr = np.concatenate( + (np.array([0]), np.cumsum(chroms_length_arr))) + + chrom_name_to_length = dict(zip(chromosomes, chroms_length_arr)) + chrom_name_to_cumsum = dict(zip(chromosomes, chroms_cumsum_arr)) + + # Prepare to fill in resolutions datasets. + resolutions = [starting_resolution * (2 ** x) for x in range(16)] + + chromosomes_group = f.create_group("chromosomes") + for chr_name, chr_len in chrom_name_to_length.items(): + chr_group = chromosomes_group.create_group(chr_name) + # Create each resolution group. + for resolution in resolutions: + chr_shape = (num_profiles, math.ceil(chr_len / resolution)) + chr_group.create_dataset(str( + resolution), shape=chr_shape, dtype="f4", fill_value=np.nan, compressor=compressor) + + # f.attrs should contain the properties required for HiGlass's "tileset_info" requests. + f.attrs['row_infos'] = [ + {"path": profile_path} + for profile_path in profile_paths + ] + f.attrs['resolutions'] = sorted(resolutions, reverse=True) + f.attrs['shape'] = [num_profiles, 256] + f.attrs['name'] = name + f.attrs['coordSystem'] = assembly + + self.resolutions = resolutions + self.chromosomes = chromosomes + self.chromosomes_group = chromosomes_group + self.chrom_name_to_length = chrom_name_to_length + self.num_profiles = num_profiles + + # https://github.com/zarr-developers/zarr-specs/issues/50 + f.attrs['multiscales'] = [ + { + "version": "0.1", + "name": chr_name, + "datasets": [ + {"path": f"chromosomes/{chr_name}/{resolution}"} + for resolution in sorted(resolutions, reverse=True) + ], + "type": "zarr-multivec", + "metadata": { + "chromoffset": int(chrom_name_to_cumsum[chr_name]), + "chromsize": int(chr_len), + } + } + for (chr_name, chr_len) in list(zip(chromosomes, chroms_length_arr)) + ] + + def add_profile(self, values, chr_name, profile_index): + """ + Add a single genomic profile to the output store. This function will aggregate for each resolution. + + :param values: A profile array for one chromosome. + :type values: np.array + :param str chr_name: The name the chromosome corresponding to this array. + :param int profile_index: The index of this profile among the list of profiles. + """ + chromosomes_group = self.chromosomes_group + resolutions = self.resolutions + resolution_exps = [(2**x) for x in range(len(resolutions))] + + chr_len = self.chrom_name_to_length[chr_name] + # Fill in the data for this cluster and chromosome at each resolution. + for resolution, resolution_exp in zip(resolutions, resolution_exps): + arr_len = math.ceil(chr_len / resolution) + + # Pad the array of values with zeros if necessary before reshaping. + padding_len = resolution_exp - (values.shape[0] % resolution_exp) + if values.shape[0] % resolution_exp > 0: + values = np.concatenate((values, np.zeros((padding_len,)))) + # Reshape to be able to sum every `resolution_exp` number of values. + arr = np.reshape(values, (-1, resolution_exp)).sum(axis=-1) + + padding_len = arr_len - arr.shape[0] + if padding_len > 0: + arr = np.concatenate((arr, np.zeros((padding_len,)))) + # Set the array in the Zarr store. + chromosomes_group[chr_name][str( + resolution)][profile_index, :] = arr diff --git a/vitessce/data_utils/multivec.py b/vitessce/data_utils/multivec/multivec.py similarity index 99% rename from vitessce/data_utils/multivec.py rename to vitessce/data_utils/multivec/multivec.py index c1460f6b..ad041900 100644 --- a/vitessce/data_utils/multivec.py +++ b/vitessce/data_utils/multivec/multivec.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from .anndata import to_dense +from ..anndata import to_dense from .entities import GenomicProfiles diff --git a/vitessce/data_utils/ome_tiff/__init__.py b/vitessce/data_utils/ome_tiff/__init__.py new file mode 100644 index 00000000..cbe49d79 --- /dev/null +++ b/vitessce/data_utils/ome_tiff/__init__.py @@ -0,0 +1,4 @@ +from .ome_tiff import ( + rgb_img_to_ome_tiff, + multiplex_img_to_ome_tiff, +) diff --git a/vitessce/data_utils/ome_tiff/ome_tiff.py b/vitessce/data_utils/ome_tiff/ome_tiff.py new file mode 100644 index 00000000..bfd1ec28 --- /dev/null +++ b/vitessce/data_utils/ome_tiff/ome_tiff.py @@ -0,0 +1,46 @@ +import numpy as np +from tifffile import TiffWriter + + +def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"): + """ + Convert an RGB image to OME-TIFF. + + :param img_arr: The image as a 3D array. + :type img_arr: np.array + :param str output_path: The path to save the Zarr store. + :param str img_name: The name of the image to include in the omero.name NGFF metadata field. + :param str axes: The array axis ordering. By default, "CYX" + """ + img_arr = img_arr.astype(np.dtype('uint8')) + + tiff_writer = TiffWriter(output_path, ome=True) + tiff_writer.write( + img_arr, + metadata={ + 'axes': axes, + 'Channel': {'Name': ['R', 'G', 'B']}, + } + ) + tiff_writer.close() + + +def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"): + """ + Convert a multiplexed image to OME-TIFF. + + :param img_arr: The image as a 3D, 4D, or 5D array. + :type img_arr: np.array + :param list[str] channel_names: A list of channel names to include in the omero.channels[].label NGFF metadata field. + :param str output_path: The path to save the Zarr store. + :param str axes: The array axis ordering. By default, "CYX" + """ + tiff_writer = TiffWriter(output_path, ome=True) + tiff_writer.write( + img_arr, + metadata={ + 'axes': axes, + 'Channel': {'Name': channel_names}, + } + ) + tiff_writer.close() diff --git a/vitessce/data_utils/ome_zarr/__init__.py b/vitessce/data_utils/ome_zarr/__init__.py new file mode 100644 index 00000000..92201844 --- /dev/null +++ b/vitessce/data_utils/ome_zarr/__init__.py @@ -0,0 +1,4 @@ +from .ome_zarr import ( + rgb_img_to_ome_zarr, + multiplex_img_to_ome_zarr, +) diff --git a/vitessce/data_utils/ome.py b/vitessce/data_utils/ome_zarr/ome_zarr.py similarity index 70% rename from vitessce/data_utils/ome.py rename to vitessce/data_utils/ome_zarr/ome_zarr.py index 8f87c33c..af747e33 100644 --- a/vitessce/data_utils/ome.py +++ b/vitessce/data_utils/ome_zarr/ome_zarr.py @@ -1,52 +1,10 @@ import numpy as np import zarr from ome_zarr.writer import write_image -from tifffile import TiffWriter -from .anndata import cast_arr - -def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"): - """ - Convert an RGB image to OME-TIFF. - - :param img_arr: The image as a 3D array. - :type img_arr: np.array - :param str output_path: The path to save the Zarr store. - :param str img_name: The name of the image to include in the omero.name NGFF metadata field. - :param str axes: The array axis ordering. By default, "CYX" - """ - img_arr = img_arr.astype(np.dtype('uint8')) - - tiff_writer = TiffWriter(output_path, ome=True) - tiff_writer.write( - img_arr, - metadata={ - 'axes': axes, - 'Channel': {'Name': ['R', 'G', 'B']}, - } - ) - tiff_writer.close() - - -def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"): - """ - Convert a multiplexed image to OME-TIFF. - - :param img_arr: The image as a 3D, 4D, or 5D array. - :type img_arr: np.array - :param list[str] channel_names: A list of channel names to include in the omero.channels[].label NGFF metadata field. - :param str output_path: The path to save the Zarr store. - :param str axes: The array axis ordering. By default, "CYX" - """ - tiff_writer = TiffWriter(output_path, ome=True) - tiff_writer.write( - img_arr, - metadata={ - 'axes': axes, - 'Channel': {'Name': channel_names}, - } - ) - tiff_writer.close() +from ..anndata import ( + cast_arr, +) def rgb_img_to_ome_zarr(img_arr, output_path, img_name="Image", chunks=(1, 256, 256), axes="cyx"): @@ -115,6 +73,7 @@ def multiplex_img_to_ome_zarr(img_arr, channel_names, output_path, img_name="Ima :param channel_colors: Dict mapping channel names to color strings to use for the omero.channels[].color NGFF metadata field. If provided, keys should match channel_names. By default, None to use "FFFFFF" for all channels. :type channel_colors: dict or None """ + img_arr = cast_arr(img_arr) dtype_info = np.iinfo(img_arr.dtype) if img_arr.dtype.kind == 'u' or img_arr.dtype.kind == 'i' else np.finfo(img_arr.dtype) diff --git a/vitessce/data_utils/ucsc_cellbrowser/__init__.py b/vitessce/data_utils/ucsc_cellbrowser/__init__.py new file mode 100644 index 00000000..dfc0ebbf --- /dev/null +++ b/vitessce/data_utils/ucsc_cellbrowser/__init__.py @@ -0,0 +1,4 @@ +from .ucsc_cellbrowser import ( + CellBrowserToAnndataZarrConverter, # only exported for testing. + convert_cell_browser_project_to_anndata, +) diff --git a/vitessce/config_converter.py b/vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py similarity index 99% rename from vitessce/config_converter.py rename to vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py index b3b57bf5..133d04af 100644 --- a/vitessce/config_converter.py +++ b/vitessce/data_utils/ucsc_cellbrowser/ucsc_cellbrowser.py @@ -6,7 +6,7 @@ import gzip import io -from vitessce.data_utils import ( +from ..anndata import ( optimize_adata, )