Skip to content

Commit

Permalink
API docs (#360)
Browse files Browse the repository at this point in the history
* Fix some type errors

* Start updating dependencies pinning

* Update test artfiact for apparent change in pandas serialization convention

* Update neighborhood enrichment and auto-correlation squidpy test data artifacts for update to library

* Limit expected precision of autocorrelation test artifacts

* Version bump

* Deprecate graph-plugin-dockerized test, too slow

* Update reanalysis scripts for library updates

* Add typing annnotation to package

* Try to fix openapi.json url

* Add root_path!

* Configure servers in openapi spec

* Deprecate empty root

* Start openapi docs

* Update doc overview

* Update docs

* Update doc

* Update doc

* Add more docs

* More docs

* More docs

* Deprecate unused endpoint, update endpoint docstrings and pydantic types, with examples

* api version bump

* Update tests for new call signature

* Update test artifacts
  • Loading branch information
jimmymathews authored Sep 30, 2024
1 parent e6aef1a commit 5e13551
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 83 deletions.
81 changes: 37 additions & 44 deletions spatialprofilingtoolbox/apiserver/app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""The API service's endpoint handlers."""

from typing import cast
from typing import Annotated
from typing import Literal
Expand Down Expand Up @@ -42,11 +41,12 @@
ValidChannelListPositives2,
ValidChannelListNegatives2,
ValidFeatureClass,
ValidFeatureClass2Phenotypes,
)
from spatialprofilingtoolbox.graphs.config_reader import read_plot_importance_fractions_config
from spatialprofilingtoolbox.graphs.importance_fractions import PlotGenerator

VERSION = '0.25.0'
VERSION = '0.26.0'

TITLE = 'Single cell studies data API'

Expand All @@ -56,12 +56,12 @@
This API provides useful access to the **single-cell datasets** residing in a database that is
curated and maintained by the [Nadeem Lab](https://nadeemlab.org).
The public portion of the database includes phenotype and slide position information for
The public portion of the database includes phenotype and slide position information for:
* ~9 million cells
* across about 1000 specimens
* typically with around 30 protein targets quantified per cell
* from cancers of the breast and lung, as well as urothelial cancer and melanoma
* from cancers from several sites: breast, lung, urothelial cancer and melanoma
* with a range of outcome assignments depending on the study design (often immunotherapy response)
This is the data source for the Spatial Profiling Toolbox (SPT) web application located at
Expand Down Expand Up @@ -93,9 +93,8 @@
The documentation you are reading in the browser is automatically generated and comes in two
flavors:
* [Redoc variant](https://oncopathtk.org/api/redoc)
* [Swagger UI variant](https://oncopathtk.org/api/docs) (includes a list of the JSON-formatted
return value types)
* the [Redoc variant](https://oncopathtk.org/api/redoc)
* the [Swagger UI variant](https://oncopathtk.org/api/docs)
The system of JSON-formatted return values is a simplified version of the complete
[schema](https://adiframework.com/docs_site/scstudies_quick_reference.html#) which was used to guide
Expand Down Expand Up @@ -214,16 +213,6 @@ async def get_study_summary(
return query().get_study_summary(study)


@app.get("/study-findings/")
async def get_study_findings(
study: ValidStudy,
) -> list[str]:
"""
Brief list of results of re-analysis of the given study.
"""
return query().get_study_findings(study)


@app.get("/channels/")
async def get_channels(
study: ValidStudy,
Expand All @@ -236,7 +225,8 @@ async def get_channels(
async def get_phenotype_symbols(
study: ValidStudy,
) -> list[PhenotypeSymbolAndCriteria]:
"""The display names and identifiers for the "composite" phenotypes in a given study."""
"""The display names and identifiers for the "composite" phenotypes in a given study, defined
by combination of positive and negative markers."""
symbols: tuple[PhenotypeSymbol, ...] = query().get_phenotype_symbols(study)
return list(
PhenotypeSymbolAndCriteria(
Expand All @@ -253,8 +243,8 @@ async def get_phenotype_criteria(
study: ValidStudy,
phenotype_symbol: ValidPhenotypeSymbol,
) -> PhenotypeCriteria:
"""Get lists of the positive markers and negative markers defining a given named phenotype, in
the context of the given study.
"""Get lists of the positive markers and negative markers defining a given named phenotype,
itself specified by identifier index, in the context of the given study.
"""
return query().get_phenotype_criteria(study, phenotype_symbol)

Expand All @@ -265,8 +255,7 @@ async def get_anonymous_phenotype_counts_fast(
negative_marker: ValidChannelListNegatives,
study: ValidStudy,
) -> PhenotypeCounts:
"""Computes the number of cells satisfying the given positive and negative criteria, in the
context of a given study.
"""Alternative syntax for `phenotype-counts`. To be deprecated.
"""
return _get_anonymous_phenotype_counts_fast(positive_marker, negative_marker, study)

Expand All @@ -276,32 +265,36 @@ def _get_anonymous_phenotype_counts_fast(
negative_marker: ValidChannelListNegatives,
study: ValidStudy,
) -> PhenotypeCounts:
number_cells = cast(int, query().get_number_cells(study))
counts = get_phenotype_counts(positive_marker, negative_marker, study, number_cells)
counts = _get_phenotype_counts(positive_marker, negative_marker, study)
return counts


@app.get("/phenotype-counts/")
async def get_phenotype_counts_nonblocking(
async def get_phenotype_counts(
positive_marker: ValidChannelListPositives,
negative_marker: ValidChannelListNegatives,
study: ValidStudy,
) -> PhenotypeCounts:
"""Computes the number of cells satisfying the given positive and negative criteria, in the
context of a given study. Non-blocking, has a "pending" flag in the response.
context of a given study, for each sample individually. This request should generally be
non-blocking, returning immediately with either a full or partial set of count values. A
"pending" flag in the response indicates which scenario is the case. If pending, poll this
endpoint until all values are available.
"""
counts = get_phenotype_counts(positive_marker, negative_marker, study, 0, blocking=False)
counts = _get_phenotype_counts(positive_marker, negative_marker, study, blocking=False)
return counts


@app.get("/request-spatial-metrics-computation/")
async def request_spatial_metrics_computation(
study: ValidStudy,
phenotype: ValidPhenotypeList,
feature_class: ValidFeatureClass,
feature_class: ValidFeatureClass2Phenotypes,
radius: float | None = None,
) -> UnivariateMetricsComputationResult:
"""Spatial proximity statistics between phenotype cell sets, as calculated by Squidpy."""
"""Spatial proximity statistics like the single-phenotype case, but between *two* phenotype cell
sets, where the phenotypes are specified by index among the pre-defined/combination phenotypes
for the given study."""
phenotypes = phenotype
criteria: list[PhenotypeCriteria] = [
query().retrieve_signature_of_phenotype(p, study) for p in phenotypes
Expand All @@ -321,8 +314,13 @@ async def request_spatial_metrics_computation_custom_phenotype(
feature_class: ValidFeatureClass,
radius: float | None = None,
) -> UnivariateMetricsComputationResult:
"""Spatial proximity statistics for a single custom-defined phenotype (cell set), as
calculated by Squidpy.
"""Spatial proximity statistics for a single custom-defined phenotype (cell set). Different
metrics are available, including several provided by the Squidpy package. If a feature class is
specified which requires two cell sets, the provided cell set will be duplicated. The radius
value provides a scale to the metric computation algorithm. Here "request" connotes that the
query will request computation and then return. Poll this endpoint until all values are
available. Note that `positive_marker` and `negative_marker` paramters can be supplied
multiple times, once for each item in the list of positive or negative markers respectively.
"""
markers = [positive_marker, negative_marker]
return get_squidpy_metrics(study, markers, feature_class, radius=radius)
Expand All @@ -338,8 +336,7 @@ async def request_spatial_metrics_computation_custom_phenotypes( # pylint: disa
feature_class: ValidFeatureClass,
radius: float | None = None,
) -> UnivariateMetricsComputationResult:
"""Spatial proximity statistics for a pair of custom-defined phenotypes (cell sets), most
calculated by Squidpy.
"""Spatial proximity statistics for a pair of custom-defined phenotypes (cell sets).
"""
markers = (positive_marker, negative_marker, positive_marker2, negative_marker2)
if feature_class == 'proximity':
Expand Down Expand Up @@ -398,50 +395,45 @@ def _get_importance_composition(
cohort_stratifier,
cell_limit,
)
return get_phenotype_counts(
return _get_phenotype_counts(
positive_marker,
negative_marker,
study,
len(cells_selected),
cells_selected,
)


def get_phenotype_counts_cached(
def _get_phenotype_counts_cached(
positives: tuple[str, ...],
negatives: tuple[str, ...],
study: str,
number_cells: int,
selected: tuple[int, ...],
blocking: bool = True,
) -> PhenotypeCounts:
counts = OnDemandRequester.get_counts_by_specimen(
positives,
negatives,
study,
number_cells,
set(selected) if selected is not None else None,
blocking = blocking,
)
return counts


def get_phenotype_counts(
def _get_phenotype_counts(
positive_marker: ValidChannelListPositives,
negative_marker: ValidChannelListNegatives,
study: ValidStudy,
number_cells: int,
cells_selected: set[int] | None = None,
blocking: bool = True,
) -> PhenotypeCounts:
"""For each specimen, return the fraction of selected/all cells expressing the phenotype."""
positive_markers = [m for m in positive_marker if m != '']
negative_markers = [m for m in negative_marker if m != '']
counts = get_phenotype_counts_cached(
counts = _get_phenotype_counts_cached(
tuple(positive_markers),
tuple(negative_markers),
study,
number_cells,
tuple(sorted(list(cells_selected))) if cells_selected is not None else (),
blocking = blocking,
)
Expand Down Expand Up @@ -482,7 +474,7 @@ async def get_cell_data_binary(
"""
Get streaming cell-level location and phenotype data in a custom binary format.
The format is documented [here](https://github.com/nadeemlab/SPT/blob/main/docs/cells.md).
The sample may be "UMAP virtual sample" if UMAP dimensional reduction is available.
"""
has_umap = query().has_umap(study)
Expand Down Expand Up @@ -582,7 +574,8 @@ async def importance_fraction_plot(
study: ValidStudy,
img_format: Literal['svg', 'png'] = 'svg',
) -> StreamingResponse:
"""Return a plot of the fraction of important cells expressing a given phenotype."""
"""Return a plot of the fraction of the top most important cells for GNN classification,
expressing various phenotypes."""
raw = get_importance_fraction_plot(str(study), str(img_format))
buffer = BytesIO()
buffer.write(raw)
Expand Down
21 changes: 19 additions & 2 deletions spatialprofilingtoolbox/apiserver/app/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def valid_channel_list(markers: list[str]) -> list[str]:
raise ValueError(f'Marker names invalid: {missing}')


ChannelList = Annotated[list[str], Query()]
ChannelList = Annotated[list[str], Query(examples=['B2M', 'SOX10'])]


async def valid_channel_list_positives(positive_marker: ChannelList) -> list[str]:
Expand All @@ -106,7 +106,23 @@ async def valid_channel_list_negatives2(negative_marker2: ChannelList) -> list[s


async def valid_spatial_feature_classname(
feature_class: str = Query(min_length=1, max_length=100),
feature_class: str = Query(
min_length=1,
max_length=100,
examples=['proximity', 'neighborhood enrichment', 'co-occurrence', 'ripley', 'spatial autocorrelation'],
),
) -> str:
if feature_class not in (list(squidpy_feature_classnames()) + ['proximity']):
raise ValueError(f'Feature class "{feature_class}" does not exist.')
return feature_class


async def valid_spatial_feature_classname2(
feature_class: str = Query(
min_length=1,
max_length=100,
examples=['proximity', 'neighborhood enrichment', 'co-occurrence'],
),
) -> str:
if feature_class not in (list(squidpy_feature_classnames()) + ['proximity']):
raise ValueError(f'Feature class "{feature_class}" does not exist.')
Expand All @@ -123,3 +139,4 @@ async def valid_spatial_feature_classname(
ValidChannelListPositives2 = Annotated[list[str], Depends(valid_channel_list_positives2)]
ValidChannelListNegatives2 = Annotated[list[str], Depends(valid_channel_list_negatives2)]
ValidFeatureClass = Annotated[str, Depends(valid_spatial_feature_classname)]
ValidFeatureClass2Phenotypes = Annotated[str, Depends(valid_spatial_feature_classname2)]
Loading

0 comments on commit 5e13551

Please sign in to comment.