From 5e1355189382faef8817a137664649b6846ac6b9 Mon Sep 17 00:00:00 2001 From: James Mathews Date: Sun, 29 Sep 2024 23:44:36 -0400 Subject: [PATCH] API docs (#360) * Fix some type errors * Start updating dependencies pinning * Update test artfiact for apparent change in pandas serialization convention * Update neighborhood enrichment and auto-correlation squidpy test data artifacts for update to library * Limit expected precision of autocorrelation test artifacts * Version bump * Deprecate graph-plugin-dockerized test, too slow * Update reanalysis scripts for library updates * Add typing annnotation to package * Try to fix openapi.json url * Add root_path! * Configure servers in openapi spec * Deprecate empty root * Start openapi docs * Update doc overview * Update docs * Update doc * Update doc * Add more docs * More docs * More docs * Deprecate unused endpoint, update endpoint docstrings and pydantic types, with examples * api version bump * Update tests for new call signature * Update test artifacts --- spatialprofilingtoolbox/apiserver/app/main.py | 81 ++++++++---------- .../apiserver/app/validation.py | 21 ++++- .../db/exchange_data_formats/metrics.py | 84 +++++++++++++++---- .../db/exchange_data_formats/study.py | 7 +- .../ondemand/request_scheduling.py | 8 +- ...test_counts_query_delegation_edge_cases.py | 2 - .../expected_counts_structured1.json | 3 - .../expected_counts_structured2.json | 3 - .../ondemand/module_tests/get_class_counts.py | 2 +- .../test_edge_cases_few_markers.py | 2 +- .../test_single_signature_count_query.py | 2 +- 11 files changed, 132 insertions(+), 83 deletions(-) diff --git a/spatialprofilingtoolbox/apiserver/app/main.py b/spatialprofilingtoolbox/apiserver/app/main.py index 22ad9cea..0d549553 100644 --- a/spatialprofilingtoolbox/apiserver/app/main.py +++ b/spatialprofilingtoolbox/apiserver/app/main.py @@ -1,5 +1,4 @@ """The API service's endpoint handlers.""" - from typing import cast from typing import Annotated from typing import Literal @@ -42,11 +41,12 @@ ValidChannelListPositives2, ValidChannelListNegatives2, ValidFeatureClass, + ValidFeatureClass2Phenotypes, ) from spatialprofilingtoolbox.graphs.config_reader import read_plot_importance_fractions_config from spatialprofilingtoolbox.graphs.importance_fractions import PlotGenerator -VERSION = '0.25.0' +VERSION = '0.26.0' TITLE = 'Single cell studies data API' @@ -56,12 +56,12 @@ This API provides useful access to the **single-cell datasets** residing in a database that is curated and maintained by the [Nadeem Lab](https://nadeemlab.org). -The public portion of the database includes phenotype and slide position information for +The public portion of the database includes phenotype and slide position information for: * ~9 million cells * across about 1000 specimens * typically with around 30 protein targets quantified per cell -* from cancers of the breast and lung, as well as urothelial cancer and melanoma +* from cancers from several sites: breast, lung, urothelial cancer and melanoma * with a range of outcome assignments depending on the study design (often immunotherapy response) This is the data source for the Spatial Profiling Toolbox (SPT) web application located at @@ -93,9 +93,8 @@ The documentation you are reading in the browser is automatically generated and comes in two flavors: -* [Redoc variant](https://oncopathtk.org/api/redoc) -* [Swagger UI variant](https://oncopathtk.org/api/docs) (includes a list of the JSON-formatted - return value types) +* the [Redoc variant](https://oncopathtk.org/api/redoc) +* the [Swagger UI variant](https://oncopathtk.org/api/docs) The system of JSON-formatted return values is a simplified version of the complete [schema](https://adiframework.com/docs_site/scstudies_quick_reference.html#) which was used to guide @@ -214,16 +213,6 @@ async def get_study_summary( return query().get_study_summary(study) -@app.get("/study-findings/") -async def get_study_findings( - study: ValidStudy, -) -> list[str]: - """ - Brief list of results of re-analysis of the given study. - """ - return query().get_study_findings(study) - - @app.get("/channels/") async def get_channels( study: ValidStudy, @@ -236,7 +225,8 @@ async def get_channels( async def get_phenotype_symbols( study: ValidStudy, ) -> list[PhenotypeSymbolAndCriteria]: - """The display names and identifiers for the "composite" phenotypes in a given study.""" + """The display names and identifiers for the "composite" phenotypes in a given study, defined + by combination of positive and negative markers.""" symbols: tuple[PhenotypeSymbol, ...] = query().get_phenotype_symbols(study) return list( PhenotypeSymbolAndCriteria( @@ -253,8 +243,8 @@ async def get_phenotype_criteria( study: ValidStudy, phenotype_symbol: ValidPhenotypeSymbol, ) -> PhenotypeCriteria: - """Get lists of the positive markers and negative markers defining a given named phenotype, in - the context of the given study. + """Get lists of the positive markers and negative markers defining a given named phenotype, + itself specified by identifier index, in the context of the given study. """ return query().get_phenotype_criteria(study, phenotype_symbol) @@ -265,8 +255,7 @@ async def get_anonymous_phenotype_counts_fast( negative_marker: ValidChannelListNegatives, study: ValidStudy, ) -> PhenotypeCounts: - """Computes the number of cells satisfying the given positive and negative criteria, in the - context of a given study. + """Alternative syntax for `phenotype-counts`. To be deprecated. """ return _get_anonymous_phenotype_counts_fast(positive_marker, negative_marker, study) @@ -276,21 +265,23 @@ def _get_anonymous_phenotype_counts_fast( negative_marker: ValidChannelListNegatives, study: ValidStudy, ) -> PhenotypeCounts: - number_cells = cast(int, query().get_number_cells(study)) - counts = get_phenotype_counts(positive_marker, negative_marker, study, number_cells) + counts = _get_phenotype_counts(positive_marker, negative_marker, study) return counts @app.get("/phenotype-counts/") -async def get_phenotype_counts_nonblocking( +async def get_phenotype_counts( positive_marker: ValidChannelListPositives, negative_marker: ValidChannelListNegatives, study: ValidStudy, ) -> PhenotypeCounts: """Computes the number of cells satisfying the given positive and negative criteria, in the - context of a given study. Non-blocking, has a "pending" flag in the response. + context of a given study, for each sample individually. This request should generally be + non-blocking, returning immediately with either a full or partial set of count values. A + "pending" flag in the response indicates which scenario is the case. If pending, poll this + endpoint until all values are available. """ - counts = get_phenotype_counts(positive_marker, negative_marker, study, 0, blocking=False) + counts = _get_phenotype_counts(positive_marker, negative_marker, study, blocking=False) return counts @@ -298,10 +289,12 @@ async def get_phenotype_counts_nonblocking( async def request_spatial_metrics_computation( study: ValidStudy, phenotype: ValidPhenotypeList, - feature_class: ValidFeatureClass, + feature_class: ValidFeatureClass2Phenotypes, radius: float | None = None, ) -> UnivariateMetricsComputationResult: - """Spatial proximity statistics between phenotype cell sets, as calculated by Squidpy.""" + """Spatial proximity statistics like the single-phenotype case, but between *two* phenotype cell + sets, where the phenotypes are specified by index among the pre-defined/combination phenotypes + for the given study.""" phenotypes = phenotype criteria: list[PhenotypeCriteria] = [ query().retrieve_signature_of_phenotype(p, study) for p in phenotypes @@ -321,8 +314,13 @@ async def request_spatial_metrics_computation_custom_phenotype( feature_class: ValidFeatureClass, radius: float | None = None, ) -> UnivariateMetricsComputationResult: - """Spatial proximity statistics for a single custom-defined phenotype (cell set), as - calculated by Squidpy. + """Spatial proximity statistics for a single custom-defined phenotype (cell set). Different + metrics are available, including several provided by the Squidpy package. If a feature class is + specified which requires two cell sets, the provided cell set will be duplicated. The radius + value provides a scale to the metric computation algorithm. Here "request" connotes that the + query will request computation and then return. Poll this endpoint until all values are + available. Note that `positive_marker` and `negative_marker` paramters can be supplied + multiple times, once for each item in the list of positive or negative markers respectively. """ markers = [positive_marker, negative_marker] return get_squidpy_metrics(study, markers, feature_class, radius=radius) @@ -338,8 +336,7 @@ async def request_spatial_metrics_computation_custom_phenotypes( # pylint: disa feature_class: ValidFeatureClass, radius: float | None = None, ) -> UnivariateMetricsComputationResult: - """Spatial proximity statistics for a pair of custom-defined phenotypes (cell sets), most - calculated by Squidpy. + """Spatial proximity statistics for a pair of custom-defined phenotypes (cell sets). """ markers = (positive_marker, negative_marker, positive_marker2, negative_marker2) if feature_class == 'proximity': @@ -398,20 +395,18 @@ def _get_importance_composition( cohort_stratifier, cell_limit, ) - return get_phenotype_counts( + return _get_phenotype_counts( positive_marker, negative_marker, study, - len(cells_selected), cells_selected, ) -def get_phenotype_counts_cached( +def _get_phenotype_counts_cached( positives: tuple[str, ...], negatives: tuple[str, ...], study: str, - number_cells: int, selected: tuple[int, ...], blocking: bool = True, ) -> PhenotypeCounts: @@ -419,29 +414,26 @@ def get_phenotype_counts_cached( positives, negatives, study, - number_cells, set(selected) if selected is not None else None, blocking = blocking, ) return counts -def get_phenotype_counts( +def _get_phenotype_counts( positive_marker: ValidChannelListPositives, negative_marker: ValidChannelListNegatives, study: ValidStudy, - number_cells: int, cells_selected: set[int] | None = None, blocking: bool = True, ) -> PhenotypeCounts: """For each specimen, return the fraction of selected/all cells expressing the phenotype.""" positive_markers = [m for m in positive_marker if m != ''] negative_markers = [m for m in negative_marker if m != ''] - counts = get_phenotype_counts_cached( + counts = _get_phenotype_counts_cached( tuple(positive_markers), tuple(negative_markers), study, - number_cells, tuple(sorted(list(cells_selected))) if cells_selected is not None else (), blocking = blocking, ) @@ -482,7 +474,7 @@ async def get_cell_data_binary( """ Get streaming cell-level location and phenotype data in a custom binary format. The format is documented [here](https://github.com/nadeemlab/SPT/blob/main/docs/cells.md). - + The sample may be "UMAP virtual sample" if UMAP dimensional reduction is available. """ has_umap = query().has_umap(study) @@ -582,7 +574,8 @@ async def importance_fraction_plot( study: ValidStudy, img_format: Literal['svg', 'png'] = 'svg', ) -> StreamingResponse: - """Return a plot of the fraction of important cells expressing a given phenotype.""" + """Return a plot of the fraction of the top most important cells for GNN classification, + expressing various phenotypes.""" raw = get_importance_fraction_plot(str(study), str(img_format)) buffer = BytesIO() buffer.write(raw) diff --git a/spatialprofilingtoolbox/apiserver/app/validation.py b/spatialprofilingtoolbox/apiserver/app/validation.py index de90363f..113a39ca 100644 --- a/spatialprofilingtoolbox/apiserver/app/validation.py +++ b/spatialprofilingtoolbox/apiserver/app/validation.py @@ -86,7 +86,7 @@ def valid_channel_list(markers: list[str]) -> list[str]: raise ValueError(f'Marker names invalid: {missing}') -ChannelList = Annotated[list[str], Query()] +ChannelList = Annotated[list[str], Query(examples=['B2M', 'SOX10'])] async def valid_channel_list_positives(positive_marker: ChannelList) -> list[str]: @@ -106,7 +106,23 @@ async def valid_channel_list_negatives2(negative_marker2: ChannelList) -> list[s async def valid_spatial_feature_classname( - feature_class: str = Query(min_length=1, max_length=100), + feature_class: str = Query( + min_length=1, + max_length=100, + examples=['proximity', 'neighborhood enrichment', 'co-occurrence', 'ripley', 'spatial autocorrelation'], + ), +) -> str: + if feature_class not in (list(squidpy_feature_classnames()) + ['proximity']): + raise ValueError(f'Feature class "{feature_class}" does not exist.') + return feature_class + + +async def valid_spatial_feature_classname2( + feature_class: str = Query( + min_length=1, + max_length=100, + examples=['proximity', 'neighborhood enrichment', 'co-occurrence'], + ), ) -> str: if feature_class not in (list(squidpy_feature_classnames()) + ['proximity']): raise ValueError(f'Feature class "{feature_class}" does not exist.') @@ -123,3 +139,4 @@ async def valid_spatial_feature_classname( ValidChannelListPositives2 = Annotated[list[str], Depends(valid_channel_list_positives2)] ValidChannelListNegatives2 = Annotated[list[str], Depends(valid_channel_list_negatives2)] ValidFeatureClass = Annotated[str, Depends(valid_spatial_feature_classname)] +ValidFeatureClass2Phenotypes = Annotated[str, Depends(valid_spatial_feature_classname2)] diff --git a/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py b/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py index 61f27901..bc171fa2 100644 --- a/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py +++ b/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py @@ -20,8 +20,17 @@ class PhenotypeSymbol(BaseModel): class Channel(BaseModel): - """The symbol for one of the imaged or measured channels..""" + """The symbol for one of the imaged or measured channels.""" symbol: str + model_config = { + "json_schema_extra": { + "examples": [ + {'symbol': 'CD3'}, + {'symbol': 'CD4'}, + {'symbol': 'FOXP3'}, + ] + } + } class PhenotypeCriteria(BaseModel): @@ -29,15 +38,15 @@ class PhenotypeCriteria(BaseModel): positive_markers: tuple[str, ...] negative_markers: tuple[str, ...] model_config = { - "json_schema_extra": { - "examples": [ + 'json_schema_extra': { + 'examples': [ { - "positive_markers": ['CD3', 'CD4'], - "negative_markers": ['FOXP3'], + 'positive_markers': ['CD3', 'CD4'], + 'negative_markers': ['FOXP3'], }, { - "positive_markers": ['SOX10'], - "negative_markers": [], + 'positive_markers': ['SOX10'], + 'negative_markers': [], }, ] } @@ -49,15 +58,38 @@ class PhenotypeSymbolAndCriteria(BaseModel): handle_string: str identifier: str criteria: PhenotypeCriteria + model_config = { + 'json_schema_extra': { + 'examples': [ + { + 'handle_string': 'T regulatory cells', + 'identifier': '8', + 'criteria': { + 'positive_markers': ['CD3', 'CD4', 'FOXP3'], + 'negative_markers': [], + } + }, + ] + } + } -class CompositePhenotype(BaseModel): - """For named phenotypes, the name and the internal identifier used for matching up related - records. +class WrapperPhenotype(BaseModel): + """The phenotype criteria used during the counting procedure (not applicable in GNN case). """ - name: str - identifier: str criteria: PhenotypeCriteria + model_config = { + 'json_schema_extra': { + 'examples': [ + { + 'criteria': { + 'positive_markers': ['CD3', 'CD4', 'FOXP3'], + 'negative_markers': [], + }, + }, + ] + } + } class PhenotypeCount(BaseModel): @@ -67,13 +99,23 @@ class PhenotypeCount(BaseModel): specimen: str count: int | None percentage: float | None + model_config = { + 'json_schema_extra': { + 'examples': [ + { + 'specimen': 'Sample001', + 'count': 3108, + 'percentage': 42.3, + }, + ] + } + } class PhenotypeCounts(BaseModel): """The number of cells of a given phenotype across all samples in a given study.""" counts: tuple[PhenotypeCount, ...] - phenotype: CompositePhenotype - number_cells_in_study: int + phenotype: WrapperPhenotype is_pending: bool @@ -83,6 +125,20 @@ class UnivariateMetricsComputationResult(BaseModel): """ values: dict[str, float | None] is_pending: bool + model_config = { + 'json_schema_extra': { + 'examples': [ + { + 'values': { + 'Sample001': 3.4, + 'Sample700': 0.01, + 'Sample715': 0.02, + }, + 'is_pending': False, + }, + ] + } + } class CellData(BaseModel): diff --git a/spatialprofilingtoolbox/db/exchange_data_formats/study.py b/spatialprofilingtoolbox/db/exchange_data_formats/study.py index 666e4987..8ecf988e 100644 --- a/spatialprofilingtoolbox/db/exchange_data_formats/study.py +++ b/spatialprofilingtoolbox/db/exchange_data_formats/study.py @@ -18,7 +18,7 @@ class StudyHandle(BaseModel): "json_schema_extra": { "examples": [ { - "name": "Dataset ABC", + "handle": "Dataset ABC", "display_name_detail": "Dataset ABC - Nature 2030" } ] @@ -237,8 +237,3 @@ class StudySummary(BaseModel): cohorts: SampleCohorts findings: list[str] has_umap: bool - - # "findings": [ - # "Proximity (occurrence within 50um) of KRT7+ CK+ cells in neighborhoods of KRT14+ CK+ cells is elevated 1.6 times in the sensitive cohort compared with refractory (p=0.03). /study/breast-cancer-imc/analysis/detail?phenotypes=KRT14%2B+CK%2B%2CKRT7%2B+CK%2B&selected_phenotypes=KRT14%2B+CK%2B%26KRT7%2B+CK%2B&enrichfields=KRT14%2B+CK%2B%26KRT7%2B+CK%2B-proximity&cohorts=1%2C2" - # ], - # "has_umap": True, diff --git a/spatialprofilingtoolbox/ondemand/request_scheduling.py b/spatialprofilingtoolbox/ondemand/request_scheduling.py index 8b09aa0b..3c339202 100644 --- a/spatialprofilingtoolbox/ondemand/request_scheduling.py +++ b/spatialprofilingtoolbox/ondemand/request_scheduling.py @@ -15,7 +15,7 @@ PhenotypeCriteria, PhenotypeCount, PhenotypeCounts, - CompositePhenotype, + WrapperPhenotype, UnivariateMetricsComputationResult, ) from spatialprofilingtoolbox.ondemand.timeout import create_timeout_handler @@ -94,7 +94,6 @@ def get_counts_by_specimen( positives: tuple[str, ...], negatives: tuple[str, ...], study_name: str, - number_cells: int, cells_selected: set[int], blocking: bool = True, ) -> PhenotypeCounts: @@ -122,12 +121,9 @@ def get_counts_by_specimen( ) for sample in combined_keys ] + [PhenotypeCount(specimen=sample, count=None, percentage=None) for sample in additional]), - phenotype=CompositePhenotype( - name='', - identifier='', + phenotype=WrapperPhenotype( criteria=phenotype, ), - number_cells_in_study=number_cells, is_pending=pending, ) diff --git a/test/apiserver/module_tests/test_counts_query_delegation_edge_cases.py b/test/apiserver/module_tests/test_counts_query_delegation_edge_cases.py index 8cc74402..80f9f2f1 100644 --- a/test/apiserver/module_tests/test_counts_query_delegation_edge_cases.py +++ b/test/apiserver/module_tests/test_counts_query_delegation_edge_cases.py @@ -46,8 +46,6 @@ def main(): phenotype_total = sum( phenotype_count['count'] for phenotype_count in response['counts'] ) - total = response['number_cells_in_study'] - print(total) if phenotype_total != expected: raise ValueError(f'Got wrong number: {phenotype_total}, expected {expected}.') diff --git a/test/ondemand/module_tests/expected_counts_structured1.json b/test/ondemand/module_tests/expected_counts_structured1.json index b8db5441..c31687ff 100644 --- a/test/ondemand/module_tests/expected_counts_structured1.json +++ b/test/ondemand/module_tests/expected_counts_structured1.json @@ -37,8 +37,6 @@ } ], "phenotype": { - "name": "", - "identifier": "", "criteria": { "positive_markers": [ "CD3" @@ -49,6 +47,5 @@ ] } }, - "number_cells_in_study": 0, "is_pending": false } \ No newline at end of file diff --git a/test/ondemand/module_tests/expected_counts_structured2.json b/test/ondemand/module_tests/expected_counts_structured2.json index 4f0cc592..e61fc021 100644 --- a/test/ondemand/module_tests/expected_counts_structured2.json +++ b/test/ondemand/module_tests/expected_counts_structured2.json @@ -22,8 +22,6 @@ } ], "phenotype": { - "name": "", - "identifier": "", "criteria": { "positive_markers": [ "CD3 epsilon" @@ -33,6 +31,5 @@ ] } }, - "number_cells_in_study": 0, "is_pending": false } \ No newline at end of file diff --git a/test/ondemand/module_tests/get_class_counts.py b/test/ondemand/module_tests/get_class_counts.py index b67cb5f9..26a1c43e 100644 --- a/test/ondemand/module_tests/get_class_counts.py +++ b/test/ondemand/module_tests/get_class_counts.py @@ -6,7 +6,7 @@ def get_counts(study_name, positives, negatives): - return OnDemandRequester.get_counts_by_specimen(positives, negatives, study_name, 0, ()) + return OnDemandRequester.get_counts_by_specimen(positives, negatives, study_name, ()) def main(): diff --git a/test/ondemand/module_tests/test_edge_cases_few_markers.py b/test/ondemand/module_tests/test_edge_cases_few_markers.py index 0686df92..4027db30 100644 --- a/test/ondemand/module_tests/test_edge_cases_few_markers.py +++ b/test/ondemand/module_tests/test_edge_cases_few_markers.py @@ -5,7 +5,7 @@ def retrieve_case(case): study_name = 'Melanoma intralesional IL2' - counts = OnDemandRequester.get_counts_by_specimen(case[0], case[1], study_name, 0, ()) + counts = OnDemandRequester.get_counts_by_specimen(case[0], case[1], study_name, ()) total = sum(entry.count for entry in counts.counts) return total diff --git a/test/ondemand/module_tests/test_single_signature_count_query.py b/test/ondemand/module_tests/test_single_signature_count_query.py index eb268366..5f25d509 100644 --- a/test/ondemand/module_tests/test_single_signature_count_query.py +++ b/test/ondemand/module_tests/test_single_signature_count_query.py @@ -7,7 +7,7 @@ def main(): study_name = 'Melanoma intralesional IL2' - counts = OnDemandRequester.get_counts_by_specimen(['CD3'], ['CD8', 'CD20'], study_name, 0, ()) + counts = OnDemandRequester.get_counts_by_specimen(['CD3'], ['CD8', 'CD20'], study_name, ()) counts_json = json.dumps(counts.model_dump(), indent=4).rstrip() with open('module_tests/expected_counts_structured1.json', 'rt', encoding='utf-8') as file: