mostly finished squidpy on demand + cascading refactors

nadeemlab · Jul 29, 2023 · 483b784 · 483b784
1 parent 88dd3f4
commit 483b784
Show file tree

Hide file tree

Showing 21 changed files with 940 additions and 616 deletions.
diff --git a/spatialprofilingtoolbox/apiserver/app/main.py b/spatialprofilingtoolbox/apiserver/app/main.py
@@ -9,24 +9,19 @@
 from fastapi import Response
 from fastapi.responses import StreamingResponse
 
-from spatialprofilingtoolbox.ondemand.counts_service_client import CountRequester
+from spatialprofilingtoolbox.ondemand.service_client import OnDemandRequester
 from spatialprofilingtoolbox.db.exchange_data_formats.study import StudyHandle
 from spatialprofilingtoolbox.db.exchange_data_formats.study import StudySummary
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsSummary
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeSymbol
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import Channel
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCriteria
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCounts
-from spatialprofilingtoolbox.db.exchange_data_formats.metrics import \
-    ProximityMetricsComputationResult
+from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsSummary, \
+    PhenotypeSymbol, Channel, PhenotypeCriteria, PhenotypeCounts, \
+    ProximityMetricsComputationResult, SquidpyMetricsComputationResult
 from spatialprofilingtoolbox.db.exchange_data_formats.metrics import UMAPChannel
 from spatialprofilingtoolbox.db.querying import query
 from spatialprofilingtoolbox.apiserver.app.validation import (
     ValidChannel,
     ValidStudy,
     ValidPhenotypeSymbol,
-    ValidPhenotype1,
-    ValidPhenotype2,
+    ValidPhenotype,
     ValidChannelListPositives,
     ValidChannelListNegatives,
 )
@@ -54,20 +49,23 @@
     },
 )
 
+
 def custom_openapi():
     if app.openapi_schema:
         return app.openapi_schema
     openapi_schema = get_openapi(
         title=TITLE,
         version=VERSION,
-        openapi_version='3.0.0', # This is a manual replacement for 3.1.0 default, which seems not supported by Swagger UI yet.
+        # This is a manual replacement for 3.1.0 default, which isn't supported by Swagger UI yet.
+        openapi_version='3.0.0',
         summary=TITLE,
         description=DESCRIPTION,
         routes=app.routes,
     )
     app.openapi_schema = openapi_schema
     return app.openapi_schema
 
+
 setattr(app, 'openapi', custom_openapi)
 
 
@@ -151,7 +149,7 @@ async def get_anonymous_phenotype_counts_fast(
     negative_markers = [m for m in negative_marker if m != '']
     measurement_study = query().get_study_components(study).measurement
     number_cells = query().get_number_cells(study)
-    with CountRequester() as requester:
+    with OnDemandRequester() as requester:
         counts = requester.get_counts_by_specimen(
             positive_markers,
             negative_markers,
@@ -164,8 +162,8 @@ async def get_anonymous_phenotype_counts_fast(
 @app.get("/request-phenotype-proximity-computation/")
 async def request_phenotype_proximity_computation(
     study: ValidStudy,
-    phenotype1: ValidPhenotype1,
-    phenotype2: ValidPhenotype2,
+    phenotype1: ValidPhenotype,
+    phenotype2: ValidPhenotype,
     radius: int = Query(default=100),
 ) -> ProximityMetricsComputationResult:
     """
@@ -176,20 +174,38 @@ async def request_phenotype_proximity_computation(
     retrieve = query().retrieve_signature_of_phenotype
     criteria1 = retrieve(phenotype1, study)
     criteria2 = retrieve(phenotype2, study)
-    with CountRequester() as requester:
+    with OnDemandRequester() as requester:
         metrics = requester.get_proximity_metrics(
             query().get_study_components(study).measurement,
             radius,
-            [
+            (
                 criteria1.positive_markers,
                 criteria1.negative_markers,
                 criteria2.positive_markers,
                 criteria2.negative_markers,
-            ],
+            ),
         )
     return metrics
 
 
+@app.get("/request-squidpy-computation/")
+async def request_squidpy_computation(
+    study: ValidStudy,
+    phenotypes: list[ValidPhenotype],
+) -> SquidpyMetricsComputationResult:
+    """Spatial proximity statistics between phenotype clusters as calculated by Squidpy."""
+    criteria: list[PhenotypeCriteria] = [
+        query().retrieve_signature_of_phenotype(p, study) for p in phenotypes]
+    markers: list[list[str]] = []
+    for criterion in criteria:
+        markers.append(criterion.positive_markers)
+        markers.append(criterion.negative_markers)
+    with OnDemandRequester() as requester:
+        metrics = requester.get_squidpy_metrics(
+            query().get_study_components(study).measurement, markers)
+    return metrics
+
+
 @app.get("/visualization-plots/")
 async def get_plots(
     study: ValidStudy,
@@ -212,6 +228,7 @@ async def get_plot_high_resolution(
     umap = query().get_umap(study, channel)
     input_buffer = BytesIO(b64decode(umap.base64_png))
     input_buffer.seek(0)
+
     def streaming_iteration():
         yield from input_buffer
     return StreamingResponse(streaming_iteration(), media_type="image/png")
diff --git a/spatialprofilingtoolbox/apiserver/app/validation.py b/spatialprofilingtoolbox/apiserver/app/validation.py
@@ -13,24 +13,29 @@ def abbreviate_string(string: str) -> str:
         abbreviation = abbreviation + '...'
     return abbreviation
 
-async def valid_study_name(study: str=Query(min_length=3)) -> str:
+
+async def valid_study_name(study: str = Query(min_length=3)) -> str:
     if study in [item.handle for item in query().retrieve_study_handles()]:
         return study
     raise ValueError(f'Study name invalid: "{abbreviate_string(study)}"')
 
-async def valid_channel(channel: str=Query(min_length=1)) -> str:
+
+async def valid_channel(channel: str = Query(min_length=1)) -> str:
     if channel in query().get_channel_names_all_studies():
         return channel
     raise ValueError(f'Channel name invalid: {abbreviate_string(channel)}')
 
+
 def valid_composite_phenotype_name(identifier: str) -> str:
     if identifier in query().get_phenotype_symbols_all_studies():
         return identifier
     raise ValueError(f'Composite phenotype identifier invalid: {abbreviate_string(identifier)}')
 
-async def valid_phenotype_symbol(phenotype_symbol: str=Query(min_length=1)) -> str:
+
+async def valid_phenotype_symbol(phenotype_symbol: str = Query(min_length=1)) -> str:
     return valid_composite_phenotype_name(phenotype_symbol)
 
+
 def valid_single_or_composite_identifier(identifier) -> str:
     if identifier in query().get_composite_phenotype_identifiers():
         return identifier
@@ -39,11 +44,10 @@ def valid_single_or_composite_identifier(identifier) -> str:
     abbreviation = abbreviate_string(identifier)
     raise ValueError(f'Channel name or phenotype identifier invalid: {abbreviation}')
 
-async def valid_phenotype1(phenotype1: str=Query(min_length=1)) -> str:
-    return valid_single_or_composite_identifier(phenotype1)
 
-async def valid_phenotype2(phenotype2: str=Query(min_length=1)) -> str:
-    return valid_single_or_composite_identifier(phenotype2)
+async def valid_phenotype(phenotype: str = Query(min_length=1)) -> str:
+    return valid_single_or_composite_identifier(phenotype)
+
 
 def valid_channel_list(markers: list[str]) -> list[str]:
     channels = query().get_channel_names_all_studies() + ['']
@@ -52,18 +56,20 @@ def valid_channel_list(markers: list[str]) -> list[str]:
     missing = [marker for marker in markers if not marker in channels]
     raise ValueError(f'Marker names invalid: f{missing}')
 
+
 ChannelList = Annotated[list[str], Query()]
 
+
 async def valid_channel_list_positives(positive_marker: ChannelList) -> list[str]:
     return valid_channel_list(positive_marker)
 
+
 async def valid_channel_list_negatives(negative_marker: ChannelList) -> list[str]:
     return valid_channel_list(negative_marker)
 
 ValidChannel = Annotated[str, Depends(valid_channel)]
 ValidStudy = Annotated[str, Depends(valid_study_name)]
 ValidPhenotypeSymbol = Annotated[str, Depends(valid_phenotype_symbol)]
-ValidPhenotype1 = Annotated[str, Depends(valid_phenotype1)]
-ValidPhenotype2 = Annotated[str, Depends(valid_phenotype2)]
+ValidPhenotype = Annotated[str, Depends(valid_phenotype)]
 ValidChannelListPositives = Annotated[list[str], Depends(valid_channel_list_positives)]
 ValidChannelListNegatives = Annotated[list[str], Depends(valid_channel_list_negatives)]
diff --git a/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py b/spatialprofilingtoolbox/db/exchange_data_formats/metrics.py
@@ -1,6 +1,7 @@
 """Data structures for ready exchange, related to computations or derived metrics."""
 
-from pydantic import BaseModel #pylint: disable=no-name-in-module
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
 
 class CellFractionsSummary(BaseModel):
     """
@@ -41,10 +42,8 @@ class Channel(BaseModel):
 
 
 class PhenotypeCriteria(BaseModel):
-    """
-    The criteria defining a given "comopsite" phenotype in terms of expression or non-expression of
-    given markers.
-    """
+    """Criteria defining a "composite" phenotype by expression and non-expression of markers."""
+
     positive_markers: list[str]
     negative_markers: list[str]
 
@@ -84,6 +83,18 @@ class ProximityMetricsComputationResult(BaseModel):
     is_pending: bool
 
 
+class SquidpyMetricsComputationResult(BaseModel):
+    """Response to an on demand request for computation of Squidpy metrics."""
+
+    values: dict[
+        str,
+        dict[str, list[float] | list[int]] |
+        dict[str, list[float]] |
+        dict[str, list[list[float]] | list[float] | list[int]]
+    ]
+    is_pending: bool
+
+
 class UMAPChannel(BaseModel):
     """
     A UMAP dimensional reduction of a cell set, with one intensity channel's overlay.

diff --git a/spatialprofilingtoolbox/db/squidpy_metrics.py b/spatialprofilingtoolbox/db/squidpy_metrics.py
@@ -1,16 +1,14 @@
 """Make squidpy metrics that don't require specific phenotype selection available."""
 
-from typing import Any
-
-from numpy.typing import NDArray
 from pandas import DataFrame
 from anndata import AnnData
-from squidpy.gr import spatial_autocorr, spatial_neighbors
+from squidpy.gr import spatial_autocorr
 from psycopg2.extensions import cursor as Psycopg2Cursor
 
 from spatialprofilingtoolbox.db.database_connection import DatabaseConnectionMaker
 from spatialprofilingtoolbox.db.feature_matrix_extractor import FeatureMatrixExtractor
 from spatialprofilingtoolbox.db.create_data_analysis_study import DataAnalysisStudyFactory
+from spatialprofilingtoolbox.workflow.common.squidpy import convert_df_to_anndata
 from spatialprofilingtoolbox.workflow.common.export_features import ADIFeaturesUploader
 from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger
 
@@ -23,45 +21,6 @@ def _describe_spatial_autocorr_derivation_method() -> str:
         'squidpy.gr.spatial_autocorr for more information.'.lstrip().rstrip()
 
 
-def convert_df_to_anndata(
-    df: DataFrame,
-    phenotypes_to_cluster_on: list[str] | None = None,
-) -> AnnData:
-    """Convert SPT DataFrame to AnnData object for use with Squidpy metrics.
-
-    Parameters:
-        df: DataFrame
-            A dataframe with an arbitrary index, x and y locations of histological structures with
-            column names 'pixel x' and 'pixel y', and several columns with arbitrary names each
-            indicating the expression of a phenotype.
-        phenotypes_to_cluster_on: list[str] | None
-            Used to create a 'cluster' column in the AnnData object if provided.
-            * If only one phenotype is provided, two clusters will be created mirroring the
-                presence or absence of the phenotype in each histological structure.
-            * If more than one is provided, the first cluster will be selected based on the
-                presence of the first phenotype in each histological structure, while the second
-                cluster will be selected only among histological structures that did not have the
-                first phenotype, with the pattern continuing for each successive phenotype.
-                Histological structures that do not have any of the phenotypes will be assigned to
-                cluster 0. 
-    """
-    locations: NDArray[Any] = df[['pixel x', 'pixel y']].to_numpy()
-    phenotype_expression: DataFrame = df.drop(['pixel x', 'pixel y'], axis=1)
-    if (phenotypes_to_cluster_on is not None) and (len(phenotypes_to_cluster_on) > 0):
-        clustering = phenotype_expression[phenotypes_to_cluster_on[0]].astype(
-            int)
-        i_cluster = 2
-        for phenotype in phenotypes_to_cluster_on[1:]:
-            clustering[phenotype_expression[phenotype]
-                       & (clustering == 0)] = i_cluster
-            i_cluster += 1
-        phenotype_expression['cluster'] = clustering.astype('category')
-    # TODO: Consider allowing for multiple clustering arrangements?
-    data = AnnData(obs=phenotype_expression, obsm={'spatial': locations})
-    spatial_neighbors(data)
-    return data
-
-
 def _spatial_autocorr(data: AnnData) -> DataFrame:
     return spatial_autocorr(
         data,