Skip to content

Commit

Permalink
mostly finished squidpy on demand + cascading refactors
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlinLiao committed Jul 29, 2023
1 parent 88dd3f4 commit 483b784
Show file tree
Hide file tree
Showing 21 changed files with 940 additions and 616 deletions.
51 changes: 34 additions & 17 deletions spatialprofilingtoolbox/apiserver/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,19 @@
from fastapi import Response
from fastapi.responses import StreamingResponse

from spatialprofilingtoolbox.ondemand.counts_service_client import CountRequester
from spatialprofilingtoolbox.ondemand.service_client import OnDemandRequester
from spatialprofilingtoolbox.db.exchange_data_formats.study import StudyHandle
from spatialprofilingtoolbox.db.exchange_data_formats.study import StudySummary
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsSummary
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeSymbol
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import Channel
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCriteria
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCounts
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import \
ProximityMetricsComputationResult
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsSummary, \
PhenotypeSymbol, Channel, PhenotypeCriteria, PhenotypeCounts, \
ProximityMetricsComputationResult, SquidpyMetricsComputationResult
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import UMAPChannel
from spatialprofilingtoolbox.db.querying import query
from spatialprofilingtoolbox.apiserver.app.validation import (
ValidChannel,
ValidStudy,
ValidPhenotypeSymbol,
ValidPhenotype1,
ValidPhenotype2,
ValidPhenotype,
ValidChannelListPositives,
ValidChannelListNegatives,
)
Expand Down Expand Up @@ -54,20 +49,23 @@
},
)


def custom_openapi():
if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
title=TITLE,
version=VERSION,
openapi_version='3.0.0', # This is a manual replacement for 3.1.0 default, which seems not supported by Swagger UI yet.
# This is a manual replacement for 3.1.0 default, which isn't supported by Swagger UI yet.
openapi_version='3.0.0',
summary=TITLE,
description=DESCRIPTION,
routes=app.routes,
)
app.openapi_schema = openapi_schema
return app.openapi_schema


setattr(app, 'openapi', custom_openapi)


Expand Down Expand Up @@ -151,7 +149,7 @@ async def get_anonymous_phenotype_counts_fast(
negative_markers = [m for m in negative_marker if m != '']
measurement_study = query().get_study_components(study).measurement
number_cells = query().get_number_cells(study)
with CountRequester() as requester:
with OnDemandRequester() as requester:
counts = requester.get_counts_by_specimen(
positive_markers,
negative_markers,
Expand All @@ -164,8 +162,8 @@ async def get_anonymous_phenotype_counts_fast(
@app.get("/request-phenotype-proximity-computation/")
async def request_phenotype_proximity_computation(
study: ValidStudy,
phenotype1: ValidPhenotype1,
phenotype2: ValidPhenotype2,
phenotype1: ValidPhenotype,
phenotype2: ValidPhenotype,
radius: int = Query(default=100),
) -> ProximityMetricsComputationResult:
"""
Expand All @@ -176,20 +174,38 @@ async def request_phenotype_proximity_computation(
retrieve = query().retrieve_signature_of_phenotype
criteria1 = retrieve(phenotype1, study)
criteria2 = retrieve(phenotype2, study)
with CountRequester() as requester:
with OnDemandRequester() as requester:
metrics = requester.get_proximity_metrics(
query().get_study_components(study).measurement,
radius,
[
(
criteria1.positive_markers,
criteria1.negative_markers,
criteria2.positive_markers,
criteria2.negative_markers,
],
),
)
return metrics


@app.get("/request-squidpy-computation/")
async def request_squidpy_computation(
study: ValidStudy,
phenotypes: list[ValidPhenotype],
) -> SquidpyMetricsComputationResult:
"""Spatial proximity statistics between phenotype clusters as calculated by Squidpy."""
criteria: list[PhenotypeCriteria] = [
query().retrieve_signature_of_phenotype(p, study) for p in phenotypes]
markers: list[list[str]] = []
for criterion in criteria:
markers.append(criterion.positive_markers)
markers.append(criterion.negative_markers)
with OnDemandRequester() as requester:
metrics = requester.get_squidpy_metrics(
query().get_study_components(study).measurement, markers)
return metrics


@app.get("/visualization-plots/")
async def get_plots(
study: ValidStudy,
Expand All @@ -212,6 +228,7 @@ async def get_plot_high_resolution(
umap = query().get_umap(study, channel)
input_buffer = BytesIO(b64decode(umap.base64_png))
input_buffer.seek(0)

def streaming_iteration():
yield from input_buffer
return StreamingResponse(streaming_iteration(), media_type="image/png")
24 changes: 15 additions & 9 deletions spatialprofilingtoolbox/apiserver/app/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,29 @@ def abbreviate_string(string: str) -> str:
abbreviation = abbreviation + '...'
return abbreviation

async def valid_study_name(study: str=Query(min_length=3)) -> str:

async def valid_study_name(study: str = Query(min_length=3)) -> str:
if study in [item.handle for item in query().retrieve_study_handles()]:
return study
raise ValueError(f'Study name invalid: "{abbreviate_string(study)}"')

async def valid_channel(channel: str=Query(min_length=1)) -> str:

async def valid_channel(channel: str = Query(min_length=1)) -> str:
if channel in query().get_channel_names_all_studies():
return channel
raise ValueError(f'Channel name invalid: {abbreviate_string(channel)}')


def valid_composite_phenotype_name(identifier: str) -> str:
if identifier in query().get_phenotype_symbols_all_studies():
return identifier
raise ValueError(f'Composite phenotype identifier invalid: {abbreviate_string(identifier)}')

async def valid_phenotype_symbol(phenotype_symbol: str=Query(min_length=1)) -> str:

async def valid_phenotype_symbol(phenotype_symbol: str = Query(min_length=1)) -> str:
return valid_composite_phenotype_name(phenotype_symbol)


def valid_single_or_composite_identifier(identifier) -> str:
if identifier in query().get_composite_phenotype_identifiers():
return identifier
Expand All @@ -39,11 +44,10 @@ def valid_single_or_composite_identifier(identifier) -> str:
abbreviation = abbreviate_string(identifier)
raise ValueError(f'Channel name or phenotype identifier invalid: {abbreviation}')

async def valid_phenotype1(phenotype1: str=Query(min_length=1)) -> str:
return valid_single_or_composite_identifier(phenotype1)

async def valid_phenotype2(phenotype2: str=Query(min_length=1)) -> str:
return valid_single_or_composite_identifier(phenotype2)
async def valid_phenotype(phenotype: str = Query(min_length=1)) -> str:
return valid_single_or_composite_identifier(phenotype)


def valid_channel_list(markers: list[str]) -> list[str]:
channels = query().get_channel_names_all_studies() + ['']
Expand All @@ -52,18 +56,20 @@ def valid_channel_list(markers: list[str]) -> list[str]:
missing = [marker for marker in markers if not marker in channels]
raise ValueError(f'Marker names invalid: f{missing}')


ChannelList = Annotated[list[str], Query()]


async def valid_channel_list_positives(positive_marker: ChannelList) -> list[str]:
return valid_channel_list(positive_marker)


async def valid_channel_list_negatives(negative_marker: ChannelList) -> list[str]:
return valid_channel_list(negative_marker)

ValidChannel = Annotated[str, Depends(valid_channel)]
ValidStudy = Annotated[str, Depends(valid_study_name)]
ValidPhenotypeSymbol = Annotated[str, Depends(valid_phenotype_symbol)]
ValidPhenotype1 = Annotated[str, Depends(valid_phenotype1)]
ValidPhenotype2 = Annotated[str, Depends(valid_phenotype2)]
ValidPhenotype = Annotated[str, Depends(valid_phenotype)]
ValidChannelListPositives = Annotated[list[str], Depends(valid_channel_list_positives)]
ValidChannelListNegatives = Annotated[list[str], Depends(valid_channel_list_negatives)]
21 changes: 16 additions & 5 deletions spatialprofilingtoolbox/db/exchange_data_formats/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data structures for ready exchange, related to computations or derived metrics."""

from pydantic import BaseModel #pylint: disable=no-name-in-module
from pydantic import BaseModel # pylint: disable=no-name-in-module


class CellFractionsSummary(BaseModel):
"""
Expand Down Expand Up @@ -41,10 +42,8 @@ class Channel(BaseModel):


class PhenotypeCriteria(BaseModel):
"""
The criteria defining a given "comopsite" phenotype in terms of expression or non-expression of
given markers.
"""
"""Criteria defining a "composite" phenotype by expression and non-expression of markers."""

positive_markers: list[str]
negative_markers: list[str]

Expand Down Expand Up @@ -84,6 +83,18 @@ class ProximityMetricsComputationResult(BaseModel):
is_pending: bool


class SquidpyMetricsComputationResult(BaseModel):
"""Response to an on demand request for computation of Squidpy metrics."""

values: dict[
str,
dict[str, list[float] | list[int]] |
dict[str, list[float]] |
dict[str, list[list[float]] | list[float] | list[int]]
]
is_pending: bool


class UMAPChannel(BaseModel):
"""
A UMAP dimensional reduction of a cell set, with one intensity channel's overlay.
Expand Down
45 changes: 2 additions & 43 deletions spatialprofilingtoolbox/db/squidpy_metrics.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
"""Make squidpy metrics that don't require specific phenotype selection available."""

from typing import Any

from numpy.typing import NDArray
from pandas import DataFrame
from anndata import AnnData
from squidpy.gr import spatial_autocorr, spatial_neighbors
from squidpy.gr import spatial_autocorr
from psycopg2.extensions import cursor as Psycopg2Cursor

from spatialprofilingtoolbox.db.database_connection import DatabaseConnectionMaker
from spatialprofilingtoolbox.db.feature_matrix_extractor import FeatureMatrixExtractor
from spatialprofilingtoolbox.db.create_data_analysis_study import DataAnalysisStudyFactory
from spatialprofilingtoolbox.workflow.common.squidpy import convert_df_to_anndata
from spatialprofilingtoolbox.workflow.common.export_features import ADIFeaturesUploader
from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger

Expand All @@ -23,45 +21,6 @@ def _describe_spatial_autocorr_derivation_method() -> str:
'squidpy.gr.spatial_autocorr for more information.'.lstrip().rstrip()


def convert_df_to_anndata(
df: DataFrame,
phenotypes_to_cluster_on: list[str] | None = None,
) -> AnnData:
"""Convert SPT DataFrame to AnnData object for use with Squidpy metrics.
Parameters:
df: DataFrame
A dataframe with an arbitrary index, x and y locations of histological structures with
column names 'pixel x' and 'pixel y', and several columns with arbitrary names each
indicating the expression of a phenotype.
phenotypes_to_cluster_on: list[str] | None
Used to create a 'cluster' column in the AnnData object if provided.
* If only one phenotype is provided, two clusters will be created mirroring the
presence or absence of the phenotype in each histological structure.
* If more than one is provided, the first cluster will be selected based on the
presence of the first phenotype in each histological structure, while the second
cluster will be selected only among histological structures that did not have the
first phenotype, with the pattern continuing for each successive phenotype.
Histological structures that do not have any of the phenotypes will be assigned to
cluster 0.
"""
locations: NDArray[Any] = df[['pixel x', 'pixel y']].to_numpy()
phenotype_expression: DataFrame = df.drop(['pixel x', 'pixel y'], axis=1)
if (phenotypes_to_cluster_on is not None) and (len(phenotypes_to_cluster_on) > 0):
clustering = phenotype_expression[phenotypes_to_cluster_on[0]].astype(
int)
i_cluster = 2
for phenotype in phenotypes_to_cluster_on[1:]:
clustering[phenotype_expression[phenotype]
& (clustering == 0)] = i_cluster
i_cluster += 1
phenotype_expression['cluster'] = clustering.astype('category')
# TODO: Consider allowing for multiple clustering arrangements?
data = AnnData(obs=phenotype_expression, obsm={'spatial': locations})
spatial_neighbors(data)
return data


def _spatial_autocorr(data: AnnData) -> DataFrame:
return spatial_autocorr(
data,
Expand Down
Loading

0 comments on commit 483b784

Please sign in to comment.