Skip to content

Commit

Permalink
Upload importance scores via cggnn command (#204)
Browse files Browse the repository at this point in the history
* created cg-gnn extraction script

* added a quick test for cggnn extract

* hotfix importance saving

* Use updated pytorch image for Python 3.11 support, cggnn extract test passes.

* Use python3.11 directly in test case usage of pip, now that python3.11 is specially installed.

* Same fix applied to second test.

* undo cggnn docker change

* added pheno, removed multistudy to FME

* logic hotfix

* fix study-substudy references

* fix test to account for phenotype columns

* make phenotype dict key consistent

* Update usage of extractor to omit study reference.

* remove phenotypes from continuous dataframes

* fix pheno neg expression match

* split extract stratification, symbols in col names

* a little formatting on FME

* explore classes for cggnn extraction

* fix FME test

* Make test more diagnosable.

* cggnn extract clarity refactors

* update providers for new feature column names

* add cg-gnn to toml checking

* adjust squidpy clustering

* actually handling this without a try except is better

* any typo

* handle malformed squidpy returns more gracefully

* cggnn extract docstring

* add direct importance upload script

* Change dataframe to bool values to permit "all" & "all" syntax.

* Fix accidentally booleanization of pixel position columns.

* Make operator order precedence explicit, booleanization and negation.

* Resolve race condition on import ("circular" import) of StudyAccess.

---------

Co-authored-by: James Mathews <[email protected]>
  • Loading branch information
CarlinLiao and jimmymathews authored Sep 14, 2023
1 parent 970d06d commit dd93f8d
Show file tree
Hide file tree
Showing 16 changed files with 128 additions and 160 deletions.
1 change: 1 addition & 0 deletions pyproject.toml.unversioned
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ packages = [
"spatialprofilingtoolbox.ondemand.providers",
"spatialprofilingtoolbox.ondemand.scripts",
"spatialprofilingtoolbox.db",
"spatialprofilingtoolbox.db.accessors",
"spatialprofilingtoolbox.db.exchange_data_formats",
"spatialprofilingtoolbox.db.scripts",
"spatialprofilingtoolbox.db.data_model",
Expand Down
9 changes: 9 additions & 0 deletions spatialprofilingtoolbox/apiserver/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
PhenotypeCriteria,
PhenotypeCounts,
UnivariateMetricsComputationResult,
CGGNNImportanceRank,
)
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import UMAPChannel
from spatialprofilingtoolbox.db.querying import query
Expand Down Expand Up @@ -222,6 +223,14 @@ async def request_spatial_metrics_computation_custom_phenotypes( # pylint: disa
return get_squidpy_metrics(study, markers, feature_class, radius=radius)


@app.get("/request-cggnn-metrics/")
async def request_cggnn_metrics(
study: ValidStudy,
) -> list[CGGNNImportanceRank]:
"""Importance scores as calculated by cggnn."""
return query().get_cggnn_metrics(study)


def get_proximity_metrics(
study: str,
markers: list[list[str]],
Expand Down
147 changes: 0 additions & 147 deletions spatialprofilingtoolbox/cggnn/scripts/run_sql.py

This file was deleted.

44 changes: 44 additions & 0 deletions spatialprofilingtoolbox/cggnn/scripts/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Upload importance score output from a cg-gnn instance to the local db."""

from argparse import ArgumentParser

from pandas import read_csv

from spatialprofilingtoolbox.db.database_connection import DatabaseConnectionMaker
from spatialprofilingtoolbox.db.importance_score_transcriber import transcribe_importance


def parse_arguments():
"""Process command line arguments."""
parser = ArgumentParser(
prog='spt cggnn upload-importances',
description='Save cell importance scores as defined by cggnn to the database.'
)
parser.add_argument(
'--spt_db_config_location',
type=str,
help='File location for SPT DB config file.',
required=True
)
parser.add_argument(
'--importances_csv_path',
type=str,
help='File location for the importances CSV.',
required=True
)
parser.add_argument(
'--cohort_stratifier',
type=str,
help='Name of the classification cohort variable the GNN was trained on.',
default='',
required=False
)
return parser.parse_args()


if __name__ == "__main__":
args = parse_arguments()
df = read_csv(args.importances_csv_path, index_col=0)
connection = DatabaseConnectionMaker(args.spt_db_config_location).get_connection()
transcribe_importance(df, connection, cohort_stratifier=args.cohort_stratifier)
connection.close()
7 changes: 7 additions & 0 deletions spatialprofilingtoolbox/db/accessors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Convenience classes for accessing various data in the database."""

from spatialprofilingtoolbox.db.accessors.cggnn import CGGNNAccess
from spatialprofilingtoolbox.db.accessors.fractions_and_associations import FractionsAccess
from spatialprofilingtoolbox.db.accessors.phenotypes import PhenotypesAccess
from spatialprofilingtoolbox.db.accessors.study import StudyAccess
from spatialprofilingtoolbox.db.accessors.umap import UMAPAccess
36 changes: 36 additions & 0 deletions spatialprofilingtoolbox/db/accessors/cggnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Convenience access of cg-gnn metrics."""

from spatialprofilingtoolbox import get_feature_description
from spatialprofilingtoolbox.db.accessors.study import StudyAccess
from spatialprofilingtoolbox.db.database_connection import SimpleReadOnlyProvider
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CGGNNImportanceRank


class CGGNNAccess(SimpleReadOnlyProvider):
"""Access to cg-gnn features from database."""

def get_metrics(self, study: str) -> list[CGGNNImportanceRank]:
"""Get cg-gnn metrics for this study.
Returns
-------
list[CGGNNImportanceRank]
List of (histological structure ID, importance rank) tuples.
"""
components = StudyAccess(self.cursor).get_study_components(study)
self.cursor.execute(f'''
SELECT
qfv.subject,
qfv.value
FROM quantitative_feature_value qfv
JOIN feature_specification fs
ON fs.identifier=qfv.feature
WHERE fs.derivation_method='{get_feature_description("gnn importance score")}'
AND fs.study='{components.analysis}'
;
''')
rows = self.cursor.fetchall()
return [CGGNNImportanceRank(
histological_structure_id=int(row[0]),
rank=int(row[1])
) for row in rows]
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsSummary
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import CellFractionsAverage
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import FeatureAssociationTest
from spatialprofilingtoolbox.db.study_access import StudyAccess
from spatialprofilingtoolbox.db.accessors.study import StudyAccess
from spatialprofilingtoolbox.db.cohorts import _replace_stratum_identifiers
from spatialprofilingtoolbox import get_feature_description
from spatialprofilingtoolbox.db.database_connection import SimpleReadOnlyProvider
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Convenience accessors/manipulators for phenotype data."""
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeSymbol
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCriteria
from spatialprofilingtoolbox.db.study_access import StudyAccess
from spatialprofilingtoolbox.db.accessors.study import StudyAccess
from spatialprofilingtoolbox.db.database_connection import SimpleReadOnlyProvider


Expand Down
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions spatialprofilingtoolbox/db/database_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class (QueryCursor) newly provides on each invocation.
retrieve_signature_of_phenotype: Callable
get_umaps_low_resolution: Callable
get_umap: Callable
get_cggnn_metrics: Callable

def __init__(self, query_handler: Type):
self.query_handler = query_handler
Expand Down
6 changes: 6 additions & 0 deletions spatialprofilingtoolbox/db/exchange_data_formats/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,9 @@ class UMAPChannel(BaseModel):
"""
channel: str
base64_png: str


class CGGNNImportanceRank(BaseModel):
"""The importance ranking of histological structures in a study."""
histological_structure_id: int
rank: int
6 changes: 4 additions & 2 deletions spatialprofilingtoolbox/db/feature_matrix_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@

from spatialprofilingtoolbox import DatabaseConnectionMaker
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import PhenotypeCriteria
from spatialprofilingtoolbox.db.phenotypes import PhenotypesAccess
from spatialprofilingtoolbox.db.accessors import (
StudyAccess,
PhenotypesAccess,
)
from spatialprofilingtoolbox.db.stratification_puller import (
StratificationPuller,
Stratification,
)
from spatialprofilingtoolbox.db.study_access import StudyAccess
from spatialprofilingtoolbox.workflow.common.structure_centroids_puller import \
StructureCentroidsPuller
from spatialprofilingtoolbox.workflow.common.sparse_matrix_puller import SparseMatrixPuller
Expand Down
8 changes: 4 additions & 4 deletions spatialprofilingtoolbox/db/importance_score_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@ def transcribe_importance(
df: DataFrame,
connection: Connection,
per_specimen_selection_number: int = 1000,
cohort_stratifier: str='default sample stratification',
cohort_stratifier: str = '',
) -> None:
r"""Upload importance score output from a cg-gnn instance to the local db.
Parameters:
df: DataFrame
One column, `importance_score`, indexed by `histological_structure`.
cohort_stratifier: str
Name of the classification cohort variable the GNN was trained on to produce
the importance score.
connection: psycopg2.extensions.connection
per_specimen_selection_number: int
Grab this many of the most important cells from each specimen (or fewer if there
aren't enough cells in the specimen).
cohort_stratifier: str = ''
Name of the classification cohort variable the GNN was trained on to produce
the importance score.
"""
study = _get_referenced_study(connection, df)
indicator: str = 'cell importance'
Expand Down
17 changes: 13 additions & 4 deletions spatialprofilingtoolbox/db/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@
Channel,
PhenotypeCriteria,
UMAPChannel,
CGGNNImportanceRank,
)
from spatialprofilingtoolbox.db.cohorts import get_cohort_identifiers
from spatialprofilingtoolbox.db.study_access import StudyAccess
from spatialprofilingtoolbox.db.fractions_and_associations import FractionsAccess
from spatialprofilingtoolbox.db.phenotypes import PhenotypesAccess
from spatialprofilingtoolbox.db.umap import UMAPAccess
from spatialprofilingtoolbox.db.accessors import (
CGGNNAccess,
StudyAccess,
FractionsAccess,
PhenotypesAccess,
UMAPAccess,
)


class QueryHandler:
"""Handle simple queries to the database."""
Expand Down Expand Up @@ -108,6 +113,10 @@ def get_umaps_low_resolution(cls, cursor, study: str) -> list[UMAPChannel]:
def get_umap(cls, cursor, study: str, channel: str) -> UMAPChannel:
return UMAPAccess(cursor).get_umap_row_for_channel(study, channel)

@classmethod
def get_cggnn_metrics(cls, cursor, study: str) -> list[CGGNNImportanceRank]:
return CGGNNAccess(cursor).get_metrics(study)


def query() -> QueryCursor:
return QueryCursor(QueryHandler)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os

from spatialprofilingtoolbox.db.database_connection import DBCursor
from spatialprofilingtoolbox.db.study_access import StudyAccess
from spatialprofilingtoolbox.db.accessors import StudyAccess

def test_lookup():
environment = {
Expand Down

0 comments on commit dd93f8d

Please sign in to comment.