Skip to content

Commit

Permalink
Merge branch 'main' into mlin/similarity-search-api-optimize-predict
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Sep 5, 2024
2 parents cd16bd1 + fc7aefe commit 011242d
Show file tree
Hide file tree
Showing 28 changed files with 228 additions and 230 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/py-dependency-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ jobs:
fail-fast: false # don't fail-fast, as errors are often specific to a single cell in the matrix
matrix:
os: [sc-dev-64g-runner, macos-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]
exclude:
- os: macos-latest
python-version: "3.8"
python-version: "3.12"

runs-on: ${{matrix.os}}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/py-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
fail-fast: false # Don't stop the workflow if one of the jobs fails
matrix:
os: [sc-dev-64g-runner, macos-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]
exclude:
- os: macos-latest
python-version: "3.8"
python-version: "3.12"

runs-on: ${{matrix.os}}

Expand Down
16 changes: 8 additions & 8 deletions api/python/cellxgene_census/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [
]
license = { text = "MIT" }
readme = "README.md"
requires-python = ">= 3.8, < 3.12"
requires-python = ">= 3.10, < 3.13"
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
Expand All @@ -22,18 +22,17 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies= [
# NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
# ensure that the assets are readable (tiledbsoma supports backward compatible reading).
# Make sure this version does not fall behind the builder's tiledbsoma version.
"tiledbsoma~=1.12.3",
"tiledbsoma>=1.12.3",
"anndata",
"numpy>=1.21,<2.0",
"numpy>=1.23,<2.0",
"requests",
"typing_extensions",
"s3fs>=2021.06.1",
Expand All @@ -43,9 +42,8 @@ dependencies= [
experimental = [
"torch",
"torchdata~=0.7",
"scikit-learn~=1.0",
"scikit-learn>=1.2",
"scikit-misc>=0.2,<0.4", # scikit-misc 0.3 dropped Python 3.8 support, and 0.4 doesn't have MacOS/ARM wheels
"psutil~=5.0",
"datasets~=2.0",
"tdigest~=0.5",
# choose newest version of tiledb-vector-search that doesn't need a newer version of tiledb
Expand Down Expand Up @@ -81,7 +79,7 @@ root = "../../.."
[tool.ruff]
line-length = 120
src = ["api/python/cellxgene_census/src"]
target-version = "py38"
target-version = "py310"

[tool.ruff.lint]
select = [
Expand Down Expand Up @@ -129,6 +127,8 @@ ignore = [
"D205",
# Prefer absolute imports over relative imports from parent modules TODO: enable
"TID252",
# It's okay to use zip without the strict kwarg. In fact, numba doesn't like it when you use it
"B905",
]

[tool.ruff.lint.pydocstyle]
Expand Down
49 changes: 25 additions & 24 deletions api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Methods to retrieve slices of the census as AnnData objects.
"""

from typing import Literal, Optional, Sequence
from collections.abc import Sequence
from typing import Literal
from warnings import warn

import anndata
Expand All @@ -27,20 +28,20 @@ def get_anndata(
organism: str,
measurement_name: str = "RNA",
X_name: str = "raw",
X_layers: Optional[Sequence[str]] = (),
obsm_layers: Optional[Sequence[str]] = (),
obsp_layers: Optional[Sequence[str]] = (),
varm_layers: Optional[Sequence[str]] = (),
varp_layers: Optional[Sequence[str]] = (),
obs_value_filter: Optional[str] = None,
obs_coords: Optional[SparseDFCoord] = None,
var_value_filter: Optional[str] = None,
var_coords: Optional[SparseDFCoord] = None,
column_names: Optional[soma.AxisColumnNames] = None,
obs_embeddings: Optional[Sequence[str]] = (),
var_embeddings: Optional[Sequence[str]] = (),
obs_column_names: Optional[Sequence[str]] = None,
var_column_names: Optional[Sequence[str]] = None,
X_layers: Sequence[str] | None = (),
obsm_layers: Sequence[str] | None = (),
obsp_layers: Sequence[str] | None = (),
varm_layers: Sequence[str] | None = (),
varp_layers: Sequence[str] | None = (),
obs_value_filter: str | None = None,
obs_coords: SparseDFCoord | None = None,
var_value_filter: str | None = None,
var_coords: SparseDFCoord | None = None,
column_names: soma.AxisColumnNames | None = None,
obs_embeddings: Sequence[str] | None = (),
var_embeddings: Sequence[str] | None = (),
obs_column_names: Sequence[str] | None = None,
var_column_names: Sequence[str] | None = None,
) -> anndata.AnnData:
"""Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
and return it as an :class:`anndata.AnnData` object.
Expand Down Expand Up @@ -176,9 +177,9 @@ def _get_axis_metadata(
axis: Literal["obs", "var"],
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
exp = _get_experiment(census, organism)
coords = (slice(None),) if coords is None else (coords,)
Expand All @@ -198,9 +199,9 @@ def get_obs(
census: soma.Collection,
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
"""Get the observation metadata for a query on the census.
Expand Down Expand Up @@ -230,9 +231,9 @@ def get_var(
census: soma.Collection,
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
"""Get the variable metadata for a query on the census.
Expand Down
18 changes: 9 additions & 9 deletions api/python/cellxgene_census/src/cellxgene_census/_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
import os.path
import urllib.parse
from typing import Any, Dict, Optional, get_args
from typing import Any, get_args

import s3fs
import tiledbsoma as soma
Expand All @@ -32,7 +32,7 @@
"anon": True,
"cache_regions": True,
}
DEFAULT_TILEDB_CONFIGURATION: Dict[str, Any] = {
DEFAULT_TILEDB_CONFIGURATION: dict[str, Any] = {
# https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
"py.init_buffer_bytes": 1 * 1024**3,
"soma.init_buffer_bytes": 1 * 1024**3,
Expand Down Expand Up @@ -71,7 +71,7 @@ def _resolve_census_locator(locator: CensusLocator, mirror: CensusMirror) -> Res

def _open_soma(
locator: ResolvedCensusLocator,
context: Optional[soma.options.SOMATileDBContext] = None,
context: soma.options.SOMATileDBContext | None = None,
) -> soma.Collection:
"""Private. Merge config defaults and return open census as a soma Collection/context."""
# if no user-defined context, cellxgene_census defaults take precedence over SOMA defaults
Expand All @@ -85,7 +85,7 @@ def _open_soma(
return soma.open(locator["uri"], mode="r", soma_type=soma.Collection, context=context)


def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
def get_default_soma_context(tiledb_config: dict[str, Any] | None = None) -> soma.options.SOMATileDBContext:
"""Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
Expand Down Expand Up @@ -132,11 +132,11 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->

def open_soma(
*,
census_version: Optional[str] = DEFAULT_CENSUS_VERSION,
mirror: Optional[str] = None,
uri: Optional[str] = None,
tiledb_config: Optional[Dict[str, Any]] = None,
context: Optional[soma.options.SOMATileDBContext] = None,
census_version: str | None = DEFAULT_CENSUS_VERSION,
mirror: str | None = None,
uri: str | None = None,
tiledb_config: dict[str, Any] | None = None,
context: soma.options.SOMATileDBContext | None = None,
) -> soma.Collection:
"""Open the Census by version or URI.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
Methods to retrieve information about versions of the publicly hosted Census object.
"""

import typing
from collections import OrderedDict
from typing import Any, Dict, Literal, Optional, Union, cast
from typing import Any, Literal, cast

import requests
from typing_extensions import NotRequired, TypedDict
Expand Down Expand Up @@ -37,7 +36,7 @@ class CensusLocator(TypedDict):

uri: str
relative_uri: str
s3_region: Optional[str]
s3_region: str | None


class CensusVersionRetraction(TypedDict):
Expand All @@ -55,13 +54,13 @@ class CensusVersionRetraction(TypedDict):
"""

date: str
reason: Optional[str]
info_url: Optional[str]
replaced_by: Optional[str]
reason: str | None
info_url: str | None
replaced_by: str | None


ReleaseFlag = Literal["lts", "retracted"]
ReleaseFlags = Dict[ReleaseFlag, bool]
ReleaseFlags = dict[ReleaseFlag, bool]


class CensusVersionDescription(TypedDict):
Expand All @@ -82,15 +81,15 @@ class CensusVersionDescription(TypedDict):
If retracted, details of the retraction.
"""

release_date: Optional[str]
release_date: str | None
release_build: str
soma: CensusLocator
h5ads: CensusLocator
flags: NotRequired[ReleaseFlags]
retraction: NotRequired[CensusVersionRetraction]


CensusDirectory = Dict[CensusVersionName, Union[CensusVersionName, CensusVersionDescription]]
CensusDirectory = dict[CensusVersionName, CensusVersionName | CensusVersionDescription]

"""
A provider identifies a storage medium for the Census, which can either be a cloud provider or a local file.
Expand Down Expand Up @@ -132,11 +131,11 @@ class CensusMirror(TypedDict):

provider: Provider
base_uri: str
region: Optional[str]
region: str | None
embeddings_base_uri: str


CensusMirrors = Dict[CensusMirrorName, Union[CensusMirrorName, CensusMirror]]
CensusMirrors = dict[CensusMirrorName, CensusMirrorName | CensusMirror]


class ResolvedCensusLocator(TypedDict):
Expand All @@ -155,7 +154,7 @@ class ResolvedCensusLocator(TypedDict):
"""

uri: str
region: Optional[str]
region: str | None
provider: str


Expand Down Expand Up @@ -200,8 +199,8 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript


def get_census_version_directory(
*, lts: Optional[bool] = None, retracted: Optional[bool] = False
) -> Dict[CensusVersionName, CensusVersionDescription]:
*, lts: bool | None = None, retracted: bool | None = False
) -> dict[CensusVersionName, CensusVersionDescription]:
"""Get the directory of Census versions currently available, optionally filtering by specified
flags. If a filtering flag is not specified, Census versions will not be filtered by that flag.
Defaults to including both "long-term stable" (LTS) and weekly Census versions, and excluding
Expand Down Expand Up @@ -358,7 +357,7 @@ def get_census_version_directory(

directory: dict[str, str | dict[str, Any]] = response.json()
directory_out: CensusDirectory = {}
aliases: typing.Set[CensusVersionName] = set()
aliases: set[CensusVersionName] = set()

# Resolve all aliases for easier use
for census_version_name in list(directory.keys()):
Expand Down Expand Up @@ -401,7 +400,7 @@ def get_census_version_directory(
directory_out[census_version_name] = census_version_description.copy()

# Cast is safe, as we have removed all aliases
unordered_directory = cast(Dict[CensusVersionName, CensusVersionDescription], directory_out)
unordered_directory = cast(dict[CensusVersionName, CensusVersionDescription], directory_out)

# Sort by aliases and release date, descending
aliased_releases = [(k, v) for k, v in unordered_directory.items() if k in aliases]
Expand All @@ -417,7 +416,7 @@ def get_census_version_directory(
return ordered_directory


def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
def get_census_mirror_directory() -> dict[CensusMirrorName, CensusMirror]:
"""Get the directory of Census mirrors currently available.
Returns:
Expand All @@ -429,7 +428,7 @@ def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
"""
mirrors = _get_census_mirrors()
del mirrors["default"]
return cast(Dict[CensusMirrorName, CensusMirror], mirrors)
return cast(dict[CensusMirrorName, CensusMirror], mirrors)


def _get_census_mirrors() -> CensusMirrors:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import json
import warnings
from typing import Any, Dict, cast
from typing import Any, cast

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -55,7 +55,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
embedding_metadata = json.loads(E.metadata["CxG_embedding_info"])
assert isinstance(embedding_metadata, dict)

return cast(Dict[str, Any], embedding_metadata)
return cast(dict[str, Any], embedding_metadata)


def _get_embedding(
Expand All @@ -67,7 +67,7 @@ def _get_embedding(
context: soma.options.SOMATileDBContext | None = None,
) -> npt.NDArray[np.float32]:
"""Private. Like get_embedding, but accepts a Census object and a Census directory."""
if isinstance(obs_soma_joinids, (pa.Array, pa.ChunkedArray, pd.Series)):
if isinstance(obs_soma_joinids, pa.Array | pa.ChunkedArray | pd.Series):
obs_soma_joinids = obs_soma_joinids.to_numpy()
assert isinstance(obs_soma_joinids, np.ndarray)
if obs_soma_joinids.dtype != np.int64:
Expand Down Expand Up @@ -194,7 +194,7 @@ def get_embedding_metadata_by_name(
response = requests.get(CELL_CENSUS_EMBEDDINGS_MANIFEST_URL, headers={"User-Agent": _user_agent()})
response.raise_for_status()

manifest = cast(Dict[str, Dict[str, Any]], response.json())
manifest = cast(dict[str, dict[str, Any]], response.json())
embeddings = []
for _, obj in manifest.items():
if (
Expand Down
Loading

0 comments on commit 011242d

Please sign in to comment.