diff --git a/_autosummary/cellxgene_census.download_source_h5ad.html b/_autosummary/cellxgene_census.download_source_h5ad.html index 3d5a67147..906b06252 100644 --- a/_autosummary/cellxgene_census.download_source_h5ad.html +++ b/_autosummary/cellxgene_census.download_source_h5ad.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
+
+ + + +
+
@@ -207,10 +216,10 @@
-

cellxgene_census.download_source_h5ad

+

cellxgene_census.download_source_h5ad

-cellxgene_census.download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = 'stable') None
+cellxgene_census.download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = 'stable', progress_bar: bool = True) None

Download the source H5AD dataset, for the given dataset_id, to the user-specified file name.

@@ -219,6 +228,7 @@

cellxgene_census.download_source_h5addataset_id.

  • to_path – The file name where the downloaded H5AD will be written. Must not already exist.

  • census_version – The census version name. Defaults to "stable".

  • +
  • progress_bar – Whether to display a progress bar. Defaults to True.

  • Raises:
    diff --git a/_autosummary/cellxgene_census.experimental.get_all_available_embeddings.html b/_autosummary/cellxgene_census.experimental.get_all_available_embeddings.html index b82a466d5..dd14327a9 100644 --- a/_autosummary/cellxgene_census.experimental.get_all_available_embeddings.html +++ b/_autosummary/cellxgene_census.experimental.get_all_available_embeddings.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.get_all_available_embeddings

    +

    cellxgene_census.experimental.get_all_available_embeddings

    -cellxgene_census.experimental.get_all_available_embeddings(census_version: str) list[dict[str, Any]]
    +cellxgene_census.experimental.get_all_available_embeddings(census_version: str) list[dict[str, Any]]

    Return a dictionary of all available embeddings for a given Census version.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.html b/_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.html index 86216f22e..ab4a7200b 100644 --- a/_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.html +++ b/_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.get_all_census_versions_with_embedding

    +

    cellxgene_census.experimental.get_all_census_versions_with_embedding

    -cellxgene_census.experimental.get_all_census_versions_with_embedding(embedding_name: str, organism: str, embedding_type: str | None = 'obs_embedding') list[str]
    +cellxgene_census.experimental.get_all_census_versions_with_embedding(embedding_name: str, organism: str, embedding_type: str | None = 'obs_embedding') list[str]

    Get a list of all census versions that contain a specific embedding.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.experimental.get_embedding.html b/_autosummary/cellxgene_census.experimental.get_embedding.html index b81b2d721..a279b781e 100644 --- a/_autosummary/cellxgene_census.experimental.get_embedding.html +++ b/_autosummary/cellxgene_census.experimental.get_embedding.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.get_embedding

    +

    cellxgene_census.experimental.get_embedding

    -cellxgene_census.experimental.get_embedding(census_version: str, embedding_uri: str, obs_soma_joinids: ndarray[Any, dtype[int64]] | Array, context: SOMATileDBContext | None = None) ndarray[Any, dtype[float32]]
    +cellxgene_census.experimental.get_embedding(census_version: str, embedding_uri: str, obs_soma_joinids: ndarray[Any, dtype[int64]] | Array, context: SOMATileDBContext | None = None) ndarray[Any, dtype[float32]]

    Read cell (obs) embeddings and return as a dense numpy.ndarray. Any cells without an embedding will return NaN values.

    diff --git a/_autosummary/cellxgene_census.experimental.get_embedding_metadata.html b/_autosummary/cellxgene_census.experimental.get_embedding_metadata.html index f046e34a2..b0bff5e96 100644 --- a/_autosummary/cellxgene_census.experimental.get_embedding_metadata.html +++ b/_autosummary/cellxgene_census.experimental.get_embedding_metadata.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.get_embedding_metadata

    +

    cellxgene_census.experimental.get_embedding_metadata

    -cellxgene_census.experimental.get_embedding_metadata(embedding_uri: str, context: SOMATileDBContext | None = None) dict[str, Any]
    +cellxgene_census.experimental.get_embedding_metadata(embedding_uri: str, context: SOMATileDBContext | None = None) dict[str, Any]

    Read embedding metadata and return as a Python dict.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.html b/_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.html index 88962e8f9..5a35bbd63 100644 --- a/_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.html +++ b/_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.get_embedding_metadata_by_name

    +

    cellxgene_census.experimental.get_embedding_metadata_by_name

    -cellxgene_census.experimental.get_embedding_metadata_by_name(embedding_name: str, organism: str, census_version: str, embedding_type: str | None = 'obs_embedding') dict[str, Any]
    +cellxgene_census.experimental.get_embedding_metadata_by_name(embedding_name: str, organism: str, census_version: str, embedding_type: str | None = 'obs_embedding') dict[str, Any]

    Return metadata for a specific embedding. If more embeddings match the query parameters, the most recent one will be returned.

    diff --git a/_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.html b/_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.html index 80795282b..414d7f988 100644 --- a/_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.html +++ b/_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder

    +

    cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder

    -class cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder(experiment: Experiment, measurement_name: str = 'RNA', layer_name: str = 'raw', *, block_size: int | None = None, **kwargs: Any)
    +class cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder(experiment: Experiment, measurement_name: str = 'RNA', layer_name: str = 'raw', *, block_size: int | None = None, **kwargs: Any)

    Abstract base class for methods to process CELLxGENE Census ExperimentAxisQuery results into a Hugging Face Dataset in which each item represents one cell. Subclasses implement the cell_item() method to process each row of an X layer @@ -236,7 +245,7 @@

    cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder```

    -__init__(experiment: Experiment, measurement_name: str = 'RNA', layer_name: str = 'raw', *, block_size: int | None = None, **kwargs: Any)
    +__init__(experiment: Experiment, measurement_name: str = 'RNA', layer_name: str = 'raw', *, block_size: int | None = None, **kwargs: Any)

    Initialize the CellDatasetBuilder to process the results of a Census ExperimentAxisQuery.

      diff --git a/_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.html b/_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.html index 9a5991f21..d734f2613 100644 --- a/_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.html +++ b/_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + +
      +

    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer

    +

    cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer

    -class cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer(experiment: Experiment, *, obs_column_names: Sequence[str] | None = None, obs_attributes: Sequence[str] | None = None, max_input_tokens: int = 2048, token_dictionary_file: str = '', gene_median_file: str = '', **kwargs: Any)
    +class cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer(experiment: Experiment, *, obs_column_names: Sequence[str] | None = None, obs_attributes: Sequence[str] | None = None, max_input_tokens: int = 2048, token_dictionary_file: str = '', gene_median_file: str = '', **kwargs: Any)

    Generate a Hugging Face Dataset containing Geneformer token sequences for each cell in CELLxGENE Census ExperimentAxisQuery results (human).

    This class requires the Geneformer package to be installed separately with: @@ -245,7 +254,7 @@

    cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer
    Experiment, *, obs_column_names: Sequence[str] | None = None, obs_attributes: Sequence[str] | None = None, max_input_tokens: int = 2048, token_dictionary_file: str = '', gene_median_file: str = '', **kwargs: Any) None
    +__init__(experiment: Experiment, *, obs_column_names: Sequence[str] | None = None, obs_attributes: Sequence[str] | None = None, max_input_tokens: int = 2048, token_dictionary_file: str = '', gene_median_file: str = '', **kwargs: Any) None
    • experiment: Census Experiment to query

    • obs_query: obs AxisQuery defining the set of Census cells to process (default all)

    • diff --git a/_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.html b/_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.html index 5678f87eb..ff15f79cd 100644 --- a/_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.html +++ b/_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + +
      +

    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe

    +

    cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe

    -class cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe(experiment: Experiment, measurement_name: str = 'raw', X_name: str = 'X', obs_query: AxisQuery | None = None, var_query: AxisQuery | None = None, obs_column_names: Sequence[str] = (), batch_size: int = 1, shuffle: bool = False, seed: int | None = None, return_sparse_X: bool = False, soma_chunk_size: int | None = None, use_eager_fetch: bool = True)
    +class cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe(experiment: Experiment, measurement_name: str = 'raw', X_name: str = 'X', obs_query: AxisQuery | None = None, var_query: AxisQuery | None = None, obs_column_names: Sequence[str] = (), batch_size: int = 1, shuffle: bool = False, seed: int | None = None, return_sparse_X: bool = False, soma_chunk_size: int | None = None, use_eager_fetch: bool = True)

    An torchdata.datapipes.iter.IterDataPipe that reads obs and X data from a tiledbsoma.Experiment, based upon the specified queries along the obs and var axes. Provides an iterator over these data when the object is passed to Python’s built-in iter function.

    @@ -249,7 +258,7 @@

    cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe
    -__init__(experiment: Experiment, measurement_name: str = 'raw', X_name: str = 'X', obs_query: AxisQuery | None = None, var_query: AxisQuery | None = None, obs_column_names: Sequence[str] = (), batch_size: int = 1, shuffle: bool = False, seed: int | None = None, return_sparse_X: bool = False, soma_chunk_size: int | None = None, use_eager_fetch: bool = True) None
    +__init__(experiment: Experiment, measurement_name: str = 'raw', X_name: str = 'X', obs_query: AxisQuery | None = None, var_query: AxisQuery | None = None, obs_column_names: Sequence[str] = (), batch_size: int = 1, shuffle: bool = False, seed: int | None = None, return_sparse_X: bool = False, soma_chunk_size: int | None = None, use_eager_fetch: bool = True) None

    Construct a new ExperimentDataPipe.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.html b/_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.html index 8b65462ef..52864d9b5 100644 --- a/_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.html +++ b/_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +

    @@ -208,17 +217,17 @@
    -

    cellxgene_census.experimental.ml.pytorch.Stats

    +

    cellxgene_census.experimental.ml.pytorch.Stats

    -class cellxgene_census.experimental.ml.pytorch.Stats(n_obs: int = 0, nnz: int = 0, elapsed: int = 0, n_soma_chunks: int = 0)
    +class cellxgene_census.experimental.ml.pytorch.Stats(n_obs: int = 0, nnz: int = 0, elapsed: int = 0, n_soma_chunks: int = 0)

    Statistics about the data retrieved by ExperimentDataPipe via SOMA API. This is useful for assessing the read throughput of SOMA data.

    Lifecycle

    experimental

    -__init__(n_obs: int = 0, nnz: int = 0, elapsed: int = 0, n_soma_chunks: int = 0) None
    +__init__(n_obs: int = 0, nnz: int = 0, elapsed: int = 0, n_soma_chunks: int = 0) None

    Method generated by attrs for class Stats.

    diff --git a/_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.html b/_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.html index 65d8737c2..fa04994ac 100644 --- a/_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.html +++ b/_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -208,10 +217,10 @@
    -

    cellxgene_census.experimental.ml.pytorch.experiment_dataloader

    +

    cellxgene_census.experimental.ml.pytorch.experiment_dataloader

    -cellxgene_census.experimental.ml.pytorch.experiment_dataloader(datapipe: IterDataPipe, num_workers: int = 0, **dataloader_kwargs: Any) DataLoader
    +cellxgene_census.experimental.ml.pytorch.experiment_dataloader(datapipe: IterDataPipe, num_workers: int = 0, **dataloader_kwargs: Any) DataLoader

    Factory method for torch.utils.data.DataLoader. This method can be used to safely instantiate a torch.utils.data.DataLoader that works with cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe, since some of the torch.utils.data.DataLoader constructor parameters are not applicable when using a diff --git a/_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.html b/_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.html index 0b65f3b08..a2910de90 100644 --- a/_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.html +++ b/_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +

    +
    + + + +
    +
    @@ -206,10 +215,10 @@
    -

    cellxgene_census.experimental.pp.get_highly_variable_genes

    +

    cellxgene_census.experimental.pp.get_highly_variable_genes

    -cellxgene_census.experimental.pp.get_highly_variable_genes(census: Collection, organism: str, measurement_name: str = 'RNA', X_name: str = 'raw', obs_value_filter: str | None = None, obs_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, var_value_filter: str | None = None, var_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, n_top_genes: int = 1000, flavor: Literal['seurat_v3'] = 'seurat_v3', span: float = 0.3, batch_key: str | Sequence[str] | None = None, max_loess_jitter: float = 1e-06, batch_key_func: Callable[[...], Any] | None = None) DataFrame
    +cellxgene_census.experimental.pp.get_highly_variable_genes(census: Collection, organism: str, measurement_name: str = 'RNA', X_name: str = 'raw', obs_value_filter: str | None = None, obs_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, var_value_filter: str | None = None, var_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, n_top_genes: int = 1000, flavor: Literal['seurat_v3'] = 'seurat_v3', span: float = 0.3, batch_key: str | Sequence[str] | None = None, max_loess_jitter: float = 1e-06, batch_key_func: Callable[[...], Any] | None = None) DataFrame

    Convience wrapper around tiledbsoma.Experiment query and cellxgene_census.experimental.pp.highly_variable_genes() function, to build and execute a query, and annotate the query result genes (var dataframe) based upon variability.

    @@ -263,7 +272,7 @@

    cellxgene_census.experimental.pp.get_highly_variable_genes )

    -

    Fetch an anndata.AnnData with top 500 genes:

    +

    Fetch an anndata.AnnData with top 500 genes:

    >>> with cellxgene_census.open_soma(census_version="stable") as census:
             organism = "mus_musculus"
             obs_value_filter = "is_primary_data == True and tissue_general == 'lung'"
    diff --git a/_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.html b/_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.html
    index a63aabc8f..863cc0bd5 100644
    --- a/_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.html
    +++ b/_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -127,6 +129,13 @@
                 
     
                 
    +
    +
    + + + +
    +
    @@ -206,10 +215,10 @@
    -

    cellxgene_census.experimental.pp.highly_variable_genes

    +

    cellxgene_census.experimental.pp.highly_variable_genes

    -cellxgene_census.experimental.pp.highly_variable_genes(query: ExperimentAxisQuery, n_top_genes: int = 1000, layer: str = 'raw', flavor: Literal['seurat_v3'] = 'seurat_v3', span: float = 0.3, batch_key: str | Sequence[str] | None = None, max_loess_jitter: float = 1e-06, batch_key_func: Callable[[...], Any] | None = None) DataFrame
    +cellxgene_census.experimental.pp.highly_variable_genes(query: ExperimentAxisQuery, n_top_genes: int = 1000, layer: str = 'raw', flavor: Literal['seurat_v3'] = 'seurat_v3', span: float = 0.3, batch_key: str | Sequence[str] | None = None, max_loess_jitter: float = 1e-06, batch_key_func: Callable[[...], Any] | None = None) DataFrame

    Identify and annotate highly variable genes contained in the query results. The API is modelled on ScanPy scanpy.pp.highly_variable_genes API. Results returned will mimic ScanPy results. The only flavor available diff --git a/_autosummary/cellxgene_census.experimental.pp.mean_variance.html b/_autosummary/cellxgene_census.experimental.pp.mean_variance.html index ae88f4a37..32cbed9b9 100644 --- a/_autosummary/cellxgene_census.experimental.pp.mean_variance.html +++ b/_autosummary/cellxgene_census.experimental.pp.mean_variance.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +

    +
    + + + +
    +
    @@ -206,10 +215,10 @@
    -

    cellxgene_census.experimental.pp.mean_variance

    +

    cellxgene_census.experimental.pp.mean_variance

    -cellxgene_census.experimental.pp.mean_variance(query: ExperimentAxisQuery, layer: str = 'raw', axis: int = 0, calculate_mean: bool = False, calculate_variance: bool = False, ddof: int = 1, nnz_only: bool = False) DataFrame
    +cellxgene_census.experimental.pp.mean_variance(query: ExperimentAxisQuery, layer: str = 'raw', axis: int = 0, calculate_mean: bool = False, calculate_variance: bool = False, ddof: int = 1, nnz_only: bool = False) DataFrame

    Calculate mean and/or variance along the obs axis from query results. Calculations are done in an accumulative chunked fashion. For the mean and variance calculations, the total number of elements (N) is, by default, the corresponding dimension size: for column-wise calculations (axis = 0) N is number of rows, for row-wise diff --git a/_autosummary/cellxgene_census.get_anndata.html b/_autosummary/cellxgene_census.get_anndata.html index 84d18215c..8b4acd684 100644 --- a/_autosummary/cellxgene_census.get_anndata.html +++ b/_autosummary/cellxgene_census.get_anndata.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +

    +
    + + + +
    +
    @@ -206,12 +215,12 @@
    -

    cellxgene_census.get_anndata

    +

    cellxgene_census.get_anndata

    -cellxgene_census.get_anndata(census: Collection, organism: str, measurement_name: str = 'RNA', X_name: str = 'raw', X_layers: Sequence[str] | None = (), obsm_layers: Sequence[str] | None = (), obsp_layers: Sequence[str] | None = (), varm_layers: Sequence[str] | None = (), varp_layers: Sequence[str] | None = (), obs_value_filter: str | None = None, obs_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, var_value_filter: str | None = None, var_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, column_names: AxisColumnNames | None = None, obs_embeddings: Sequence[str] | None = (), var_embeddings: Sequence[str] | None = ()) AnnData
    +cellxgene_census.get_anndata(census: Collection, organism: str, measurement_name: str = 'RNA', X_name: str = 'raw', X_layers: Sequence[str] | None = (), obsm_layers: Sequence[str] | None = (), obsp_layers: Sequence[str] | None = (), varm_layers: Sequence[str] | None = (), varp_layers: Sequence[str] | None = (), obs_value_filter: str | None = None, obs_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, var_value_filter: str | None = None, var_coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = None, column_names: AxisColumnNames | None = None, obs_embeddings: Sequence[str] | None = (), var_embeddings: Sequence[str] | None = ()) AnnData

    Convenience wrapper around tiledbsoma.Experiment query, to build and execute a query, -and return it as an anndata.AnnData object.

    +and return it as an anndata.AnnData object.

    Parameters:

    Lifecycle

    diff --git a/_autosummary/cellxgene_census.get_census_version_description.html b/_autosummary/cellxgene_census.get_census_version_description.html index 4f847e530..8d25925bb 100644 --- a/_autosummary/cellxgene_census.get_census_version_description.html +++ b/_autosummary/cellxgene_census.get_census_version_description.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -205,10 +214,10 @@
    -

    cellxgene_census.get_census_version_description

    +

    cellxgene_census.get_census_version_description

    -cellxgene_census.get_census_version_description(census_version: str) CensusVersionDescription
    +cellxgene_census.get_census_version_description(census_version: str) CensusVersionDescription

    Get release description for given Census version, from the Census release directory.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.get_census_version_directory.html b/_autosummary/cellxgene_census.get_census_version_directory.html index 7047ab65a..e9c48ca7d 100644 --- a/_autosummary/cellxgene_census.get_census_version_directory.html +++ b/_autosummary/cellxgene_census.get_census_version_directory.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -205,10 +214,10 @@
    -

    cellxgene_census.get_census_version_directory

    +

    cellxgene_census.get_census_version_directory

    -cellxgene_census.get_census_version_directory(*, lts: bool | None = None, retracted: bool | None = False) Dict[str, CensusVersionDescription]
    +cellxgene_census.get_census_version_directory(*, lts: bool | None = None, retracted: bool | None = False) Dict[str, CensusVersionDescription]

    Get the directory of Census versions currently available, optionally filtering by specified flags. If a filtering flag is not specified, Census versions will not be filtered by that flag. Defaults to including both “long-term stable” (LTS) and weekly Census versions, and excluding diff --git a/_autosummary/cellxgene_census.get_default_soma_context.html b/_autosummary/cellxgene_census.get_default_soma_context.html index c181f6522..c071940ec 100644 --- a/_autosummary/cellxgene_census.get_default_soma_context.html +++ b/_autosummary/cellxgene_census.get_default_soma_context.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +

    +
    + + + +
    +
    @@ -207,10 +216,10 @@
    -

    cellxgene_census.get_default_soma_context

    +

    cellxgene_census.get_default_soma_context

    -cellxgene_census.get_default_soma_context(tiledb_config: Dict[str, Any] | None = None) SOMATileDBContext
    +cellxgene_census.get_default_soma_context(tiledb_config: Dict[str, Any] | None = None) SOMATileDBContext

    Return a tiledbsoma.SOMATileDBContext with sensible defaults that can be further customized by the user. The customized context can then be passed to cellxgene_census.open_soma() with the context argument or to somacore.SOMAObject.open() with the context argument, such as diff --git a/_autosummary/cellxgene_census.get_obs.html b/_autosummary/cellxgene_census.get_obs.html index e64dd2f7d..1895586c8 100644 --- a/_autosummary/cellxgene_census.get_obs.html +++ b/_autosummary/cellxgene_census.get_obs.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +

    +
    + + + +
    +
    @@ -206,10 +215,10 @@
    -

    cellxgene_census.get_obs

    +

    cellxgene_census.get_obs

    -cellxgene_census.get_obs(census: Collection, organism: str, *, value_filter: str | None = None, coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = slice(None, None, None), column_names: Sequence[str] | None = None) DataFrame
    +cellxgene_census.get_obs(census: Collection, organism: str, *, value_filter: str | None = None, coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = slice(None, None, None), column_names: Sequence[str] | None = None) DataFrame

    Get the observation metadata for a query on the census.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.get_presence_matrix.html b/_autosummary/cellxgene_census.get_presence_matrix.html index df647e2c0..a36fc9010 100644 --- a/_autosummary/cellxgene_census.get_presence_matrix.html +++ b/_autosummary/cellxgene_census.get_presence_matrix.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -204,10 +213,10 @@
    -

    cellxgene_census.get_presence_matrix

    +

    cellxgene_census.get_presence_matrix

    -cellxgene_census.get_presence_matrix(census: Collection, organism: str, measurement_name: str = 'RNA') csr_matrix
    +cellxgene_census.get_presence_matrix(census: Collection, organism: str, measurement_name: str = 'RNA') csr_matrix

    Read the feature dataset presence matrix and return as a scipy.sparse.csr_array. The returned sparse matrix is indexed on the first dimension by the dataset soma_joinid values, and on the second dimension by the var pandas.DataFrame soma_joinid values.

    diff --git a/_autosummary/cellxgene_census.get_source_h5ad_uri.html b/_autosummary/cellxgene_census.get_source_h5ad_uri.html index 4b1184868..27d75296d 100644 --- a/_autosummary/cellxgene_census.get_source_h5ad_uri.html +++ b/_autosummary/cellxgene_census.get_source_h5ad_uri.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -207,10 +216,10 @@
    -

    cellxgene_census.get_source_h5ad_uri

    +

    cellxgene_census.get_source_h5ad_uri

    -cellxgene_census.get_source_h5ad_uri(dataset_id: str, *, census_version: str = 'stable') CensusLocator
    +cellxgene_census.get_source_h5ad_uri(dataset_id: str, *, census_version: str = 'stable') CensusLocator

    Open the named version of the census, and return the URI for the dataset_id. This does not guarantee that the H5AD exists or is accessible to the user.

    diff --git a/_autosummary/cellxgene_census.get_var.html b/_autosummary/cellxgene_census.get_var.html index ab988c9ce..f34056fe5 100644 --- a/_autosummary/cellxgene_census.get_var.html +++ b/_autosummary/cellxgene_census.get_var.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -206,10 +215,10 @@
    -

    cellxgene_census.get_var

    +

    cellxgene_census.get_var

    -cellxgene_census.get_var(census: Collection, organism: str, *, value_filter: str | None = None, coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = slice(None, None, None), column_names: Sequence[str] | None = None) DataFrame
    +cellxgene_census.get_var(census: Collection, organism: str, *, value_filter: str | None = None, coords: None | bytes | Slice[bytes] | Sequence[bytes] | float | Slice[float] | Sequence[float] | int | Slice[int] | Sequence[int] | slice | Slice[slice] | Sequence[slice] | str | Slice[str] | Sequence[str] | datetime64 | Slice[datetime64] | Sequence[datetime64] | TimestampType | Slice[TimestampType] | Sequence[TimestampType] | Array | ChunkedArray | ndarray[Any, dtype[integer]] | ndarray[Any, dtype[datetime64]] = slice(None, None, None), column_names: Sequence[str] | None = None) DataFrame

    Get the variable metadata for a query on the census.

    Parameters:
    diff --git a/_autosummary/cellxgene_census.open_soma.html b/_autosummary/cellxgene_census.open_soma.html index 277ac8474..41fc835eb 100644 --- a/_autosummary/cellxgene_census.open_soma.html +++ b/_autosummary/cellxgene_census.open_soma.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -207,10 +216,10 @@
    -

    cellxgene_census.open_soma

    +

    cellxgene_census.open_soma

    -cellxgene_census.open_soma(*, census_version: str | None = 'stable', mirror: str | None = None, uri: str | None = None, tiledb_config: Dict[str, Any] | None = None, context: SOMATileDBContext | None = None) Collection
    +cellxgene_census.open_soma(*, census_version: str | None = 'stable', mirror: str | None = None, uri: str | None = None, tiledb_config: Dict[str, Any] | None = None, context: SOMATileDBContext | None = None) Collection

    Open the Census by version or URI.

    Parameters:
    diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js new file mode 100644 index 000000000..81415803e --- /dev/null +++ b/_static/_sphinx_javascript_frameworks_compat.js @@ -0,0 +1,123 @@ +/* Compatability shim for jQuery and underscores.js. + * + * Copyright Sphinx contributors + * Released under the two clause BSD licence + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} diff --git a/_static/jquery.js b/_static/jquery.js new file mode 100644 index 000000000..c4c6022f2 --- /dev/null +++ b/_static/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0 + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +
    @@ -200,9 +209,9 @@
    -

    What’s new?

    +

    What’s new?

    -

    2023

    +

    2023

    -

    2024

    +

    2024

    • Census supports categoricals for cell metadata
    • diff --git a/articles/2023/20230808-r_api_release.html b/articles/2023/20230808-r_api_release.html index 5a67b2891..0251780f3 100644 --- a/articles/2023/20230808-r_api_release.html +++ b/articles/2023/20230808-r_api_release.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + +
      +
    @@ -203,7 +212,7 @@
    -

    R package cellxgene.census V1 is out!

    +

    R package cellxgene.census V1 is out!

    Published: August 7th, 2023

    By: Pablo Garcia-Nieto

    The Census team is pleased to announce the release of the R package cellxgene.census. 🎉 🎉

    @@ -211,7 +220,7 @@

    R package cellx

    With cellxgene.census in a few seconds users can access any slice of Census data using cell or gene filters across hundreds of datasets. The data can be fetched in an iterative fashion for bigger-than-memory slices of data, or quickly exported to basic R structures, and Seurat or SingleCellExperiment for downstream analysis.

    image

    -

    Installation and usage

    +

    Installation and usage

    Users can install cellxgene.census and its dependencies following the installation instructions.

    To learn more about the package please make sure to check out the following resources:

      @@ -221,17 +230,17 @@

      Installation and usage

    -

    Census R package is made possible by tiledbsoma

    +

    Census R package is made possible by tiledbsoma

    The cellxgene.census package relies on TileDB-SOMA R’s package tiledbsoma for all of its data access capabilities as shown in the next section.

    CZI and TileDB have worked closely on the development of tiledbsoma and recently upgraded it from beta to its first stable version. Release notes can be found here.

    -

    Efficient access to single-cell data for >33M cells from R

    +

    Efficient access to single-cell data for >33M cells from R

    Census hosts ever-growing data releases from CZ CELLxGENE Discover, representing the largest aggregation of standardized single-cell data.

    Census data are accompanied by cell and gene metadata that have been standardized on ontologies across all datasets hosted in CZ CELLxGENE Discover. For example all cell types and tissues have been mapped to a value of the CL and UBERON ontologies, respectively. You can find more about the data in the Census data and schema page.

    With the cellxgene.census R package, researchers can have access to all of these data and metadata directly from an R session with the following capabilities:

    -

    Easy-to-use handles to the cloud-hosted Census data

    +

    Easy-to-use handles to the cloud-hosted Census data

    From R users can get a handle to the data by opening the Census.

    library("cellxgene.census")
     
    @@ -244,7 +253,7 @@ 

    Easy-to-use handles to the cloud-hosted Census data

    -

    Querying and reading single-cell metadata from Census

    +

    Querying and reading single-cell metadata from Census

    Following our Census data and schema, users can navigate and query Census data and metadata by using any combination of gene and cell filters.

    For example, reading a slice of the human cell metadata for more than 300K cells with Microglial cells or Neurons from female donors:

    library("cellxgene.census")
    @@ -271,7 +280,7 @@ 

    Querying and reading single-cell metadata from Census

    -

    Exporting Census slices to Seurat and SingleCellExperiment

    +

    Exporting Census slices to Seurat and SingleCellExperiment

    Similarly, users can query both the single-cell data along with its metadata and export them to Seurat or SingleCellExperiment objects for downstream analysis:

    library("cellxgene.census")
     
    @@ -305,7 +314,7 @@ 

    Exporting Census slices to

    -

    Streaming data incrementally in chunks

    +

    Streaming data incrementally in chunks

    Sometimes Census queries can be too large to be loaded in memory. TileDB-SOMA allows users to query Census data in an incremental fashion using iterators.

    To find out more about iterable-based queries you can check out the following resources:

      diff --git a/articles/2023/20230919-out_of_core_methods.html b/articles/2023/20230919-out_of_core_methods.html index b64ed905f..16e658edf 100644 --- a/articles/2023/20230919-out_of_core_methods.html +++ b/articles/2023/20230919-out_of_core_methods.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + +
      +

    @@ -202,7 +211,7 @@
    -

    Memory-efficient implementations of commonly used single-cell methods

    +

    Memory-efficient implementations of commonly used single-cell methods

    Published: September 18, 2023

    By: Pablo Garcia-Nieto

    The Census team is thrilled to officially announce memory-efficient implementations of some of the most widely used single-cell algorithms.

    @@ -214,16 +223,16 @@

    Memory-efficient implementations of commonly used single-cell methodsThese implementations are interwoven with the way users query slices of Census data, which means that these tasks can be seamlessly applied to any slice of the 33M+ cells available in Census.

    Continue reading for more implementation details and usage examples.

    -

    Efficient calculation of average and variance gene expression across millions of cells

    +

    Efficient calculation of average and variance gene expression across millions of cells

    With cellxgene_census.experimental.pp.mean_variance users can now get gene expression average and variance for all genes or cells in a given Census query.

    -

    How it works

    +

    How it works

    Calculations are done in an accumulative incremental fashion, meaning that only a small fraction of the total data is processed at any given time.

    The Census data is downloaded in increments and average and variance accumulators are updated at each incremental step. The implementation also takes advantage of CPU-based multiprocessing to speed up the process.

    Currently, the mean and variance are calculated using the full population of cells/genes, including those with a zero valued measurement. In the future, we will enable calculation of mean including only the population of non-zero cells/genes.

    -

    Example: KRAS and AQP4 average and variance expression in lung epithelial cells

    +

    Example: KRAS and AQP4 average and variance expression in lung epithelial cells

    The following calculates the average and variance values for the genes KRAS and AQP4 in all epithelial cells of the human lung.

    Users can easily switch the calculation, and obtain average and variance for each cell across the genes in the query. This is controlled by the axis argument of mean_variance.

    import cellxgene_census
    @@ -274,16 +283,16 @@ 

    Example: KRAS and AQP4 average and variance expression in

    -

    Efficient calculation of highly variable genes across millions of cells

    +

    Efficient calculation of highly variable genes across millions of cells

    With cellxgene_census.experimental.pp.get_highly_variable_genes users can get the most highly variable genes of a Census query while accounting for batch effects.

    This is usually the first pre-processing step necessary for other downstream tasks, for example data integration.

    -

    How it works

    +

    How it works

    The Census algorithm is based on the scanpy method scanpy.pp.highly_variable_genes, and in particular the Seurat V3 method, which is designed for raw counts and can account for batch effects.

    The Census implementation utilizes the same incremental paradigm used in cellxgene_census.experimental.pp.mean_variance (see above), calculating incremental-based mean and variance accumulators with some tweaks to comply to the Seurat V3 method.

    -

    Example: Finding highly variable genes for all cells of the human esophagus

    +

    Example: Finding highly variable genes for all cells of the human esophagus

    The following example identifies the top 1000 highly variable genes for all human esophagus cells. As a general rule of thumb it is good to use dataset_id as the batch variable.

    import cellxgene_census
     from cellxgene_census.experimental.pp import get_highly_variable_genes
    diff --git a/articles/2023/20231012-normalized_layer_precalc_stats.html b/articles/2023/20231012-normalized_layer_precalc_stats.html
    index 4ee280a38..b8de3de23 100644
    --- a/articles/2023/20231012-normalized_layer_precalc_stats.html
    +++ b/articles/2023/20231012-normalized_layer_precalc_stats.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -127,6 +129,13 @@
                 
     
                 
    +
    +
    + + + +
    +
    @@ -203,7 +212,7 @@
    -

    Introducing a normalized layer and pre-calculated cell and gene statistics in Census

    +

    Introducing a normalized layer and pre-calculated cell and gene statistics in Census

    Published: October 12, 2023

    By: Maximilian Lombardo and Pablo Garcia-Nieto

    @@ -218,15 +227,15 @@

    Introducing a normalized layer and pre-calculated cell and gene statistics i

    These features are currently exclusive to the “latest” versions of the Census data release and they will be available in the next LTS data release. We invite your feedback as you explore these novel functionalities.

    Keep on reading to find out more about these features!

    -

    Description of new data added to Census

    +

    Description of new data added to Census

    All of the following changes were introduced in the Census schema V1.1.0.

    -

    Added a new library-size normalized layer

    +

    Added a new library-size normalized layer

    We have introduced a library-size normalized X layer for the RNA measurements of both the human and mouse experiments available as X["normalized"]. The normalized layer is built by dividing each value in the raw count matrix by its corresponding row sum (i.e. size normalization).

    To reduce data size and improve performance, normalized values are stored with a reduced floating point precision. In addition, to ensure that small count values do not round to zero, a small sigma has been added. You will see the effect of these artifacts in row (per-cell) values not summing to precisely 1.0.

    -

    Enhanced gene metadata

    +

    Enhanced gene metadata

    The ms["RNA"].var DataFrame for both the human and mouse experiments has been enriched with two new metadata fields:

    • nnz — the number of explicitly stored values, effectively the number of cells expressing this gene.

    • @@ -234,7 +243,7 @@

      Enhanced gene metadata

    -

    Enhanced cell metadata

    +

    Enhanced cell metadata

    The obs DataFrame for both the human and mouse experiments is now augmented with the following new metadata, allowing users to forego common calculations used in early data pre-processing. For each cell:

    • raw_sum — the sum of the raw counts, derived from X["raw"].

    • @@ -246,9 +255,9 @@

      Enhanced cell metadata

    -

    How to use the new features

    +

    How to use the new features

    -

    Exporting the normalized data to existing single-cell toolkits

    +

    Exporting the normalized data to existing single-cell toolkits

    In Python, the normalized data can be exported into AnnData specifying the X_name = "normalized" argument of the cellxgene.get_anndata() method.

    import cellxgene_census
     
    @@ -300,7 +309,7 @@ 

    Exporting the normalized data to existing single-cell toolkits -

    Accessing library-size normalized data layer via TileDB-SOMA

    +

    Accessing library-size normalized data layer via TileDB-SOMA

    For memory-efficient data retrieval, you can use TileDB-SOMA as outlined below. In Python this looks like the following.

    
     import cellxgene_census
    @@ -356,7 +365,7 @@ 

    Accessing library-size normalized data layer via TileDB-SOMA -

    Utilizing pre-calculated stats for querying obs and var

    +

    Utilizing pre-calculated stats for querying obs and var

    To filter cells or genes based on pre-calculated statistics and export to AnnData, you can use the new metadata variables as value filters.

    For example, you can add a filter to query cells with more than 500 genes expressed, along with other filters. In Python this looks like the following.

    import cellxgene_census
    @@ -409,7 +418,7 @@ 

    Utilizing pre-calculated stats for querying -

    Help us improve these data additions

    +

    Help us improve these data additions

    We encourage you to engage with these new features in the Census API and share your feedback. This input is invaluable for the ongoing enhancement of the Census project.

    For further information on any new feature, please reach out to us at soma@chanzuckerberg.com. To report issues or for additional feedback, refer to our Census GitHub repository.

    diff --git a/articles/2024/20240404-categoricals.html b/articles/2024/20240404-categoricals.html index fd15266a1..b5bee29f6 100644 --- a/articles/2024/20240404-categoricals.html +++ b/articles/2024/20240404-categoricals.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + +
    +

    @@ -200,14 +209,14 @@
    -

    Census supports categoricals for cell metadata

    +

    Census supports categoricals for cell metadata

    Published: April 4th, 2024

    By: Emanuele Bezzi & Pablo Garcia-Nieto

    Starting with the 2024-04-01 Census build, a subset of the columns in the obs dataframe are now categorical instead of strings.

    Overall users will observe a smaller memory footprint when loading Census data into memory. 🚀

    However, this may break some existing pipelines as explained below.

    -

    Potential breaking changes

    +

    Potential breaking changes

    For Python users, note that Pandas will encode these columns as pandas.Categorical for which some downstream operations may need to be adapted. See this link for more details. In particular:

    Series methods like Series.value_counts() will use all categories, even if some categories are not present in the data

    @@ -220,7 +229,7 @@

    Potential breaking changesarrow, these columns will be encoded as dictionary, see more details for R in this link and Python in this link.

    -

    Identifying the obs columns encoded as categorical

    +

    Identifying the obs columns encoded as categorical

    Users can always check the the type of each cell metadata variable by inspecting the schema of obs. Categoricals will be shown as dictionary.

    In Python:

    import cellxgene_census
    diff --git a/cellxgene_census_aws_open_data.html b/cellxgene_census_aws_open_data.html
    index 28ef7294d..7efca160f 100644
    --- a/cellxgene_census_aws_open_data.html
    +++ b/cellxgene_census_aws_open_data.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -127,6 +129,13 @@
                 
     
                 
    +
    +
    + + + +
    +
    @@ -201,7 +210,7 @@
    -

    CZ CELLxGENE Discover Census in AWS

    +

    CZ CELLxGENE Discover Census in AWS

    The single-cell data from CZ CELLxGENE Discover Census are available for public access via Amazon Web Services (AWS).

    This page describes what Census data are available in AWS and how to access them.

    Contents

    @@ -210,10 +219,10 @@

    CZ CELLxGENE Discover Census in AWSHow to access AWS Census data

    -

    Census data available in AWS

    +

    Census data available in AWS

    The single-cell data from CZ CELLxGENE Discover included in Census (see inclusion criteria) are available either as Census-wide TileDB files or individual H5AD files of the source datasets.

    -

    Data specifications

    +

    Data specifications

    @@ -248,7 +257,7 @@

    Data specifications[tag].

    -

    Data release versioning

    +

    Data release versioning

    A data release is a Census build that is publicly hosted in AWS. A Census build is a TileDB-SOMA collection and its corresponding source H5AD files with the Census data from CZ CELLxGENE Discover.

    Any given Census build is named with a unique [tag], normally the date of build, e.g. “2023-05-15”.

    The are two types of data releases:

    @@ -260,9 +269,9 @@

    Data release versioning

    -

    How to access AWS Census data

    +

    How to access AWS Census data

    -

    AWS CLI for programatic downloads

    +

    AWS CLI for programatic downloads

    Users can bulk-download Census data via the AWS CLI.

    For example, to download the H5ADs files of the Census LTS release 2023-07-25, users can execute the following from a shell session:

    aws s3 sync --no-sign-request s3://cellxgene-census-public-us-west-2/cell-census/2023-07-25/h5ads/ ./h5ads/
    @@ -274,7 +283,7 @@ 

    AWS CLI for programatic downloads -

    CELLxGENE Census API (Python and R)

    +

    CELLxGENE Census API (Python and R)

    This is the recommend method for accessing Census data. Please follow the Census API quick start guide for a full guide.

    For example, in Python users can create an iterator for the cell metadata Data Frame as follows:

    import cellxgene_census
    @@ -309,7 +318,7 @@ 

    CELLxGENE Census API (Python and R) -

    TileDB-SOMA API (Python and R)

    +

    TileDB-SOMA API (Python and R)

    The Census API provides convenience wrappers for TileDB-SOMA to access the Census Data hosted at AWS. Users can interact directly with the Census TileDB data directly via the TileDB-SOMA APIs. Please refer to the TileDb-SOMA documentation for full details on usage.

    For example, in Python users can create an iterator for the cell metadata Data Frame as follows:

    import cellxgene_census
    diff --git a/cellxgene_census_docsite_FAQ.html b/cellxgene_census_docsite_FAQ.html
    index 5f70b8049..eb7867d59 100644
    --- a/cellxgene_census_docsite_FAQ.html
    +++ b/cellxgene_census_docsite_FAQ.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -126,6 +128,13 @@
                 
     
                 
    +
    +
    + + + + +
    @@ -204,7 +213,7 @@
    -

    FAQ

    +

    FAQ

    Last updated: Jan, 2024.

    -

    Why should I use the Census?

    +

    Why should I use the Census?

    The Census provides efficient low-latency access via Python and R APIs to most single-cell RNA data from CZ CELLxGENE Discover. To accelerate computational research, the Census enables researchers to:

    • Access slices of data from more than 500 single-cell datasets spanning about 33M unique cells (50M total) from >60K genes from human or mice.

    • @@ -243,15 +252,15 @@

      Why should I use the Census?CZ CELLxGENE Discover Datasets feature. Click here for more information about downloading published data on CELLxGENE Discover.

    -

    What data is contained in the Census?

    +

    What data is contained in the Census?

    Most RNA non-spatial data from CZ CELLxGENE Discover is included. You can see a general description of these data and their organization in the schema description or you can use the APIs to explore the data as indicated in this tutorial.

    -

    How do I cite the use of the Census for a publication?

    +

    How do I cite the use of the Census for a publication?

    Please follow the citation guidelines offered by CZ CELLxGENE Discover.

    -

    Why does the Census not have a normalized layer or embeddings?

    +

    Why does the Census not have a normalized layer or embeddings?

    The Census does not have normalized counts or embeddings because:

    • The original normalized values and embeddings are not harmonized or integrated across datasets and are therefore numerically incompatible.

    • @@ -260,7 +269,7 @@

      Why does the Census not have a normalized layer or embeddings?.

    -

    How does the Census differentiate from other tools?

    +

    How does the Census differentiate from other tools?

    The Census differentiates from existing single-cell tools by providing fast, efficient access to the largest corpus of standardized single-cell data from CZ CELLxGENE Discover via TileDB-SOMA. Thus, single-cell data from about 33M unique cells (50M total) across >60 K genes, with 11 standardized cell metadata variables and harmonized GENCODE annotations are ready for:

    • Opening and reading data at low latency from the cloud.

    • @@ -272,15 +281,15 @@

      How does the Census differentiate from other tools?

    -

    Can I query human and mouse data in a single query?

    +

    Can I query human and mouse data in a single query?

    It is not possible to query both mouse and human data in a single query. This is due to the data from these organisms using different organism-specific gene annotations.

    -

    Where are the Census data hosted?

    +

    Where are the Census data hosted?

    The Census data is publicly hosted free-of-cost in an Amazon Web Services (AWS) S3 bucket in the us-west-2 region.

    -

    Can I retrieve the original H5AD datasets from which the Census was built?

    +

    Can I retrieve the original H5AD datasets from which the Census was built?

    Yes, you can use the API function download_source_h5ad to do so. For usage, please see the reference documentation at the doc-site or directly from Python or R:

    Python

    import cellxgene_census
    @@ -294,7 +303,7 @@ 

    Can I retrieve the original H5AD datasets from which the Census was built?

    -

    How can I increase the performance of my queries?

    +

    How can I increase the performance of my queries?

    Since the access patterns are via the internet, usually the main limiting step for data queries is bandwidth and client location. We recommend the following tactics to increase query efficiency:

    • Utilize a computer connected to high-speed internet.

    • @@ -304,7 +313,7 @@

      How can I increase the performance of my queries?

    -

    Can I use conda to install the Census Python API?

    +

    Can I use conda to install the Census Python API?

    There is not a conda package available for cellxgene-census. However you can use conda in combination with pip to install the package in a conda environment:

    conda create -n census_env python=3.10
     conda activate census_env
    @@ -313,24 +322,24 @@ 

    Can I use conda to install the Census Python API?

    -

    How can I ask for support?

    +

    How can I ask for support?

    You can either submit a github issue, or for quick support, you can join the CZI Science Community on Slack (czi.co/science-slack) and ask questions in the #cellxgene-census-users channel.

    -

    How can I ask for new features?

    +

    How can I ask for new features?

    You can submit a feature request in the github repository.

    -

    How can I contribute my data to the Census?

    +

    How can I contribute my data to the Census?

    To inquire about submitting your data to CZ CELLxGENE Discover, click here. If your data request is accepted, the data will automatically be included in the Census if it meets the biological criteria defined in the Census schema.

    -

    Why do I get an ArraySchema error when opening the Census?

    +

    Why do I get an ArraySchema error when opening the Census?

    You may get this error if you are trying to open a Census data build with an old version of the Census API. Please update your Python or R Census package.

    If the error persists please file a github issue.

    -

    Why do I get an error when running import cellxgene_census on Databricks?

    +

    Why do I get an error when running import cellxgene_census on Databricks?

    This can occur if the cellxgene_census Python package is installed in a Databricks notebook using %sh pip install cellxgene_census. This command does not restart the Python process after installing cellxgene_census and any pip package dependencies that were pre-installed by the Databricks Runtime environment but upgraded for cellxgene_census will not be reloaded with their new version. You may see numba or pyarrow related errors, for example.

    To fix, simply install using one of the following Databricks notebook “magic” commands:

    pip install -U cellxgene-census
    diff --git a/cellxgene_census_docsite_data_release_info.html b/cellxgene_census_docsite_data_release_info.html
    index 917032412..c36908759 100644
    --- a/cellxgene_census_docsite_data_release_info.html
    +++ b/cellxgene_census_docsite_data_release_info.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -127,6 +129,13 @@
                 
     
                 
    +
    +
    + + + + +
    @@ -219,7 +228,7 @@
    -

    Census data releases

    +

    Census data releases

    Last edited: December 15th, 2023.

    Contents:

      @@ -228,12 +237,12 @@

      Census data releases

      Compatibility with package versions

    -

    What is a Census data release?

    +

    What is a Census data release?

    It is a Census build that is publicly hosted online. A Census build is a TileDB-SOMA collection with the Census data from CZ CELLxGENE Discover as specified in the Census schema.

    Any given Census build is named with a unique tag, normally the date of build, e.g., "2023-05-15".

    -

    Long-term supported (LTS) Census releases

    +

    Long-term supported (LTS) Census releases

    To enable data stability and scientific reproducibility, CZ CELLxGENE Discover plans to perform regular LTS Census data releases:

    • Published online every six months for public access, starting on May 15, 2023.

    • @@ -252,7 +261,7 @@

      Long-term supported (LTS) Census releases -

      Weekly Census releases (latest)

      +

      Weekly Census releases (latest)

      CZ CELLxGENE Discover ingests a handful of new datasets every week. To quickly enable access to these new data via the Census, CZ CELLxGENE Discover plans to perform weekly Census data releases:

      • Available for public access for 1 month.

      • @@ -271,12 +280,12 @@

        Weekly Census releases (latest) -

        List of LTS Census data releases

        +

        List of LTS Census data releases

        -

        LTS 2023-12-15

        +

        LTS 2023-12-15

        Open this data release by specifying census_version = "2023-12-15" in future calls to open_soma().

        -

        Version information

        +

        Version information

    Data
    @@ -300,7 +309,7 @@

    Version information -

    Cell and donor counts

    +

    Cell and donor counts

    Information

    @@ -325,7 +334,7 @@

    Cell and donor counts
    -

    Cell metadata

    +

    Cell metadata

    Type

    @@ -374,7 +383,7 @@

    Cell metadata -

    Cell embbedings

    +

    Cell embbedings

    Find out more in the Census model page.

    Available obsm slots:

    Category

    @@ -398,10 +407,10 @@

    Cell embbedings -

    LTS 2023-07-25

    +

    LTS 2023-07-25

    Open this data release by specifying census_version = "2023-07-25" in future calls to open_soma().

    -

    Version information

    +

    Version information

    @@ -425,7 +434,7 @@

    Version information -

    Cell and donor counts

    +

    Cell and donor counts

    Information

    @@ -450,7 +459,7 @@

    Cell and donor counts -

    Cell metadata

    +

    Cell metadata

    Type

    @@ -500,19 +509,19 @@

    Cell metadata -

    LTS 2023-05-15

    +

    LTS 2023-05-15

    Open this data release by specifying census_version = "2023-05-15" in future calls to open_soma().

    -

    🔴 Errata 🔴

    +

    🔴 Errata 🔴

    -
    Duplicate observations with is_primary_data = True
    +
    Duplicate observations with is_primary_data = True

    In order to prevent duplicate data in analyses, each observation (cell) should be marked is_primary data = True exactly once in the Census. Since this LTS release, 243,569 observations have been identified that are represented at least twice with is_primary_data = True.

    This issue will be corrected in the following LTS data release, by identifying and marking only one cell out of the duplicates as is_primary_data = True.

    If you wish to use this data release, you can consider filtering out all of these 243,569 cells by using the soma_joinids provided in this file duplicate_cells_census_LTS_2023-05-15.csv.zip. You can filter specific cells by using the value_filter or obs_value_filter of the querying API functions, for more information follow this tutorial.

    -

    Version information

    +

    Version information

    Category

    @@ -536,7 +545,7 @@

    Version information -

    Cell and donor counts

    +

    Cell and donor counts

    Information

    @@ -561,7 +570,7 @@

    Cell and donor counts -

    Cell metadata

    +

    Cell metadata

    Type

    @@ -612,7 +621,7 @@

    Cell metadata -

    Compatibility with package versions

    +

    Compatibility with package versions

    Due to the nature of the Census storage backend, the format version will change from time to time. Format upgrades are always backwards compatible, but they’re not always forwards compatible, which means that reading a recent Census data version using an older version of the package might result in an error. We aim to guarantee the following policy:

      diff --git a/cellxgene_census_docsite_installation.html b/cellxgene_census_docsite_installation.html index 20dc4a128..ab86b6254 100644 --- a/cellxgene_census_docsite_installation.html +++ b/cellxgene_census_docsite_installation.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + + +
      @@ -193,9 +202,9 @@
      -

      Installation

      +

      Installation

      -

      Requirements

      +

      Requirements

      The Census API requires a Linux or MacOS system with:

      -

      R

      +

      R

      If installing from Ubuntu, you may need to install the following libraries via apt install, libxml2-dev libssl-dev libcurl4-openssl-dev. In addition you must have cmake v3.21 or greater.

      If installing from MacOS, you will need to install the developer tools Xcode.

      Windows is not supported.

      diff --git a/cellxgene_census_docsite_landing.html b/cellxgene_census_docsite_landing.html index fdbee4e99..19ea15bfa 100644 --- a/cellxgene_census_docsite_landing.html +++ b/cellxgene_census_docsite_landing.html @@ -35,6 +35,8 @@ + + @@ -125,6 +127,13 @@ +
      +
      + + + + +
      @@ -189,7 +198,7 @@

      🚀 New to the Census: we’ve created a centralized hub of models and embeddings using Census data. Check it out!

      -

      CZ CELLxGENE Discover Census

      +

      CZ CELLxGENE Discover Census

      The Census provides efficient computational tooling to access, query, and analyze all single-cell RNA data from CZ CELLxGENE Discover. Using a new access paradigm of cell-based slicing and querying, you can interact with the data through TileDB-SOMA, or get slices in AnnData, Seurat, or SingleCellExperiment objects, thus accelerating your research by significantly minimizing data harmonization.

      Get started:

        @@ -201,12 +210,12 @@

        CZ CELLxGENE Discover Census

        -

        Citing Census

        +

        Citing Census

        To cite the project please follow the citation guidelines offered by CZ CELLxGENE Discover.

        To cite individual studies please refer to the tutorial Generating citations for Census slices.

        -

        Census Capabilities

        +

        Census Capabilities

        The Census is a data object publicly hosted online and an API to open it. The object is built using the SOMA API specification and data model, and it is implemented via TileDB-SOMA. As such, the Census has all the data capabilities offered by TileDB-SOMA including:

        Data access at scale:

          @@ -227,7 +236,7 @@

          Census Capabilities -

          Census Data and Schema

          +

          Census Data and Schema

          A description of the Census data and its schema is detailed here.

          ⚠️ Note that the data includes:

            @@ -236,12 +245,12 @@

            Census Data and Schema

        -

        Census Data Releases

        +

        Census Data Releases

        The Census data release plans are detailed here.

        Starting May 15th, 2023, Census data releases with long-term support will be published every six months. These releases will be publicly accessible for at least five years. In addition, weekly releases may be published without any guarantee of permanence.

        -

        Questions, Feedback and Issues

        +

        Questions, Feedback and Issues

        • Users are encouraged to submit questions and feature requests about the Census via github issues.

        • For quick support, you can join the CZI Science Community on Slack (czi.co/science-slack) and ask questions in the #cellxgene-census-users channel.

        • @@ -252,7 +261,7 @@

          Questions, Feedback and Issues -

          Coming Soon!

          +

          Coming Soon!

          • We are currently working on creating the tooling necessary to perform data modeling at scale with seamless integration of the Census and PyTorch.

          • To increase the usability of the Census for research, in 2023 and 2024 we are planning to explore the following areas:

            @@ -265,7 +274,7 @@

            Coming Soon! -

            Projects and Tools Using Census

            +

            Projects and Tools Using Census

            If you are interested in listing a project here, please reach out to us at soma@chanzuckerberg.com

      diff --git a/cellxgene_census_docsite_quick_start.html b/cellxgene_census_docsite_quick_start.html index bc3e1d671..7126bb525 100644 --- a/cellxgene_census_docsite_quick_start.html +++ b/cellxgene_census_docsite_quick_start.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
      +
      + + + + +
      @@ -203,7 +212,7 @@
      -

      Quick start

      +

      Quick start

      This page provides details to start using the Census. Click here for more detailed Python tutorials (R vignettes coming soon).

      Contents:

        @@ -212,11 +221,11 @@

        Quick startR quick start.

      -

      Installation

      +

      Installation

      Install the Census API by following these instructions.

      -

      Python quick start

      +

      Python quick start

      Below are 3 examples of common operations you can do with the Census. As a reminder, the reference documentation for the API can be accessed via help():

      import cellxgene_census
       
      @@ -226,7 +235,7 @@ 

      Python quick start -

      Querying a slice of cell metadata

      +

      Querying a slice of cell metadata

      The following reads the cell metadata and filters female cells of cell type microglial cell or neuron, and selects the columns assay, cell_type, tissue, tissue_general, suspension_type, and disease.

      import cellxgene_census
       
      @@ -267,7 +276,7 @@ 

      Querying a slice of cell metadata -

      Obtaining a slice as AnnData

      +

      Obtaining a slice as AnnData

      The following creates an anndata.AnnData object on-demand with the same cell filtering criteria as above and filtering only the genes ENSG00000161798, ENSG00000188229.

      import cellxgene_census
       
      @@ -291,7 +300,7 @@ 

      Obtaining a slice as AnnData -

      Memory-efficient queries

      +

      Memory-efficient queries

      This example provides a demonstration to access the data for larger-than-memory operations using TileDB-SOMA operations.

      First we initiate a lazy-evaluation query to access all brain and male cells from human. This query needs to be closed — query.close() — or called in a context manager — with ....

      import cellxgene_census
      @@ -330,7 +339,7 @@ 

      Memory-efficient queries -

      R quick start

      +

      R quick start

      Below are 3 examples of common operations you can do with the Census. As a reminder, the reference documentation for the API can be accessed via ?:

      library("cellxgene.census")
       
      @@ -338,7 +347,7 @@ 

      R quick start -

      Querying a slice of cell metadata

      +

      Querying a slice of cell metadata

      The following reads the cell metadata and filters female cells of cell type microglial cell or neuron, and selects the columns assay, cell_type, tissue, tissue_general, suspension_type, and disease.

      The cellxgene.census package uses R6 classes and we recommend you to get familiar with their usage.

      library("cellxgene.census")
      @@ -385,7 +394,7 @@ 

      Querying a slice of cell metadata -

      Obtaining a slice as a Seurat or SingleCellExperiment object

      +

      Obtaining a slice as a Seurat or SingleCellExperiment object

      The following creates a Seurat object on-demand with a smaller set of cells and filtering only the genes ENSG00000161798, ENSG00000188229.

      library("cellxgene.census")
       library("Seurat")
      @@ -444,7 +453,7 @@ 

      Obtaining a slice as a

      -

      Memory-efficient queries

      +

      Memory-efficient queries

      This example provides a demonstration to access the data for larger-than-memory operations using TileDB-SOMA operations.

      First we initiate a lazy-evaluation query to access all brain and male cells from human. This query needs to be closed — query$close().

      library("cellxgene.census")
      diff --git a/cellxgene_census_docsite_schema.html b/cellxgene_census_docsite_schema.html
      index 5f53d227c..4bd0f4abb 100644
      --- a/cellxgene_census_docsite_schema.html
      +++ b/cellxgene_census_docsite_schema.html
      @@ -35,6 +35,8 @@
         
           
             
      +        
      +        
               
               
               
      @@ -127,6 +129,13 @@
                   
       
                   
      +
      +
      + + + + +
      @@ -197,7 +206,7 @@
      -

      Census data and schema

      +

      Census data and schema

      This page provides a user-friendly overview of the Census contents and its schema, in case you are interested you can find the full schema specification here.

      Contents:

        @@ -206,7 +215,7 @@

        Census data and schema

        SOMA objects

      -

      Schema

      +

      Schema

      The Census is a collection of a variety of SOMA objects organized with the following hierarchy.

      image

      As you can see the Census data is a SOMACollection with two high-level items:

      @@ -215,7 +224,7 @@

      Schema

      "census_data" for the single-cell data and metadata.

      -

      Census summary info "census_info"

      +

      Census summary info "census_info"

      A SOMAcollection with tables providing information of the census as a whole, it has the following items:

      • "summary": high-level information of this Census, e.g. build date, total cell count, etc.

      • @@ -224,7 +233,7 @@

        Census summary info -

        Census single-cell data "census_data"

        +

        Census single-cell data "census_data"

        Data for each organism is stored in independent SOMAExperiment objects which are a specialized form of a SOMACollection. Each of these store a data matrix (cell by genes), cell metadata, gene metadata, and feature presence matrix:

        This is how the data is organized for one organism – Homo sapiens:

          @@ -236,7 +245,7 @@

          Census single-cell data

      -

      Data included in the Census

      +

      Data included in the Census

      All data from CZ CELLxGENE Discover that adheres to the following criteria is included in the Census:

      • Cells from human or mouse.

      • @@ -251,7 +260,7 @@

        Data included in the Census -

        SOMA objects

        +

        SOMA objects

        You can find the full SOMA specification here.

        The following is short description of the main SOMA objects used by the Census:

          diff --git a/cellxgene_census_schema.html b/cellxgene_census_schema.html index d3c87632f..a71b64479 100644 --- a/cellxgene_census_schema.html +++ b/cellxgene_census_schema.html @@ -35,6 +35,8 @@ + + @@ -125,6 +127,13 @@ +
          +
          + + + + +

      @@ -186,17 +195,17 @@
      -

      CZ CELLxGENE Discover Census Schema

      +

      CZ CELLxGENE Discover Census Schema

      Version: 2.0.1

      Last edited: March, 2024.

      The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “NOT RECOMMENDED” “MAY”, and “OPTIONAL” in this document are to be interpreted as described in BCP 14, RFC2119, and RFC8174 when, and only when, they appear in all capitals, as shown here.

      -

      Census overview

      +

      Census overview

      The CZ CELLxGENE Discover Census, hereafter referred as Census, is a versioned data object and API for most of the single-cell data hosted at CZ CELLxGENE Discover. To learn more about the Census visit the chanzuckerberg/cellxgene-census github repository

      To better understand this document the reader should be familiar with the CELLxGENE dataset schema and SOMA.

      -

      Definitions

      +

      Definitions

      The following terms are used throughout this document:

      -
      Full-gene sequencing assays
      +
      Full-gene sequencing assays

      From the list of accepted assays, this list of full-gene sequencing assays are those that when used at the single-cell level will always perform full-gene sequencing.

      These data need to be normalized by gene length for downstream analysis.

      -

      Data matrix types

      +

      Data matrix types

      Per the CELLxGENE dataset schema, all RNA assays MUST include UMI or read counts. Author-normalized data layers as defined in the CELLxGENE dataset schema MUST NOT be included in the Census.

      -

      Sample types

      +

      Sample types

      Only observations (cells) from primary tissue MUST be included in the Census. Thus, ONLY those observations with a tissue_type value equal to “tissue” MUST be included; other values of tissue_type MUST NOT be included.

      -

      Repeated data

      +

      Repeated data

      When a cell is represented multiple times in CELLxGENE Discover, only one is marked as the primary cell. This is defined in the CELLxGENE dataset schema under is_primary_data. This information MUST be included in the Census cell metadata to enable queries that retrieve datasets (see cell metadata below), and all cells MUST be included in the Census.

      -

      Data encoding and organization

      +

      Data encoding and organization

      The Census MUST be encoded as a SOMACollection which will be referenced as census_obj in the following sections. census_obj MUST have two keys "census_info" and "census_data" whose contents are defined in the sections below.

      -

      Census information census_obj["census_info"] - SOMACollection

      +

      Census information census_obj["census_info"] - SOMACollection

      A series of summary and metadata tables MUST be included in this SOMACollection:

      -
      Census metadata – census_obj​​["census_info"]["summary"]SOMADataFrame
      +
      Census metadata – census_obj​​["census_info"]["summary"]SOMADataFrame

      Census metadata MUST be stored as a SOMADataFrame with two columns:

    Category

    @@ -458,7 +467,7 @@
    Census metadata – -

    Census table of CELLxGENE Discover datasets – census_obj["census_info"]["datasets"]SOMADataFrame

    +

    Census table of CELLxGENE Discover datasets – census_obj["census_info"]["datasets"]SOMADataFrame

    All datasets used to build the Census MUST be included in a table modeled as a SOMADataFrame. Each row MUST correspond to an individual dataset with the following columns:

    @@ -514,7 +523,7 @@

    Census table of CELLxGENE Discover datasets – -

    Census summary cell counts – census_obj["census_info"]["summary_cell_counts"]SOMADataframe

    +

    Census summary cell counts – census_obj["census_info"]["summary_cell_counts"]SOMADataframe

    Summary cell counts grouped by organism and relevant cell metadata MUST be modeled as a SOMADataFrame in census_obj["census_info"]["summary_cell_counts"]. Each row of MUST correspond to a combination of organism and metadata variables with the following columns:

    @@ -785,7 +794,7 @@

    Census summary cell counts – <

    -

    Census table of organisms – census_obj["census_info"]["organisms"]SOMADataframe

    +

    Census table of organisms – census_obj["census_info"]["organisms"]SOMADataframe

    Information about organisms whose cells are included in the Census MUST be included in a table modeled as a SOMADataFrame. Each row MUST correspond to an individual organism with the following columns:

    @@ -838,7 +847,7 @@

    Census table of organisms –
    -

    Census Data – census_obj["census_data"][organism]SOMAExperiment

    +

    Census Data – census_obj["census_data"][organism]SOMAExperiment

    Data for Homo sapiens MUST be stored as a SOMAExperiment in census_obj["homo_sapiens"].

    Data for Mus musculus MUST be stored as a SOMAExperiment in census_obj["mus_musculus"].

    For each organism the SOMAExperiment MUST contain the following:

    @@ -858,11 +867,11 @@

    Census Data –
    -

    Matrix Data, count (raw) matrix – census_obj["census_data"][organism].ms["RNA"].X["raw"]SOMASparseNDArray

    +

    Matrix Data, count (raw) matrix – census_obj["census_data"][organism].ms["RNA"].X["raw"]SOMASparseNDArray

    Per the CELLxGENE dataset schema, all RNA assays MUST include UMI or read counts. These counts MUST be encoded as float32 in this SOMASparseNDArray with a fill value of zero (0), and no explicitly stored zero values.

    -

    Matrix Data, normalized count matrix – census_obj["census_data"][organism].ms["RNA"].X["normalized"]SOMASparseNDArray

    +

    Matrix Data, normalized count matrix – census_obj["census_data"][organism].ms["RNA"].X["normalized"]SOMASparseNDArray

    This is an experimental data artifact - it may be removed at any time.

    A library-sized normalized layer, containing a normalized variant of the count (raw) matrix. For full-gene sequencing assays, given a value X[i,j] in the counts (raw) matrix, library-size normalized values are defined @@ -871,7 +880,7 @@

    Matrix Data, normalized count matrix – normalized[i,j] = X[i,j] / sum(X[i, ]).

    -

    Feature metadata – census_obj["census_data"][organism].ms["RNA"].varSOMADataFrame

    +

    Feature metadata – census_obj["census_data"][organism].ms["RNA"].varSOMADataFrame

    The Census MUST only contain features with a feature_biotype value of “gene”.

    The gene references are pinned as defined in the CELLxGENE dataset schema.

    The following columns MUST be included:

    @@ -913,7 +922,7 @@

    Feature metadata –

    -

    Feature dataset presence matrix – census_obj["census_data"][organism].ms["RNA"]["feature_dataset_presence_matrix"]SOMASparseNDArray

    +

    Feature dataset presence matrix – census_obj["census_data"][organism].ms["RNA"]["feature_dataset_presence_matrix"]SOMASparseNDArray

    In some datasets, there are features not included in the source data. To clarify the difference between features that were not included and features that were not measured, for each SOMAExperiment the Census MUST include a presence matrix encoded as a SOMASparseNDArray.

    For all features included in the Census, the dataset presence matrix MUST indicate what features are included in each dataset of the Census. This information MUST be encoded as a boolean matrix, True indicates the feature was included in the dataset, False otherwise. This is a two-dimensional matrix and it MUST be N x M where N is the number of datasets in the SOMAExperiment and M is the number of features. The matrix is indexed by the soma_joinid value of census_obj["census_info"]["datasets"] and census_obj["census_data"][organism].ms["RNA"].var.

    If the feature has at least one cell with a value greater than zero in the count data matrix X in the dataset of origin, the value MUST be True; otherwise, it MUST be False.

    @@ -950,7 +959,7 @@

    Feature dataset presence matrix – -

    Cell metadata – census_obj["census_data"][organism].obsSOMADataFrame

    +

    Cell metadata – census_obj["census_data"][organism].obsSOMADataFrame

    Cell metadata MUST be encoded as a SOMADataFrame with the following columns:

    @@ -1065,9 +1074,9 @@

    Cell metadata – -

    Changelog

    +

    Changelog

    -

    Version 2.0.0

    +

    Version 2.0.0

    -

    Version 1.1.0

    +

    Version 1.1.0

    • Adds dataset_version_id to “Census table of CELLxGENE Discover datasets – census_obj["census_info"]["datasets"]

    • Add X["normalized"] layer

    • @@ -1099,7 +1108,7 @@

      Version 1.1.0 -

      Version 1.0.0

      +

      Version 1.0.0

      • Updates text to reflect official name: CZ CELLxGENE Discover Census.

      • Updates census["census_info"]["summary"] to reflect official name in the column label:

        @@ -1116,20 +1125,20 @@

        Version 1.0.0 -

        Version 0.1.1

        +

        Version 0.1.1

        • Adds clarifying text for “Feature Dataset Presence Matrix”

    -

    Version 0.1.0

    +

    Version 0.1.0

    • The “Dataset Presence Matrix” was renamed to “Feature Dataset Presence Matrix” and moved from census_obj["census_data"][organism].ms["RNA"].varp["dataset_presence_matrix"] to census_obj["census_data"][organism].ms["RNA"]["feature_dataset_presence_matrix"].

    • Editorial: changes all double quotes in the schema to ASCII quotes 0x22.

    -

    Version 0.0.1

    +

    Version 0.0.1

    • Initial Census schema is published.

    diff --git a/examples.html b/examples.html index b84456fea..7b08960fd 100644 --- a/examples.html +++ b/examples.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
    +
    + + + + +
    @@ -230,9 +239,9 @@
    -

    Python tutorials

    +

    Python tutorials

    -

    Exporting data

    +

    Exporting data

    Learn how to stream the single-cell data and metadata from Census into your machine.

      @@ -244,7 +253,7 @@

      Exporting data -

      [NEW! 🚀] Using integrated embeddings and models

      +

      [NEW! 🚀] Using integrated embeddings and models

      Tutorials that show you how to retrieve pre-calculated Census embeddings and use their associated models for your workflows.

      Access Census embeddings.

      @@ -268,7 +277,7 @@

      [NEW! 🚀] Using integrated embeddings and models

    -

    Understanding Census data

    +

    Understanding Census data

    Gain a better understanding on the nature of the Census data and how it’s organized.

      @@ -282,7 +291,7 @@

      Understanding Census data -

      Analyzing Census data

      +

      Analyzing Census data

      A few examples of relevant analysis pipelines with Census data.

        @@ -293,7 +302,7 @@

        Analyzing Census data

    -

    Scalable computing

    +

    Scalable computing

    Demonstrations of memory-efficient compute workflows that leverage the streaming capabilities of Census.

      @@ -304,7 +313,7 @@

      Scalable computing -

      Scalable machine learning

      +

      Scalable machine learning

      Learn about features to do data modeling directly from Census into machine learning toolkits.

        diff --git a/genindex.html b/genindex.html index 84ba9c403..b29bbfc9c 100644 --- a/genindex.html +++ b/genindex.html @@ -34,6 +34,8 @@ + + @@ -124,6 +126,13 @@ +
        +
        + + + + +
      diff --git a/index.html b/index.html index bdbb0a4de..af40834dd 100644 --- a/index.html +++ b/index.html @@ -35,6 +35,8 @@ + + @@ -126,6 +128,13 @@ +
      +
      + + + + +
    @@ -190,7 +199,7 @@

    🚀 New to the Census: we’ve created a centralized hub of models and embeddings using Census data. Check it out!

    -

    CZ CELLxGENE Discover Census

    +

    CZ CELLxGENE Discover Census

    The Census provides efficient computational tooling to access, query, and analyze all single-cell RNA data from CZ CELLxGENE Discover. Using a new access paradigm of cell-based slicing and querying, you can interact with the data through TileDB-SOMA, or get slices in AnnData, Seurat, or SingleCellExperiment objects, thus accelerating your research by significantly minimizing data harmonization.

    Get started:

      @@ -202,12 +211,12 @@

      CZ CELLxGENE Discover Census

      -

      Citing Census

      +

      Citing Census

      To cite the project please follow the citation guidelines offered by CZ CELLxGENE Discover.

      To cite individual studies please refer to the tutorial Generating citations for Census slices.

      -

      Census Capabilities

      +

      Census Capabilities

      The Census is a data object publicly hosted online and an API to open it. The object is built using the SOMA API specification and data model, and it is implemented via TileDB-SOMA. As such, the Census has all the data capabilities offered by TileDB-SOMA including:

      Data access at scale:

        @@ -228,7 +237,7 @@

        Census Capabilities -

        Census Data and Schema

        +

        Census Data and Schema

        A description of the Census data and its schema is detailed here.

        ⚠️ Note that the data includes:

          @@ -237,12 +246,12 @@

          Census Data and Schema

      -

      Census Data Releases

      +

      Census Data Releases

      The Census data release plans are detailed here.

      Starting May 15th, 2023, Census data releases with long-term support will be published every six months. These releases will be publicly accessible for at least five years. In addition, weekly releases may be published without any guarantee of permanence.

      -

      Questions, Feedback and Issues

      +

      Questions, Feedback and Issues

      • Users are encouraged to submit questions and feature requests about the Census via github issues.

      • For quick support, you can join the CZI Science Community on Slack (czi.co/science-slack) and ask questions in the #cellxgene-census-users channel.

      • @@ -253,7 +262,7 @@

        Questions, Feedback and Issues -

        Coming Soon!

        +

        Coming Soon!

        • We are currently working on creating the tooling necessary to perform data modeling at scale with seamless integration of the Census and PyTorch.

        • To increase the usability of the Census for research, in 2023 and 2024 we are planning to explore the following areas:

          @@ -266,7 +275,7 @@

          Coming Soon! -

          Projects and Tools Using Census

          +

          Projects and Tools Using Census

          If you are interested in listing a project here, please reach out to us at soma@chanzuckerberg.com

    diff --git a/notebooks/analysis_demo/comp_bio_census_info.html b/notebooks/analysis_demo/comp_bio_census_info.html index 84e5bd6e4..631239058 100644 --- a/notebooks/analysis_demo/comp_bio_census_info.html +++ b/notebooks/analysis_demo/comp_bio_census_info.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
    +
    + + + + +
    @@ -216,7 +225,7 @@
    -

    Learning about the CZ CELLxGENE Census

    +

    Learning about the CZ CELLxGENE Census

    This notebook showcases the Census contents and how to obtain high-level information about it. It covers the organization of data within the Census, what cell and gene metadata are available, and it provides simple demonstrations to summarize cell counts across cell metadata.

    Contents

      @@ -229,7 +238,7 @@

      Learning about the CZ CELLxGENE Censusis_primary_data which is described in the Census schema.

      -

      Opening the Census

      +

      Opening the Census

      The cellxgene_census python package contains a convenient API to open the latest version of the Census. If you open the census, you should close it. open_soma() returns a context, so you can open/close it in several ways, like a Python file handle. The context manager is preferred, as it will automatically close upon an error raise.

      [1]:
      @@ -276,15 +285,15 @@ 

      Opening the Census -

      Census organization

      +

      Census organization

      The Census schema defines the structure of the Census. In short, you can think of the Census as a structured collection of items that stores different pieces of information. All of these items and the parent collection are SOMA objects of various types and can all be accessed with the TileDB-SOMA API (documentation).

      The cellxgene_census package contains some convenient wrappers of the TileDB-SOMA API. An example of this is the function we used to open the Census: cellxgene_census.open_soma()

      -

      Main Census components

      +

      Main Census components

      With the command above you created census, which is a SOMACollection. It is analogous to a Python dictionary, and it has two items: census_info and census_data.

      -

      Census summary info

      +

      Census summary info

      • census["census_info"] A collection of tables providing information of the census as a whole.

          @@ -296,7 +305,7 @@

          Census summary info -

          Census data

          +

          Census data

          Data for each organism is stored in independent SOMAExperiment objects which are a specialized form of a SOMACollection. Each of these store a data matrix (cell by genes), cell metadata, gene metadata, and some other useful components not covered in this notebook.

          This is how the data is organized for one organism – Homo sapiens:

            @@ -308,7 +317,7 @@

            Census data -

            Cell metadata

            +

            Cell metadata

            You can obtain all cell metadata variables by directly querying the columns of the corresponding SOMADataFrame.

            All of these variables can be used for querying the Census in case you want to work with specific cells.

            @@ -358,7 +367,7 @@

            Cell metadata -

            Gene metadata

            +

            Gene metadata

            Similarly, we can obtain all gene metadata variables by directly querying the columns of the corresponding SOMADataFrame.

            These are the variables you can use for querying the Census in case there are specific genes you are interested in.

            @@ -472,7 +481,7 @@

            Gene metadata -

            Census summary content tables

            +

            Census summary content tables

            You can take a quick look at the high-level Census information by looking at census["census_info"]["summary"]

            Of special interest are the label-value combinations for :

            -

            Cell counts by cell metadata

            +

            Cell counts by cell metadata

            By looking at census["summary_cell_counts"] you can get a general idea of cell counts stratified by some relevant cell metadata. Not all cell metadata is included in this table, you can take a look at all cell and gene metadata available in the sections below “Cell metadata” and “Gene metadata”.

            The line below retrieves this table and casts it into a pandas.DataFrame.

            @@ -645,7 +654,7 @@

            Cell counts by cell metadataorganism and values for each category of cell metadata you can take a look at total_cell_count and unique_cell_count for the cell counts of that combination.

            The values for each category are specified in ontology_term_id and label, which are the value’s IDs and labels, respectively.

            -

            Example: cell metadata included in the summary counts table

            +

            Example: cell metadata included in the summary counts table

            To get all the available cell metadata in the summary counts table you can do the following. Remember this is not all the cell metadata available, as some variables were omitted in the creation of this table.

            [7]:
            @@ -685,7 +694,7 @@ 

            Example: cell metadata included in the summary counts table -

            Example: cell counts for each sequencing assay in human data

            +

            Example: cell counts for each sequencing assay in human data

            To get the cell counts for each sequencing assay type in human data, you can perform the following pandas.DataFrame operations:

            [8]:
            @@ -925,7 +934,7 @@ 

            Example: cell counts for each sequencing assay in human data -

            Example: number of microglial cells in the Census

            +

            Example: number of microglial cells in the Census

            If you have a specific term from any of the categories shown above you can directly find out the number of cells for that term.

            [9]:
            @@ -996,7 +1005,7 @@ 

            Example: number of microglial cells in the Census

            -

            Understanding Census contents beyond the summary tables

            +

            Understanding Census contents beyond the summary tables

            While using the pre-computed tables in census["census_info"] is an easy and quick way to understand the contents of the Census, it falls short if you want to learn more about certain slices of the Census.

            For example, you may want to learn more about:

              @@ -1007,7 +1016,7 @@

              Understanding Census contents beyond the summary tables

              All of these questions can be answered by directly querying the cell metadata as shown in the examples below.

              -

              Example: all cell types available in human

              +

              Example: all cell types available in human

              To exemplify the process of accessing and slicing cell metadata for summary stats, let’s start with a trivial example and take a look at all human cell types available in the Census:

              [10]:
              @@ -1190,7 +1199,7 @@ 

              Example: all cell types available in human -

              Example: cell types available in human liver

              +

              Example: cell types available in human liver

              Similar to the example above, we can learn what cell types are available for a specific tissue, e.g. liver.

              To achieve this goal we just need to limit our cell metadata to that tissue. We will use the information in the cell metadata variable tissue_general. This variable contains the high-level tissue label for all cells in the Census:

              @@ -1232,7 +1241,7 @@

              Example: cell types available in human liver -

              Example: diseased T cells in human tissues

              +

              Example: diseased T cells in human tissues

              In this example we are going to get the counts for all diseased cells annotated as T cells. For the sake of the example we will focus on “CD8-positive, alpha-beta T cell” and “CD4-positive, alpha-beta T cell”:

              [15]:
              diff --git a/notebooks/analysis_demo/comp_bio_data_integration_scvi.html b/notebooks/analysis_demo/comp_bio_data_integration_scvi.html
              index b769c6d2a..d79fdaace 100644
              --- a/notebooks/analysis_demo/comp_bio_data_integration_scvi.html
              +++ b/notebooks/analysis_demo/comp_bio_data_integration_scvi.html
              @@ -36,6 +36,8 @@
                 
                   
                     
              +        
              +        
                       
                       
                       
              @@ -130,6 +132,13 @@
                           
               
                           
              +
              +
              + + + + +
              @@ -210,7 +219,7 @@
              -

              Integrating multi-dataset slices of data

              +

              Integrating multi-dataset slices of data

              The Census contains data from multiple studies providing an opportunity to perform inter-dataset analysis. To this end integration of data has to be performed first to account for batch effects.

              This notebook provides a demonstration for integrating two Census datasets using scvi-tools. The goal is not to provide an exhaustive guide on proper integration, but to showcase what information in the Census can inform data integration.

              Contents

              @@ -228,7 +237,7 @@

              Integrating multi-dataset slices of datais_primary_data which is described in the Census schema. For this notebook we will focus on individual datasets, therefore we can ignore this variable.

              -

              Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)

              +

              Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)

              Let’s load all modules needed for this notebook.

              [1]:
              @@ -397,7 +406,7 @@ 

              Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)

              -

              Gene-length normalization of Smart-Seq2 data.

              +

              Gene-length normalization of Smart-Seq2 data.

              Smart-seq2 read counts have to be normalized by gene length. For full details on gene-length normalization take a look at the notebook Normalizing full-length gene sequencing data from the Census.

              Let’s first get the gene lengths from var.feature_length.

              @@ -440,12 +449,12 @@

              Gene-length normalization of Smart-Seq2 data. -

              Integration with scvi-tools

              +

              Integration with scvi-tools

              From its documentation scvi-tools is described as a package for end-to-end analysis of single-cell omics data primarily developed and maintained by the Yosef Lab at UC Berkeley.

              Here we will use the “single-cell Variational Inference” model or scVI which uses a deep generative model for the integration of spatial transcriptomic data and scRNA-seq data.

              For comprehensive usage and best practices of scVI please refer to thedoc siteof scvi-tools.

              -

              Inspecting data prior to integration

              +

              Inspecting data prior to integration

              Let’s take a look at the strength of batch effects in our data. For that we will perform bread-and-butter normalization, neighbor graph calculation, and embedding visualization via UMAP.

              But first let’s save the read counts in a different layer as we will need them for integration

              -

              Data integration with scVI

              +

              Data integration with scVI

              Whenever you query and fetch Census data from multiple datasets then integration needs to be performed as evidenced by the batch effects we observed.

              The paramaters for SCVI used in this notebook were selected to the model run quickly. For best practices on integration of single-cell data using scvi-tools please refer to their documentation page.

              Additionally we recommend reading the article An integrated cell atlas of the human lung in health and disease by Sikkema et al. whom perfomed integration of 43 datasets from Lung.

              Here we focus on the metadata from the Census that can be as batch information for integration.

              -

              Integration with batch defined as dataset_id

              +

              Integration with batch defined as dataset_id

              All cells in the Census are annotated with the dataset they come from in obs["dataset_id"]. This is a great place to start for integration.

              So let’s run an scVI model and obtain the latent embeddings. First we define our model with batch set as dataset_id.

              @@ -677,7 +686,7 @@

              Integration with batch defined as Great! You can see that the clustering is no longer mainly driven by assay, albeit still contributing to it.

              -

              Integration with batch defined as dataset_id + donor_id

              +

              Integration with batch defined as dataset_id + donor_id

              Similar to dataset_id, all cells in Census are annotated with donor_id. The definition of donor_id depends on the dataset and it is left to the discretion of data curators. However it is still rich in information and can be used as a batch variable during integration.

              Because donor_id is not guaranteed to be unique across all cells of the Census, we strongly recommend concatenating dataset_id and donor_id and use that as the batch key for scVI.

              @@ -803,7 +812,7 @@

              Integration with batch defined as As you can see using dataset_id and donor_id as batch the cells now mostly cluster by cell type.

              -

              Integration with batch defined as dataset_id + donor_id + assay_ontology_term_id + suspension_type

              +

              Integration with batch defined as dataset_id + donor_id + assay_ontology_term_id + suspension_type

              In some cases one dataset may contain multiple assay types and/or multiple suspension types (cell vs nucleus), and for those it is important to consider these metadata as batches.

              Therefore, the most comprehensive definition of batch in the Census can be accomplished by combining the cell metadata of dataset_id, donor_id, assay_ontology_term_id and suspension_type, the latter will encode the EFO ids for assay types.

              In our example, the two datasets that we used only contain cells from one assay each, and one suspension type for all of them. Thus it would not make a difference to include these metadata as part of batch.

              diff --git a/notebooks/analysis_demo/comp_bio_embedding_exploration.html b/notebooks/analysis_demo/comp_bio_embedding_exploration.html index 3f92373b5..f5871b87d 100644 --- a/notebooks/analysis_demo/comp_bio_embedding_exploration.html +++ b/notebooks/analysis_demo/comp_bio_embedding_exploration.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
              +
              + + + + +
              @@ -216,7 +225,7 @@
              -

              Exploring biologically relevant clusters in Census embeddings

              +

              Exploring biologically relevant clusters in Census embeddings

              In this notebook, we explore biologically relevant clusters in Census embeddings using UMAP as a visualization tool. This demonstration assumes knowledge of how to access both, collaboration and hosted (community) Census embeddings. To learn the basics on accessing these data please visit the Census model page.

              IMPORTANT: This tutorial requires cellxgene-census package version 1.9.1 or later.

              Contents

              @@ -233,7 +242,7 @@

              Exploring biologically relevant clusters in Census embeddings.

              -

              Background

              +

              Background

              The journey from a gene expression matrix to a 2D scatterplot involves numerous highly nonlinear transformations. Such transformations can introduce artifacts that affect both the global and local structures in the visualized manifold.

              Common issues like overclustering and clustering by batch are typical artifacts resulting from these dimensionality reduction methods. With that in mind, these embeddings and their UMAP visualizations are best used as tools for generating hypotheses. They should not be the final word in analysis. Instead, we recommend focusing on the full representation of the embedding matrices and ultimately returning to the underlying gene expressions to investigate the reasons behind the observed clustering patterns.

              @@ -247,7 +256,7 @@

              Background -

              Requirements

              +

              Requirements

              • cellxgene-census

              • scanpy

              • @@ -260,7 +269,7 @@

                Requirements -

                Imports and function definitions

                +

                Imports and function definitions

                [1]:
                 
                @@ -337,9 +346,9 @@

                Imports and function definitions -

                Melanocytes in eye

                +

                Melanocytes in eye

                -

                Sample and fetch 150k cells from eye tissue

                +

                Sample and fetch 150k cells from eye tissue

                [3]:
                 
                @@ -402,7 +411,7 @@

                Sample and fetch 150k cells from eye tissue -

                Observations

                +

                Observations

                In the study of melanocytes within the eye, the following observations are made across various embeddings:

                • Melanocytes are distinctly clustered in all embeddings, with OCA2 as a noted marker.

                • @@ -511,7 +520,7 @@

                  Observations -

                  Retinal bipolar neurons in eye

                  +

                  Retinal bipolar neurons in eye

                  In a more detailed analysis of retinal bipolar neurons in the eye, we focus on subclustering within this cell type across various embeddings. This involves rerunning UMAP specifically for retinal bipolar neurons and applying Leiden clustering to each embedding. Additionally, we employ HDBSCAN, a density-based clustering algorithm, on a full pairwise Euclidean distance matrix calculated from each embedding to compare the clustering results.

                  Key findings from this analysis include:

                  @@ -693,9 +702,9 @@

                  Retinal bipolar neurons in eye -

                  Dopaminergic neurons in brain

                  +

                  Dopaminergic neurons in brain

                  -

                  Sample and fetch 150k cells from brain tissue

                  +

                  Sample and fetch 150k cells from brain tissue

                  [8]:
                   
                  @@ -755,7 +764,7 @@

                  Sample and fetch 150k cells from brain tissue -

                  Observations

                  +

                  Observations

                  Here, we visualize a randomly selected subset of cells in the brain from CELLxGENE Census. We can observe that dopaminergic neurons, marked by TH expression, separate into distinct clusters in Geneformer and SCVI latent spaces, whereas in UCE and scGPT embeddings, they are grouped at one end of a larger neuron cluster.

                  All embeddings show a tendency to cluster by assay, indicating a consistent pattern across different models. Conditions like glioblastoma are clearly separated in all embeddings, while pilocytic astrocytoma is distinctly clustered in Geneformer and UCE and more mixed in others.

                  In the UCE embedding, we observe many small satellite glioblastoma clusters outside the main cluster that do not have distinct gene expression signatures. This is similar to what we observed previously in the eye (e.g. for the retinal bipolar neurons).

                  @@ -807,9 +816,9 @@

                  Observations -

                  Pulmonary ionocytes in lung (Tabula Sapiens)

                  +

                  Pulmonary ionocytes in lung (Tabula Sapiens)

                  -

                  Fetch lung cells from Tabula Sapiens

                  +

                  Fetch lung cells from Tabula Sapiens

                  [11]:
                   
                  @@ -841,7 +850,7 @@

                  Fetch lung cells from Tabula Sapiens -

                  Observations

                  +

                  Observations

                  For the case study focusing on pulmonary ionocytes in lung tissue, as part of the Tabula Sapiens project, the following observations are noted:

                  • In all embeddings, except for SCVI, a clear separation is seen between SmartSeq data and 10x data. The distinction is most pronounced in the scGPT embedding.

                  • diff --git a/notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.html b/notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.html index ed013fa0c..b849fe6b0 100644 --- a/notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.html +++ b/notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                    +
                    + + + + +
                  @@ -214,7 +223,7 @@
                  -

                  Exploring all data from a tissue

                  +

                  Exploring all data from a tissue

                  This tutorial provides a series of examples for how to explore and query the Census in the context of a single tissue, lung. We will summarize cell and gene metadata, then fetch the single-cell expression counts and perform some basic data explorations via Scanpy

                  Contents

                    @@ -230,7 +239,7 @@

                    Exploring all data from a tissueis_primary_data which is described in the Census schema.

                    -

                    Learning about the lung data in the Census

                    +

                    Learning about the lung data in the Census

                    First we will open the Census. If you are not familiar with the basics of the Census API you should take a look at notebook Learning about the CZ CELLxGENE Census

                    [1]:
                    @@ -314,7 +323,7 @@ 

                    Learning about the lung data in the Censustotal_cell_count and the unique number cells under unique_cell_count (i.e. after removing cells that were included in multiple datasets).

                    Let’s now take a look at the cell and gene information of this slice of the Census.

                    -

                    Learning about cells of lung data

                    +

                    Learning about cells of lung data

                    Let’s load the cell metadata for all lung cells and select only the unique cells using is_primary_data.

                    [3]:
                    @@ -648,7 +657,7 @@ 

                    Learning about cells of lung data -

                    Datasets

                    +

                    Datasets

                    First let’s start by looking at what are the datasets and collections from CELLxGENE Discover contributing to lung. For this we will use the dataset table at census["census-info"]["datasets"] that contains metadata of all datasets used to build this Census.

                    [4]:
                    @@ -1011,7 +1020,7 @@ 

                    Datasets -

                    Assays

                    +

                    Assays

                    Let’s use similar logic to take a look at all the assays available for human lung data. This tells us that most assays are from 10x technologies and sci-RNA-seq.

                    [6]:
                    @@ -1047,7 +1056,7 @@ 

                    Assays

                    -

                    Disease

                    +

                    Disease

                    And now let’s take a look at diseased cell counts, with normal indicating non-diseased cells.

                    [7]:
                    @@ -1084,7 +1093,7 @@ 

                    Disease -

                    Sex

                    +

                    Sex

                    There doesn’t seem to be strong biases for sex.

                    [8]:
                    @@ -1109,7 +1118,7 @@ 

                    Sex

                    -

                    Cell vs nucleus

                    +

                    Cell vs nucleus

                    The majority of data are from cells and not nucleus.

                    [9]:
                    @@ -1133,7 +1142,7 @@ 

                    Cell vs nucleus -

                    Cell types

                    +

                    Cell types

                    Let’s take a look at the counts of the top 20 cell types.

                    [10]:
                    @@ -1175,7 +1184,7 @@ 

                    Cell types -

                    Sub-tissues

                    +

                    Sub-tissues

                    We can look at the original tissue annotations that were mapped to “lung”.

                    [11]:
                    @@ -1209,7 +1218,7 @@ 

                    Sub-tissues -

                    Learning about genes of lung data

                    +

                    Learning about genes of lung data

                    Let’s load the gene metadata of the Census.

                    [12]:
                    @@ -1750,7 +1759,7 @@ 

                    Learning about genes of lung data -

                    Summary of lung metadata

                    +

                    Summary of lung metadata

                    In the previous sections, using the Census we learned the following information:

                    • The total number of unique lung cells and their composition for:

                      @@ -1767,7 +1776,7 @@

                      Summary of lung metadata -

                      Fetching all single-cell human lung data from the Census

                      +

                      Fetching all single-cell human lung data from the Census

                      Since loading the entire lung data is resource-intensive, for the sake of this exercise let’s load a subset of the lung data into an anndata.AnnData object and perform some exploratory analysis.

                      We will subset to 100,000 random unique cells using the lung_obs pandas.DataFrame we previously created.

                      @@ -1827,7 +1836,7 @@

                      Fetching all single-cell human lung data from the Census -

                      Calculating QC metrics of the lung data

                      +

                      Calculating QC metrics of the lung data

                      Now let’s take a look at some QC metrics

                      Top genes per cell

                      @@ -1902,7 +1911,7 @@

                      Calculating QC metrics of the lung data -

                      Creating a normalized expression layer and embeddings

                      +

                      Creating a normalized expression layer and embeddings

                      Let’s perform a bread and butter normalization and take a look at UMAP embeddings, but for all the data below we’ll exclude Smart-seq2 as this requires an extra step to normalize based on gene lengths

                      [26]:
                      diff --git a/notebooks/analysis_demo/comp_bio_geneformer_prediction.html b/notebooks/analysis_demo/comp_bio_geneformer_prediction.html
                      index 4293db142..725f49253 100644
                      --- a/notebooks/analysis_demo/comp_bio_geneformer_prediction.html
                      +++ b/notebooks/analysis_demo/comp_bio_geneformer_prediction.html
                      @@ -36,6 +36,8 @@
                         
                           
                             
                      +        
                      +        
                               
                               
                               
                      @@ -130,6 +132,13 @@
                                   
                       
                                   
                      +
                      +
                      + + + + +
                      @@ -213,7 +222,7 @@
                      -

                      Geneformer for cell class prediction and data projection

                      +

                      Geneformer for cell class prediction and data projection

                      This notebook provides examples to utilize the CELLxGENE collaboration fine-tuned Geneformer model with user data. For more information on the model please refer to the Census model page.

                      IMPORTANT: This tutorial requires cellxgene-census package version 1.9.1 or later.

                      Contents

                      @@ -228,9 +237,9 @@

                      Geneformer for cell class prediction and data projectionCensus schema.

                      -

                      Requirements

                      +

                      Requirements

                      -

                      System requirements

                      +

                      System requirements

                      To run this notebook the following are required:

                      • Unix system.

                      • @@ -241,7 +250,7 @@

                        System requirements -

                        Downloading example data

                        +

                        Downloading example data

                        Throughout the notebook the 10X PBMC 3K dataset will be used, you can download it via the following shell commands.

                        [1]:
                        @@ -263,7 +272,7 @@ 

                        Downloading example data -

                        Downloading the fine-tuned Geneformer model

                        +

                        Downloading the fine-tuned Geneformer model

                        The model is currently hosted in S3, you can find out more deatails in the Census model page.

                        Additional information, including its S3 URI, is also included in the metadata of the corresponding embeddings inside Census. These metadata can be obtained as follows.

                        @@ -314,7 +323,7 @@

                        Downloading the fine-tuned Geneformer model -

                        Importing required packages

                        +

                        Importing required packages

                        Finally all the required packages are loaded.

                        [5]:
                        @@ -343,9 +352,9 @@ 

                        Importing required packages -

                        Preparing data and model

                        +

                        Preparing data and model

                        -

                        Preparing single-cell data

                        +

                        Preparing single-cell data

                        Let’s load the test data. In preparation to use with Geneformer we do the following:

                        • Set the index as the ENSEMBL gene ID and stores it in the obs column "ensembl_id"

                          @@ -419,7 +428,7 @@

                          Preparing single-cell data -

                          Preparing data from model

                          +

                          Preparing data from model

                          Then let’s fetch the mapping dictionary between Geneformer IDs and the associated cell subclass labels. This information is stored along the fine-tuned model.

                          [8]:
                          @@ -541,9 +550,9 @@ 

                          Preparing data from model -

                          Using the Geneformer fine-tuned model for cell subclass inference

                          +

                          Using the Geneformer fine-tuned model for cell subclass inference

                          -

                          Loading tokenized data

                          +

                          Loading tokenized data

                          Let’s load the tokenized test data.

                          [10]:
                          @@ -578,7 +587,7 @@ 

                          Loading tokenized data

                          -

                          Performing inference of cell subclass

                          +

                          Performing inference of cell subclass

                          Now we can load the model and run the inference workflow.

                          ⚠️ Note, this step will be slow with CPUs, a machine with one GPU is recommended

                          @@ -615,7 +624,7 @@

                          Performing inference of cell subclass -

                          Inspecting inference results

                          +

                          Inspecting inference results

                          Then we add the prediction back to our loaded AnnData test dataset.

                          [14]:
                          @@ -711,9 +720,9 @@ 

                          Inspecting inference results -

                          Using the Geneformer fine-tuned model for data projection

                          +

                          Using the Geneformer fine-tuned model for data projection

                          -

                          Generating Geneformer embeddings for 10X PBMC 3K data

                          +

                          Generating Geneformer embeddings for 10X PBMC 3K data

                          To project new data, for example the 10X PBMC 3K data, into the Census embedding space from Geneformer’s fine-tune model, we can use EmbExtractor from the Geneformer package as follows.

                          We first need to get the number of categories (cell subclasses) present in the model.

                          @@ -798,7 +807,7 @@

                          Generating Geneformer embeddings for 10X PBMC 3K data

                          -

                          Joining Geneformer embeddings from 10X PBMC 3K data with other Census datasets

                          +

                          Joining Geneformer embeddings from 10X PBMC 3K data with other Census datasets

                          There are multiple datasets in Census from PBMCs, and all human Census data has pre-calculated Geneformer embeddings, so now we can join the embeddings we generated above from the 10X PBMC 3K dataset with Census data.

                          Let’s grab a few PBMC datasets from Census and request the Geneformer embeddings.

                          diff --git a/notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.html b/notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.html index 4fc511907..54a2aaacc 100644 --- a/notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.html +++ b/notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                          +
                          + + + + +
                          @@ -211,7 +220,7 @@
                          -

                          Normalizing full-length gene sequencing data

                          +

                          Normalizing full-length gene sequencing data

                          This tutorial shows you how to fetch full-length gene sequencing data from the Census and normalize it to account for gene length.

                          Contents

                            @@ -222,7 +231,7 @@

                            Normalizing full-length gene sequencing datais_primary_data which is described in the Census schema. For this notebook we will focus on individual datasets, therefore we can ignore this variable.

                            -

                            Opening the census

                            +

                            Opening the census

                            First we open the Census, if you are not familiar with the basics of the Census API you should take a look at notebook Learning about the CZ CELLxGENE Census

                            -

                            Fetching full-length example sequencing data (Smart-Seq)

                            +

                            Fetching full-length example sequencing data (Smart-Seq)

                            Let’s get some example data, in this case we’ll fetch all cells from a relatively small dataset derived from the Smart-Seq2 technology which performs full-length gene sequencing:

                            • Collection: Tabula Muris Senis

                            • @@ -366,7 +375,7 @@

                              Fetching full-length example sequencing data (Smart-Seq) -

                              Normalizing expression to account for gene length

                              +

                              Normalizing expression to account for gene length

                              By default cellxgene_census.get_anndata() fetches all genes in the Census. So let’s first identify the genes that were measured in this dataset and subset the AnnData to only include those.

                              To this goal we can use the “Dataset Presence Matrix” in census["census_data"]["mus_musculus"].ms["RNA"]["feature_dataset_presence_matrix"]. This is a boolean matrix N x M where N is the number of datasets and M is the number of genes in the Census, True indicates that a gene was measured in a dataset.

                              @@ -480,7 +489,7 @@

                              Normalizing expression to account for gene lengthAll done! You can see that we now have real numbers instead of integers.

                            -

                            Validation through clustering exploration

                            +

                            Validation through clustering exploration

                            Let’s perform some basic clustering analysis to see if cell types cluster as expected using the normalized counts.

                            First we do some basic filtering of cells and genes.

                            diff --git a/notebooks/analysis_demo/comp_bio_scvi_model_use.html b/notebooks/analysis_demo/comp_bio_scvi_model_use.html index 09d5bde30..0caa56b5a 100644 --- a/notebooks/analysis_demo/comp_bio_scvi_model_use.html +++ b/notebooks/analysis_demo/comp_bio_scvi_model_use.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                            +
                            + + + + +
                            @@ -212,7 +221,7 @@
                            -

                            scVI for cell type prediction and data projection

                            +

                            scVI for cell type prediction and data projection

                            This notebook provides examples to utilize the pretrained scVI model with user data. For more information on the model please refer to the Census model page.

                            IMPORTANT: This tutorial requires cellxgene-census package version 1.9.1 or later.

                            Contents

                            @@ -224,9 +233,9 @@

                            scVI for cell type prediction and data projection

                            ⚠️ Note that the Census RNA data includes duplicate cells present across multiple datasets. Duplicate cells can be filtered in or out using the cell metadata variable is_primary_data which is described in the Census schema.

                            -

                            Requirements

                            +

                            Requirements

                            -

                            System requirements

                            +

                            System requirements

                            To run this notebook the following are required:

                            • Unix system.

                            • @@ -237,7 +246,7 @@

                              System requirements -

                              Downloading example data

                              +

                              Downloading example data

                              Throughout the notebook the 10X PBMC 3K dataset will be used, you can download it via the following shell commands.

                              [1]:
                              @@ -259,7 +268,7 @@ 

                              Downloading example data -

                              Downloading the trained scVI model

                              +

                              Downloading the trained scVI model

                              The model is currently hosted in S3, you can find out more deatails in the Census model page.

                              Additional information, including its S3 URI, is also included in the metadata of the corresponding embeddings inside Census. These metadata can be obtained as follows.

                              @@ -310,7 +319,7 @@

                              Downloading the trained scVI model -

                              Using the scVI pretrained model for data projection

                              +

                              Using the scVI pretrained model for data projection

                              Import all the required packages for this demonstration

                              [5]:
                              @@ -505,7 +514,7 @@ 

                              Using the scVI pretrained model for data projectionDisplay the scatter plot

                            -

                            Using the scVI pretrained model for cell cell type inference.

                            +

                            Using the scVI pretrained model for cell cell type inference.

                            Fetch the reference scVI embeddings corresponding to some example PBMC data from Census

                            [15]:
                            diff --git a/notebooks/analysis_demo/comp_bio_summarize_axis_query.html b/notebooks/analysis_demo/comp_bio_summarize_axis_query.html
                            index ca9cd11fd..88ca26271 100644
                            --- a/notebooks/analysis_demo/comp_bio_summarize_axis_query.html
                            +++ b/notebooks/analysis_demo/comp_bio_summarize_axis_query.html
                            @@ -36,6 +36,8 @@
                               
                                 
                                   
                            +        
                            +        
                                     
                                     
                                     
                            @@ -130,6 +132,13 @@
                                         
                             
                                         
                            +
                            +
                            + + + + +
                            @@ -210,7 +219,7 @@
                            -

                            Summarizing cell and gene metadata

                            +

                            Summarizing cell and gene metadata

                            This notebook provides examples for basic axis metadata handling using Pandas. The Census stores obs (cell) and var (gene) metadata in SOMADataFrame objects via the TileDB-SOMA API (documentation), which can be queried and read as a Pandas DataFrame using TileDB-SOMA.

                            Note that Pandas DataFrame is an in-memory object, therefore queries should be small enough for results to fit in memory.

                            Contents

                            @@ -226,7 +235,7 @@

                            Summarizing cell and gene metadatais_primary_data which is described in the Census schema.

                            -

                            Opening the Census

                            +

                            Opening the Census

                            The cellxgene_census python package contains a convenient API to open the latest version of the Census. If you open the Census, you should close it. open_soma() returns a context, so you can open/close it in several ways, like a Python file handle. The context manager is preferred, as it will automatically close upon an error raise.

                            You can learn more about the cellxgene_census methods by accessing their corresponding documentation via help(). For example help(cellxgene_census.open_soma).

                            @@ -255,7 +264,7 @@

                            Opening the Census -

                            Summarizing cell metadata

                            +

                            Summarizing cell metadata

                            Once the Census is open you can use its TileDB-SOMA methods as it is itself a SOMACollection. You can thus access the metadata SOMADataFrame objects encoding cell and gene metadata.

                            Tips:

                            -

                            Example: Summarize all cell types

                            +

                            Example: Summarize all cell types

                            This example reads the cell metadata (obs) into a Pandas DataFrame, and summarizes in a variety of ways using Pandas API.

                            [2]:
                            @@ -315,7 +324,7 @@ 

                            Example: Summarize all cell types -

                            Example: Summarize a subset of cell types, selected with a value_filter

                            +

                            Example: Summarize a subset of cell types, selected with a value_filter

                            This example utilizes a SOMA “value filter” to read the subset of cells with tissue_ontology_term_id equal to UBERON:0002048 (lung tissue), and summarizes the query result using Pandas.

                            [3]:
                            @@ -424,7 +433,7 @@ 

                            Example: Summarize a subset of cell types, selected with a -

                            Full Census metadata stats

                            +

                            Full Census metadata stats

                            This example queries all organisms in the Census, and summarizes the diversity of various metadata lables.

                            [5]:
                            diff --git a/notebooks/api_demo/census_access_maintained_embeddings.html b/notebooks/api_demo/census_access_maintained_embeddings.html
                            index 19214b461..91248f9ed 100644
                            --- a/notebooks/api_demo/census_access_maintained_embeddings.html
                            +++ b/notebooks/api_demo/census_access_maintained_embeddings.html
                            @@ -36,6 +36,8 @@
                               
                                 
                                   
                            +        
                            +        
                                     
                                     
                                     
                            @@ -130,6 +132,13 @@
                                         
                             
                                         
                            +
                            +
                            + + + + +
                            @@ -212,7 +221,7 @@
                            -

                            Access CELLxGENE collaboration embeddings (scVI, Geneformer)

                            +

                            Access CELLxGENE collaboration embeddings (scVI, Geneformer)

                            This notebook demonstrates basic access to CELLxGENE collaboration embeddings of CELLxGENE Discover Census. Currently, embeddings from scVI and a fine-tuned Geneformer model are maintained by CELLxGENE Discover. There are other CELLxGENE-hosted embeddings contributed by the community to CELLxGENE Discover, find out more about these in the Census model page.

                            IMPORTANT: This tutorial requires cellxgene-census package version 1.9.1 or later.

                            Contents

                            @@ -223,7 +232,7 @@

                            Access CELLxGENE collaboration embeddings (scVI, Geneformer)Census schema.

                            -

                            Quick start

                            +

                            Quick start

                            CELLxGENE collaboration embeddings can easily be exported into an AnnData as shown below for any slice of Census. This example queries all cells from tongue tissue.

                            ⚠️ Note that Geneformer embeddings are only available for human data

                            @@ -285,7 +294,7 @@

                            Quick start -

                            Storage format

                            +

                            Storage format

                            Each embedding is encoded as a SOMA SparseNDArray, where:

                            • dimension 0 (soma_dim_0) encodes the cell (obs) soma_joinid value

                            • @@ -296,17 +305,17 @@

                              Storage format -

                              Query cells and load associated embeddings

                              +

                              Query cells and load associated embeddings

                              This section demonstrates several methods to query cells from the Census by obs metadata, and then fetch embeddings associated with each cell.

                              -

                              Loading embeddings into an AnnData obsm slot

                              +

                              Loading embeddings into an AnnData obsm slot

                              There are two main ways to load CELLxGENE collaboration embeddings into an AnnData.

                              1. Via cellxgene_census.get_anndata().

                              2. With a lazy query via ExperimentAxisQuery.

                              -

                              AnnData embeddings via cellxgene_census.get_anndata()

                              +

                              AnnData embeddings via cellxgene_census.get_anndata()

                              This is the simplest way of getting the embeddings. In this example we create an AnnData for all “central nervous system” cells.

                              [4]:
                              @@ -374,7 +383,7 @@ 

                              AnnData embeddings via

                              -

                              AnnData embeddings via ExperimentAxisQuery

                              +

                              AnnData embeddings via ExperimentAxisQuery

                              Using an ExperimentAxisQuery to get embeddings into an AnnData has the main advantage of inspecting the query in a lazy manner before loading all data into AnnData.

                              As a reminder this class offers a lazy interface to query Census based on cell and gene metadata, and provides access to the correspondong expression data, cell/gene metadata, and the embeddings.

                              Let’s initiate a lazy query with the same filters as the previous example.

                              @@ -484,7 +493,7 @@

                              AnnData embeddings via

                              -

                              Load an embedding into a dense NumPy array

                              +

                              Load an embedding into a dense NumPy array

                              To load a embeddinng into a stand-alone numpy array you can select cells from the Census based on obs metadata, then given the resulting cells, use the soma_joinid values to download an embedding, and finally save as a dense NDArray.

                              Let’s first select cells based on cell metadata.

                              diff --git a/notebooks/api_demo/census_citation_generation.html b/notebooks/api_demo/census_citation_generation.html index 69a1ff7f3..84c32a603 100644 --- a/notebooks/api_demo/census_citation_generation.html +++ b/notebooks/api_demo/census_citation_generation.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                              +
                              + + + + +
                              @@ -210,7 +219,7 @@
                              -

                              Generating citations for Census slices

                              +

                              Generating citations for Census slices

                              This notebook demonstrates how to generate a citation string for all datasets contained in a Census slice.

                              Contents

                                @@ -224,7 +233,7 @@

                                Generating citations for Census slicesis_primary_data which is described in the Census schema.

                                -

                                Requirements

                                +

                                Requirements

                                This notebook requires:

                                • cellxgene_census Python package.

                                • @@ -232,7 +241,7 @@

                                  Requirements -

                                  Generating citation strings

                                  +

                                  Generating citation strings

                                  First we open a handle to the Census data. To ensure we open a data release with schema version 1.3.0 or greater, we use census_version="latest"

                                  [1]:
                                  @@ -352,7 +361,7 @@ 

                                  Generating citation strings"dataset_id" present in both the dataset table and the Census cell metadata to create citation strings for any Census slice.

                                  -

                                  Via cell metadata query

                                  +

                                  Via cell metadata query

                                  [3]:
                                   
                                  @@ -389,7 +398,7 @@

                                  Via cell metadata query

                                  -

                                  Via AnnData query

                                  +

                                  Via AnnData query

                                  [4]:
                                   
                                  diff --git a/notebooks/api_demo/census_compute_over_X.html b/notebooks/api_demo/census_compute_over_X.html index c47d50e67..b973921ff 100644 --- a/notebooks/api_demo/census_compute_over_X.html +++ b/notebooks/api_demo/census_compute_over_X.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                  +
                                  + + + + +
                                  @@ -210,7 +219,7 @@
                                  -

                                  Computing on X using online (incremental) algorithms

                                  +

                                  Computing on X using online (incremental) algorithms

                                  This tutorial showcases computing a variety of per-gene and per-cell statistics for a user-defined query using out-of-core operations.

                                  NOTE: when query results are small enough to fit in memory, it may be easier to use the SOMAExperiment Query class to extract an AnnData, and then just compute over that. This tutorial shows means of incrementally processing larger-than-core (RAM) data, where incremental (online) algorithms are used.

                                  Contents

                                  @@ -233,7 +242,7 @@

                                  Computing on X using online (incremental) algorithms

                                  -

                                  Incremental count and mean calculation.

                                  +

                                  Incremental count and mean calculation.

                                  Many statistics, such as mean, are easy to calculate incrementally. This cell demonstrates a query on the X['raw'] sparse nD array, which will return results in batches. Accumulate the sum and count incrementally, into raw_sum and raw_n, and then compute mean.

                                  First define a query - in this case a slice over the obs axis for cells with a specific tissue & sex value, and all genes on the var axis. The query.X() method returns an iterator of results, each as a PyArrow Table. Each table will contain the sparse X data and obs/var coordinates, using standard SOMA names:

                                    @@ -425,7 +434,7 @@

                                    Incremental count and mean calculation. -

                                    Incremental variance calculation

                                    +

                                    Incremental variance calculation

                                    Other statistics are not as simple when implemented as an online algorithm. This cell demonstrates an implementation of an online computation of variance, using Welford’s online calculation of mean and variance.

                                    [3]:
                                    @@ -669,7 +678,7 @@ 

                                    Incremental variance calculation -

                                    Counting cells per gene, grouped by dataset_id

                                    +

                                    Counting cells per gene, grouped by dataset_id

                                    This example demonstrates a more complex example where the goal is to count the number of cells per gene, grouped by cell dataset_id. The result is a Pandas DataFrame indexed by obs.dataset_id and var.feature_id, containing the number of cells per pair.

                                    This example does not use positional indexing, but rather demonstrates the use of Pandas DataFrame join to join on the soma_joinid. For the sake of this example we will query only 4 genes, but this can be expanded to all genes.

                                    diff --git a/notebooks/api_demo/census_dataset_presence.html b/notebooks/api_demo/census_dataset_presence.html index 904eccdc2..ffefa6932 100644 --- a/notebooks/api_demo/census_dataset_presence.html +++ b/notebooks/api_demo/census_dataset_presence.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                    +
                                    + + + + +
                                    @@ -216,7 +225,7 @@
                                    -

                                    Genes measured in each cell (dataset presence matrix)

                                    +

                                    Genes measured in each cell (dataset presence matrix)

                                    The Census is a compilation of cells from multiple datasets that may differ by the sets of genes they measure. This notebook describes the way to identify the genes measured per dataset.

                                    The presence matrix is a sparse boolean array, indicating which features (var) were present in each dataset. The array has dimensions [n_datasets, n_var], and is stored in the SOMA Measurement varp collection. The first dimension is indexed by the soma_joinid in the census_datasets dataframe. The second is indexed by the soma_joinid in the var dataframe of the measurement.

                                    As a reminder the obs data frame has a column dataset_id that can be used to link any cell in the Census to the presence matrix.

                                    @@ -231,7 +240,7 @@

                                    Genes measured in each cell (dataset presence matrix)

                                    ⚠️ Note that the Census RNA data includes duplicate cells present across multiple datasets. Duplicate cells can be filtered in or out using the cell metadata variable is_primary_data which is described in the Census schema.

                                    -

                                    Opening the Census

                                    +

                                    Opening the Census

                                    The cellxgene_census python package contains a convenient API to open the latest version of the Census.

                                    [1]:
                                    @@ -253,7 +262,7 @@ 

                                    Opening the Census -

                                    Fetching the IDs of the Census datasets

                                    +

                                    Fetching the IDs of the Census datasets

                                    Let’s grab a table of all the datasets included in the Census and use this table in combination with the presence matrix below.

                                    [2]:
                                    @@ -432,7 +441,7 @@ 

                                    Fetching the IDs of the Census datasets -

                                    Fetching the dataset presence matrix

                                    +

                                    Fetching the dataset presence matrix

                                    Now let’s fetch the dataset presence matrix.

                                    For convenience, read the entire presence matrix (for Homo sapiens) into a SciPy array. There is a convenience API providing this capability, returning the matrix in a scipy.sparse.array.

                                    @@ -580,7 +589,7 @@

                                    Fetching the dataset presence matrix -

                                    Identifying genes measured in a specific dataset.

                                    +

                                    Identifying genes measured in a specific dataset.

                                    Now that we have the dataset table, the genes metadata table, and the dataset presence matrix, we can check if a gene or set of genes were measured in a specific dataset.

                                    Important: the presence matrix is indexed by soma_joinid, and is NOT positionally indexed. In other words:

                                      @@ -609,7 +618,7 @@

                                      Identifying genes measured in a specific dataset.

                                    -

                                    Identifying datasets that measured specific genes

                                    +

                                    Identifying datasets that measured specific genes

                                    Similarly, we can determine the datasets that measured a specific gene or set of genes.

                                    [6]:
                                    @@ -1129,7 +1138,7 @@ 

                                    Identifying datasets that measured specific genes

                                    -

                                    Identifying all genes measured in a dataset

                                    +

                                    Identifying all genes measured in a dataset

                                    Finally, we can find the set of genes that were measured in the cells of a given dataset.

                                    [7]:
                                    diff --git a/notebooks/api_demo/census_datasets.html b/notebooks/api_demo/census_datasets.html
                                    index cba122671..6b5cecede 100644
                                    --- a/notebooks/api_demo/census_datasets.html
                                    +++ b/notebooks/api_demo/census_datasets.html
                                    @@ -36,6 +36,8 @@
                                       
                                         
                                           
                                    +        
                                    +        
                                             
                                             
                                             
                                    @@ -130,6 +132,13 @@
                                                 
                                     
                                                 
                                    +
                                    +
                                    + + + + +
                                    @@ -213,7 +222,7 @@
                                    -

                                    Exploring the Census Datasets table

                                    +

                                    Exploring the Census Datasets table

                                    This tutorial demonstrates basic use of the census_datasets dataframe that contains metadata of the Census source datasets. This metadata can be joined to the cell metadata dataframe (obs) via the column dataset_id,

                                    Contents

                                      @@ -223,7 +232,7 @@

                                      Exploring the Census Datasets tableis_primary_data which is described in the Census schema.

                                      -

                                      Fetching the datasets table

                                      +

                                      Fetching the datasets table

                                      Each Census contains a top-level dataframe itemizing the datasets contained therein. You can read this into a pandas.DataFrame.

                                      [1]:
                                      @@ -445,7 +454,7 @@ 

                                      Fetching the datasets table -

                                      Fetching the expression data from a single dataset

                                      +

                                      Fetching the expression data from a single dataset

                                      Lets pick one dataset to slice out of the census, and turn into an AnnData in-memory object. This can be used with the ScanPy toolchain. You can also save this AnnData locally using the AnnData write API.

                                      [3]:
                                      @@ -538,7 +547,7 @@ 

                                      Fetching the expression data from a single dataset

                                      -

                                      Downloading the original source H5AD file of a dataset.

                                      +

                                      Downloading the original source H5AD file of a dataset.

                                      You can download the original H5AD file for any given dataset. This is the same H5AD you can download from the CZ CELLxGENE Discover, and may contain additional data-submitter provided information which was not included in the Census.

                                      To do this you can fetch the location in the cloud or directly download to your system using the cellxgene-census

                                      diff --git a/notebooks/api_demo/census_duplicated_cells.html b/notebooks/api_demo/census_duplicated_cells.html index 84c123a94..bcb6bd720 100644 --- a/notebooks/api_demo/census_duplicated_cells.html +++ b/notebooks/api_demo/census_duplicated_cells.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                      +
                                      + + + + +
                                      @@ -213,7 +222,7 @@
                                      -

                                      Understanding and filtering out duplicate cells

                                      +

                                      Understanding and filtering out duplicate cells

                                      This tutorial provides an explanation for the existence of duplicate cells in the Census, and it showcases different ways to handle these cells when performing queries on the Census using the is_primary_data cell metadata variable.

                                      Contents

                                        @@ -228,7 +237,7 @@

                                        Understanding and filtering out duplicate cells -

                                        Why are there duplicate cells in the Census?

                                        +

                                        Why are there duplicate cells in the Census?

                                        Duplicate cells are labeled on the is_primary_data cell metadata variable as False. To learn more about this please take a look at the corresponding section of the dataset schema.

                                        The Census data is a concatenation of most RNA data from CZ CELLxGENE Discover and these data are ingested one dataset at a time. You can take a look at what data is included in the Census here.

                                        In some cases data from the same cell exists in different datasets, therefore cells can be duplicated throughout CELLxGENE Discover and by extension the Census.

                                        @@ -248,7 +257,7 @@

                                        Why are there duplicate cells in the Census? -

                                        An example: duplicate cells in the Tabula Muris Senis data

                                        +

                                        An example: duplicate cells in the Tabula Muris Senis data

                                        Let’s take a look at an example from the Census using the Tabula Muris Senis data. Some of its datasets contain duplicated cells.

                                        We can obtain cell metadata for the main Tabula Muris Senis dataset: “All - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x”, which contains the original (non-duplicated) cells.

                                        And remember we must include the is_primary_data column.

                                        @@ -364,12 +373,12 @@

                                        An example: duplicate cells in the Tabula Muris Senis data -

                                        Filtering out duplicate cells

                                        +

                                        Filtering out duplicate cells

                                        In some cases you may be interested in getting all cells for a specific biological context, for example “all natural killer cells from blood of female cells with COVID-19” but you need to be aware that there is a chance you end up with some duplicate cells.

                                        We therefore recommend that you always look at is_primary_data and use that information based on your needs.

                                        If you know a priori that you don’t want duplicated cells this section shows you how to efficiently exclude them from your queries.

                                        -

                                        Filtering out duplicate cells when reading the obs data frame.

                                        +

                                        Filtering out duplicate cells when reading the obs data frame.

                                        Let’s say you are interested in looking at the cell metadata of “all natural killer cells from blood of female cells with COVID-19” but you want to exclude duplicate cells, then you can use value_filter when reading the data frame to only include cells with is_primary_data as True.

                                        Let’s first read the cell metadata including all cells:

                                        @@ -459,7 +468,7 @@

                                        Filtering out duplicate cells when reading the -

                                        Filtering out duplicate cells when creating an AnnData

                                        +

                                        Filtering out duplicate cells when creating an AnnData

                                        You can also utilize is_primary_data on the obs_value_filter of get_anndata.

                                        Let’s repeat the process above. First querying by including all cells. To reduce the bandwidth and memory usage, let’s just fetch data for one gene.

                                        @@ -550,7 +559,7 @@

                                        Filtering out duplicate cells when creating an AnnData

                                        In this case you can also observe a clear reduction in the number of cells.

                                        -

                                        Filtering out duplicate cells for out-of-core operations.

                                        +

                                        Filtering out duplicate cells for out-of-core operations.

                                        Finally we can utilize is_primary_data on the value_filter of obs of an “Axis Query” to perform out-of-core operations.

                                        In this example we only include the version with duplicated cells removed.

                                        diff --git a/notebooks/api_demo/census_embedding.html b/notebooks/api_demo/census_embedding.html index 9b3d728ad..35dd58221 100644 --- a/notebooks/api_demo/census_embedding.html +++ b/notebooks/api_demo/census_embedding.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                        +
                                        + + + + +
                                        @@ -216,11 +225,11 @@
                                        -

                                        Access CELLxGENE-hosted embeddings

                                        +

                                        Access CELLxGENE-hosted embeddings

                                        This notebook demonstrates basic access to CELLxGENE-hosted embeddings of the Census. CELLxGENE-hosted embeddings have been contributed by the community, CELLxGENE Discover does not actively maintain or update them. Find out more about these in the Census model page.

                                        IMPORTANT: This tutorial requires cellxgene-census package version 1.9.1 or later.

                                        -

                                        Contents

                                        +

                                        Contents

                                        1. Background

                                        2. Quick start

                                        3. @@ -233,13 +242,13 @@

                                          Contents -

                                          Background

                                          +

                                          Background

                                          This notebook demonstrates access to CELLxGENE-hosted embeddings of the Census. The Census has multiple releases, named by a census_version, which normally looks like an ISO date, e.g., 2023-02-01. A CELLxGENE-hosted embedding is a 2D sparse matrix of cell embeddings for a given census version, encoded as a SOMA SparseNDArray.

                                          ⚠️ Note that embeddings may be available for one or both organisms, see the Census model page for the latest availability.

                                          ⚠️ IMPORTANT: embeddings are only meaningful in the context of the Census from which they were created. Each embedding contains a metadata field indicating the source Census, suitable for confirming embedding lineage.

                                        -

                                        Quick start

                                        +

                                        Quick start

                                        The easiest way to access Census CELLxGENE-hosted embeddings is by calling the get_anndata function with an obs_embeddings or var_embeddings parameter.

                                        Let’s start by exploring what embeddings are available:

                                        -

                                        Storage format

                                        +

                                        Storage format

                                        Each embedding is encoded as a SOMA SparseNDArray, where:

                                        • dimension 0 (soma_dim_0) encodes the cell (obs) soma_joinid value

                                        • @@ -345,7 +354,7 @@

                                          Storage format -

                                          Query cells and load associated embeddings

                                          +

                                          Query cells and load associated embeddings

                                          This section demonstrates two methods to query cells from the Census by obs metadata, and then fetch CELLxGENE-hosted embeddings associated with each cell.

                                          1. Load an embedding into an AnnData obsm slot

                                          2. @@ -381,14 +390,14 @@

                                            Query cells and load associated embeddings -

                                            Load an embedding into an AnnData obsm slot

                                            +

                                            Load an embedding into an AnnData obsm slot

                                            There are two main ways to load hosted embeddings into an AnnData.

                                            1. Via cellxgene_census.get_anndata(), followed by merging embeddings.

                                            2. With a lazy query via ExperimentAxisQuery, followed by merging embeddings.

                                            -

                                            AnnData embeddings via cellxgene_census.get_anndata()

                                            +

                                            AnnData embeddings via cellxgene_census.get_anndata()

                                            This is the simplest way of getting the embeddings. In this example we create an AnnData for all “central nervous system” cells, and use the obs_embeddings parameter to add scGPT embeddings to the obsm slot.

                                            [6]:
                                            @@ -427,7 +436,7 @@ 

                                            AnnData embeddings via cellxgene_census.get_anndata()

                                            -

                                            AnnData embeddings via ExperimentAxisQuery

                                            +

                                            AnnData embeddings via ExperimentAxisQuery

                                            Using an ExperimentAxisQuery to get embeddings into an AnnData has the main advantage of inspecting the query in a lazy manner before loading all data into AnnData.

                                            As a reminder this class offers a lazy interface to query Census based on cell and gene metadata, and provides access to the correspondong expression data, and cell/gene metadata.

                                            Let’s initiate a lazy query with the same filters as the previous example.

                                            @@ -527,7 +536,7 @@

                                            AnnData embeddings via

                                            -

                                            Load an embedding into a dense NumPy array

                                            +

                                            Load an embedding into a dense NumPy array

                                            To load a embeddinng into a stand-alone numpy array you can select cells from the Census based on obs metadata, then given the resulting cells, use the soma_joinid values to download an embedding, and finally save as a dense NDArray.

                                            Let’s first select cells based on cell metadata

                                            @@ -588,7 +597,7 @@

                                            Load an embedding into a dense NumPy array -

                                            Load embeddings and fetch associated Census data

                                            +

                                            Load embeddings and fetch associated Census data

                                            This section describes a more advanced use case. Here we showcase how to load large slices of an embeding matrix, and then append cell metadata to them.

                                            The method starts with the loaded embedding, and for each embedded cell loads metadata or X data.

                                            @@ -694,7 +703,7 @@

                                            Load embeddings and fetch associated Census data -

                                            Embedding Metadata

                                            +

                                            Embedding Metadata

                                            Each embedding contains descriptive information stored in the SOMA metadata slot, encoded as a JSON string. This metadata includes:

                                            • census_version - the Census which is embedded. It is critical to confirm this matches the Census in use, or the embeddings will be meaningless.

                                            • diff --git a/notebooks/api_demo/census_gget_demo.html b/notebooks/api_demo/census_gget_demo.html index 56b400aac..ea68a6fd1 100644 --- a/notebooks/api_demo/census_gget_demo.html +++ b/notebooks/api_demo/census_gget_demo.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                              +
                                              + + + + +
                                            @@ -213,7 +222,7 @@
                                            -

                                            Querying data using the gget cellxgene module

                                            +

                                            Querying data using the gget cellxgene module

                                            By Laura Luebbert, lauraluebbert@caltech.edu.

                                            gget is a free, open-source command-line tool and Python package that enables efficient querying of genomic databases. gget consists of a collection of separate but interoperable modules, each designed to facilitate one type of database querying in a single line of code.

                                            The gget cellxgene module builds on the CZ CELLxGENE Discover Census to query data from CZ CELLxGENE Discover. This notebook briefly introduces the gget cellxgene module by providing one simple example for each supported query type.

                                            @@ -228,7 +237,7 @@

                                            Querying data using the gget cellxgene modulegget cellxgene from the command line.

                                          -

                                          Install gget and set up cellxgene module

                                          +

                                          Install gget and set up cellxgene module

                                          [1]:
                                           
                                          @@ -332,7 +341,7 @@

                                          Install gget and set up cellxgene module -

                                          Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)

                                          +

                                          Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)

                                          You can use all of the options listed above to filter for data of interest. Here, we will demonstrate the module by fetching a small dataset containing only three genes and two lung cell types:

                                          [3]:
                                          @@ -609,7 +618,7 @@ 

                                          Fetch an -

                                          Plot a dot plot similar to those shown on the CZ CELLxGENE Discover Gene Expression

                                          +

                                          Plot a dot plot similar to those shown on the CZ CELLxGENE Discover Gene Expression

                                          Using the data we just fetched, we can plot a dot plot using scanpy:

                                          [7]:
                                          @@ -653,7 +662,7 @@ 

                                          Plot a dot plot similar to those shown on the CZ CELLxGENE Discover

                                          -

                                          Fetch only cell metadata (corresponds to AnnData.obs)

                                          +

                                          Fetch only cell metadata (corresponds to AnnData.obs)

                                          By setting meta_only=True and again filtering by the cell metadata attributes listed above, you can also fetch only the cell metadata:

                                          [9]:
                                          @@ -842,7 +851,7 @@ 

                                          Fetch only cell metadata (corresponds to AnnData.obs)

                                          -

                                          Use gget cellxgene from the command line

                                          +

                                          Use gget cellxgene from the command line

                                          All gget modules support use from the command line. Note that the command line interface requires the -o/--out argument to specify a path to save the fetched data. Here are the command line versions of the queries demonstrated above:

                                          [10]:
                                          diff --git a/notebooks/api_demo/census_query_extract.html b/notebooks/api_demo/census_query_extract.html
                                          index db5cf7bd3..f282634de 100644
                                          --- a/notebooks/api_demo/census_query_extract.html
                                          +++ b/notebooks/api_demo/census_query_extract.html
                                          @@ -36,6 +36,8 @@
                                             
                                               
                                                 
                                          +        
                                          +        
                                                   
                                                   
                                                   
                                          @@ -130,6 +132,13 @@
                                                       
                                           
                                                       
                                          +
                                          +
                                          + + + + +
                                          @@ -212,7 +221,7 @@
                                          -

                                          Querying and fetching the single-cell data and cell/gene metadata.

                                          +

                                          Querying and fetching the single-cell data and cell/gene metadata.

                                          This tutorial showcases the easiest ways to query the expression data and cell/gene metadata from the Census, and load them into common in-memory Python objects, including pandas.DataFrame and anndata.AnnData.

                                          Contents

                                            @@ -223,7 +232,7 @@

                                            Querying and fetching the single-cell data and cell/gene metadata.Census schema.

                                            -

                                            Opening the census

                                            +

                                            Opening the census

                                            The cellxgene_census python package contains a convenient API to open the latest version of the Census.

                                            -

                                            Querying expression data

                                            +

                                            Querying expression data

                                            A convenient way to query and fetch expression data is to use the get_anndata method of the cellxgene_census API. This is a method that combines the column selection and value filtering we described above to obtain slices of the expression data based on metadata queries.

                                            The method will return an anndata.AnnData object, it takes as an input a census object, the string for an organism, and for both cell and gene metadata we can specify filters and column selection as described above but with the following arguments:

                                              @@ -486,7 +495,7 @@

                                              Querying expression data -

                                              Querying cell metadata (obs)

                                              +

                                              Querying cell metadata (obs)

                                              The human gene metadata of the Census, for RNA assays, is located at census["census_data"]["homo_sapiens"].obs. This is a SOMADataFrame and as such it can be materialized as a pandas.DataFrame via the methods read().concat().to_pandas().

                                              The mouse cell metadata is at census["census_data"]["mus_musculus"].obs.

                                              For slicing the cell metadata there are two relevant arguments that can be passed through read():

                                              @@ -972,7 +981,7 @@

                                              Querying cell metadata (obs) -

                                              Querying gene metadata (var)

                                              +

                                              Querying gene metadata (var)

                                              The human gene metadata of the Census is located at census["census_data"]["homo_sapiens"].ms["RNA"].var. Similarly to the cell metadata, it is a SOMADataFrame and thus we can also use its method read().

                                              The mouse gene metadata is at census["census_data"]["mus_musculus"].ms["RNA"].var.

                                              Let’s take a look at the metadata available for column selection and row filtering.

                                              diff --git a/notebooks/api_demo/census_summary_cell_counts.html b/notebooks/api_demo/census_summary_cell_counts.html index b8846d5f2..4cc0e7fff 100644 --- a/notebooks/api_demo/census_summary_cell_counts.html +++ b/notebooks/api_demo/census_summary_cell_counts.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                              +
                                              + + + + +

                                          @@ -212,7 +221,7 @@
                                          -

                                          Exploring pre-calculated summary cell counts

                                          +

                                          Exploring pre-calculated summary cell counts

                                          This tutorial describes how to access pre-calculated summary cell counts. Each Census contains a top-level dataframe summarizing counts of various cell labels, this is the census_summary_cell_counts dataframe . You can read this into a Pandas DataFrame

                                          Contents

                                            @@ -221,7 +230,7 @@

                                            Exploring pre-calculated summary cell countsis_primary_data which is described in the Census schema.

                                            -

                                            Fetching the census_summary_cell_counts dataframe

                                            +

                                            Fetching the census_summary_cell_counts dataframe

                                            [1]:
                                             
                                            @@ -384,7 +393,7 @@

                                            Fetching the ce

                                            -

                                            Creating summary counts beyond pre-calculated values.

                                            +

                                            Creating summary counts beyond pre-calculated values.

                                            The dataframe above is precomputed from the experiments in the Census, providing a quick overview of the Census contents.

                                            You can do similar group statistics using Pandas groupby functions.

                                            The code below reproduces the above counts using full obs dataframe in the Homo_sapiens experiment.

                                            diff --git a/notebooks/experimental/highly_variable_genes.html b/notebooks/experimental/highly_variable_genes.html index ace610441..70b9900bf 100644 --- a/notebooks/experimental/highly_variable_genes.html +++ b/notebooks/experimental/highly_variable_genes.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                            +
                                            + + + + +

                                          @@ -209,7 +218,7 @@
                                          -

                                          Experimental Highly Variable Genes API

                                          +

                                          Experimental Highly Variable Genes API

                                          This tutorial describes use of the cellxgene_census.experimental.pp API for finding highly variable genes (HVGs) in the Census. The HVG algorithm implements the ranked normalized variance method seurat_v3 described in scanpy.pp.highly_variable_genes.

                                          There are two API available:

                                            @@ -243,7 +252,7 @@

                                            Experimental Highly Variable Genes API -

                                            get_highly_variable_genes

                                            +

                                            get_highly_variable_genes

                                            This convenience function will meet most use cases, and is a wrapper around highly_variable_genes. This demonstration requests the top 500 genes from the Mouse census where tissue_general is heart, and joins with the var dataframe.

                                            The HVGs returned by get_highly_variable_genes are indexed by their soma_joinid. Join with the var dataframe to have a merged view of var metadata.

                                            @@ -768,7 +777,7 @@

                                            get_highly_variable_genes -

                                            highly_variable_genes

                                            +

                                            highly_variable_genes

                                            This API provides the same function as get_highly_variable_genes, but accepts any tiledbsoma.ExperimentAxisQuery. It is intended for more advanced users who wish to use create and manage their own queries.

                                            [5]:
                                            diff --git a/notebooks/experimental/mean_variance.html b/notebooks/experimental/mean_variance.html
                                            index 99f79fd3e..483023c69 100644
                                            --- a/notebooks/experimental/mean_variance.html
                                            +++ b/notebooks/experimental/mean_variance.html
                                            @@ -36,6 +36,8 @@
                                               
                                                 
                                                   
                                            +        
                                            +        
                                                     
                                                     
                                                     
                                            @@ -130,6 +132,13 @@
                                                         
                                             
                                                         
                                            +
                                            +
                                            + + + + +
                                            @@ -209,7 +218,7 @@
                                            -

                                            Out-of-core (incremental) mean and variance calculation

                                            +

                                            Out-of-core (incremental) mean and variance calculation

                                            This tutorial describes use of the cellxgene_census.experimental.pp API for calculating out-of-core mean and variance in the Census. The variance calculation is performed using Welford’s online algorithm.

                                            Contents

                                              @@ -217,7 +226,7 @@

                                              Out-of-core (incremental) mean and variance calculation

                                              Example: calculate mean and variance for a slice of the Census.

                                            -

                                            The mean and variance API

                                            +

                                            The mean and variance API

                                            mean_variance() calculates the mean and the variance for an ExperimentAxisQuery. The following additional arguments are supported:

                                            • layer: the X layer used for the calculation, defaults to raw

                                            • @@ -240,7 +249,7 @@

                                              The mean and variance API -

                                              Example: calculate mean and variance for a slice of the Census

                                              +

                                              Example: calculate mean and variance for a slice of the Census

                                              As an example, we’ll calculate the mean and variance along the obs axis for a subset of cells from the Mouse census.

                                              The return value will be a Pandas dataframe indexed by soma_joinid (in this case, it will be relative to obs) and will contain the mean and variance columns.

                                              diff --git a/notebooks/experimental/pytorch.html b/notebooks/experimental/pytorch.html index c985bcd85..06035731b 100644 --- a/notebooks/experimental/pytorch.html +++ b/notebooks/experimental/pytorch.html @@ -36,6 +36,8 @@ + + @@ -130,6 +132,13 @@ +
                                              +
                                              + + + + +
                                              @@ -212,7 +221,7 @@
                                              -

                                              Training a PyTorch Model

                                              +

                                              Training a PyTorch Model

                                              This tutorial shows how to train a Logistic Regression model in PyTorch using the Census API’s experimental.ml.ExperimentDataPipe class. This is intended only to demonstrate the use of the ExperimentDataPipe, and not as an example of how to train a biologically useful model.

                                              This tutorial assumes a basic familiarity with PyTorch and the Census API. See the Querying and fetching the single-cell data and cell/gene metadata notebook tutorial for a quick primer on Census API usage.

                                              Contents

                                              @@ -224,7 +233,7 @@

                                              Training a PyTorch ModelMake predictions with the model

                                            -

                                            Open the Census

                                            +

                                            Open the Census

                                            First, obtain a handle to the Census data, in the usual manner:

                                            [25]:
                                            @@ -246,7 +255,7 @@ 

                                            Open the Census -

                                            Create an ExperimentDataPipe

                                            +

                                            Create an ExperimentDataPipe

                                            To train a model in PyTorch using this census data object, first instantiate an ExperimentDataPipe as follows:

                                            [26]:
                                            @@ -271,12 +280,12 @@ 

                                            Create an ExperimentDataPipe -

                                            ExperimentDataPipe class explained

                                            +

                                            ExperimentDataPipe class explained

                                            This class provides an implementation of PyTorch’s DataPipe interface, which defines a common mechanism for wrapping and accessing training data from any underlying source. The ExperimentDataPipe class encapsulates the details of querying and retrieving Census data from a single SOMA Experiment and returning it to the caller as PyTorch Tensors. Most importantly, it retrieves the data lazily from the Census in batches, avoiding having to load the entire training dataset into memory at once. (Note: PyTorch also provides DataSet as a legacy interface for wrapping and accessing training data sources, but a DataPipe can be used interchangeably.)

                                            -

                                            ExperimentDataPipe parameters explained

                                            +

                                            ExperimentDataPipe parameters explained

                                            The constructor only requires a single parameter, experiment, which is a soma.Experiment containing the data of the organism to be used for training.

                                            To retrieve a subset of the Experiment’s data, along either the obs or var axes, you may specify query filters via the obs_query and var_query parameters, which are both soma.AxisQuery objects.

                                            The values for the prediction label(s) that you intend to use for training are specified via the obs_column_names array.

                                            @@ -306,7 +315,7 @@

                                            ExperimentDataP

                                            -

                                            Split the dataset

                                            +

                                            Split the dataset

                                            You may split the overall dataset into the typical training, validation, and test sets by using the PyTorch RandomSplitter DataPipe. Using PyTorch’s functional form for chaining DataPipes, this is done as follows:

                                            -

                                            Define the model

                                            +

                                            Define the model

                                            With the training data retrieval code now in place, we can move on to defining a simple logistic regression model, using PyTorch’s torch.nn.Linear class:

                                            [30]:
                                            @@ -426,7 +435,7 @@ 

                                            Define the modelexperiment_datapipe.obs_encoders (more on this below).

                                            -

                                            Train the model

                                            +

                                            Train the model

                                            Finally, we are ready to train the model. Here we instantiate the model, a loss function, and an optimization method and then iterate through the desired number of training epochs. Note how the train_dataloader is passed into train_epoch, where for each epoch it will provide a new iterator through the training dataset.

                                            [32]:
                                            @@ -470,7 +479,7 @@ 

                                            Train the model -

                                            Make predictions with the model

                                            +

                                            Make predictions with the model

                                            To make predictions with the model, we first create a new DataLoader using the test_datapipe, which provides the “test” split of the original dataset. For this example, we will only make predictions on a single batch of data from the test split.

                                            [33]:
                                            diff --git a/notebooks/experimental/pytorch.ipynb b/notebooks/experimental/pytorch.ipynb
                                            index bacecdee6..b5617bdd0 100644
                                            --- a/notebooks/experimental/pytorch.ipynb
                                            +++ b/notebooks/experimental/pytorch.ipynb
                                            @@ -118,7 +118,7 @@
                                               },
                                               {
                                                "cell_type": "markdown",
                                            -   "id": "ccb38226",
                                            +   "id": "46470edb",
                                                "metadata": {
                                                 "collapsed": false
                                                },
                                            @@ -130,7 +130,7 @@
                                               },
                                               {
                                                "cell_type": "markdown",
                                            -   "id": "b3563ee7",
                                            +   "id": "f31b7460",
                                                "metadata": {
                                                 "collapsed": false
                                                },
                                            diff --git a/py-modindex.html b/py-modindex.html
                                            index c5cb6d7fb..5d491db09 100644
                                            --- a/py-modindex.html
                                            +++ b/py-modindex.html
                                            @@ -34,6 +34,8 @@
                                               
                                                 
                                                   
                                            +        
                                            +        
                                                     
                                                     
                                                     
                                            @@ -131,6 +133,13 @@
                                                         
                                             
                                                         
                                            +
                                            +
                                            + + + + +
                                            diff --git a/python-api.html b/python-api.html index c85910c6e..8ff1228ff 100644 --- a/python-api.html +++ b/python-api.html @@ -35,6 +35,8 @@ + + @@ -127,6 +129,13 @@ +
                                            +
                                            + + + + +
                                            @@ -234,7 +243,7 @@
                                            -

                                            Python API

                                            +

                                            Python API

                                            An API to facilitate use of the CZI Science CELLxGENE Census. The Census is a versioned container of single-cell data hosted at CELLxGENE Discover.

                                            The API is built on the tiledbsoma SOMA API, and provides a number of helper functions including:

                                            @@ -247,7 +256,7 @@

                          For more information on the API, visit the cellxgene_census repo. For more information on SOMA, see the tiledbsoma repo.

                          -

                          Open/retrieve Cell Census data

                          +

                          Open/retrieve Cell Census data

    @@ -266,11 +275,11 @@

    Open/retrieve Cell Census data -

    Get slice as AnnData

    +

    Get slice as AnnData

    cellxgene_census.open_soma

    - + @@ -282,7 +291,7 @@

    Get slice as AnnData
    -

    Feature presence matrix

    +

    Feature presence matrix

    cellxgene_census.get_anndata

    Convenience wrapper around tiledbsoma.Experiment query, to build and execute a query, and return it as an anndata.AnnData object.

    Convenience wrapper around tiledbsoma.Experiment query, to build and execute a query, and return it as an anndata.AnnData object.

    cellxgene_census.get_obs

    Get the observation metadata for a query on the census.

    @@ -292,7 +301,7 @@

    Feature presence matrix
    -

    Versioning of Cell Census builds

    +

    Versioning of Cell Census builds

    cellxgene_census.get_presence_matrix

    @@ -305,7 +314,7 @@

    Versioning of Cell Census builds -

    Experimental: Machine Learning

    +

    Experimental: Machine Learning

    cellxgene_census.get_census_version_description

    @@ -327,7 +336,7 @@

    Experimental: Machine Learning -

    Experimental: Processing

    +

    Experimental: Processing

    cellxgene_census.experimental.ml.pytorch.experiment_dataloader

    @@ -343,7 +352,7 @@

    Experimental: Processing -

    Experimental: Embeddings

    +

    Experimental: Embeddings

    cellxgene_census.experimental.pp.get_highly_variable_genes

    diff --git a/r/articles/comp_bio_data_integration.html b/r/articles/comp_bio_data_integration.html index 0bbb4b636..dc485eef7 100644 --- a/r/articles/comp_bio_data_integration.html +++ b/r/articles/comp_bio_data_integration.html @@ -316,20 +316,20 @@

    # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = "pca", dims = 1:30) -#> 23:56:21 UMAP embedding parameters a = 0.9922 b = 1.112 -#> 23:56:21 Read 10153 rows and found 30 numeric columns -#> 23:56:21 Using Annoy for neighbor search, n_neighbors = 30 -#> 23:56:21 Building Annoy index with metric = cosine, n_trees = 50 +#> 20:54:47 UMAP embedding parameters a = 0.9922 b = 1.112 +#> 20:54:47 Read 10153 rows and found 30 numeric columns +#> 20:54:47 Using Annoy for neighbor search, n_neighbors = 30 +#> 20:54:47 Building Annoy index with metric = cosine, n_trees = 50 #> 0% 10 20 30 40 50 60 70 80 90 100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| -#> 23:56:23 Writing NN index file to temp file /tmp/RtmpsKixI5/file1cd2c4aac6d19 -#> 23:56:23 Searching Annoy index using 1 thread, search_k = 3000 -#> 23:56:26 Annoy recall = 100% -#> 23:56:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 -#> 23:56:27 Initializing from normalized Laplacian + noise (using RSpectra) -#> 23:56:28 Commencing optimization for 200 epochs, with 410528 positive edges -#> 23:56:31 Optimization finished +#> 20:54:49 Writing NN index file to temp file /tmp/RtmpNZhqvF/file114141d66b377 +#> 20:54:49 Searching Annoy index using 1 thread, search_k = 3000 +#> 20:54:53 Annoy recall = 100% +#> 20:54:53 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 +#> 20:54:54 Initializing from normalized Laplacian + noise (using RSpectra) +#> 20:54:54 Commencing optimization for 200 epochs, with 410528 positive edges +#> 20:54:59 Optimization finished

    Plot the UMAP.

     # By assay
    diff --git a/r/pkgdown.yml b/r/pkgdown.yml
    index e843b9514..06f84ff2e 100644
    --- a/r/pkgdown.yml
    +++ b/r/pkgdown.yml
    @@ -12,5 +12,5 @@ articles:
       comp_bio_data_integration: comp_bio_data_integration.html
       comp_bio_normalizing_full_gene_sequencing: comp_bio_normalizing_full_gene_sequencing.html
       comp_bio_summarize_axis_query: comp_bio_summarize_axis_query.html
    -last_built: 2024-05-28T23:09Z
    +last_built: 2024-05-31T20:00Z
     
    diff --git a/r/search.json b/r/search.json
    index 0a62638d3..319c835f6 100644
    --- a/r/search.json
    +++ b/r/search.json
    @@ -1 +1 @@
    -[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2022, Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_access_maintained_embeddings.html","id":"open-census","dir":"Articles","previous_headings":"","what":"Open Census","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"","code":"library(\"cellxgene.census\") census <- open_soma(census_version = \"2023-12-15\")"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-seurat-reductions","dir":"Articles","previous_headings":"","what":"Load embeddings as Seurat reductions","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"high-level cellxgene.census::get_seurat() function can query Census load embeddings dimensional reductions Seurat object. ask Seurat object expression data human cells tissue_general equal 'central nervous system', along scVI geneformer embeddings (obsm_layers). embeddings stored dimensional reductions seurat_obj, can take quick look scVI embeddings 2D scatter plot via UMAP, colored Census cell_type annotations.","code":"library(\"Seurat\")  seurat_obj <- get_seurat(   census,   organism = \"homo_sapiens\",   obs_value_filter = \"tissue_general == 'central nervous system'\",   obs_column_names = c(\"cell_type\"),   obsm_layers = c(\"scvi\", \"geneformer\") ) seurat_obj <- RunUMAP(   seurat_obj,   reduction = \"scvi\",   dims = 1:ncol(Embeddings(seurat_obj, \"scvi\")) )  DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") +   theme(legend.text = element_text(size = 8))"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-singlecellexperiment-reductions","dir":"Articles","previous_headings":"","what":"Load embeddings as SingleCellExperiment reductions","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"Similarly, cellxgene.census::get_single_cell_experiment() can query Census store embeddings dimensionality reduction results Bioconductor SingleCellExperiment object. , can view UMAP Geneformer embeddings colored cell_type.","code":"library(\"SingleCellExperiment\") sce_obj <- get_single_cell_experiment(   census,   organism = \"homo_sapiens\",   obs_value_filter = \"tissue_general == 'central nervous system'\",   obs_column_names = c(\"cell_type\"),   obsm_layers = c(\"scvi\", \"geneformer\") ) sce_obj <- scater::runUMAP(sce_obj, dimred = \"geneformer\") scater::plotReducedDim(sce_obj, dimred = \"UMAP\", colour_by = \"cell_type\")"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-sparsematrix","dir":"Articles","previous_headings":"","what":"Load embeddings as sparseMatrix","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"Lastly, can use SOMAExperimentAxisQuery lower-level access embeddings’ numerical data. can performant use cases don’t need features Seurat SingleCellExperiment. row embeddings sparseMatrix provides fine-tuned Geneformer model’s 512-dimensional embedding vector cell, cell soma_joinids row names. different arguments, SOMAExperimentAxisQuery$to_sparse_matrix() can also read scVI embeddings expression data. Still lower-level access available SOMAExperimentAxisQuery$read(), streams Arrow tables. methods SOMAExperimentAxisQuery can fetch metadata like cell_type: SOMAExperimentAxisQuery loads ask Census, unlike high-level get_seurat() get_single_cell_experiment() functions, eagerly populate objects based query.","code":"query <- census$get(\"census_data\")$get(\"homo_sapiens\")$axis_query(   \"RNA\",   obs_query = tiledbsoma::SOMAAxisQuery$new(value_filter = \"tissue == 'tongue'\") ) embeddings <- query$to_sparse_matrix(\"obsm\", \"geneformer\") str(embeddings) #> Formal class 'dgTMatrix' [package \"Matrix\"] with 6 slots #>   ..@ i       : int [1:190464] 0 0 0 0 0 0 0 0 0 0 ... #>   ..@ j       : int [1:190464] 0 1 2 3 4 5 6 7 8 9 ... #>   ..@ Dim     : int [1:2] 372 512 #>   ..@ Dimnames:List of 2 #>   .. ..$ : chr [1:372] \"51784858\" \"51784859\" \"51784860\" \"51784861\" ... #>   .. ..$ : chr [1:512] \"0\" \"1\" \"2\" \"3\" ... #>   ..@ x       : num [1:190464] 0.1104 -1.2031 1.0078 0.0131 1.2422 ... #>   ..@ factors : list() head(as.data.frame(query$obs(column_names = c(\"soma_joinid\", \"cell_type\"))$concat())) #>   soma_joinid  cell_type #> 1    51784858 basal cell #> 2    51784859 basal cell #> 3    51784860 fibroblast #> 4    51784861 fibroblast #> 5    51784862 basal cell #> 6    51784863 basal cell census$close()"},{"path":"/articles/census_axis_query.html","id":"axis-query-example","dir":"Articles","previous_headings":"","what":"Axis Query Example","title":"Axis Query Example","text":"Goal: demonstrate basic axis metadata handling. CZ CELLxGENE Census stores obs (cell) metadata SOMA DataFrame, can queried read R data frame. Census also convenience package simplifies opening census. R data frames -memory objects. Take care queries small enough results fit memory.","code":""},{"path":"/articles/census_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"Axis Query Example","what":"Opening the census","title":"Axis Query Example","text":"cellxgene.census R package contains convenient API open latest version Census. can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma()"},{"path":"/articles/census_axis_query.html","id":"summarize-census-cell-metadata","dir":"Articles","previous_headings":"Axis Query Example","what":"Summarize Census cell metadata","title":"Axis Query Example","text":"Tips: can read entire SOMA dataframe R using .data.frame(soma_df$read()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/census_axis_query.html","id":"summarize-all-cell-types","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Summarize all cell types","title":"Axis Query Example","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- as.data.frame(human$obs$read(   column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\") ))  # Find all unique values in the cell_type_ontology_term_id column. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)  cat(paste(   \"There are\",   length(unique_cell_type_ontology_term_id),   \"cell types in the Census! The first few are:\" )) #> There are 604 cell types in the Census! The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000540\" \"CL:0000738\" \"CL:0000763\" \"CL:0000136\" \"CL:0000235\" #> [6] \"CL:0000115\""},{"path":"/articles/census_axis_query.html","id":"summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Summarize a subset of cell types, selected with a value_filter","title":"Axis Query Example","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = paste(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\", sep = \"\") ))  # Find all unique values in the cell_type_ontology_term_id column as an R data frame. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id) cat(paste(   \"There are \",   length(unique_cell_type_ontology_term_id),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   sep = \"\" )) #> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048! #> The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000003\" \"CL:4028004\" \"CL:0002145\" \"CL:0000625\" \"CL:0000624\" #> [6] \"CL:4028006\"  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] cat(paste(\"The top 10 cell types where tissue_ontology_term_id ==\", LUNG_TISSUE)) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 print(top_10) #>  #> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860  #>     562038     526859     323433     323067     254173     246279     203526  #> CL:0000623 CL:0001064 CL:0002632  #>     164944     149067     132243 # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" ))  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669  #>     159096      84750      79618      64190      61830      32088      27515  #> CL:0000003 CL:0000057 CL:0002144  #>      22707      20117      18593"},{"path":"/articles/census_axis_query.html","id":"full-census-stats","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Full census stats","title":"Axis Query Example","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)   obs_df <- as.data.frame(     census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)   )   total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(paste(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\", sep = \"\"))   } } #> [1] \"homo_sapiens\" #>   Unique cell_type_ontology_term_id values: 604 #>   Unique assay_ontology_term_id values: 20 #>   Unique tissue_ontology_term_id values: 227 #> [1] \"mus_musculus\" #>   Unique cell_type_ontology_term_id values: 226 #>   Unique assay_ontology_term_id values: 9 #>   Unique tissue_ontology_term_id values: 51 cat(paste(\"Complete Census contains\", total_cells, \"cells.\")) #> Complete Census contains 60361716 cells."},{"path":"/articles/census_citation_generation.html","id":"requirements","dir":"Articles","previous_headings":"","what":"Requirements","title":"Generating citations for Census slices","text":"notebook requires: cellxgene_census Python package. Census data release schema version 1.3.0 greater.","code":""},{"path":"/articles/census_citation_generation.html","id":"generating-citation-strings","dir":"Articles","previous_headings":"","what":"Generating citation strings","title":"Generating citations for Census slices","text":"First open handle Census data. ensure open data release schema version 1.3.0 greater, use census_version=\"latest\" load dataset table contains column \"citation\" dataset included Census. now can use column \"dataset_id\" present dataset table Census cell metadata create citation strings Census slice.","code":"library(\"tiledb\") library(\"cellxgene.census\")  census <- open_soma(census_version = \"latest\") census_release_info <- census$get(\"census_info\")$get(\"summary\")$read()$concat() as.data.frame(census_release_info) #>   soma_joinid                      label      value #> 1           0      census_schema_version      2.0.1 #> 2           1          census_build_date 2024-05-27 #> 3           2     dataset_schema_version      5.0.0 #> 4           3           total_cell_count  115661967 #> 5           4          unique_cell_count   60703793 #> 6           5 number_donors_homo_sapiens      17671 #> 7           6 number_donors_mus_musculus       4216 datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() datasets <- as.data.frame(datasets) head(datasets[\"citation\"]) #>                                                                                                                                                                                                                                                                                                           citation #> 1            Publication: https://doi.org/10.1002/hep4.1854 Dataset Version: https://datasets.cellxgene.cziscience.com/fb76c95f-0391-4fac-9fb9-082ce2430b59.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/44531dd9-1388-4416-a117-af0a99de2294 #> 2   Publication: https://doi.org/10.1126/sciimmunol.abe6291 Dataset Version: https://datasets.cellxgene.cziscience.com/b6737a5e-9069-4dd6-9a57-92e17a746df9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/3a2af25b-2338-4266-aad3-aa8d07473f50 #> 3   Publication: https://doi.org/10.1038/s41593-020-00764-7 Dataset Version: https://datasets.cellxgene.cziscience.com/0e02290f-b992-450b-8a19-554f73cd7f09.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/180bff9c-c8a5-4539-b13b-ddbc00d643e6 #> 4   Publication: https://doi.org/10.1038/s41467-022-29450-x Dataset Version: https://datasets.cellxgene.cziscience.com/40832710-d7b1-43fb-b2c2-1cd2255bc3ac.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/bf325905-5e8e-42e3-933d-9a9053e9af80 #> 5   Publication: https://doi.org/10.1038/s41590-021-01059-0 Dataset Version: https://datasets.cellxgene.cziscience.com/eb6c070c-ff67-4c1f-8d4d-65f9fe2119ee.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/93eebe82-d8c3-41bc-a906-63b5b5f24a9d #> 6 Publication: https://doi.org/10.1016/j.celrep.2019.12.082 Dataset Version: https://datasets.cellxgene.cziscience.com/650a47be-6666-4f70-ac47-8414c50bbd8e.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/939769a8-d8d2-4d01-abfc-55699893fd49"},{"path":"/articles/census_citation_generation.html","id":"via-cell-metadata-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via cell metadata query","title":"Generating citations for Census slices","text":"","code":"# Query cell metadata cell_metadata <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   value_filter = \"tissue == 'cardiac atrium'\",   column_names = c(\"dataset_id\", \"cell_type\") )  cell_metadata <- as.data.frame(cell_metadata$concat())  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% cell_metadata$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_citation_generation.html","id":"via-seurat-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via Seurat query","title":"Generating citations for Census slices","text":"","code":"# Fetch a Seurat object seurat_obj <- get_seurat(   census = census,   organism = \"homo_sapiens\",   measurement_name = \"RNA\",   obs_value_filter = \"tissue == 'cardiac atrium'\",   var_value_filter = \"feature_name == 'MYBPC3'\",   obs_column_names = c(\"dataset_id\", \"cell_type\") )  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% seurat_obj[[]]$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_citation_generation.html","id":"via-singlecellexperiment-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via SingleCellExperiment query","title":"Generating citations for Census slices","text":"","code":"# Fetch a Seurat object sce_obj <- get_single_cell_experiment(   census = census,   organism = \"homo_sapiens\",   measurement_name = \"RNA\",   obs_value_filter = \"tissue == 'cardiac atrium'\",   var_value_filter = \"feature_name == 'MYBPC3'\",   obs_column_names = c(\"dataset_id\", \"cell_type\") )  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% sce_obj$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_compute_over_X.html","id":"incremental-mean-calculation","dir":"Articles","previous_headings":"","what":"Incremental mean calculation","title":"Computing on X using online (incremental) algorithms","text":"Many statistics, marginal means, easy calculate incrementally. Let’s begin query X$raw sparse matrix unnormalized read counts, return results shards incrementally accumulate read count gene, divide cell count get mean reads per cell gene. First define query - case slice obs axis cells specific tissue & sex value, genes var axis. query$X() method returns iterator results, Arrow Table. table contain sparse X data obs/var coordinates, using standard SOMA names: soma_data - X values (float32) soma_dim_0 - obs coordinate (int64) soma_dim_1 - var coordinate (int64) Important: X matrices joined var/obs axis DataFrames integer join “id” (aka soma_joinid). positionally indexed, given cell gene may soma_joinid value (e.g., large integer). words, given X value, soma_dim_0 corresponds soma_joinid obs dataframe, soma_dim_1 coordinate corresponds soma_joinid var dataframe. convenience, query class includes utility simplify operations query slices. query$indexer indexer used wrap output query$X(), converting soma_joinids positional indexing query results. Positions [0, N), N number results query given axis. Key points: expensive query read results - rather make multiple passes data, read perform multiple computations. default, data census indexed soma_joinid positionally.","code":"library(\"tiledbsoma\") library(\"cellxgene.census\") census <- open_soma()  query <- census$get(\"census_data\")$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = SOMAAxisQuery$new(value_filter = \"tissue=='brain' && sex=='male'\") )  genes_df <- query$var(column_names = c(\"feature_id\", \"feature_name\"))$concat() genes_df <- as.data.frame(genes_df) n_genes <- nrow(genes_df)  # accumulator vector (for each gene) for the total count over all cells in X(\"raw\") raw_sum_by_gene <- numeric(n_genes) names(raw_sum_by_gene) <- genes_df$feature_id  # iterate through in-memory shards of query results tables <- query$X(\"raw\")$tables() while (!tables$read_complete()) {   table_part <- tables$read_next()   # table_part is an Arrow table with the columns mentioned above. The result   # order is not guaranteed!    # table_part$soma_dim_1 is the var/gene soma_joinid. But note that these are   # arbitrary int64 id's, and moreover each table_part may exhibit only a subset   # of the values we'll see over all query results. query$indexer helps us map   # any given soma_dim_1 values onto positions in query$var() (genes_df), that is   # the union of all values we'll see.   gene_indexes <- query$indexer$by_var(table_part$soma_dim_1)$as_vector()   stopifnot(sum(gene_indexes >= n_genes) == 0)   # sum(table_part) group by gene, yielding a numeric vector with the gene_index   # in its names   sum_part <- tapply(as.vector(table_part$soma_data), gene_indexes, sum)   # update the accumulator vector   which_genes <- as.integer(names(sum_part)) + 1 # nb: gene_indexes is zero-based   stopifnot(sum(which_genes > n_genes) == 0)   raw_sum_by_gene[which_genes] <- raw_sum_by_gene[which_genes] + sum_part }  # Divide each sum by cell count to get mean reads per cell (for each gene), # implicitly averaging in all zero entries in X even though they weren't included # in the sparse query results. genes_df$raw_mean <- raw_sum_by_gene / query$n_obs genes_df #>            feature_id  feature_name     raw_mean #> 1  ENSMUSG00000051951          Xkr4 1.397121e+00 #> 2  ENSMUSG00000025900           Rp1 3.162902e-01 #> 3  ENSMUSG00000025902         Sox17 6.604085e+01 #> 4  ENSMUSG00000033845        Mrpl15 3.939172e+01 #> 5  ENSMUSG00000025903        Lypla1 1.986548e+01 #> 6  ENSMUSG00000033813         Tcea1 4.305924e+01 #> 7  ENSMUSG00000002459         Rgs20 3.496194e+00 #> 8  ENSMUSG00000033793       Atp6v1h 7.470932e+01 #> 9  ENSMUSG00000025905         Oprk1 4.568752e-01 #> 10 ENSMUSG00000033774        Npbwr1 1.241003e-04 #> 11 ENSMUSG00000025907        Rb1cc1 3.631679e+01 #> 12 ENSMUSG00000033740          St18 1.660110e+01 #> 13 ENSMUSG00000051285        Pcmtd1 5.410501e+01 #> 14 ENSMUSG00000025909         Sntg1 1.178725e+00 #> 15 ENSMUSG00000061024          Rrs1 2.098927e+01 #> 16 ENSMUSG00000025911        Adhfe1 1.266112e+01 #> 17 ENSMUSG00000079671 2610203C22Rik 9.474621e+00 #> 18 ENSMUSG00000025912         Mybl1 2.643129e-01 #> 19 ENSMUSG00000045210        Vcpip1 3.456668e+01 #> 20 ENSMUSG00000097893 1700034P13Rik 5.721023e-01 #> 21 ENSMUSG00000025915          Sgk3 2.012592e+01 #> 22 ENSMUSG00000098234         Snhg6 6.784314e+00 #> 23 ENSMUSG00000025916       Ppp1r42 2.585422e-01 #> 24 ENSMUSG00000025917         Cops5 7.909310e+01 #> 25 ENSMUSG00000056763         Cspp1 1.635604e+01 #> 26 ENSMUSG00000067851       Arfgef1 1.582897e+01 #> 27 ENSMUSG00000042501          Cpa6 1.880119e-02 #> 28 ENSMUSG00000048960         Prex2 2.283623e+01 #> 29 ENSMUSG00000057715 A830018L16Rik 9.992140e-01 #> 30 ENSMUSG00000016918         Sulf1 5.567469e+00 #> 31 ENSMUSG00000025938       Slco5a1 2.452015e-01 #> 32 ENSMUSG00000042414        Prdm14 6.142964e-03 #> 33 ENSMUSG00000005886         Ncoa2 1.707928e+01 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 52384 rows ]"},{"path":"/articles/census_compute_over_X.html","id":"counting-cells-grouped-by-dataset-and-gene","dir":"Articles","previous_headings":"","what":"Counting cells grouped by dataset and gene","title":"Computing on X using online (incremental) algorithms","text":"goal example count number cells nonzero reads, grouped gene Census dataset_id. result data frame dataset, gene, number cells nonzero reads dataset gene. multi-factor aggregation, ’ll take advantage dplyr routines instead lower-level vector indexer shown . presentation purposes, ’ll limit query four genes, can expanded genes easily. Don’t forget close census.","code":"library(\"dplyr\")  query <- census$get(\"census_data\")$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = SOMAAxisQuery$new(value_filter = \"tissue=='brain'\"),   var_query = SOMAAxisQuery$new(value_filter = \"feature_name %in% c('Malat1', 'Ptprd', 'Dlg2', 'Pcdh9')\") )  obs_tbl <- query$obs(column_names = c(\"soma_joinid\", \"dataset_id\"))$concat() obs_df <- data.frame(   # materialize soma_joinid as character to avoid overflowing R 32-bit integer   cell_id = as.character(obs_tbl$soma_joinid),   dataset_id = obs_tbl$dataset_id$as_vector() ) var_tbl <- query$var(column_names = c(\"soma_joinid\", \"feature_name\"))$concat() var_df <- data.frame(   gene_id = as.character(var_tbl$soma_joinid),   feature_name = var_tbl$feature_name$as_vector() )  # accumulator for # cells by dataset & gene n_cells_grouped <- data.frame(   \"dataset_id\" = character(0),   \"gene_id\" = character(0),   \"n_cells\" = numeric(0) )  # iterate through in-memory shards of query results tables <- query$X(\"raw\")$tables() while (!tables$read_complete()) {   table_part <- tables$read_next()    # prepare a (dataset,gene,1) tuple for each entry in table_part   n_cells_part <- data.frame(     \"cell_id\" = as.character(table_part$soma_dim_0),     \"gene_id\" = as.character(table_part$soma_dim_1),     \"n_cells\" = 1   )   n_cells_part <- left_join(n_cells_part, obs_df, by = \"cell_id\")   stopifnot(sum(is.null(n_cells_part$dataset_id)) == 0)    # fold those into n_cells_grouped   n_cells_grouped <- n_cells_part %>%     select(-cell_id) %>%     bind_rows(n_cells_grouped) %>%     group_by(dataset_id, gene_id) %>%     summarise(n_cells = sum(n_cells)) %>%     ungroup() }  # add gene names for display n_cells_grouped <- left_join(n_cells_grouped, var_df, by = \"gene_id\") stopifnot(sum(is.null(n_cells_grouped$feature_name)) == 0) n_cells_grouped[c(\"dataset_id\", \"feature_name\", \"n_cells\")] #> # A tibble: 21 x 3 #>    dataset_id                           feature_name n_cells #>                                               #>  1 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Ptprd          79578 #>  2 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Dlg2           79513 #>  3 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Pcdh9          79476 #>  4 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Malat1         79667 #>  5 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Ptprd            474 #>  6 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Dlg2              81 #>  7 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Pcdh9            125 #>  8 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Malat1         12622 #>  9 66ff82b4-9380-469c-bc4b-cfa08eacd325 Dlg2             856 #> 10 66ff82b4-9380-469c-bc4b-cfa08eacd325 Pcdh9           2910 #> # i 11 more rows census$close()"},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #>    soma_joinid                        collection_id #> 1            0 4dca242c-d302-4dba-a68f-4c61e7bad553 #> 2            1 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 3            2 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 4            3 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 5            4 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 6            5 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 7            6 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 8            7 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 9            8 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 10           9 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 11          10 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #>                                                                       collection_name #> 1                Comparative transcriptomics reveals human-specific cortical features #> 2  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 3  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 4  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 5  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 6  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 7  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 8  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 9  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 10 Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 11 Transcriptomic cytoarchitecture reveals principles of human neocortex organization #>             collection_doi                           dataset_id #> 1  10.1126/science.ade9516 2bdd3a2c-2ff4-4314-adf3-8a06b797a33a #> 2  10.1126/science.adf6812 f5b0810c-1664-4a62-ad06-be1d9964aa8b #> 3  10.1126/science.adf6812 e4ddac12-f48f-4455-8e8d-c2a48a683437 #> 4  10.1126/science.adf6812 e2808a6e-e2ea-41b9-b38c-4a08f1677f02 #> 5  10.1126/science.adf6812 d01c9dff-abd1-4825-bf30-2eb2ba74597e #> 6  10.1126/science.adf6812 c3aa4f95-7a18-4a7d-8dd8-ca324d714363 #> 7  10.1126/science.adf6812 be401db3-d732-408a-b0c4-71af0458b8ab #> 8  10.1126/science.adf6812 a5d5c529-8a1f-40b5-bda3-35208970070d #> 9  10.1126/science.adf6812 9c63201d-bfd9-41a8-bbbc-18d947556f3d #> 10 10.1126/science.adf6812 93cb76aa-a84b-4a92-8e6c-66a914e26d4c #> 11 10.1126/science.adf6812 8d1dd010-5cbc-43fb-83f8-e0de8e8517da #>                      dataset_version_id #> 1  7eb7f2fd-fd74-4c99-863c-97836415652e #> 2  d4427196-7876-4bdd-a929-ae4d177ec776 #> 3  3280113b-7148-4a3e-98d4-015f443aab8a #> 4  dc092185-3b8e-4fcb-ae21-1dc106d683ac #> 5  c4959ded-83dc-4442-aac7-9a59bdb47801 #> 6  0476ef54-aefe-4754-b0e9-d9fcd75adff4 #> 7  ee027704-72aa-4195-a467-0754db1ed65d #> 8  d47c0742-cea2-46c1-9e72-4d479214041c #> 9  8b09695a-1426-4867-961e-c40a1fbcc2da #> 10 98ad7381-f464-4f49-b850-5321b4f98be6 #> 11 c56683d2-452a-45dc-b402-35397e27e325 #>                                           dataset_title #> 1                               Human: Great apes study #> 2                       Dissection: Angular gyrus (AnG) #> 3                Supercluster: CGE-derived interneurons #> 4               Dissection: Primary auditory cortex(A1) #> 5  Supercluster: Deep layer (non-IT) excitatory neurons #> 6        Supercluster: IT-projecting excitatory neurons #> 7           Dissection: Anterior cingulate cortex (ACC) #> 8               Human Multiple Cortical Areas SMART-seq #> 9                Supercluster: MGE-derived interneurons #> 10        Dissection: Primary somatosensory cortex (S1) #> 11                Dissection: Primary visual cortex(V1) #>                            dataset_h5ad_path dataset_total_cell_count #> 1  2bdd3a2c-2ff4-4314-adf3-8a06b797a33a.h5ad                   156285 #> 2  f5b0810c-1664-4a62-ad06-be1d9964aa8b.h5ad                   110752 #> 3  e4ddac12-f48f-4455-8e8d-c2a48a683437.h5ad                   129495 #> 4  e2808a6e-e2ea-41b9-b38c-4a08f1677f02.h5ad                   139054 #> 5  d01c9dff-abd1-4825-bf30-2eb2ba74597e.h5ad                    92969 #> 6  c3aa4f95-7a18-4a7d-8dd8-ca324d714363.h5ad                   638941 #> 7  be401db3-d732-408a-b0c4-71af0458b8ab.h5ad                   135462 #> 8  a5d5c529-8a1f-40b5-bda3-35208970070d.h5ad                    49417 #> 9  9c63201d-bfd9-41a8-bbbc-18d947556f3d.h5ad                   185477 #> 10 93cb76aa-a84b-4a92-8e6c-66a914e26d4c.h5ad                   153159 #> 11 8d1dd010-5cbc-43fb-83f8-e0de8e8517da.h5ad                   241077 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 640 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience function providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #>    soma_joinid      feature_id feature_name feature_length      nnz n_measured_obs #> 1            0 ENSG00000233576      HTR3C2P           1057    69370       19581263 #> 2            1 ENSG00000121410         A1BG           3999  5640476       62641311 #> 3            2 ENSG00000268895     A1BG-AS1           3374  3071864       61946057 #> 4            3 ENSG00000148584         A1CF           9603   734347       58195911 #> 5            4 ENSG00000175899          A2M           6318  7894261       62704378 #> 6            5 ENSG00000245105      A2M-AS1           2948  1637794       62086816 #> 7            6 ENSG00000166535        A2ML1           7156  2156616       60911688 #> 8            7 ENSG00000256069        A2MP1           4657   835384       23554778 #> 9            8 ENSG00000184389      A3GALT2           1023   439067       53780311 #> 10           9 ENSG00000128274       A4GALT           3358  2432348       62706770 #> 11          10 ENSG00000118017        A4GNT           1779    52430       56117399 #> 12          11 ENSG00000265544         AA06            632   220755       22545140 #> 13          12 ENSG00000081760         AACS          16039 11280800       62842909 #> 14          13 ENSG00000250420       AACSP1           3380   211588       22831831 #> 15          14 ENSG00000114771        AADAC           1632   552258       54941618 #> 16          15 ENSG00000188984      AADACL3           4055    24626       43074608 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 60648 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. presence matrix method $take() lets slice soma_joinids census_datasets var. full presence matrix, slices , can exported regular matrix method $get_one_based_matrix() Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"# Get soma_joinid for datasets and genes of interest var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"]  # Slice presence matrix with datasets and genes of interest presence_matrix_slice <- presence_matrix$take(i = dataset_joinid, j = var_joinid)  # Convert presence matrix to regular matrix presence_matrix_slice <- presence_matrix_slice$get_one_based_matrix()  # Find how if the gene is present in this dataset is_present <- presence_matrix_slice[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. presence_matrix_slice <- presence_matrix$take(j = var_joinid)$get_one_based_matrix() measured_datasets <- presence_matrix_slice[, , drop = TRUE] != 0 dataset_joinids <- datasets_df$soma_joinid[measured_datasets]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #>    soma_joinid                        collection_id #> 63          62 3f50314f-bdc9-40c6-8e4a-b0901ebfbe4c #> 64          63 e5f58829-1a66-40b5-a624-9046778e74f5 #> 65          64 e5f58829-1a66-40b5-a624-9046778e74f5 #> 66          65 e5f58829-1a66-40b5-a624-9046778e74f5 #> 67          66 e5f58829-1a66-40b5-a624-9046778e74f5 #> 69          68 e5f58829-1a66-40b5-a624-9046778e74f5 #> 70          69 e5f58829-1a66-40b5-a624-9046778e74f5 #> 72          71 e5f58829-1a66-40b5-a624-9046778e74f5 #> 73          72 e5f58829-1a66-40b5-a624-9046778e74f5 #> 77          76 e5f58829-1a66-40b5-a624-9046778e74f5 #> 78          77 e5f58829-1a66-40b5-a624-9046778e74f5 #>                                                                                                                             collection_name #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                           Tabula Sapiens #> 65                                                                                                                           Tabula Sapiens #> 66                                                                                                                           Tabula Sapiens #> 67                                                                                                                           Tabula Sapiens #> 69                                                                                                                           Tabula Sapiens #> 70                                                                                                                           Tabula Sapiens #> 72                                                                                                                           Tabula Sapiens #> 73                                                                                                                           Tabula Sapiens #> 77                                                                                                                           Tabula Sapiens #> 78                                                                                                                           Tabula Sapiens #>                 collection_doi                           dataset_id #> 63 10.1016/j.ccell.2021.03.007 bd65a70f-b274-4133-b9dd-0d1431b6af34 #> 64     10.1126/science.abl4896 ff45e623-7f5f-46e3-b47d-56be0341f66b #> 65     10.1126/science.abl4896 f01bdd17-4902-40f5-86e3-240d66dd2587 #> 66     10.1126/science.abl4896 e6a11140-2545-46bc-929e-da243eed2cae #> 67     10.1126/science.abl4896 e5c63d94-593c-4338-a489-e1048599e751 #> 69     10.1126/science.abl4896 d77ec7d6-ef2e-49d6-9e79-05b7f8881484 #> 70     10.1126/science.abl4896 cee11228-9f0b-4e57-afe2-cfe15ee56312 #> 72     10.1126/science.abl4896 a2d4d33e-4c62-4361-b80a-9be53d2e50e8 #> 73     10.1126/science.abl4896 a0754256-f44b-4c4a-962c-a552e47d3fdc #> 77     10.1126/science.abl4896 6d41668c-168c-4500-b06a-4674ccf3e19d #> 78     10.1126/science.abl4896 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c #>                      dataset_version_id #> 63 71815674-a8cf-4add-95dd-c5d5d1631597 #> 64 0b29f4ce-5e72-4356-b74b-b54714979234 #> 65 bd13c169-af97-4d8f-ba45-7588808c2e48 #> 66 47615a3d-0a9f-4a78-88ef-5cce2a84637d #> 67 ac7714f0-dce2-40ba-9912-324de6c9a77f #> 69 c7679ec2-652d-437a-bded-3ec2344829e4 #> 70 f89fa18f-c32b-4bae-9511-1a4d18f200e1 #> 72 37ada0d2-9970-4ff2-8bcd-41e80ab6e081 #> 73 1cda78aa-f0d9-4d50-96bf-8bc309318802 #> 77 5297a910-453f-4e3f-af16-e18fd5a79090 #> 78 b783b036-c837-4290-a07d-f6b79a301f59 #>                                                                                                                               dataset_title #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                Tabula Sapiens - Pancreas #> 65                                                                                                          Tabula Sapiens - Salivary_Gland #> 66                                                                                                                   Tabula Sapiens - Heart #> 67                                                                                                                 Tabula Sapiens - Bladder #> 69                                                                                                                Tabula Sapiens - Prostate #> 70                                                                                                                  Tabula Sapiens - Spleen #> 72                                                                                                             Tabula Sapiens - Vasculature #> 73                                                                                                                     Tabula Sapiens - Eye #> 77                                                                                                                   Tabula Sapiens - Liver #> 78                                                                                                                     Tabula Sapiens - Fat #>                            dataset_h5ad_path dataset_total_cell_count #> 63 bd65a70f-b274-4133-b9dd-0d1431b6af34.h5ad                   167283 #> 64 ff45e623-7f5f-46e3-b47d-56be0341f66b.h5ad                    13497 #> 65 f01bdd17-4902-40f5-86e3-240d66dd2587.h5ad                    27199 #> 66 e6a11140-2545-46bc-929e-da243eed2cae.h5ad                    11505 #> 67 e5c63d94-593c-4338-a489-e1048599e751.h5ad                    24583 #> 69 d77ec7d6-ef2e-49d6-9e79-05b7f8881484.h5ad                    16375 #> 70 cee11228-9f0b-4e57-afe2-cfe15ee56312.h5ad                    34004 #> 72 a2d4d33e-4c62-4361-b80a-9be53d2e50e8.h5ad                    16037 #> 73 a0754256-f44b-4c4a-962c-a552e47d3fdc.h5ad                    10650 #> 77 6d41668c-168c-4500-b06a-4674ccf3e19d.h5ad                     5007 #> 78 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c.h5ad                    20263 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 31 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset presence_matrix_slice <- presence_matrix$take(i = dataset_joinids)$get_one_based_matrix() genes_measured <- Matrix::colSums(presence_matrix_slice) > 0 var_joinids <- var_df$soma_joinid[genes_measured]  print(var_df[var_joinids, ]) #>    soma_joinid      feature_id feature_name feature_length      nnz n_measured_obs #> 1            0 ENSG00000233576      HTR3C2P           1057    69370       19581263 #> 2            1 ENSG00000121410         A1BG           3999  5640476       62641311 #> 3            2 ENSG00000268895     A1BG-AS1           3374  3071864       61946057 #> 4            3 ENSG00000148584         A1CF           9603   734347       58195911 #> 5            4 ENSG00000175899          A2M           6318  7894261       62704378 #> 6            5 ENSG00000245105      A2M-AS1           2948  1637794       62086816 #> 9            8 ENSG00000184389      A3GALT2           1023   439067       53780311 #> 10           9 ENSG00000128274       A4GALT           3358  2432348       62706770 #> 12          11 ENSG00000265544         AA06            632   220755       22545140 #> 14          13 ENSG00000250420       AACSP1           3380   211588       22831831 #> 16          15 ENSG00000188984      AADACL3           4055    24626       43074608 #> 18          17 ENSG00000240602      AADACP1           2012    29491       23133490 #> 19          18 ENSG00000109576        AADAT           2970  4524608       61559099 #> 20          19 ENSG00000158122       PRXL2C           3098  5424472       55618144 #> 21          20 ENSG00000103591        AAGAB           4138 12427442       62843055 #> 22          21 ENSG00000115977         AAK1          24843 29280566       62664775 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 27195 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"library(\"cellxgene.census\") census <- open_soma() census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 651 rows x 9 columns #> $soma_joinid  #> $collection_id  #> $collection_name  #> $collection_doi  #> $dataset_id  #> $dataset_version_id  #> $dataset_title  #> $dataset_h5ad_path  #> $dataset_total_cell_count  census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #>                              dataset_id #> 1  2bdd3a2c-2ff4-4314-adf3-8a06b797a33a #> 2  f5b0810c-1664-4a62-ad06-be1d9964aa8b #> 3  e4ddac12-f48f-4455-8e8d-c2a48a683437 #> 4  e2808a6e-e2ea-41b9-b38c-4a08f1677f02 #> 5  d01c9dff-abd1-4825-bf30-2eb2ba74597e #> 6  c3aa4f95-7a18-4a7d-8dd8-ca324d714363 #> 7  be401db3-d732-408a-b0c4-71af0458b8ab #> 8  a5d5c529-8a1f-40b5-bda3-35208970070d #> 9  9c63201d-bfd9-41a8-bbbc-18d947556f3d #> 10 93cb76aa-a84b-4a92-8e6c-66a914e26d4c #> 11 8d1dd010-5cbc-43fb-83f8-e0de8e8517da #> 12 716a4acc-919e-4326-9672-ebe06ede84e6 #> 13 5bdc423a-59e6-457d-aa01-debd2c9c564f #> 14 5346f9c6-755e-4336-94cc-38706ec00c2f #> 15 015c230d-650c-4527-870d-8a805849a382 #> 16 d567b692-c374-4628-a508-8008f6778f22 #> 17 cf83c98a-3791-4537-bbde-a719f6d73c13 #> 18 738942eb-ac72-44ff-a64b-8943b5ecd8d9 #> 19 f8d8b443-bca6-4c3c-9042-669dfb7f8030 #> 20 f5be4b96-f5a3-4c3d-84ac-6f69daf744d5 #> 21 dea1aa78-c0a2-413f-b375-f91cce49e4d0 #> 22 92161459-9103-4379-ae34-73a38eee1d1d #> 23 5829c7ba-697f-418e-8b98-d605b192dc48 #> 24 4dd1cd23-fc4d-4fd1-9709-602540f3ca6f #> 25 2856d06c-0ff9-4e01-bfc9-202b74d0b60f #> 26 251b1a7e-d050-4486-8d50-4c2619eb0f46 #> 27 07760522-707a-4a1c-8891-dbd1226d6b27 #> 28 9fcb0b73-c734-40a5-be9c-ace7eea401c9 #> 29 1a38e762-2465-418f-b81c-6a4bce261c34 #> 30 f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c #> 31 94c41723-b2c4-4b59-a49a-64c9b851903e #> 32 6ceeaa86-9ceb-4582-b390-6d4dd6ff0572 #> 33 9a64bf99-ebe5-4276-93a8-bee9dff1cd47 #> 34 fc0ceb80-d2d9-47c1-9d78-b0e45c64c500 #> 35 d0ea3ec4-0f3b-4649-9146-1c0b5f303a55 #> 36 b8920ef5-7d22-497b-abca-a7a9eb76d79a #> 37 b1d37bbd-9ae4-4404-b2f9-f2fe66750e4e #> 38 a4e89c26-e8d4-4471-9b06-16a1405880f0 #> 39 a190b2e9-3796-4785-9a2f-013e2a9a43e6 #> 40 9ff9f9ba-016b-4cbb-8899-45dc20860b8b #> 41 9940f951-3dc0-4579-bbb2-2392786e59a3 #> 42 74d584f0-74fc-482e-b944-e76f29c1ab85 #> 43 6f7fd0f1-a2ed-4ff1-80d3-33dde731cbc3 #> 44 6cda07c7-5d7a-41ba-9799-5bb73da25a60 #> 45 646e3e87-e46b-4b12-85b5-8d8589e26088 #> 46 6437bc9c-16cb-46c8-8f79-9a7384a0212a #> 47 58c43cc2-e00e-43c4-94eb-8501369264e1 #> 48 53bc5729-6202-4351-bc99-1f36139e9dc4 #> 49 44c83972-e5d2-4858-ac58-2df9f4bf564b #> 50 2ecc72f8-085f-4e86-8692-771f316c54f6 #> 51 2e5a9b5d-d31b-4e9f-a179-d5d70ba459fb #> 52 1c9f5c6b-73da-4d17-95de-df080ffe0df1 #> 53 100c6145-7b0e-4ba6-81c1-ffebed0d1ac4 #> 54 0ed60482-a34f-4268-b576-d69cc30210f6 #> 55 0eccaf0c-19d2-4900-9962-899378adf8be #> 56 04c94a7d-1133-42c9-bb48-c697bd302a8d #> 57 0374f03c-62e2-4859-8a14-acb00b0627d5 #> 58 03181d87-4769-41e7-8c39-d9a81835f0d2 #> 59 f171db61-e57e-4535-a06a-35d8b6ef8f2b #> 60 ecf2e08e-2032-4a9e-b466-b65b395f4a02 #> 61 74cff64f-9da9-4b2a-9b3b-8a04a1598040 #> 62 5af90777-6760-4003-9dba-8f945fec6fdf #> 63 bd65a70f-b274-4133-b9dd-0d1431b6af34 #> 64 ff45e623-7f5f-46e3-b47d-56be0341f66b #> 65 f01bdd17-4902-40f5-86e3-240d66dd2587 #> 66 e6a11140-2545-46bc-929e-da243eed2cae #> 67 e5c63d94-593c-4338-a489-e1048599e751 #> 68 d8732da6-8d1d-42d9-b625-f2416c30054b #> 69 d77ec7d6-ef2e-49d6-9e79-05b7f8881484 #> 70 cee11228-9f0b-4e57-afe2-cfe15ee56312 #> 71 a357414d-2042-4eb5-95f0-c58604a18bdd #> 72 a2d4d33e-4c62-4361-b80a-9be53d2e50e8 #> 73 a0754256-f44b-4c4a-962c-a552e47d3fdc #> 74 983d5ec9-40e8-4512-9e65-a572a9c486cb #> 75 7357cee7-9f7f-4ab0-8cec-90de8f047e38 #> 76 6ec405bb-4727-4c6d-ab4e-01fe489af7ea #> 77 6d41668c-168c-4500-b06a-4674ccf3e19d #> 78 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c #> 79 55cf0ea3-9d2b-4294-871e-bb4b49a79fc7 #> 80 4f1555bc-4664-46c3-a606-78d34dd10d92 #> 81 2ba40233-8576-4dec-a5f1-2adfa115e2dc #> 82 2423ce2c-3149-4cca-a2ff-cf682ea29b5f #> 83 1c9eb291-6d31-47e1-96b2-129b5e1ae64f #> 84 18eb630b-a754-4111-8cd4-c24ec80aa5ec #> 85 0d2ee4ac-05ee-40b2-afb6-ebb584caa867 #>                                                                                                                               dataset_title #> 1                                                                                                                   Human: Great apes study #> 2                                                                                                           Dissection: Angular gyrus (AnG) #> 3                                                                                                    Supercluster: CGE-derived interneurons #> 4                                                                                                   Dissection: Primary auditory cortex(A1) #> 5                                                                                      Supercluster: Deep layer (non-IT) excitatory neurons #> 6                                                                                            Supercluster: IT-projecting excitatory neurons #> 7                                                                                               Dissection: Anterior cingulate cortex (ACC) #> 8                                                                                                   Human Multiple Cortical Areas SMART-seq #> 9                                                                                                    Supercluster: MGE-derived interneurons #> 10                                                                                            Dissection: Primary somatosensory cortex (S1) #> 11                                                                                                    Dissection: Primary visual cortex(V1) #> 12                                                                                         Dissection: Dorsolateral prefrontal cortex (DFC) #> 13                                                                                                    Dissection: Primary motor cortex (M1) #> 14                                                                                                         Supercluster: Non-neuronal cells #> 15                                                                                                  Dissection: Middle temporal gyrus (MTG) #> 16                                                                       Combined single cell and single nuclei RNA-Seq data - Heart Global #> 17                                                                                                    Global dataset of infant KMT2Ar B-ALL #> 18                                                                                     Normal immune cells landscape of infant KMT2Ar B-ALL #> 19                                                                                                      Human Human Microglia 10x scRNA-seq #> 20                                                                                                    Human Endothelial cells 10x scRNA-seq #> 21                                                                                                 Human Nurr-Negative Nuclei 10x scRNA-seq #> 22                                                                                                 Human Nurr-Positive Nuclei 10x scRNA-seq #> 23                                                                                                     Human Oligodendrocytes 10x scRNA-seq #> 24                                                                                                            Human OPC Cells 10x scRNA-seq #> 25                                                                                                           Human DA Neurons 10x scRNA-seq #> 26                                                                                                       Human Non-DA Neurons 10x scRNA-seq #> 27                                                                                                           Human Astrocytes 10x scRNA-seq #> 28                                                                              An Integrated Single Cell Meta-atlas of Human Periodontitis #> 29                                                                Single-cell analysis of prenatal and postnatal human cortical development #> 30                                                       All - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse #> 31                                                                                    snRNA-seq of human anterior and posterior hippocampus #> 32                                                                                                                        3-prime FGID data #> 33                                                      Single-Cell RNA Sequencing of Breast Tissues: Cell Subtypes and Cancer Risk Factors #> 34                                                                            Sst Chodl - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 35                                                                                  L6b - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 36                                                                              L5/6 NP - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 37                                                                                 Sncg - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 38                                                                                L6 CT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 39                                                                           Lamp5 Lhx6 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 40                                                                                L4 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 41                                                                      Oligodendrocyte - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 42                                                                            Astrocyte - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 43                                                                       Whole Taxonomy - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 44                                                                                L5 ET - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 45                                                                              L2/3 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 46                                                                                L6 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 47                                                                                  OPC - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 48                                                                                  Vip - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 49                                                                                L5 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 50                                                                          Endothelial - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 51                                                                                 VLMC - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 52                                                                           L6 IT Car3 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 53                                                                        Microglia-PVM - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 54                                                                                Lamp5 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 55                                                                                 Pax6 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 56                                                                                Pvalb - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 57                                                                           Chandelier - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 58                                                                                  Sst - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 59                                                                                                                   donor_p13_trophoblasts #> 60                                                                                                                  All donors trophoblasts #> 61                                                                                                     All donors all cell states (in vivo) #> 62                                                                     Single-cell transcriptomic datasets of Renal cell carcinoma patients #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                Tabula Sapiens - Pancreas #> 65                                                                                                          Tabula Sapiens - Salivary_Gland #> 66                                                                                                                   Tabula Sapiens - Heart #> 67                                                                                                                 Tabula Sapiens - Bladder #> 68                                                                                                                 Tabula Sapiens - Trachea #> 69                                                                                                                Tabula Sapiens - Prostate #> 70                                                                                                                  Tabula Sapiens - Spleen #> 71                                                                                                         Tabula Sapiens - Small_Intestine #> 72                                                                                                             Tabula Sapiens - Vasculature #> 73                                                                                                                     Tabula Sapiens - Eye #> 74                                                                                                                   Tabula Sapiens - Blood #> 75                                                                                                         Tabula Sapiens - Large_Intestine #> 76                                                                                                                  Tabula Sapiens - Uterus #> 77                                                                                                                   Tabula Sapiens - Liver #> 78                                                                                                                     Tabula Sapiens - Fat #> 79                                                                                                                  Tabula Sapiens - Tongue #> 80                                                                                                             Tabula Sapiens - Bone_Marrow #> 81                                                                                                                 Tabula Sapiens - Mammary #> 82                                                                                                                  Tabula Sapiens - Kidney #> 83                                                                                                                  Tabula Sapiens - Muscle #> 84                                                                                                              Tabula Sapiens - Lymph_Node #> 85                                                                                                                    Tabula Sapiens - Lung #>    dataset_total_cell_count #> 1                    156285 #> 2                    110752 #> 3                    129495 #> 4                    139054 #> 5                     92969 #> 6                    638941 #> 7                    135462 #> 8                     49417 #> 9                    185477 #> 10                   153159 #> 11                   241077 #> 12                   113339 #> 13                   114605 #> 14                   108940 #> 15                   148374 #> 16                   493236 #> 17                   128588 #> 18                    36313 #> 19                    33041 #> 20                    14903 #> 21                   104097 #> 22                    80576 #> 23                   178815 #> 24                    13691 #> 25                    22048 #> 26                    91479 #> 27                    33506 #> 28                   105918 #> 29                   700391 #> 30                   356213 #> 31                   129905 #> 32                    89849 #> 33                    52681 #> 34                     1772 #> 35                    17996 #> 36                    18154 #> 37                    23640 #> 38                    27454 #> 39                    21603 #> 40                    76195 #> 41                   136076 #> 42                    82936 #> 43                  1309414 #> 44                     3848 #> 45                   317116 #> 46                    44174 #> 47                    27670 #> 48                    95014 #> 49                    97173 #> 50                     2496 #> 51                     4619 #> 52                    13007 #> 53                    40625 #> 54                    52828 #> 55                     8984 #> 56                   109618 #> 57                    14871 #> 58                    71545 #> 59                    31497 #> 60                    67070 #> 61                   286326 #> 62                   270855 #> 63                   167283 #> 64                    13497 #> 65                    27199 #> 66                    11505 #> 67                    24583 #> 68                     9522 #> 69                    16375 #> 70                    34004 #> 71                    12467 #> 72                    16037 #> 73                    10650 #> 74                    50115 #> 75                    13680 #> 76                     7124 #> 77                     5007 #> 78                    20263 #> 79                    15020 #> 80                    12297 #> 81                    11375 #> 82                     9641 #> 83                    30746 #> 84                    53275 #> 85                    35682 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 566 rows ] census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(x) census_data$get(x$name)) print(all_experiments) #> $homo_sapiens #>  #>   uri: s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms*  #>  #> $mus_musculus #>  #>   uri: s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(x) {   nrow(x$obs$read(column_names = c(\"soma_joinid\"))$concat()) }))  print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 68683222 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 68683222 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #>     soma_joinid                        collection_id    collection_name #> 581         580 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>                collection_doi                           dataset_id #> 581 10.1038/s41586-020-2496-1 0bd1a1de-3aee-40e0-b2ec-86c7a30c7149 #>                       dataset_version_id #> 581 ff352f35-58a2-4962-b716-649d1f9e9f44 #>                                                                                        dataset_title #> 581 Bone marrow - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x #>                             dataset_h5ad_path dataset_total_cell_count #> 581 0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad                    40220 library(\"tiledbsoma\") obs_query <- SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- census_data$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) print(dataset_seurat) #> An object of class Seurat  #> 52417 features across 40220 samples within 1 assay  #> Active assay: RNA (52417 features, 0 variable features) #>  2 layers present: counts, data #>  1 dimensional reduction calculated: scvi"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download download_source_h5ad(   dataset_id = \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one       Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\"          #> [22] \"raw_sum\"                                  #> [23] \"nnz\"                                      #> [24] \"raw_mean_nnz\"                             #> [25] \"raw_variance_nnz\"                         #> [26] \"n_measured_vars\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #>             sex #> 1          male #> 224      female #> 3747640 unknown as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #>   soma_joinid                           dataset_id     assay assay_ontology_term_id #> 1     3747639 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 2     3747640 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 3     3747641 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 4     3747642 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 5     3747643 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 6     3747644 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 7     3747645 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 8     3747646 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 9     3747647 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #>    cell_type cell_type_ontology_term_id development_stage #> 1 fibroblast                 CL:0000057 human adult stage #> 2 fibroblast                 CL:0000057 human adult stage #> 3 fibroblast                 CL:0000057 human adult stage #> 4 fibroblast                 CL:0000057 human adult stage #> 5 fibroblast                 CL:0000057 human adult stage #> 6 fibroblast                 CL:0000057 human adult stage #> 7 fibroblast                 CL:0000057 human adult stage #> 8 fibroblast                 CL:0000057 human adult stage #> 9 fibroblast                 CL:0000057 human adult stage #>   development_stage_ontology_term_id disease disease_ontology_term_id #> 1                     HsapDv:0000087  normal             PATO:0000461 #> 2                     HsapDv:0000087  normal             PATO:0000461 #> 3                     HsapDv:0000087  normal             PATO:0000461 #> 4                     HsapDv:0000087  normal             PATO:0000461 #> 5                     HsapDv:0000087  normal             PATO:0000461 #> 6                     HsapDv:0000087  normal             PATO:0000461 #> 7                     HsapDv:0000087  normal             PATO:0000461 #> 8                     HsapDv:0000087  normal             PATO:0000461 #> 9                     HsapDv:0000087  normal             PATO:0000461 #>                       donor_id is_primary_data self_reported_ethnicity #> 1 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 2 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 3 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 4 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 5 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 6 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 7 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 8 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 9 Pagella_GSE161267_GSM4904134            TRUE                 unknown #>   self_reported_ethnicity_ontology_term_id     sex sex_ontology_term_id suspension_type #> 1                                  unknown unknown              unknown            cell #> 2                                  unknown unknown              unknown            cell #> 3                                  unknown unknown              unknown            cell #> 4                                  unknown unknown              unknown            cell #> 5                                  unknown unknown              unknown            cell #> 6                                  unknown unknown              unknown            cell #> 7                                  unknown unknown              unknown            cell #> 8                                  unknown unknown              unknown            cell #> 9                                  unknown unknown              unknown            cell #>    tissue tissue_ontology_term_id tissue_general tissue_general_ontology_term_id #> 1 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 2 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 3 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 4 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 5 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 6 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 7 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 8 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 9 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #>   raw_sum  nnz raw_mean_nnz raw_variance_nnz n_measured_vars #> 1     547  329     1.662614        14.559604           31602 #> 2     982  563     1.744227         5.315247           31602 #> 3   12467 3809     3.273038       109.305683           31602 #> 4    1053  566     1.860424         7.430042           31602 #> 5     548  363     1.509642         2.410818           31602 #> 6     678  429     1.580420        11.379616           31602 #> 7     848  524     1.618321         9.437216           31602 #> 8     935  608     1.537829         4.868418           31602 #> 9     735  485     1.515464         6.213087           31602 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 3301779 rows ] cell_metadata_b_cell <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",   column_names = \"disease\" )  cell_metadata_b_cell <- as.data.frame(cell_metadata_b_cell$concat())  table(cell_metadata_b_cell) #> disease #>                              COVID-19 chronic obstructive pulmonary disease  #>                                  2729                                  6369  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" \"nnz\"            #> [6] \"n_measured_obs\" var_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(   value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   column_names = c(\"feature_name\", \"feature_length\") )  as.data.frame(var_df$concat()) #>   feature_name feature_length #> 1         AQP5           1884 #> 2       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) #>  2 layers present: counts, data head(seurat_obj[[]]) #>                 orig.ident cell_type tissue_general  disease     sex #> cell13391229 SeuratProject    B cell           lung COVID-19    male #> cell13393737 SeuratProject    B cell           lung COVID-19 unknown #> cell13394391 SeuratProject    B cell           lung COVID-19    male #> cell13394897 SeuratProject    B cell           lung COVID-19 unknown #> cell13395941 SeuratProject    B cell           lung COVID-19    male #> cell13397408 SeuratProject    B cell           lung COVID-19 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length      nnz n_measured_obs #> ENSG00000161798         AQP5           1884  1029069       58250439 #> ENSG00000188229       TUBB4B           2037 21416107       62655002"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(4): feature_name feature_length nnz n_measured_obs #> colnames(2729): obs13391229 obs13393737 ... obs54635684 obs54635708 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>               cell_type tissue_general     disease         sex #>                    #> obs13391229      B cell           lung    COVID-19        male #> obs13393737      B cell           lung    COVID-19     unknown #> obs13394391      B cell           lung    COVID-19        male #> obs13394897      B cell           lung    COVID-19     unknown #> obs13395941      B cell           lung    COVID-19        male #> obs13397408      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 4 columns #>                 feature_name feature_length       nnz n_measured_obs #>                                #> ENSG00000161798         AQP5           1884   1029069       58250439 #> ENSG00000188229       TUBB4B           2037  21416107       62655002"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\"          #> [22] \"raw_sum\"                                  #> [23] \"nnz\"                                      #> [24] \"raw_mean_nnz\"                             #> [25] \"raw_variance_nnz\"                         #> [26] \"n_measured_vars\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" \"nnz\"            #> [6] \"n_measured_obs\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #>   soma_joinid                      label      value #> 1           0      census_schema_version      1.2.0 #> 2           1          census_build_date 2023-10-23 #> 3           2     dataset_schema_version      3.1.0 #> 4           3           total_cell_count   68683222 #> 5           4          unique_cell_count   40356133 #> 6           5 number_donors_homo_sapiens      15588 #> 7           6 number_donors_mus_musculus       1990"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #>   soma_joinid     organism category ontology_term_id unique_cell_count total_cell_count #> 1           0 Homo sapiens      all               na          36227903         62998417 #> 2           1 Homo sapiens    assay      EFO:0008722            264166           279635 #> 3           2 Homo sapiens    assay      EFO:0008780             25652            51304 #> 4           3 Homo sapiens    assay      EFO:0008796             54753            54753 #> 5           4 Homo sapiens    assay      EFO:0008919             89477           206754 #> 6           5 Homo sapiens    assay      EFO:0008931             78750           188248 #>        label #> 1         na #> 2   Drop-seq #> 3     inDrop #> 4   MARS-seq #> 5   Seq-Well #> 6 Smart-seq2"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             20           10 #>   cell_type                        631          248 #>   disease                           72            5 #>   self_reported_ethnicity           30            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           230           74 #>   tissue_general                    53           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #>      soma_joinid     organism  category ontology_term_id unique_cell_count #> 72            71 Homo sapiens cell_type       CL:0000129            359243 #> 1080        1079 Mus musculus cell_type       CL:0000129             48998 #>      total_cell_count           label #> 72             544977 microglial cell #> 1080            75885 microglial cell"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"obs_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = c(\"cell_type\", \"is_primary_data\")) as.data.frame(obs_df$concat()) #>                            cell_type is_primary_data #> 1                    oligodendrocyte           FALSE #> 2     oligodendrocyte precursor cell           FALSE #> 3   astrocyte of the cerebral cortex           FALSE #> 4   astrocyte of the cerebral cortex           FALSE #> 5   astrocyte of the cerebral cortex           FALSE #> 6     oligodendrocyte precursor cell           FALSE #> 7   astrocyte of the cerebral cortex           FALSE #> 8                    microglial cell           FALSE #> 9   astrocyte of the cerebral cortex           FALSE #> 10  astrocyte of the cerebral cortex           FALSE #> 11  astrocyte of the cerebral cortex           FALSE #> 12  astrocyte of the cerebral cortex           FALSE #> 13  astrocyte of the cerebral cortex           FALSE #> 14  astrocyte of the cerebral cortex           FALSE #> 15  astrocyte of the cerebral cortex           FALSE #> 16    oligodendrocyte precursor cell           FALSE #> 17                   oligodendrocyte           FALSE #> 18  astrocyte of the cerebral cortex           FALSE #> 19  astrocyte of the cerebral cortex           FALSE #> 20  astrocyte of the cerebral cortex           FALSE #> 21  astrocyte of the cerebral cortex           FALSE #> 22  astrocyte of the cerebral cortex           FALSE #> 23    oligodendrocyte precursor cell           FALSE #> 24  astrocyte of the cerebral cortex           FALSE #> 25  astrocyte of the cerebral cortex           FALSE #> 26    oligodendrocyte precursor cell           FALSE #> 27                   microglial cell           FALSE #> 28                   oligodendrocyte           FALSE #> 29  astrocyte of the cerebral cortex           FALSE #> 30  cerebral cortex endothelial cell           FALSE #> 31                   microglial cell           FALSE #> 32                   microglial cell           FALSE #> 33                   microglial cell           FALSE #> 34                   oligodendrocyte           FALSE #> 35                   oligodendrocyte           FALSE #> 36                   microglial cell           FALSE #> 37                   oligodendrocyte           FALSE #> 38                   oligodendrocyte           FALSE #> 39  astrocyte of the cerebral cortex           FALSE #> 40                   oligodendrocyte           FALSE #> 41  astrocyte of the cerebral cortex           FALSE #> 42                   oligodendrocyte           FALSE #> 43    oligodendrocyte precursor cell           FALSE #> 44                   oligodendrocyte           FALSE #> 45  astrocyte of the cerebral cortex           FALSE #> 46    oligodendrocyte precursor cell           FALSE #> 47                   oligodendrocyte           FALSE #> 48    oligodendrocyte precursor cell           FALSE #> 49  astrocyte of the cerebral cortex           FALSE #> 50  astrocyte of the cerebral cortex           FALSE #> 51  astrocyte of the cerebral cortex           FALSE #> 52                   oligodendrocyte           FALSE #> 53                   oligodendrocyte           FALSE #> 54                   oligodendrocyte           FALSE #> 55  astrocyte of the cerebral cortex           FALSE #> 56  cerebral cortex endothelial cell           FALSE #> 57                   oligodendrocyte           FALSE #> 58                   oligodendrocyte           FALSE #> 59                   oligodendrocyte           FALSE #> 60                   microglial cell           FALSE #> 61                   microglial cell           FALSE #> 62    oligodendrocyte precursor cell           FALSE #> 63    oligodendrocyte precursor cell           FALSE #> 64                   oligodendrocyte           FALSE #> 65    oligodendrocyte precursor cell           FALSE #> 66                   oligodendrocyte           FALSE #> 67  astrocyte of the cerebral cortex           FALSE #> 68                   oligodendrocyte           FALSE #> 69    oligodendrocyte precursor cell           FALSE #> 70                   oligodendrocyte           FALSE #> 71  astrocyte of the cerebral cortex           FALSE #> 72  astrocyte of the cerebral cortex           FALSE #> 73  astrocyte of the cerebral cortex           FALSE #> 74    oligodendrocyte precursor cell           FALSE #> 75  astrocyte of the cerebral cortex           FALSE #> 76    oligodendrocyte precursor cell           FALSE #> 77                   microglial cell           FALSE #> 78                   microglial cell           FALSE #> 79    oligodendrocyte precursor cell           FALSE #> 80                   oligodendrocyte           FALSE #> 81                   oligodendrocyte           FALSE #> 82  astrocyte of the cerebral cortex           FALSE #> 83                   oligodendrocyte           FALSE #> 84  astrocyte of the cerebral cortex           FALSE #> 85  astrocyte of the cerebral cortex           FALSE #> 86                   oligodendrocyte           FALSE #> 87  astrocyte of the cerebral cortex           FALSE #> 88                   oligodendrocyte           FALSE #> 89    oligodendrocyte precursor cell           FALSE #> 90    oligodendrocyte precursor cell           FALSE #> 91  astrocyte of the cerebral cortex           FALSE #> 92  astrocyte of the cerebral cortex           FALSE #> 93  astrocyte of the cerebral cortex           FALSE #> 94                   oligodendrocyte           FALSE #> 95  astrocyte of the cerebral cortex           FALSE #> 96  astrocyte of the cerebral cortex           FALSE #> 97                   oligodendrocyte           FALSE #> 98                   oligodendrocyte           FALSE #> 99    oligodendrocyte precursor cell           FALSE #> 100                  oligodendrocyte           FALSE #> 101                  oligodendrocyte           FALSE #> 102                  oligodendrocyte           FALSE #> 103 astrocyte of the cerebral cortex           FALSE #> 104   oligodendrocyte precursor cell           FALSE #> 105                  oligodendrocyte           FALSE #> 106   oligodendrocyte precursor cell           FALSE #> 107                  oligodendrocyte           FALSE #> 108                  oligodendrocyte           FALSE #> 109                  oligodendrocyte           FALSE #> 110                  oligodendrocyte           FALSE #> 111   oligodendrocyte precursor cell           FALSE #> 112                  oligodendrocyte           FALSE #> 113                  oligodendrocyte           FALSE #> 114 astrocyte of the cerebral cortex           FALSE #> 115                  oligodendrocyte           FALSE #> 116 astrocyte of the cerebral cortex           FALSE #> 117                  oligodendrocyte           FALSE #> 118                  oligodendrocyte           FALSE #> 119                  oligodendrocyte           FALSE #> 120 astrocyte of the cerebral cortex           FALSE #> 121 astrocyte of the cerebral cortex           FALSE #> 122   oligodendrocyte precursor cell           FALSE #> 123                  microglial cell           FALSE #> 124 astrocyte of the cerebral cortex           FALSE #> 125 astrocyte of the cerebral cortex           FALSE #> 126                  microglial cell           FALSE #> 127 cerebral cortex endothelial cell           FALSE #> 128   oligodendrocyte precursor cell           FALSE #>  [ reached 'max' / getOption(\"max.print\") -- omitted 62998289 rows ] obs_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE\" )  obs_df <- as.data.frame(obs_df$concat()) nrow(obs_df) #> [1] 36227903 human_cell_type_counts <- table(obs_df$cell_type) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #>  #>                                                             neuron  #>                                                            2815336  #>                                               glutamatergic neuron  #>                                                            1563446  #>                                    CD4-positive, alpha-beta T cell  #>                                                            1243885  #>                                    CD8-positive, alpha-beta T cell  #>                                                            1197715  #> L2/3-6 intratelencephalic projecting glutamatergic cortical neuron  #>                                                            1123360  #>                                                    oligodendrocyte  #>                                                            1063874  #>                                                 classical monocyte  #>                                                            1030996  #>                                                        native cell  #>                                                            1011949  #>                                                             B cell  #>                                                             934060  #>                                                natural killer cell  #>                                                             770637 length(human_cell_type_counts) #> [1] 610"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"obs_liver_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE & tissue_general == 'liver'\" )  obs_liver_df <- as.data.frame(obs_liver_df$concat())  sort(table(obs_liver_df$cell_type), decreasing = TRUE)[1:10] #>  #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"obs_t_cells_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = c(\"disease\", \"tissue_general\"),   value_filter = \"is_primary_data == TRUE & disease != 'normal' & cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\" )  obs_t_cells_df <- as.data.frame(obs_t_cells_df$concat())  print(table(obs_t_cells_df)) #>                                        tissue_general #> disease                                 adrenal gland  blood bone marrow  brain breast #>   COVID-19                                          0 819428           0      0      0 #>   Crohn disease                                     0      0           0      0      0 #>   Down syndrome                                     0      0         181      0      0 #>   breast cancer                                     0      0           0      0   1850 #>   chronic obstructive pulmonary disease             0      0           0      0      0 #>   chronic rhinitis                                  0      0           0      0      0 #>   clear cell renal carcinoma                        0   6548           0      0      0 #>   cystic fibrosis                                   0      0           0      0      0 #>   follicular lymphoma                               0      0           0      0      0 #>   influenza                                         0   8871           0      0      0 #>   interstitial lung disease                         0      0           0      0      0 #>   kidney benign neoplasm                            0      0           0      0      0 #>   kidney oncocytoma                                 0      0           0      0      0 #>   lung adenocarcinoma                             205      0           0   3274      0 #>   lung large cell carcinoma                         0      0           0      0      0 #>   lymphangioleiomyomatosis                          0      0           0      0      0 #>                                        tissue_general #> disease                                  colon kidney  liver   lung lymph node   nose #>   COVID-19                                   0      0      0  30578          0     13 #>   Crohn disease                          17490      0      0      0          0      0 #>   Down syndrome                              0      0      0      0          0      0 #>   breast cancer                              0      0      0      0          0      0 #>   chronic obstructive pulmonary disease      0      0      0   9382          0      0 #>   chronic rhinitis                           0      0      0      0          0    909 #>   clear cell renal carcinoma                 0  20540      0      0         36      0 #>   cystic fibrosis                            0      0      0      7          0      0 #>   follicular lymphoma                        0      0      0      0       1089      0 #>   influenza                                  0      0      0      0          0      0 #>   interstitial lung disease                  0      0      0   1803          0      0 #>   kidney benign neoplasm                     0     10      0      0          0      0 #>   kidney oncocytoma                          0   2303      0      0          0      0 #>   lung adenocarcinoma                        0      0    507 215013      24969      0 #>   lung large cell carcinoma                  0      0      0   5922          0      0 #>   lymphangioleiomyomatosis                   0      0      0    513          0      0 #>                                        tissue_general #> disease                                 pleural fluid respiratory system saliva #>   COVID-19                                          0                  4     41 #>   Crohn disease                                     0                  0      0 #>   Down syndrome                                     0                  0      0 #>   breast cancer                                     0                  0      0 #>   chronic obstructive pulmonary disease             0                  0      0 #>   chronic rhinitis                                  0                  0      0 #>   clear cell renal carcinoma                        0                  0      0 #>   cystic fibrosis                                   0                  0      0 #>   follicular lymphoma                               0                  0      0 #>   influenza                                         0                  0      0 #>   interstitial lung disease                         0                  0      0 #>   kidney benign neoplasm                            0                  0      0 #>   kidney oncocytoma                                 0                  0      0 #>   lung adenocarcinoma                           11558                  0      0 #>   lung large cell carcinoma                         0                  0      0 #>   lymphangioleiomyomatosis                          0                  0      0 #>                                        tissue_general #> disease                                 small intestine vasculature #>   COVID-19                                            0           0 #>   Crohn disease                                   52029           0 #>   Down syndrome                                       0           0 #>   breast cancer                                       0           0 #>   chronic obstructive pulmonary disease               0           0 #>   chronic rhinitis                                    0           0 #>   clear cell renal carcinoma                          0           0 #>   cystic fibrosis                                     0           0 #>   follicular lymphoma                                 0           0 #>   influenza                                           0           0 #>   interstitial lung disease                           0           0 #>   kidney benign neoplasm                              0           0 #>   kidney oncocytoma                                   0           0 #>   lung adenocarcinoma                                 0           0 #>   lung large cell carcinoma                           0           0 #>   lymphangioleiomyomatosis                            0           0 #>  [ reached getOption(\"max.print\") -- omitted 8 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load packages needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load Seurat object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(\"cellxgene.census\") library(\"Seurat\") census <- open_soma() census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #>    soma_joinid                        collection_id    collection_name #> 15         583 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #> 36         605 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>               collection_doi                           dataset_id #> 15 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd #> 36 10.1038/s41586-020-2496-1 6202a243-b713-4e12-9ced-c387f8483dea #>                      dataset_version_id #> 15 0a851e26-a629-4e59-9b52-9b4d1ce4440b #> 36 70f4f091-86a9-44e3-a92a-54cee98cc223 #>                                                                                        dataset_title #> 15 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2 #> 36        Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x #>                            dataset_h5ad_path dataset_total_cell_count #> 15 4546e757-34d0-4d17-be06-538318925fcd.h5ad                     2859 #> 36 6202a243-b713-4e12-9ced-c387f8483dea.h5ad                     7294 tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj$RNA[[]]$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:30) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7190 anchors #> Filtering anchors #>  Retained 5063 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> [1] 1 #> Warning: Different cells and/or features from existing assay SCT #> Warning: Layer counts isn't present in the assay object; returning NULL #> [1] 2 #> Warning: Different cells and/or features from existing assay SCT #> Layer counts isn't present in the assay object; returning NULL #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data #> Warning: Layer counts isn't present in the assay object; returning NULL #> Warning: Assay integrated changing from Assay to SCTAssay #> Warning: Layer counts isn't present in the assay object; returning NULL #> Warning: Different cells and/or features from existing assay SCT DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 23:56:21 UMAP embedding parameters a = 0.9922 b = 1.112 #> 23:56:21 Read 10153 rows and found 30 numeric columns #> 23:56:21 Using Annoy for neighbor search, n_neighbors = 30 #> 23:56:21 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 23:56:23 Writing NN index file to temp file /tmp/RtmpsKixI5/file1cd2c4aac6d19 #> 23:56:23 Searching Annoy index using 1 thread, search_k = 3000 #> 23:56:26 Annoy recall = 100% #> 23:56:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 23:56:27 Initializing from normalized Laplacian + noise (using RSpectra) #> 23:56:28 Commencing optimization for 200 epochs, with 410528 positive edges #> 23:56:31 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Normalizing full-length gene sequencing data","text":"First open Census: can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"Seurat\") census <- cellxgene.census::open_soma()"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"fetching-full-length-example-sequencing-data-smart-seq","dir":"Articles","previous_headings":"","what":"Fetching full-length example sequencing data (Smart-Seq)","title":"Normalizing full-length gene sequencing data","text":"Let’s get example data, case ’ll fetch cells relatively small dataset derived Smart-Seq2 technology performs full-length gene sequencing: Collection: Tabula Muris Senis Dataset: Liver - single-cell transcriptomic atlas characterizes ageing tissues mouse - Smart-seq2 Let’s first find dataset’s id using dataset table Census. Now can use id fetch data. Let’s make sure data contains Smart-Seq2 cells. Great! can see small dataset containing 2,859 cells. Now let’s proceed normalize gene lengths.","code":"liver_dataset <- as.data.frame(   census$get(\"census_info\")$get(\"datasets\")   $read(value_filter = \"dataset_title == 'Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2'\")   $concat() ) liver_dataset #>   soma_joinid                        collection_id    collection_name #> 1         583 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>              collection_doi                           dataset_id #> 1 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd #>                     dataset_version_id #> 1 0a851e26-a629-4e59-9b52-9b4d1ce4440b #>                                                                                       dataset_title #> 1 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2 #>                           dataset_h5ad_path dataset_total_cell_count #> 1 4546e757-34d0-4d17-be06-538318925fcd.h5ad                     2859 liver_dataset_id <- liver_dataset[1, \"dataset_id\"] liver_seurat <- cellxgene.census::get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = paste0(\"dataset_id == '\", liver_dataset_id, \"'\") ) table(liver_seurat$assay) #>  #> Smart-seq2  #>       2859"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"normalizing-expression-to-account-for-gene-length","dir":"Articles","previous_headings":"","what":"Normalizing expression to account for gene length","title":"Normalizing full-length gene sequencing data","text":"default cellxgene_census::get_seurat() fetches genes Census. let’s first identify genes measured dataset subset Seurat obect include . goal can use “Dataset Presence Matrix” census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$get(\"feature_dataset_presence_matrix\"). boolean matrix N x M N number datasets, M number genes Census, 1 entry indicates gene measured dataset. (Note Seurat objects transposed layout M x N.) Let’s get genes measured dataset. can see genes Census 17,992 measured dataset. Now let’s normalize genes gene length. can easily Census gene lengths included gene metadata feature_length. done! can now see real numbers instead integers.","code":"liver_seurat #> An object of class Seurat  #> 52417 features across 2859 samples within 1 assay  #> Active assay: RNA (52417 features, 0 variable features) #>  2 layers present: counts, data liver_dataset_joinid <- liver_dataset$soma_joinid[1] presence_matrix <- cellxgene.census::get_presence_matrix(census, \"Mus musculus\", \"RNA\") presence_matrix <- presence_matrix$take(liver_dataset_joinid) gene_presence <- as.vector(presence_matrix$get_one_based_matrix())  liver_seurat <- liver_seurat[gene_presence, ] liver_seurat #> An object of class Seurat  #> 17992 features across 2859 samples within 1 assay  #> Active assay: RNA (17992 features, 0 variable features) #>  2 layers present: counts, data GetAssayData(liver_seurat[1:5, 1:5], slot = \"data\") #> Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0. #> i Please use the `layer` argument instead. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. #> 5 x 5 sparse Matrix of class \"dgCMatrix\" #>                    cell3959639 cell3959640 cell3959641 cell3959642 cell3959643 #> ENSMUSG00000025900           .           .           .           .           . #> ENSMUSG00000025902           .           .           .           .        2250 #> ENSMUSG00000033845           .         559        1969           .           . #> ENSMUSG00000025903           .           .           .           .           . #> ENSMUSG00000033813           .           .         828           1          54 gene_lengths <- liver_seurat$RNA@meta.features$feature_length liver_seurat <- SetAssayData(   liver_seurat,   new.data = sweep(GetAssayData(liver_seurat, slot = \"data\"), 1, gene_lengths, \"/\") ) GetAssayData(liver_seurat[1:5, 1:5], slot = \"data\") #> 5 x 5 sparse Matrix of class \"dgCMatrix\" #>                    cell3959639 cell3959640 cell3959641  cell3959642 cell3959643 #> ENSMUSG00000025900           .  .            .         .             .          #> ENSMUSG00000025902           .  .            .         .             0.47150042 #> ENSMUSG00000033845           .  0.06586544   0.2320019 .             .          #> ENSMUSG00000025903           .  .            .         .             .          #> ENSMUSG00000033813           .  .            0.2744448 0.0003314551  0.01789857"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"validation-through-clustering-exploration","dir":"Articles","previous_headings":"","what":"Validation through clustering exploration","title":"Normalizing full-length gene sequencing data","text":"Let’s perform basic clustering analysis see cell types cluster expected using normalized counts. First basic filtering cells genes. normalize account sequencing depth transform data log scale. subset highly variable genes. finally scale values across gene axis. Now can proceed clustering analysis.  exceptions can see cells cell type cluster near serves sanity check gene-length normalization applied. Don’t forget close census.","code":"cells_per_gene <- rowSums(GetAssayData(liver_seurat, slot = \"counts\") > 0) genes_per_cell <- Matrix::colSums(liver_seurat$RNA@counts > 0) liver_seurat <- liver_seurat[cells_per_gene >= 5, genes_per_cell >= 500] liver_seurat <- Seurat::NormalizeData(   liver_seurat,   normalization.method = \"LogNormalize\",   scale.factor = 10000 ) liver_seurat <- Seurat::FindVariableFeatures(   liver_seurat,   selection.method = \"vst\",   nfeatures = 1000 ) all.genes <- rownames(liver_seurat) liver_seurat <- Seurat::ScaleData(liver_seurat, features = all.genes) liver_seurat <- RunPCA(   liver_seurat,   features = VariableFeatures(object = liver_seurat) ) liver_seurat <- FindNeighbors(liver_seurat, dims = 1:40) liver_seurat <- RunUMAP(liver_seurat, dims = 1:40) DimPlot(liver_seurat, reduction = \"umap\", group.by = \"cell_type\") census$close()"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- human$obs$read(column_names = c(\"cell_type\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column. unique_cell_type <- unique(obs_df$cell_type)  cat(   \"There are\",   length(unique_cell_type),   \"cell types in the Census! The first few are: \",   paste(head(unique_cell_type), collapse = \", \") ) #> There are 631 cell types in the Census! The first few are:  oligodendrocyte, oligodendrocyte precursor cell, astrocyte of the cerebral cortex, microglial cell, cerebral cortex endothelial cell, vascular leptomeningeal cell"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms & | use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- human$obs$read(column_names = c(\"cell_type\"), value_filter = paste0(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column as an R data frame. unique_cell_type <- unique(obs_df$cell_type) cat(   \"There are \",   length(unique_cell_type),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   paste(head(unique_cell_type), collapse = \", \"),   \"\\n\" ) #> There are  185  cell types in the Census where tissue_ontology_term_id ==  UBERON:0002048 ! #> The first few are: type II pneumocyte, neutrophil, effector CD4-positive, alpha-beta T cell, effector CD8-positive, alpha-beta T cell, mature NK T cell, blood vessel endothelial cell # Report the 10 most common top_10 <- sort(table(obs_df$cell_type), decreasing = TRUE)[1:10] cat(   \"The top 10 cell types where tissue_ontology_term_id ==\",   LUNG_TISSUE,   \"are: \",   paste(names(top_10), collapse = \", \") ) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 are:  native cell, alveolar macrophage, CD8-positive, alpha-beta T cell, CD4-positive, alpha-beta T cell, macrophage, type II pneumocyte, classical monocyte, natural killer cell, malignant cell, epithelial cell of lower respiratory tract # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )  obs_df <- as.data.frame(obs_df$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002131 CL:0002548 CL:0000115 CL:0000763 CL:0000057 CL:0000669  #>     160974      99458      96953      79733      79626      35560      33075      27515  #> CL:0000003 CL:0002144  #>      23613      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)    obs_df <- census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)   obs_df <- as.data.frame(obs_df$concat())    total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\")   } } #> [1] \"homo_sapiens\" #>   Unique  cell_type_ontology_term_id  values:  631  #>   Unique  assay_ontology_term_id  values:  20  #>   Unique  tissue_ontology_term_id  values:  230  #> [1] \"mus_musculus\" #>   Unique  cell_type_ontology_term_id  values:  248  #>   Unique  assay_ontology_term_id  values:  10  #>   Unique  tissue_ontology_term_id  values:  74 cat(\"Complete Census contains \", total_cells, \" cells.\") #> Complete Census contains  68683222  cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative Foundation. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative Foundation (2024). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 1.14.0, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative Foundation}},   year = {2024},   note = {R package version 1.14.0},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"r-package-of-cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"documentation R package cellxgene.census part CZ CELLxGENE Discover Census. full details Census data capabilities please go main Census site. cellxgene.census provides API efficiently access cloud-hosted Census single-cell data R. just seconds users can access slice Census data using cell gene filters across hundreds single-cell datasets. Census data can fetched iterative fashion bigger--memory slices data, quickly exported basic R structures, well Seurat SingleCellExperiment objects downstream analysis.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"installing Ubuntu, may need install following libraries via apt install, libxml2-dev libssl-dev libcurl4-openssl-dev. addition must cmake v3.21 greater. installing MacOS, need install developer tools Xcode. Windows supported. R session install cellxgene.census R-Universe. able export Census data Seurat SingleCellExperiment also need install respective packages.","code":"install.packages(   \"cellxgene.census\",   repos=c('https://chanzuckerberg.r-universe.dev', 'https://cloud.r-project.org') ) # Seurat install.packages(\"Seurat\")  # SingleCellExperiment if (!require(\"BiocManager\", quietly = TRUE))     install.packages(\"BiocManager\")  BiocManager::install(\"SingleCellExperiment\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"CZ CELLxGENE Discover Cell Census","text":"Check vignettes “Articles” section navigation bar site. highly recommend following vignettes starting point: Querying fetching single-cell data cell/gene metadata Learning CZ CELLxGENE Discover Census can also check quick start guide main Census site.","code":""},{"path":"/index.html","id":"example-seurat-and-singlecellexperiment-query","dir":"","previous_headings":"Usage","what":"Example Seurat and SingleCellExperiment query","title":"CZ CELLxGENE Discover Cell Census","text":"following creates Seurat object -demand sympathetic neurons Census filtering genes ENSG00000161798, ENSG00000188229. following retrieves data SingleCellExperiment object.","code":"library(\"cellxgene.census\") library(\"Seurat\")  census <- open_soma()  organism <- \"Homo sapiens\" gene_filter <- \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" cell_filter <-  \"cell_type == 'sympathetic neuron'\" cell_columns <- c(\"assay\", \"cell_type\", \"tissue\", \"tissue_general\", \"suspension_type\", \"disease\")  seurat_obj <- get_seurat(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns ) library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns )"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please go visit main Census site. believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_mirror.html","id":null,"dir":"Reference","previous_headings":"","what":"Get locator information about a Census mirror — get_census_mirror","title":"Get locator information about a Census mirror — get_census_mirror","text":"Get locator information Census mirror","code":""},{"path":"/reference/get_census_mirror.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get locator information about a Census mirror — get_census_mirror","text":"","code":"get_census_mirror(mirror)"},{"path":"/reference/get_census_mirror.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get locator information about a Census mirror — get_census_mirror","text":"mirror Name mirror.","code":""},{"path":"/reference/get_census_mirror.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get locator information about a Census mirror — get_census_mirror","text":"List mirror information","code":""},{"path":"/reference/get_census_mirror.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get locator information about a Census mirror — get_census_mirror","text":"","code":"get_census_mirror(\"AWS-S3-us-west-2\") #> $provider #> [1] \"S3\" #>  #> $base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $region #> [1] \"us-west-2\" #>  #> $alias #> [1] \"\" #>"},{"path":"/reference/get_census_mirror_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census mirrors currently available — get_census_mirror_directory","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"Get directory Census mirrors currently available","code":""},{"path":"/reference/get_census_mirror_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"","code":"get_census_mirror_directory()"},{"path":"/reference/get_census_mirror_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"Nested list information available mirrors","code":""},{"path":"/reference/get_census_mirror_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"","code":"get_census_mirror_directory() #> $default #> $default$provider #> [1] \"S3\" #>  #> $default$base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $default$region #> [1] \"us-west-2\" #>  #> $default$alias #> [1] \"default\" #>  #>  #> $`AWS-S3-us-west-2` #> $`AWS-S3-us-west-2`$provider #> [1] \"S3\" #>  #> $`AWS-S3-us-west-2`$base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $`AWS-S3-us-west-2`$region #> [1] \"us-west-2\" #>  #> $`AWS-S3-us-west-2`$alias #> [1] \"\" #>  #>"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-12-15 #>                                                              soma.uri #> 1 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #>               soma.relative_uri soma.s3_region #> 1 /cell-census/2023-12-15/soma/      us-west-2 #>                                                              h5ads.uri #> 1 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #>               h5ads.relative_uri h5ads.s3_region do_not_delete  lts  alias #> 1 /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE stable #>   census_version #> 1         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-12-15 #> latest                     2024-05-27 #> 2023-05-15                 2023-05-15 #> 2023-07-25                 2023-07-25 #> 2023-12-15                 2023-12-15 #> 2024-04-29                 2024-04-29 #> 2024-05-06                 2024-05-06 #> 2024-05-13                 2024-05-13 #> 2024-05-20                 2024-05-20 #> 2024-05-27                 2024-05-27 #>                                                                       soma.uri #> stable     s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #> latest     s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/soma/ #> 2023-05-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-05-15/soma/ #> 2023-07-25 s3://cellxgene-census-public-us-west-2/cell-census/2023-07-25/soma/ #> 2023-12-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #> 2024-04-29 s3://cellxgene-census-public-us-west-2/cell-census/2024-04-29/soma/ #> 2024-05-06 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-06/soma/ #> 2024-05-13 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-13/soma/ #> 2024-05-20 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-20/soma/ #> 2024-05-27 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/soma/ #>                        soma.relative_uri soma.s3_region #> stable     /cell-census/2023-12-15/soma/      us-west-2 #> latest     /cell-census/2024-05-27/soma/      us-west-2 #> 2023-05-15 /cell-census/2023-05-15/soma/      us-west-2 #> 2023-07-25 /cell-census/2023-07-25/soma/      us-west-2 #> 2023-12-15 /cell-census/2023-12-15/soma/      us-west-2 #> 2024-04-29 /cell-census/2024-04-29/soma/      us-west-2 #> 2024-05-06 /cell-census/2024-05-06/soma/      us-west-2 #> 2024-05-13 /cell-census/2024-05-13/soma/      us-west-2 #> 2024-05-20 /cell-census/2024-05-20/soma/      us-west-2 #> 2024-05-27 /cell-census/2024-05-27/soma/      us-west-2 #>                                                                       h5ads.uri #> stable     s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #> latest     s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/h5ads/ #> 2023-05-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-05-15/h5ads/ #> 2023-07-25 s3://cellxgene-census-public-us-west-2/cell-census/2023-07-25/h5ads/ #> 2023-12-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #> 2024-04-29 s3://cellxgene-census-public-us-west-2/cell-census/2024-04-29/h5ads/ #> 2024-05-06 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-06/h5ads/ #> 2024-05-13 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-13/h5ads/ #> 2024-05-20 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-20/h5ads/ #> 2024-05-27 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/h5ads/ #>                        h5ads.relative_uri h5ads.s3_region do_not_delete  lts #> stable     /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE #> latest     /cell-census/2024-05-27/h5ads/       us-west-2         FALSE   NA #> 2023-05-15 /cell-census/2023-05-15/h5ads/       us-west-2          TRUE TRUE #> 2023-07-25 /cell-census/2023-07-25/h5ads/       us-west-2          TRUE TRUE #> 2023-12-15 /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE #> 2024-04-29 /cell-census/2024-04-29/h5ads/       us-west-2         FALSE   NA #> 2024-05-06 /cell-census/2024-05-06/h5ads/       us-west-2         FALSE   NA #> 2024-05-13 /cell-census/2024-05-13/h5ads/       us-west-2         FALSE   NA #> 2024-05-20 /cell-census/2024-05-20/h5ads/       us-west-2          TRUE   NA #> 2024-05-27 /cell-census/2024-05-27/h5ads/       us-west-2         FALSE   NA #>             alias #> stable     stable #> latest     latest #> 2023-05-15        #> 2023-07-25        #> 2023-12-15        #> 2024-04-29        #> 2024-05-06        #> 2024-05-13        #> 2024-05-20        #> 2024-05-27"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   obsm_layers = FALSE,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. obsm_layers Names arrays obsm add cell embeddings; pass FALSE suppress loading dimensional reductions. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   obsm_layers = FALSE,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. obsm_layers Names arrays obsm add cell embeddings; pass FALSE suppress loading dimensional reductions. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(   census_version_description,   mirror = \"default\",   ... )"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. mirror name intended census mirror (get_census_mirror_directory()[[name]] save lookup), NULL configure local file access. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(   census_version = \"stable\",   uri = NULL,   tiledbsoma_ctx = NULL,   mirror = NULL )"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused. mirror Census mirror access; one names(get_census_mirror_directory()).","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #>   soma_joinid                      label      value #> 1           0      census_schema_version      1.2.0 #> 2           1          census_build_date 2023-10-23 #> 3           2     dataset_schema_version      3.1.0 #> 4           3           total_cell_count   68683222 #> 5           4          unique_cell_count   40356133 #> 6           5 number_donors_homo_sapiens      15588 #> 7           6 number_donors_mus_musculus       1990 census$close()"}]
    +[{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2022, Chan Zuckerberg Initiative Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/census_access_maintained_embeddings.html","id":"open-census","dir":"Articles","previous_headings":"","what":"Open Census","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"","code":"library(\"cellxgene.census\") census <- open_soma(census_version = \"2023-12-15\")"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-seurat-reductions","dir":"Articles","previous_headings":"","what":"Load embeddings as Seurat reductions","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"high-level cellxgene.census::get_seurat() function can query Census load embeddings dimensional reductions Seurat object. ask Seurat object expression data human cells tissue_general equal 'central nervous system', along scVI geneformer embeddings (obsm_layers). embeddings stored dimensional reductions seurat_obj, can take quick look scVI embeddings 2D scatter plot via UMAP, colored Census cell_type annotations.","code":"library(\"Seurat\")  seurat_obj <- get_seurat(   census,   organism = \"homo_sapiens\",   obs_value_filter = \"tissue_general == 'central nervous system'\",   obs_column_names = c(\"cell_type\"),   obsm_layers = c(\"scvi\", \"geneformer\") ) seurat_obj <- RunUMAP(   seurat_obj,   reduction = \"scvi\",   dims = 1:ncol(Embeddings(seurat_obj, \"scvi\")) )  DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") +   theme(legend.text = element_text(size = 8))"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-singlecellexperiment-reductions","dir":"Articles","previous_headings":"","what":"Load embeddings as SingleCellExperiment reductions","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"Similarly, cellxgene.census::get_single_cell_experiment() can query Census store embeddings dimensionality reduction results Bioconductor SingleCellExperiment object. , can view UMAP Geneformer embeddings colored cell_type.","code":"library(\"SingleCellExperiment\") sce_obj <- get_single_cell_experiment(   census,   organism = \"homo_sapiens\",   obs_value_filter = \"tissue_general == 'central nervous system'\",   obs_column_names = c(\"cell_type\"),   obsm_layers = c(\"scvi\", \"geneformer\") ) sce_obj <- scater::runUMAP(sce_obj, dimred = \"geneformer\") scater::plotReducedDim(sce_obj, dimred = \"UMAP\", colour_by = \"cell_type\")"},{"path":"/articles/census_access_maintained_embeddings.html","id":"load-embeddings-as-sparsematrix","dir":"Articles","previous_headings":"","what":"Load embeddings as sparseMatrix","title":"Access CELLxGENE collaboration embeddings (scVI, Geneformer)","text":"Lastly, can use SOMAExperimentAxisQuery lower-level access embeddings’ numerical data. can performant use cases don’t need features Seurat SingleCellExperiment. row embeddings sparseMatrix provides fine-tuned Geneformer model’s 512-dimensional embedding vector cell, cell soma_joinids row names. different arguments, SOMAExperimentAxisQuery$to_sparse_matrix() can also read scVI embeddings expression data. Still lower-level access available SOMAExperimentAxisQuery$read(), streams Arrow tables. methods SOMAExperimentAxisQuery can fetch metadata like cell_type: SOMAExperimentAxisQuery loads ask Census, unlike high-level get_seurat() get_single_cell_experiment() functions, eagerly populate objects based query.","code":"query <- census$get(\"census_data\")$get(\"homo_sapiens\")$axis_query(   \"RNA\",   obs_query = tiledbsoma::SOMAAxisQuery$new(value_filter = \"tissue == 'tongue'\") ) embeddings <- query$to_sparse_matrix(\"obsm\", \"geneformer\") str(embeddings) #> Formal class 'dgTMatrix' [package \"Matrix\"] with 6 slots #>   ..@ i       : int [1:190464] 0 0 0 0 0 0 0 0 0 0 ... #>   ..@ j       : int [1:190464] 0 1 2 3 4 5 6 7 8 9 ... #>   ..@ Dim     : int [1:2] 372 512 #>   ..@ Dimnames:List of 2 #>   .. ..$ : chr [1:372] \"51784858\" \"51784859\" \"51784860\" \"51784861\" ... #>   .. ..$ : chr [1:512] \"0\" \"1\" \"2\" \"3\" ... #>   ..@ x       : num [1:190464] 0.1104 -1.2031 1.0078 0.0131 1.2422 ... #>   ..@ factors : list() head(as.data.frame(query$obs(column_names = c(\"soma_joinid\", \"cell_type\"))$concat())) #>   soma_joinid  cell_type #> 1    51784858 basal cell #> 2    51784859 basal cell #> 3    51784860 fibroblast #> 4    51784861 fibroblast #> 5    51784862 basal cell #> 6    51784863 basal cell census$close()"},{"path":"/articles/census_axis_query.html","id":"axis-query-example","dir":"Articles","previous_headings":"","what":"Axis Query Example","title":"Axis Query Example","text":"Goal: demonstrate basic axis metadata handling. CZ CELLxGENE Census stores obs (cell) metadata SOMA DataFrame, can queried read R data frame. Census also convenience package simplifies opening census. R data frames -memory objects. Take care queries small enough results fit memory.","code":""},{"path":"/articles/census_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"Axis Query Example","what":"Opening the census","title":"Axis Query Example","text":"cellxgene.census R package contains convenient API open latest version Census. can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"census <- cellxgene.census::open_soma()"},{"path":"/articles/census_axis_query.html","id":"summarize-census-cell-metadata","dir":"Articles","previous_headings":"Axis Query Example","what":"Summarize Census cell metadata","title":"Axis Query Example","text":"Tips: can read entire SOMA dataframe R using .data.frame(soma_df$read()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/census_axis_query.html","id":"summarize-all-cell-types","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Summarize all cell types","title":"Axis Query Example","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- as.data.frame(human$obs$read(   column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\") ))  # Find all unique values in the cell_type_ontology_term_id column. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id)  cat(paste(   \"There are\",   length(unique_cell_type_ontology_term_id),   \"cell types in the Census! The first few are:\" )) #> There are 604 cell types in the Census! The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000540\" \"CL:0000738\" \"CL:0000763\" \"CL:0000136\" \"CL:0000235\" #> [6] \"CL:0000115\""},{"path":"/articles/census_axis_query.html","id":"summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Summarize a subset of cell types, selected with a value_filter","title":"Axis Query Example","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = paste(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\", sep = \"\") ))  # Find all unique values in the cell_type_ontology_term_id column as an R data frame. unique_cell_type_ontology_term_id <- unique(obs_df$cell_type_ontology_term_id) cat(paste(   \"There are \",   length(unique_cell_type_ontology_term_id),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   sep = \"\" )) #> There are 185 cell types in the Census where tissue_ontology_term_id == UBERON:0002048! #> The first few are: head(unique_cell_type_ontology_term_id) #> [1] \"CL:0000003\" \"CL:4028004\" \"CL:0002145\" \"CL:0000625\" \"CL:0000624\" #> [6] \"CL:4028006\"  # Report the 10 most common top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] cat(paste(\"The top 10 cell types where tissue_ontology_term_id ==\", LUNG_TISSUE)) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 print(top_10) #>  #> CL:0000003 CL:0000583 CL:0000625 CL:0000624 CL:0000235 CL:0002063 CL:0000860  #>     562038     526859     323433     323067     254173     246279     203526  #> CL:0000623 CL:0001064 CL:0002632  #>     164944     149067     132243 # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- as.data.frame(human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" ))  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002548 CL:0000115 CL:0002131 CL:0000763 CL:0000669  #>     159096      84750      79618      64190      61830      32088      27515  #> CL:0000003 CL:0000057 CL:0002144  #>      22707      20117      18593"},{"path":"/articles/census_axis_query.html","id":"full-census-stats","dir":"Articles","previous_headings":"Axis Query Example > Summarize Census cell metadata","what":"Full census stats","title":"Axis Query Example","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)   obs_df <- as.data.frame(     census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)   )   total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(paste(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\", sep = \"\"))   } } #> [1] \"homo_sapiens\" #>   Unique cell_type_ontology_term_id values: 604 #>   Unique assay_ontology_term_id values: 20 #>   Unique tissue_ontology_term_id values: 227 #> [1] \"mus_musculus\" #>   Unique cell_type_ontology_term_id values: 226 #>   Unique assay_ontology_term_id values: 9 #>   Unique tissue_ontology_term_id values: 51 cat(paste(\"Complete Census contains\", total_cells, \"cells.\")) #> Complete Census contains 60361716 cells."},{"path":"/articles/census_citation_generation.html","id":"requirements","dir":"Articles","previous_headings":"","what":"Requirements","title":"Generating citations for Census slices","text":"notebook requires: cellxgene_census Python package. Census data release schema version 1.3.0 greater.","code":""},{"path":"/articles/census_citation_generation.html","id":"generating-citation-strings","dir":"Articles","previous_headings":"","what":"Generating citation strings","title":"Generating citations for Census slices","text":"First open handle Census data. ensure open data release schema version 1.3.0 greater, use census_version=\"latest\" load dataset table contains column \"citation\" dataset included Census. now can use column \"dataset_id\" present dataset table Census cell metadata create citation strings Census slice.","code":"library(\"tiledb\") library(\"cellxgene.census\")  census <- open_soma(census_version = \"latest\") census_release_info <- census$get(\"census_info\")$get(\"summary\")$read()$concat() as.data.frame(census_release_info) #>   soma_joinid                      label      value #> 1           0      census_schema_version      2.0.1 #> 2           1          census_build_date 2024-05-27 #> 3           2     dataset_schema_version      5.0.0 #> 4           3           total_cell_count  115661967 #> 5           4          unique_cell_count   60703793 #> 6           5 number_donors_homo_sapiens      17671 #> 7           6 number_donors_mus_musculus       4216 datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() datasets <- as.data.frame(datasets) head(datasets[\"citation\"]) #>                                                                                                                                                                                                                                                                                                           citation #> 1            Publication: https://doi.org/10.1002/hep4.1854 Dataset Version: https://datasets.cellxgene.cziscience.com/fb76c95f-0391-4fac-9fb9-082ce2430b59.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/44531dd9-1388-4416-a117-af0a99de2294 #> 2   Publication: https://doi.org/10.1126/sciimmunol.abe6291 Dataset Version: https://datasets.cellxgene.cziscience.com/b6737a5e-9069-4dd6-9a57-92e17a746df9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/3a2af25b-2338-4266-aad3-aa8d07473f50 #> 3   Publication: https://doi.org/10.1038/s41593-020-00764-7 Dataset Version: https://datasets.cellxgene.cziscience.com/0e02290f-b992-450b-8a19-554f73cd7f09.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/180bff9c-c8a5-4539-b13b-ddbc00d643e6 #> 4   Publication: https://doi.org/10.1038/s41467-022-29450-x Dataset Version: https://datasets.cellxgene.cziscience.com/40832710-d7b1-43fb-b2c2-1cd2255bc3ac.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/bf325905-5e8e-42e3-933d-9a9053e9af80 #> 5   Publication: https://doi.org/10.1038/s41590-021-01059-0 Dataset Version: https://datasets.cellxgene.cziscience.com/eb6c070c-ff67-4c1f-8d4d-65f9fe2119ee.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/93eebe82-d8c3-41bc-a906-63b5b5f24a9d #> 6 Publication: https://doi.org/10.1016/j.celrep.2019.12.082 Dataset Version: https://datasets.cellxgene.cziscience.com/650a47be-6666-4f70-ac47-8414c50bbd8e.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/939769a8-d8d2-4d01-abfc-55699893fd49"},{"path":"/articles/census_citation_generation.html","id":"via-cell-metadata-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via cell metadata query","title":"Generating citations for Census slices","text":"","code":"# Query cell metadata cell_metadata <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   value_filter = \"tissue == 'cardiac atrium'\",   column_names = c(\"dataset_id\", \"cell_type\") )  cell_metadata <- as.data.frame(cell_metadata$concat())  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% cell_metadata$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_citation_generation.html","id":"via-seurat-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via Seurat query","title":"Generating citations for Census slices","text":"","code":"# Fetch a Seurat object seurat_obj <- get_seurat(   census = census,   organism = \"homo_sapiens\",   measurement_name = \"RNA\",   obs_value_filter = \"tissue == 'cardiac atrium'\",   var_value_filter = \"feature_name == 'MYBPC3'\",   obs_column_names = c(\"dataset_id\", \"cell_type\") )  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% seurat_obj[[]]$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_citation_generation.html","id":"via-singlecellexperiment-query","dir":"Articles","previous_headings":"Generating citation strings","what":"Via SingleCellExperiment query","title":"Generating citations for Census slices","text":"","code":"# Fetch a Seurat object sce_obj <- get_single_cell_experiment(   census = census,   organism = \"homo_sapiens\",   measurement_name = \"RNA\",   obs_value_filter = \"tissue == 'cardiac atrium'\",   var_value_filter = \"feature_name == 'MYBPC3'\",   obs_column_names = c(\"dataset_id\", \"cell_type\") )  # Get a citation string for the slice slice_datasets <- datasets[datasets$dataset_id %in% sce_obj$dataset_id, ] print(slice_datasets$citation) #> [1] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/9227d155-6f2d-4534-be73-b86c5c34d8e6.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [2] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/017c9ef2-a5e5-429e-a9a1-919e330c4087.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [3] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/8c189c08-4eba-45d4-925f-a5fe1a13d2ae.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [4] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b76d37f6-0654-447f-bd1b-477be2c747f9.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [5] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/860c49d4-8ab1-4576-b67e-02d66e4a6ddd.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\" #> [6] \"Publication: https://doi.org/10.1126/science.abl4896 Dataset Version: https://datasets.cellxgene.cziscience.com/b84def55-a776-4aa4-a9a6-7aab8b973086.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/e5f58829-1a66-40b5-a624-9046778e74f5\""},{"path":"/articles/census_compute_over_X.html","id":"incremental-mean-calculation","dir":"Articles","previous_headings":"","what":"Incremental mean calculation","title":"Computing on X using online (incremental) algorithms","text":"Many statistics, marginal means, easy calculate incrementally. Let’s begin query X$raw sparse matrix unnormalized read counts, return results shards incrementally accumulate read count gene, divide cell count get mean reads per cell gene. First define query - case slice obs axis cells specific tissue & sex value, genes var axis. query$X() method returns iterator results, Arrow Table. table contain sparse X data obs/var coordinates, using standard SOMA names: soma_data - X values (float32) soma_dim_0 - obs coordinate (int64) soma_dim_1 - var coordinate (int64) Important: X matrices joined var/obs axis DataFrames integer join “id” (aka soma_joinid). positionally indexed, given cell gene may soma_joinid value (e.g., large integer). words, given X value, soma_dim_0 corresponds soma_joinid obs dataframe, soma_dim_1 coordinate corresponds soma_joinid var dataframe. convenience, query class includes utility simplify operations query slices. query$indexer indexer used wrap output query$X(), converting soma_joinids positional indexing query results. Positions [0, N), N number results query given axis. Key points: expensive query read results - rather make multiple passes data, read perform multiple computations. default, data census indexed soma_joinid positionally.","code":"library(\"tiledbsoma\") library(\"cellxgene.census\") census <- open_soma()  query <- census$get(\"census_data\")$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = SOMAAxisQuery$new(value_filter = \"tissue=='brain' && sex=='male'\") )  genes_df <- query$var(column_names = c(\"feature_id\", \"feature_name\"))$concat() genes_df <- as.data.frame(genes_df) n_genes <- nrow(genes_df)  # accumulator vector (for each gene) for the total count over all cells in X(\"raw\") raw_sum_by_gene <- numeric(n_genes) names(raw_sum_by_gene) <- genes_df$feature_id  # iterate through in-memory shards of query results tables <- query$X(\"raw\")$tables() while (!tables$read_complete()) {   table_part <- tables$read_next()   # table_part is an Arrow table with the columns mentioned above. The result   # order is not guaranteed!    # table_part$soma_dim_1 is the var/gene soma_joinid. But note that these are   # arbitrary int64 id's, and moreover each table_part may exhibit only a subset   # of the values we'll see over all query results. query$indexer helps us map   # any given soma_dim_1 values onto positions in query$var() (genes_df), that is   # the union of all values we'll see.   gene_indexes <- query$indexer$by_var(table_part$soma_dim_1)$as_vector()   stopifnot(sum(gene_indexes >= n_genes) == 0)   # sum(table_part) group by gene, yielding a numeric vector with the gene_index   # in its names   sum_part <- tapply(as.vector(table_part$soma_data), gene_indexes, sum)   # update the accumulator vector   which_genes <- as.integer(names(sum_part)) + 1 # nb: gene_indexes is zero-based   stopifnot(sum(which_genes > n_genes) == 0)   raw_sum_by_gene[which_genes] <- raw_sum_by_gene[which_genes] + sum_part }  # Divide each sum by cell count to get mean reads per cell (for each gene), # implicitly averaging in all zero entries in X even though they weren't included # in the sparse query results. genes_df$raw_mean <- raw_sum_by_gene / query$n_obs genes_df #>            feature_id  feature_name     raw_mean #> 1  ENSMUSG00000051951          Xkr4 1.397121e+00 #> 2  ENSMUSG00000025900           Rp1 3.162902e-01 #> 3  ENSMUSG00000025902         Sox17 6.604085e+01 #> 4  ENSMUSG00000033845        Mrpl15 3.939172e+01 #> 5  ENSMUSG00000025903        Lypla1 1.986548e+01 #> 6  ENSMUSG00000033813         Tcea1 4.305924e+01 #> 7  ENSMUSG00000002459         Rgs20 3.496194e+00 #> 8  ENSMUSG00000033793       Atp6v1h 7.470932e+01 #> 9  ENSMUSG00000025905         Oprk1 4.568752e-01 #> 10 ENSMUSG00000033774        Npbwr1 1.241003e-04 #> 11 ENSMUSG00000025907        Rb1cc1 3.631679e+01 #> 12 ENSMUSG00000033740          St18 1.660110e+01 #> 13 ENSMUSG00000051285        Pcmtd1 5.410501e+01 #> 14 ENSMUSG00000025909         Sntg1 1.178725e+00 #> 15 ENSMUSG00000061024          Rrs1 2.098927e+01 #> 16 ENSMUSG00000025911        Adhfe1 1.266112e+01 #> 17 ENSMUSG00000079671 2610203C22Rik 9.474621e+00 #> 18 ENSMUSG00000025912         Mybl1 2.643129e-01 #> 19 ENSMUSG00000045210        Vcpip1 3.456668e+01 #> 20 ENSMUSG00000097893 1700034P13Rik 5.721023e-01 #> 21 ENSMUSG00000025915          Sgk3 2.012592e+01 #> 22 ENSMUSG00000098234         Snhg6 6.784314e+00 #> 23 ENSMUSG00000025916       Ppp1r42 2.585422e-01 #> 24 ENSMUSG00000025917         Cops5 7.909310e+01 #> 25 ENSMUSG00000056763         Cspp1 1.635604e+01 #> 26 ENSMUSG00000067851       Arfgef1 1.582897e+01 #> 27 ENSMUSG00000042501          Cpa6 1.880119e-02 #> 28 ENSMUSG00000048960         Prex2 2.283623e+01 #> 29 ENSMUSG00000057715 A830018L16Rik 9.992140e-01 #> 30 ENSMUSG00000016918         Sulf1 5.567469e+00 #> 31 ENSMUSG00000025938       Slco5a1 2.452015e-01 #> 32 ENSMUSG00000042414        Prdm14 6.142964e-03 #> 33 ENSMUSG00000005886         Ncoa2 1.707928e+01 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 52384 rows ]"},{"path":"/articles/census_compute_over_X.html","id":"counting-cells-grouped-by-dataset-and-gene","dir":"Articles","previous_headings":"","what":"Counting cells grouped by dataset and gene","title":"Computing on X using online (incremental) algorithms","text":"goal example count number cells nonzero reads, grouped gene Census dataset_id. result data frame dataset, gene, number cells nonzero reads dataset gene. multi-factor aggregation, ’ll take advantage dplyr routines instead lower-level vector indexer shown . presentation purposes, ’ll limit query four genes, can expanded genes easily. Don’t forget close census.","code":"library(\"dplyr\")  query <- census$get(\"census_data\")$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = SOMAAxisQuery$new(value_filter = \"tissue=='brain'\"),   var_query = SOMAAxisQuery$new(value_filter = \"feature_name %in% c('Malat1', 'Ptprd', 'Dlg2', 'Pcdh9')\") )  obs_tbl <- query$obs(column_names = c(\"soma_joinid\", \"dataset_id\"))$concat() obs_df <- data.frame(   # materialize soma_joinid as character to avoid overflowing R 32-bit integer   cell_id = as.character(obs_tbl$soma_joinid),   dataset_id = obs_tbl$dataset_id$as_vector() ) var_tbl <- query$var(column_names = c(\"soma_joinid\", \"feature_name\"))$concat() var_df <- data.frame(   gene_id = as.character(var_tbl$soma_joinid),   feature_name = var_tbl$feature_name$as_vector() )  # accumulator for # cells by dataset & gene n_cells_grouped <- data.frame(   \"dataset_id\" = character(0),   \"gene_id\" = character(0),   \"n_cells\" = numeric(0) )  # iterate through in-memory shards of query results tables <- query$X(\"raw\")$tables() while (!tables$read_complete()) {   table_part <- tables$read_next()    # prepare a (dataset,gene,1) tuple for each entry in table_part   n_cells_part <- data.frame(     \"cell_id\" = as.character(table_part$soma_dim_0),     \"gene_id\" = as.character(table_part$soma_dim_1),     \"n_cells\" = 1   )   n_cells_part <- left_join(n_cells_part, obs_df, by = \"cell_id\")   stopifnot(sum(is.null(n_cells_part$dataset_id)) == 0)    # fold those into n_cells_grouped   n_cells_grouped <- n_cells_part %>%     select(-cell_id) %>%     bind_rows(n_cells_grouped) %>%     group_by(dataset_id, gene_id) %>%     summarise(n_cells = sum(n_cells)) %>%     ungroup() }  # add gene names for display n_cells_grouped <- left_join(n_cells_grouped, var_df, by = \"gene_id\") stopifnot(sum(is.null(n_cells_grouped$feature_name)) == 0) n_cells_grouped[c(\"dataset_id\", \"feature_name\", \"n_cells\")] #> # A tibble: 21 x 3 #>    dataset_id                           feature_name n_cells #>                                               #>  1 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Ptprd          79578 #>  2 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Dlg2           79513 #>  3 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Pcdh9          79476 #>  4 3bbb6cf9-72b9-41be-b568-656de6eb18b5 Malat1         79667 #>  5 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Ptprd            474 #>  6 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Dlg2              81 #>  7 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Pcdh9            125 #>  8 58b01044-c5e5-4b0f-8a2d-6ebf951e01ff Malat1         12622 #>  9 66ff82b4-9380-469c-bc4b-cfa08eacd325 Dlg2             856 #> 10 66ff82b4-9380-469c-bc4b-cfa08eacd325 Pcdh9           2910 #> # i 11 more rows census$close()"},{"path":"/articles/census_dataset_presence.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Genes measured in each cell (dataset presence matrix)","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version).","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-ids-of-the-census-datasets","dir":"Articles","previous_headings":"","what":"Fetching the IDs of the Census datasets","title":"Genes measured in each cell (dataset presence matrix)","text":"Let’s grab table datasets included Census use table combination presence matrix .","code":"# Grab the experiment containing human data, and the measurement therein with RNA human <- census$get(\"census_data\")$get(\"homo_sapiens\") human_rna <- human$ms$get(\"RNA\")  # The census-wide datasets datasets_df <- as.data.frame(census$get(\"census_info\")$get(\"datasets\")$read()$concat()) print(datasets_df) #>    soma_joinid                        collection_id #> 1            0 4dca242c-d302-4dba-a68f-4c61e7bad553 #> 2            1 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 3            2 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 4            3 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 5            4 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 6            5 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 7            6 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 8            7 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 9            8 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 10           9 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #> 11          10 d17249d2-0e6e-4500-abb8-e6c93fa1ac6f #>                                                                       collection_name #> 1                Comparative transcriptomics reveals human-specific cortical features #> 2  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 3  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 4  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 5  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 6  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 7  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 8  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 9  Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 10 Transcriptomic cytoarchitecture reveals principles of human neocortex organization #> 11 Transcriptomic cytoarchitecture reveals principles of human neocortex organization #>             collection_doi                           dataset_id #> 1  10.1126/science.ade9516 2bdd3a2c-2ff4-4314-adf3-8a06b797a33a #> 2  10.1126/science.adf6812 f5b0810c-1664-4a62-ad06-be1d9964aa8b #> 3  10.1126/science.adf6812 e4ddac12-f48f-4455-8e8d-c2a48a683437 #> 4  10.1126/science.adf6812 e2808a6e-e2ea-41b9-b38c-4a08f1677f02 #> 5  10.1126/science.adf6812 d01c9dff-abd1-4825-bf30-2eb2ba74597e #> 6  10.1126/science.adf6812 c3aa4f95-7a18-4a7d-8dd8-ca324d714363 #> 7  10.1126/science.adf6812 be401db3-d732-408a-b0c4-71af0458b8ab #> 8  10.1126/science.adf6812 a5d5c529-8a1f-40b5-bda3-35208970070d #> 9  10.1126/science.adf6812 9c63201d-bfd9-41a8-bbbc-18d947556f3d #> 10 10.1126/science.adf6812 93cb76aa-a84b-4a92-8e6c-66a914e26d4c #> 11 10.1126/science.adf6812 8d1dd010-5cbc-43fb-83f8-e0de8e8517da #>                      dataset_version_id #> 1  7eb7f2fd-fd74-4c99-863c-97836415652e #> 2  d4427196-7876-4bdd-a929-ae4d177ec776 #> 3  3280113b-7148-4a3e-98d4-015f443aab8a #> 4  dc092185-3b8e-4fcb-ae21-1dc106d683ac #> 5  c4959ded-83dc-4442-aac7-9a59bdb47801 #> 6  0476ef54-aefe-4754-b0e9-d9fcd75adff4 #> 7  ee027704-72aa-4195-a467-0754db1ed65d #> 8  d47c0742-cea2-46c1-9e72-4d479214041c #> 9  8b09695a-1426-4867-961e-c40a1fbcc2da #> 10 98ad7381-f464-4f49-b850-5321b4f98be6 #> 11 c56683d2-452a-45dc-b402-35397e27e325 #>                                           dataset_title #> 1                               Human: Great apes study #> 2                       Dissection: Angular gyrus (AnG) #> 3                Supercluster: CGE-derived interneurons #> 4               Dissection: Primary auditory cortex(A1) #> 5  Supercluster: Deep layer (non-IT) excitatory neurons #> 6        Supercluster: IT-projecting excitatory neurons #> 7           Dissection: Anterior cingulate cortex (ACC) #> 8               Human Multiple Cortical Areas SMART-seq #> 9                Supercluster: MGE-derived interneurons #> 10        Dissection: Primary somatosensory cortex (S1) #> 11                Dissection: Primary visual cortex(V1) #>                            dataset_h5ad_path dataset_total_cell_count #> 1  2bdd3a2c-2ff4-4314-adf3-8a06b797a33a.h5ad                   156285 #> 2  f5b0810c-1664-4a62-ad06-be1d9964aa8b.h5ad                   110752 #> 3  e4ddac12-f48f-4455-8e8d-c2a48a683437.h5ad                   129495 #> 4  e2808a6e-e2ea-41b9-b38c-4a08f1677f02.h5ad                   139054 #> 5  d01c9dff-abd1-4825-bf30-2eb2ba74597e.h5ad                    92969 #> 6  c3aa4f95-7a18-4a7d-8dd8-ca324d714363.h5ad                   638941 #> 7  be401db3-d732-408a-b0c4-71af0458b8ab.h5ad                   135462 #> 8  a5d5c529-8a1f-40b5-bda3-35208970070d.h5ad                    49417 #> 9  9c63201d-bfd9-41a8-bbbc-18d947556f3d.h5ad                   185477 #> 10 93cb76aa-a84b-4a92-8e6c-66a914e26d4c.h5ad                   153159 #> 11 8d1dd010-5cbc-43fb-83f8-e0de8e8517da.h5ad                   241077 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 640 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"fetching-the-dataset-presence-matrix","dir":"Articles","previous_headings":"","what":"Fetching the dataset presence matrix","title":"Genes measured in each cell (dataset presence matrix)","text":"Now let’s fetch dataset presence matrix. convenience, read entire presence matrix (Homo sapiens) sparse matrix. convenience function providing capability: also need var dataframe, read R data frame convenient manipulation:","code":"presence_matrix <- get_presence_matrix(census, \"Homo sapiens\", \"RNA\") print(dim(presence_matrix)) #> NULL var_df <- as.data.frame(human_rna$var$read()$concat()) print(var_df) #>    soma_joinid      feature_id feature_name feature_length      nnz n_measured_obs #> 1            0 ENSG00000233576      HTR3C2P           1057    69370       19581263 #> 2            1 ENSG00000121410         A1BG           3999  5640476       62641311 #> 3            2 ENSG00000268895     A1BG-AS1           3374  3071864       61946057 #> 4            3 ENSG00000148584         A1CF           9603   734347       58195911 #> 5            4 ENSG00000175899          A2M           6318  7894261       62704378 #> 6            5 ENSG00000245105      A2M-AS1           2948  1637794       62086816 #> 7            6 ENSG00000166535        A2ML1           7156  2156616       60911688 #> 8            7 ENSG00000256069        A2MP1           4657   835384       23554778 #> 9            8 ENSG00000184389      A3GALT2           1023   439067       53780311 #> 10           9 ENSG00000128274       A4GALT           3358  2432348       62706770 #> 11          10 ENSG00000118017        A4GNT           1779    52430       56117399 #> 12          11 ENSG00000265544         AA06            632   220755       22545140 #> 13          12 ENSG00000081760         AACS          16039 11280800       62842909 #> 14          13 ENSG00000250420       AACSP1           3380   211588       22831831 #> 15          14 ENSG00000114771        AADAC           1632   552258       54941618 #> 16          15 ENSG00000188984      AADACL3           4055    24626       43074608 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 60648 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"identifying-genes-measured-in-a-specific-dataset","dir":"Articles","previous_headings":"","what":"Identifying genes measured in a specific dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Now dataset table, genes metadata table, dataset presence matrix, can check gene set genes measured specific dataset. Important: presence matrix indexed soma_joinid, positionally indexed. words: first dimension presence matrix dataset’s soma_joinid, stored census_datasets dataframe. second dimension presence matrix feature’s soma_joinid, stored var dataframe. presence matrix method $take() lets slice soma_joinids census_datasets var. full presence matrix, slices , can exported regular matrix method $get_one_based_matrix() Let’s find gene \"ENSG00000286096\" measured dataset id \"97a17473-e2b1-4f31-a544-44a60773e2dd\".","code":"# Get soma_joinid for datasets and genes of interest var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"] dataset_joinid <- datasets_df$soma_joinid[datasets_df$dataset_id == \"97a17473-e2b1-4f31-a544-44a60773e2dd\"]  # Slice presence matrix with datasets and genes of interest presence_matrix_slice <- presence_matrix$take(i = dataset_joinid, j = var_joinid)  # Convert presence matrix to regular matrix presence_matrix_slice <- presence_matrix_slice$get_one_based_matrix()  # Find how if the gene is present in this dataset is_present <- presence_matrix_slice[, , drop = TRUE] cat(paste(\"Feature is\", if (is_present) \"present.\" else \"not present.\")) #> Feature is present."},{"path":"/articles/census_dataset_presence.html","id":"identifying-datasets-that-measured-specific-genes","dir":"Articles","previous_headings":"","what":"Identifying datasets that measured specific genes","title":"Genes measured in each cell (dataset presence matrix)","text":"Similarly, can determine datasets measured specific gene set genes.","code":"# Grab the feature's soma_joinid from the var dataframe var_joinid <- var_df$soma_joinid[var_df$feature_id == \"ENSG00000286096\"]  # The presence matrix is indexed by the joinids of the dataset and var dataframes, # so slice out the feature of interest by its joinid. presence_matrix_slice <- presence_matrix$take(j = var_joinid)$get_one_based_matrix() measured_datasets <- presence_matrix_slice[, , drop = TRUE] != 0 dataset_joinids <- datasets_df$soma_joinid[measured_datasets]  # From the datasets dataframe, slice out the datasets which have a joinid in the list print(datasets_df[dataset_joinids, ]) #>    soma_joinid                        collection_id #> 63          62 3f50314f-bdc9-40c6-8e4a-b0901ebfbe4c #> 64          63 e5f58829-1a66-40b5-a624-9046778e74f5 #> 65          64 e5f58829-1a66-40b5-a624-9046778e74f5 #> 66          65 e5f58829-1a66-40b5-a624-9046778e74f5 #> 67          66 e5f58829-1a66-40b5-a624-9046778e74f5 #> 69          68 e5f58829-1a66-40b5-a624-9046778e74f5 #> 70          69 e5f58829-1a66-40b5-a624-9046778e74f5 #> 72          71 e5f58829-1a66-40b5-a624-9046778e74f5 #> 73          72 e5f58829-1a66-40b5-a624-9046778e74f5 #> 77          76 e5f58829-1a66-40b5-a624-9046778e74f5 #> 78          77 e5f58829-1a66-40b5-a624-9046778e74f5 #>                                                                                                                             collection_name #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                           Tabula Sapiens #> 65                                                                                                                           Tabula Sapiens #> 66                                                                                                                           Tabula Sapiens #> 67                                                                                                                           Tabula Sapiens #> 69                                                                                                                           Tabula Sapiens #> 70                                                                                                                           Tabula Sapiens #> 72                                                                                                                           Tabula Sapiens #> 73                                                                                                                           Tabula Sapiens #> 77                                                                                                                           Tabula Sapiens #> 78                                                                                                                           Tabula Sapiens #>                 collection_doi                           dataset_id #> 63 10.1016/j.ccell.2021.03.007 bd65a70f-b274-4133-b9dd-0d1431b6af34 #> 64     10.1126/science.abl4896 ff45e623-7f5f-46e3-b47d-56be0341f66b #> 65     10.1126/science.abl4896 f01bdd17-4902-40f5-86e3-240d66dd2587 #> 66     10.1126/science.abl4896 e6a11140-2545-46bc-929e-da243eed2cae #> 67     10.1126/science.abl4896 e5c63d94-593c-4338-a489-e1048599e751 #> 69     10.1126/science.abl4896 d77ec7d6-ef2e-49d6-9e79-05b7f8881484 #> 70     10.1126/science.abl4896 cee11228-9f0b-4e57-afe2-cfe15ee56312 #> 72     10.1126/science.abl4896 a2d4d33e-4c62-4361-b80a-9be53d2e50e8 #> 73     10.1126/science.abl4896 a0754256-f44b-4c4a-962c-a552e47d3fdc #> 77     10.1126/science.abl4896 6d41668c-168c-4500-b06a-4674ccf3e19d #> 78     10.1126/science.abl4896 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c #>                      dataset_version_id #> 63 71815674-a8cf-4add-95dd-c5d5d1631597 #> 64 0b29f4ce-5e72-4356-b74b-b54714979234 #> 65 bd13c169-af97-4d8f-ba45-7588808c2e48 #> 66 47615a3d-0a9f-4a78-88ef-5cce2a84637d #> 67 ac7714f0-dce2-40ba-9912-324de6c9a77f #> 69 c7679ec2-652d-437a-bded-3ec2344829e4 #> 70 f89fa18f-c32b-4bae-9511-1a4d18f200e1 #> 72 37ada0d2-9970-4ff2-8bcd-41e80ab6e081 #> 73 1cda78aa-f0d9-4d50-96bf-8bc309318802 #> 77 5297a910-453f-4e3f-af16-e18fd5a79090 #> 78 b783b036-c837-4290-a07d-f6b79a301f59 #>                                                                                                                               dataset_title #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                Tabula Sapiens - Pancreas #> 65                                                                                                          Tabula Sapiens - Salivary_Gland #> 66                                                                                                                   Tabula Sapiens - Heart #> 67                                                                                                                 Tabula Sapiens - Bladder #> 69                                                                                                                Tabula Sapiens - Prostate #> 70                                                                                                                  Tabula Sapiens - Spleen #> 72                                                                                                             Tabula Sapiens - Vasculature #> 73                                                                                                                     Tabula Sapiens - Eye #> 77                                                                                                                   Tabula Sapiens - Liver #> 78                                                                                                                     Tabula Sapiens - Fat #>                            dataset_h5ad_path dataset_total_cell_count #> 63 bd65a70f-b274-4133-b9dd-0d1431b6af34.h5ad                   167283 #> 64 ff45e623-7f5f-46e3-b47d-56be0341f66b.h5ad                    13497 #> 65 f01bdd17-4902-40f5-86e3-240d66dd2587.h5ad                    27199 #> 66 e6a11140-2545-46bc-929e-da243eed2cae.h5ad                    11505 #> 67 e5c63d94-593c-4338-a489-e1048599e751.h5ad                    24583 #> 69 d77ec7d6-ef2e-49d6-9e79-05b7f8881484.h5ad                    16375 #> 70 cee11228-9f0b-4e57-afe2-cfe15ee56312.h5ad                    34004 #> 72 a2d4d33e-4c62-4361-b80a-9be53d2e50e8.h5ad                    16037 #> 73 a0754256-f44b-4c4a-962c-a552e47d3fdc.h5ad                    10650 #> 77 6d41668c-168c-4500-b06a-4674ccf3e19d.h5ad                     5007 #> 78 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c.h5ad                    20263 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 31 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"identifying-all-genes-measured-in-a-dataset","dir":"Articles","previous_headings":"","what":"Identifying all genes measured in a dataset","title":"Genes measured in each cell (dataset presence matrix)","text":"Finally, can find set genes measured cells given dataset.","code":"# Slice the dataset(s) of interest, and get the joinid(s) dataset_joinids <- datasets_df$soma_joinid[datasets_df$collection_id == \"17481d16-ee44-49e5-bcf0-28c0780d8c4a\"]  # Slice the presence matrix by the first dimension, i.e., by dataset presence_matrix_slice <- presence_matrix$take(i = dataset_joinids)$get_one_based_matrix() genes_measured <- Matrix::colSums(presence_matrix_slice) > 0 var_joinids <- var_df$soma_joinid[genes_measured]  print(var_df[var_joinids, ]) #>    soma_joinid      feature_id feature_name feature_length      nnz n_measured_obs #> 1            0 ENSG00000233576      HTR3C2P           1057    69370       19581263 #> 2            1 ENSG00000121410         A1BG           3999  5640476       62641311 #> 3            2 ENSG00000268895     A1BG-AS1           3374  3071864       61946057 #> 4            3 ENSG00000148584         A1CF           9603   734347       58195911 #> 5            4 ENSG00000175899          A2M           6318  7894261       62704378 #> 6            5 ENSG00000245105      A2M-AS1           2948  1637794       62086816 #> 9            8 ENSG00000184389      A3GALT2           1023   439067       53780311 #> 10           9 ENSG00000128274       A4GALT           3358  2432348       62706770 #> 12          11 ENSG00000265544         AA06            632   220755       22545140 #> 14          13 ENSG00000250420       AACSP1           3380   211588       22831831 #> 16          15 ENSG00000188984      AADACL3           4055    24626       43074608 #> 18          17 ENSG00000240602      AADACP1           2012    29491       23133490 #> 19          18 ENSG00000109576        AADAT           2970  4524608       61559099 #> 20          19 ENSG00000158122       PRXL2C           3098  5424472       55618144 #> 21          20 ENSG00000103591        AAGAB           4138 12427442       62843055 #> 22          21 ENSG00000115977         AAK1          24843 29280566       62664775 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 27195 rows ]"},{"path":"/articles/census_dataset_presence.html","id":"close-the-census","dir":"Articles","previous_headings":"Identifying all genes measured in a dataset","what":"Close the census","title":"Genes measured in each cell (dataset presence matrix)","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_datasets.html","id":"fetching-the-datasets-table","dir":"Articles","previous_headings":"","what":"Fetching the datasets table","title":"Census Datasets example","text":"Census contains top-level data frame itemizing datasets contained therein. can read SOMADataFrame Arrow Table: R data frame: sum cell counts across datasets match number cells across SOMA experiments (human, mouse).","code":"library(\"cellxgene.census\") census <- open_soma() census_datasets <- census$get(\"census_info\")$get(\"datasets\")$read()$concat() print(census_datasets) #> Table #> 651 rows x 9 columns #> $soma_joinid  #> $collection_id  #> $collection_name  #> $collection_doi  #> $dataset_id  #> $dataset_version_id  #> $dataset_title  #> $dataset_h5ad_path  #> $dataset_total_cell_count  census_datasets <- as.data.frame(census_datasets) print(census_datasets[, c(   \"dataset_id\",   \"dataset_title\",   \"dataset_total_cell_count\" )]) #>                              dataset_id #> 1  2bdd3a2c-2ff4-4314-adf3-8a06b797a33a #> 2  f5b0810c-1664-4a62-ad06-be1d9964aa8b #> 3  e4ddac12-f48f-4455-8e8d-c2a48a683437 #> 4  e2808a6e-e2ea-41b9-b38c-4a08f1677f02 #> 5  d01c9dff-abd1-4825-bf30-2eb2ba74597e #> 6  c3aa4f95-7a18-4a7d-8dd8-ca324d714363 #> 7  be401db3-d732-408a-b0c4-71af0458b8ab #> 8  a5d5c529-8a1f-40b5-bda3-35208970070d #> 9  9c63201d-bfd9-41a8-bbbc-18d947556f3d #> 10 93cb76aa-a84b-4a92-8e6c-66a914e26d4c #> 11 8d1dd010-5cbc-43fb-83f8-e0de8e8517da #> 12 716a4acc-919e-4326-9672-ebe06ede84e6 #> 13 5bdc423a-59e6-457d-aa01-debd2c9c564f #> 14 5346f9c6-755e-4336-94cc-38706ec00c2f #> 15 015c230d-650c-4527-870d-8a805849a382 #> 16 d567b692-c374-4628-a508-8008f6778f22 #> 17 cf83c98a-3791-4537-bbde-a719f6d73c13 #> 18 738942eb-ac72-44ff-a64b-8943b5ecd8d9 #> 19 f8d8b443-bca6-4c3c-9042-669dfb7f8030 #> 20 f5be4b96-f5a3-4c3d-84ac-6f69daf744d5 #> 21 dea1aa78-c0a2-413f-b375-f91cce49e4d0 #> 22 92161459-9103-4379-ae34-73a38eee1d1d #> 23 5829c7ba-697f-418e-8b98-d605b192dc48 #> 24 4dd1cd23-fc4d-4fd1-9709-602540f3ca6f #> 25 2856d06c-0ff9-4e01-bfc9-202b74d0b60f #> 26 251b1a7e-d050-4486-8d50-4c2619eb0f46 #> 27 07760522-707a-4a1c-8891-dbd1226d6b27 #> 28 9fcb0b73-c734-40a5-be9c-ace7eea401c9 #> 29 1a38e762-2465-418f-b81c-6a4bce261c34 #> 30 f16a8f4d-bc97-43c5-a2f6-bbda952e4c5c #> 31 94c41723-b2c4-4b59-a49a-64c9b851903e #> 32 6ceeaa86-9ceb-4582-b390-6d4dd6ff0572 #> 33 9a64bf99-ebe5-4276-93a8-bee9dff1cd47 #> 34 fc0ceb80-d2d9-47c1-9d78-b0e45c64c500 #> 35 d0ea3ec4-0f3b-4649-9146-1c0b5f303a55 #> 36 b8920ef5-7d22-497b-abca-a7a9eb76d79a #> 37 b1d37bbd-9ae4-4404-b2f9-f2fe66750e4e #> 38 a4e89c26-e8d4-4471-9b06-16a1405880f0 #> 39 a190b2e9-3796-4785-9a2f-013e2a9a43e6 #> 40 9ff9f9ba-016b-4cbb-8899-45dc20860b8b #> 41 9940f951-3dc0-4579-bbb2-2392786e59a3 #> 42 74d584f0-74fc-482e-b944-e76f29c1ab85 #> 43 6f7fd0f1-a2ed-4ff1-80d3-33dde731cbc3 #> 44 6cda07c7-5d7a-41ba-9799-5bb73da25a60 #> 45 646e3e87-e46b-4b12-85b5-8d8589e26088 #> 46 6437bc9c-16cb-46c8-8f79-9a7384a0212a #> 47 58c43cc2-e00e-43c4-94eb-8501369264e1 #> 48 53bc5729-6202-4351-bc99-1f36139e9dc4 #> 49 44c83972-e5d2-4858-ac58-2df9f4bf564b #> 50 2ecc72f8-085f-4e86-8692-771f316c54f6 #> 51 2e5a9b5d-d31b-4e9f-a179-d5d70ba459fb #> 52 1c9f5c6b-73da-4d17-95de-df080ffe0df1 #> 53 100c6145-7b0e-4ba6-81c1-ffebed0d1ac4 #> 54 0ed60482-a34f-4268-b576-d69cc30210f6 #> 55 0eccaf0c-19d2-4900-9962-899378adf8be #> 56 04c94a7d-1133-42c9-bb48-c697bd302a8d #> 57 0374f03c-62e2-4859-8a14-acb00b0627d5 #> 58 03181d87-4769-41e7-8c39-d9a81835f0d2 #> 59 f171db61-e57e-4535-a06a-35d8b6ef8f2b #> 60 ecf2e08e-2032-4a9e-b466-b65b395f4a02 #> 61 74cff64f-9da9-4b2a-9b3b-8a04a1598040 #> 62 5af90777-6760-4003-9dba-8f945fec6fdf #> 63 bd65a70f-b274-4133-b9dd-0d1431b6af34 #> 64 ff45e623-7f5f-46e3-b47d-56be0341f66b #> 65 f01bdd17-4902-40f5-86e3-240d66dd2587 #> 66 e6a11140-2545-46bc-929e-da243eed2cae #> 67 e5c63d94-593c-4338-a489-e1048599e751 #> 68 d8732da6-8d1d-42d9-b625-f2416c30054b #> 69 d77ec7d6-ef2e-49d6-9e79-05b7f8881484 #> 70 cee11228-9f0b-4e57-afe2-cfe15ee56312 #> 71 a357414d-2042-4eb5-95f0-c58604a18bdd #> 72 a2d4d33e-4c62-4361-b80a-9be53d2e50e8 #> 73 a0754256-f44b-4c4a-962c-a552e47d3fdc #> 74 983d5ec9-40e8-4512-9e65-a572a9c486cb #> 75 7357cee7-9f7f-4ab0-8cec-90de8f047e38 #> 76 6ec405bb-4727-4c6d-ab4e-01fe489af7ea #> 77 6d41668c-168c-4500-b06a-4674ccf3e19d #> 78 5e5e7a2f-8f1c-42ac-90dc-b4f80f38e84c #> 79 55cf0ea3-9d2b-4294-871e-bb4b49a79fc7 #> 80 4f1555bc-4664-46c3-a606-78d34dd10d92 #> 81 2ba40233-8576-4dec-a5f1-2adfa115e2dc #> 82 2423ce2c-3149-4cca-a2ff-cf682ea29b5f #> 83 1c9eb291-6d31-47e1-96b2-129b5e1ae64f #> 84 18eb630b-a754-4111-8cd4-c24ec80aa5ec #> 85 0d2ee4ac-05ee-40b2-afb6-ebb584caa867 #>                                                                                                                               dataset_title #> 1                                                                                                                   Human: Great apes study #> 2                                                                                                           Dissection: Angular gyrus (AnG) #> 3                                                                                                    Supercluster: CGE-derived interneurons #> 4                                                                                                   Dissection: Primary auditory cortex(A1) #> 5                                                                                      Supercluster: Deep layer (non-IT) excitatory neurons #> 6                                                                                            Supercluster: IT-projecting excitatory neurons #> 7                                                                                               Dissection: Anterior cingulate cortex (ACC) #> 8                                                                                                   Human Multiple Cortical Areas SMART-seq #> 9                                                                                                    Supercluster: MGE-derived interneurons #> 10                                                                                            Dissection: Primary somatosensory cortex (S1) #> 11                                                                                                    Dissection: Primary visual cortex(V1) #> 12                                                                                         Dissection: Dorsolateral prefrontal cortex (DFC) #> 13                                                                                                    Dissection: Primary motor cortex (M1) #> 14                                                                                                         Supercluster: Non-neuronal cells #> 15                                                                                                  Dissection: Middle temporal gyrus (MTG) #> 16                                                                       Combined single cell and single nuclei RNA-Seq data - Heart Global #> 17                                                                                                    Global dataset of infant KMT2Ar B-ALL #> 18                                                                                     Normal immune cells landscape of infant KMT2Ar B-ALL #> 19                                                                                                      Human Human Microglia 10x scRNA-seq #> 20                                                                                                    Human Endothelial cells 10x scRNA-seq #> 21                                                                                                 Human Nurr-Negative Nuclei 10x scRNA-seq #> 22                                                                                                 Human Nurr-Positive Nuclei 10x scRNA-seq #> 23                                                                                                     Human Oligodendrocytes 10x scRNA-seq #> 24                                                                                                            Human OPC Cells 10x scRNA-seq #> 25                                                                                                           Human DA Neurons 10x scRNA-seq #> 26                                                                                                       Human Non-DA Neurons 10x scRNA-seq #> 27                                                                                                           Human Astrocytes 10x scRNA-seq #> 28                                                                              An Integrated Single Cell Meta-atlas of Human Periodontitis #> 29                                                                Single-cell analysis of prenatal and postnatal human cortical development #> 30                                                       All - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse #> 31                                                                                    snRNA-seq of human anterior and posterior hippocampus #> 32                                                                                                                        3-prime FGID data #> 33                                                      Single-Cell RNA Sequencing of Breast Tissues: Cell Subtypes and Cancer Risk Factors #> 34                                                                            Sst Chodl - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 35                                                                                  L6b - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 36                                                                              L5/6 NP - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 37                                                                                 Sncg - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 38                                                                                L6 CT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 39                                                                           Lamp5 Lhx6 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 40                                                                                L4 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 41                                                                      Oligodendrocyte - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 42                                                                            Astrocyte - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 43                                                                       Whole Taxonomy - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 44                                                                                L5 ET - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 45                                                                              L2/3 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 46                                                                                L6 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 47                                                                                  OPC - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 48                                                                                  Vip - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 49                                                                                L5 IT - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 50                                                                          Endothelial - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 51                                                                                 VLMC - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 52                                                                           L6 IT Car3 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 53                                                                        Microglia-PVM - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 54                                                                                Lamp5 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 55                                                                                 Pax6 - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 56                                                                                Pvalb - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 57                                                                           Chandelier - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 58                                                                                  Sst - DLPFC: Seattle Alzheimer's Disease Atlas (SEA-AD) #> 59                                                                                                                   donor_p13_trophoblasts #> 60                                                                                                                  All donors trophoblasts #> 61                                                                                                     All donors all cell states (in vivo) #> 62                                                                     Single-cell transcriptomic datasets of Renal cell carcinoma patients #> 63 Single-cell sequencing links multiregional immune landscapes and tissue-resident T cells in ccRCC to tumor topology and therapy efficacy #> 64                                                                                                                Tabula Sapiens - Pancreas #> 65                                                                                                          Tabula Sapiens - Salivary_Gland #> 66                                                                                                                   Tabula Sapiens - Heart #> 67                                                                                                                 Tabula Sapiens - Bladder #> 68                                                                                                                 Tabula Sapiens - Trachea #> 69                                                                                                                Tabula Sapiens - Prostate #> 70                                                                                                                  Tabula Sapiens - Spleen #> 71                                                                                                         Tabula Sapiens - Small_Intestine #> 72                                                                                                             Tabula Sapiens - Vasculature #> 73                                                                                                                     Tabula Sapiens - Eye #> 74                                                                                                                   Tabula Sapiens - Blood #> 75                                                                                                         Tabula Sapiens - Large_Intestine #> 76                                                                                                                  Tabula Sapiens - Uterus #> 77                                                                                                                   Tabula Sapiens - Liver #> 78                                                                                                                     Tabula Sapiens - Fat #> 79                                                                                                                  Tabula Sapiens - Tongue #> 80                                                                                                             Tabula Sapiens - Bone_Marrow #> 81                                                                                                                 Tabula Sapiens - Mammary #> 82                                                                                                                  Tabula Sapiens - Kidney #> 83                                                                                                                  Tabula Sapiens - Muscle #> 84                                                                                                              Tabula Sapiens - Lymph_Node #> 85                                                                                                                    Tabula Sapiens - Lung #>    dataset_total_cell_count #> 1                    156285 #> 2                    110752 #> 3                    129495 #> 4                    139054 #> 5                     92969 #> 6                    638941 #> 7                    135462 #> 8                     49417 #> 9                    185477 #> 10                   153159 #> 11                   241077 #> 12                   113339 #> 13                   114605 #> 14                   108940 #> 15                   148374 #> 16                   493236 #> 17                   128588 #> 18                    36313 #> 19                    33041 #> 20                    14903 #> 21                   104097 #> 22                    80576 #> 23                   178815 #> 24                    13691 #> 25                    22048 #> 26                    91479 #> 27                    33506 #> 28                   105918 #> 29                   700391 #> 30                   356213 #> 31                   129905 #> 32                    89849 #> 33                    52681 #> 34                     1772 #> 35                    17996 #> 36                    18154 #> 37                    23640 #> 38                    27454 #> 39                    21603 #> 40                    76195 #> 41                   136076 #> 42                    82936 #> 43                  1309414 #> 44                     3848 #> 45                   317116 #> 46                    44174 #> 47                    27670 #> 48                    95014 #> 49                    97173 #> 50                     2496 #> 51                     4619 #> 52                    13007 #> 53                    40625 #> 54                    52828 #> 55                     8984 #> 56                   109618 #> 57                    14871 #> 58                    71545 #> 59                    31497 #> 60                    67070 #> 61                   286326 #> 62                   270855 #> 63                   167283 #> 64                    13497 #> 65                    27199 #> 66                    11505 #> 67                    24583 #> 68                     9522 #> 69                    16375 #> 70                    34004 #> 71                    12467 #> 72                    16037 #> 73                    10650 #> 74                    50115 #> 75                    13680 #> 76                     7124 #> 77                     5007 #> 78                    20263 #> 79                    15020 #> 80                    12297 #> 81                    11375 #> 82                     9641 #> 83                    30746 #> 84                    53275 #> 85                    35682 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 566 rows ] census_data <- census$get(\"census_data\") all_experiments <- lapply(census_data$to_list(), function(x) census_data$get(x$name)) print(all_experiments) #> $homo_sapiens #>  #>   uri: s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/census_data/homo_sapiens  #>   arrays: obs*  #>   groups: ms*  #>  #> $mus_musculus #>  #>   uri: s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/census_data/mus_musculus  #>   arrays: obs*  #>   groups: ms* experiments_total_cells <- sum(sapply(all_experiments, function(x) {   nrow(x$obs$read(column_names = c(\"soma_joinid\"))$concat()) }))  print(paste(\"Found\", experiments_total_cells, \"cells in all experiments.\")) #> [1] \"Found 68683222 cells in all experiments.\" print(paste(   \"Found\", sum(as.vector(census_datasets$dataset_total_cell_count)),   \"cells in all datasets.\" )) #> [1] \"Found 68683222 cells in all datasets.\""},{"path":"/articles/census_datasets.html","id":"fetching-the-expression-data-from-a-single-dataset","dir":"Articles","previous_headings":"","what":"Fetching the expression data from a single dataset","title":"Census Datasets example","text":"Let’s pick one dataset slice census, turn Seurat -memory object. (requires Seurat package installed beforehand.) Create query mouse experiment, “RNA” measurement, dataset_id.","code":"census_datasets[census_datasets$dataset_id == \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\", ] #>     soma_joinid                        collection_id    collection_name #> 581         580 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>                collection_doi                           dataset_id #> 581 10.1038/s41586-020-2496-1 0bd1a1de-3aee-40e0-b2ec-86c7a30c7149 #>                       dataset_version_id #> 581 ff352f35-58a2-4962-b716-649d1f9e9f44 #>                                                                                        dataset_title #> 581 Bone marrow - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x #>                             dataset_h5ad_path dataset_total_cell_count #> 581 0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad                    40220 library(\"tiledbsoma\") obs_query <- SOMAAxisQuery$new(   value_filter = \"dataset_id == '0bd1a1de-3aee-40e0-b2ec-86c7a30c7149'\" ) expt_query <- census_data$get(\"mus_musculus\")$axis_query(   measurement_name = \"RNA\",   obs_query = obs_query ) dataset_seurat <- expt_query$to_seurat(c(counts = \"raw\")) print(dataset_seurat) #> An object of class Seurat  #> 52417 features across 40220 samples within 1 assay  #> Active assay: RNA (52417 features, 0 variable features) #>  2 layers present: counts, data #>  1 dimensional reduction calculated: scvi"},{"path":"/articles/census_datasets.html","id":"downloading-the-original-source-h5ad-file-of-a-dataset","dir":"Articles","previous_headings":"","what":"Downloading the original source H5AD file of a dataset","title":"Census Datasets example","text":"can use cellxgene.census::get_source_h5ad_uri() API fetch URI pointing H5AD associated dataset_id. H5AD can download CZ CELLxGENE Discover, may contain additional data-submitter provided information included Census. can fetch location cloud directly download system. local H5AD file can used R using SeuratDisk’s anndata converter.","code":"# Option 1: Direct download download_source_h5ad(   dataset_id = \"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\",   file = \"/tmp/Tabula_Muris_Senis-bone_marrow.h5ad\",   overwrite = TRUE ) # Option 2: Get location and download via preferred method get_source_h5ad_uri(\"0bd1a1de-3aee-40e0-b2ec-86c7a30c7149\") #> $uri #> [1] \"s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/0bd1a1de-3aee-40e0-b2ec-86c7a30c7149.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\""},{"path":"/articles/census_datasets.html","id":"close-the-census","dir":"Articles","previous_headings":"Downloading the original source H5AD file of a dataset","what":"Close the census","title":"Census Datasets example","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/census_query_extract.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/census_query_extract.html","id":"querying-cell-metadata-obs","dir":"Articles","previous_headings":"","what":"Querying cell metadata (obs)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census, RNA assays, located census$get(\"census_data\")$get(\"homo_sapiens\")$obs. SOMADataFrame can materialized R data frame (tibble) using .data.frame(obs$read()$concat()). mouse cell metadata census$get(\"census_data\")$get(\"mus_musculus\").obs. slicing cell metadata two relevant arguments can passed read(): column_names — character vector indicating metadata columns fetch. Expressions one comparisons Comparisons one       Expressions can combine comparisons using && || op one < | > | <= | >= | == | != %% learn metadata columns available fetching filtering can directly look keys cell metadata. soma_joinid special SOMADataFrame column used join operations. definition columns can found Census schema. can used fetch specific columns specific rows matching condition. latter need know values looking priori. example let’s see possible values available sex. can load cell metadata fetching column sex. can see three different values sex, \"male\", \"female\" \"unknown\". information can fetch cell metatadata specific sex value, example \"unknown\". can use column_names value_filter perform specific queries. example let’s fetch disease column cell_type \"B cell\" tissue_general \"lung\".","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\"          #> [22] \"raw_sum\"                                  #> [23] \"nnz\"                                      #> [24] \"raw_mean_nnz\"                             #> [25] \"raw_variance_nnz\"                         #> [26] \"n_measured_vars\" unique(as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = \"sex\")$concat())) #>             sex #> 1          male #> 224      female #> 3747640 unknown as.data.frame(census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(value_filter = \"sex == 'unknown'\")$concat()) #>   soma_joinid                           dataset_id     assay assay_ontology_term_id #> 1     3747639 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 2     3747640 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 3     3747641 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 4     3747642 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 5     3747643 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 6     3747644 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 7     3747645 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 8     3747646 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #> 9     3747647 9fcb0b73-c734-40a5-be9c-ace7eea401c9 10x 3' v2            EFO:0009899 #>    cell_type cell_type_ontology_term_id development_stage #> 1 fibroblast                 CL:0000057 human adult stage #> 2 fibroblast                 CL:0000057 human adult stage #> 3 fibroblast                 CL:0000057 human adult stage #> 4 fibroblast                 CL:0000057 human adult stage #> 5 fibroblast                 CL:0000057 human adult stage #> 6 fibroblast                 CL:0000057 human adult stage #> 7 fibroblast                 CL:0000057 human adult stage #> 8 fibroblast                 CL:0000057 human adult stage #> 9 fibroblast                 CL:0000057 human adult stage #>   development_stage_ontology_term_id disease disease_ontology_term_id #> 1                     HsapDv:0000087  normal             PATO:0000461 #> 2                     HsapDv:0000087  normal             PATO:0000461 #> 3                     HsapDv:0000087  normal             PATO:0000461 #> 4                     HsapDv:0000087  normal             PATO:0000461 #> 5                     HsapDv:0000087  normal             PATO:0000461 #> 6                     HsapDv:0000087  normal             PATO:0000461 #> 7                     HsapDv:0000087  normal             PATO:0000461 #> 8                     HsapDv:0000087  normal             PATO:0000461 #> 9                     HsapDv:0000087  normal             PATO:0000461 #>                       donor_id is_primary_data self_reported_ethnicity #> 1 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 2 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 3 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 4 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 5 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 6 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 7 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 8 Pagella_GSE161267_GSM4904134            TRUE                 unknown #> 9 Pagella_GSE161267_GSM4904134            TRUE                 unknown #>   self_reported_ethnicity_ontology_term_id     sex sex_ontology_term_id suspension_type #> 1                                  unknown unknown              unknown            cell #> 2                                  unknown unknown              unknown            cell #> 3                                  unknown unknown              unknown            cell #> 4                                  unknown unknown              unknown            cell #> 5                                  unknown unknown              unknown            cell #> 6                                  unknown unknown              unknown            cell #> 7                                  unknown unknown              unknown            cell #> 8                                  unknown unknown              unknown            cell #> 9                                  unknown unknown              unknown            cell #>    tissue tissue_ontology_term_id tissue_general tissue_general_ontology_term_id #> 1 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 2 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 3 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 4 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 5 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 6 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 7 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 8 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #> 9 gingiva          UBERON:0001828         mucosa                  UBERON:0000344 #>   raw_sum  nnz raw_mean_nnz raw_variance_nnz n_measured_vars #> 1     547  329     1.662614        14.559604           31602 #> 2     982  563     1.744227         5.315247           31602 #> 3   12467 3809     3.273038       109.305683           31602 #> 4    1053  566     1.860424         7.430042           31602 #> 5     548  363     1.509642         2.410818           31602 #> 6     678  429     1.580420        11.379616           31602 #> 7     848  524     1.618321         9.437216           31602 #> 8     935  608     1.537829         4.868418           31602 #> 9     735  485     1.515464         6.213087           31602 #>  [ reached 'max' / getOption(\"max.print\") -- omitted 3301779 rows ] cell_metadata_b_cell <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   value_filter = \"cell_type == 'B cell' & tissue_general == 'lung'\",   column_names = \"disease\" )  cell_metadata_b_cell <- as.data.frame(cell_metadata_b_cell$concat())  table(cell_metadata_b_cell) #> disease #>                              COVID-19 chronic obstructive pulmonary disease  #>                                  2729                                  6369  #>          hypersensitivity pneumonitis             interstitial lung disease  #>                                    52                                   376  #>                   lung adenocarcinoma             lung large cell carcinoma  #>                                 62351                                  1534  #>              lymphangioleiomyomatosis         non-small cell lung carcinoma  #>                                   133                                 17484  #>   non-specific interstitial pneumonia                                normal  #>                                   231                                 25461  #>                 pleomorphic carcinoma                             pneumonia  #>                                  1210                                    50  #>                   pulmonary emphysema                    pulmonary fibrosis  #>                                  1512                                  6798  #>                 pulmonary sarcoidosis             small cell lung carcinoma  #>                                     6                                   583  #>          squamous cell lung carcinoma  #>                                 11920"},{"path":"/articles/census_query_extract.html","id":"querying-gene-metadata-var","dir":"Articles","previous_headings":"","what":"Querying gene metadata (var)","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"human gene metadata Census located census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var. Similarly cell metadata, SOMADataFrame thus can also use method read(). mouse gene metadata census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$var. Let’s take look metadata available column selection row filtering. exception soma_joinid columns defined Census schema. Similarly cell metadata, can use operations learn fetch gene metadata. example, get feature_name feature_length genes \"ENSG00000161798\" \"ENSG00000188229\" can following.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" \"nnz\"            #> [6] \"n_measured_obs\" var_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$read(   value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   column_names = c(\"feature_name\", \"feature_length\") )  as.data.frame(var_df$concat()) #>   feature_name feature_length #> 1         AQP5           1884 #> 2       TUBB4B           2037"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-seurat","dir":"Articles","previous_headings":"","what":"Querying expression data as Seurat","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"convenient way query fetch expression data use get_seurat method cellxgene.census API. method combines column selection value filtering described obtain slices expression data based metadata queries. method return Seurat object, takes input census object, string organism, cell gene metadata can specify filters column selection described following arguments: obs_column_names — character vector indicating columns select cell metadata. obs_value_filter — expression selection conditions fetch cells meeting criteria. var_column_names — character vector indicating columns select gene metadata. var_value_filter — expression selection conditions fetch genes meeting criteria. example want fetch expression data : Genes \"ENSG00000161798\" \"ENSG00000188229\". \"B cells\" \"lung\" \"COVID-19\". gene metadata adding sex cell metadata. full description refer ?cellxgene.census::get_seurat.","code":"library(\"Seurat\")  seurat_obj <- get_seurat(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) seurat_obj #> An object of class Seurat  #> 2 features across 2729 samples within 1 assay  #> Active assay: RNA (2 features, 0 variable features) #>  2 layers present: counts, data head(seurat_obj[[]]) #>                 orig.ident cell_type tissue_general  disease     sex #> cell13391229 SeuratProject    B cell           lung COVID-19    male #> cell13393737 SeuratProject    B cell           lung COVID-19 unknown #> cell13394391 SeuratProject    B cell           lung COVID-19    male #> cell13394897 SeuratProject    B cell           lung COVID-19 unknown #> cell13395941 SeuratProject    B cell           lung COVID-19    male #> cell13397408 SeuratProject    B cell           lung COVID-19 unknown head(seurat_obj$RNA[[]]) #>                 feature_name feature_length      nnz n_measured_obs #> ENSG00000161798         AQP5           1884  1029069       58250439 #> ENSG00000188229       TUBB4B           2037 21416107       62655002"},{"path":"/articles/census_query_extract.html","id":"querying-expression-data-as-singlecellexperiment","dir":"Articles","previous_headings":"","what":"Querying expression data as SingleCellExperiment","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"Similarly previous section, get_single_cell_experiment method cellxgene.census API. behaves exactly get_seurat returns SingleCellExperiment object. example, repeat query can simply following. full description refer ?cellxgene.census::get_single_cell_experiment.","code":"library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(   census, \"Homo sapiens\",   obs_column_names = c(\"cell_type\", \"tissue_general\", \"disease\", \"sex\"),   var_value_filter = \"feature_id %in% c('ENSG00000161798', 'ENSG00000188229')\",   obs_value_filter = \"cell_type == 'B cell' & tissue_general == 'lung' & disease == 'COVID-19'\" ) sce_obj #> class: SingleCellExperiment  #> dim: 2 2729  #> metadata(0): #> assays(1): counts #> rownames(2): ENSG00000161798 ENSG00000188229 #> rowData names(4): feature_name feature_length nnz n_measured_obs #> colnames(2729): obs13391229 obs13393737 ... obs54635684 obs54635708 #> colData names(4): cell_type tissue_general disease sex #> reducedDimNames(0): #> mainExpName: RNA #> altExpNames(0): head(colData(sce_obj)) #> DataFrame with 6 rows and 4 columns #>               cell_type tissue_general     disease         sex #>                    #> obs13391229      B cell           lung    COVID-19        male #> obs13393737      B cell           lung    COVID-19     unknown #> obs13394391      B cell           lung    COVID-19        male #> obs13394897      B cell           lung    COVID-19     unknown #> obs13395941      B cell           lung    COVID-19        male #> obs13397408      B cell           lung    COVID-19     unknown head(rowData(sce_obj)) #> DataFrame with 2 rows and 4 columns #>                 feature_name feature_length       nnz n_measured_obs #>                                #> ENSG00000161798         AQP5           1884   1029069       58250439 #> ENSG00000188229       TUBB4B           2037  21416107       62655002"},{"path":"/articles/census_query_extract.html","id":"close-the-census","dir":"Articles","previous_headings":"Querying expression data as SingleCellExperiment","what":"Close the census","title":"Querying and fetching the single-cell data and cell/gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/articles/comp_bio_census_info.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Learning about the CZ CELLxGENE Census","text":"cellxgene.census R package contains convenient open_soma() API open version Census (stable default). can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/comp_bio_census_info.html","id":"census-organization","dir":"Articles","previous_headings":"","what":"Census organization","title":"Learning about the CZ CELLxGENE Census","text":"Census schema defines structure Census. short, can think Census structured collection items stores different pieces information. items parent collection SOMA objects various types can accessed TileDB-SOMA API (documentation). cellxgene.census package contains convenient wrappers TileDB-SOMA API. example function used open Census: cellxgene_census.open_soma().","code":""},{"path":"/articles/comp_bio_census_info.html","id":"main-census-components","dir":"Articles","previous_headings":"Census organization","what":"Main Census components","title":"Learning about the CZ CELLxGENE Census","text":"command created census, SOMACollection, R6 class providing key-value associative map. get() method can access two top-level collection members, census_info census_data, instances SOMACollection.","code":""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-info","dir":"Articles","previous_headings":"Census organization","what":"Census summary info","title":"Learning about the CZ CELLxGENE Census","text":"census$get(\"census_info\")$get(\"summary\"): data frame high-level information Census, e.g. build date, total cell count, etc. census$get(\"census_info\")$get(\"datasets\"): data frame datasets CELLxGENE Discover used create Census. census$get(\"census_info\")$get(\"summary_cell_counts\"): data frame cell counts stratified relevant cell metadata Census data Data organism stored independent SOMAExperiment objects specialized form SOMACollection. store data matrix (cell genes), cell metadata, gene metadata, useful components covered notebook. data organized one organism – Homo sapiens: census$get(\"census_data\")$get(\"homo_sapiens\")$obs: Cell metadata census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\"): Data matrices, currently … census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$X$get(\"raw\"): matrix raw counts SOMASparseNDArray census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var: Gene Metadata","code":""},{"path":"/articles/comp_bio_census_info.html","id":"cell-metadata","dir":"Articles","previous_headings":"","what":"Cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"can obtain cell metadata variables directly querying columns corresponding SOMADataFrame. variables can used querying Census case want work specific cells. variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. dataset_id: dataset id encoded census$get(\"census_info\")$get(\"datasets\"). tissue_general tissue_general_ontology_term_id: high-level tissue mapping.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$obs$colnames() #>  [1] \"soma_joinid\"                              #>  [2] \"dataset_id\"                               #>  [3] \"assay\"                                    #>  [4] \"assay_ontology_term_id\"                   #>  [5] \"cell_type\"                                #>  [6] \"cell_type_ontology_term_id\"               #>  [7] \"development_stage\"                        #>  [8] \"development_stage_ontology_term_id\"       #>  [9] \"disease\"                                  #> [10] \"disease_ontology_term_id\"                 #> [11] \"donor_id\"                                 #> [12] \"is_primary_data\"                          #> [13] \"self_reported_ethnicity\"                  #> [14] \"self_reported_ethnicity_ontology_term_id\" #> [15] \"sex\"                                      #> [16] \"sex_ontology_term_id\"                     #> [17] \"suspension_type\"                          #> [18] \"tissue\"                                   #> [19] \"tissue_ontology_term_id\"                  #> [20] \"tissue_general\"                           #> [21] \"tissue_general_ontology_term_id\"          #> [22] \"raw_sum\"                                  #> [23] \"nnz\"                                      #> [24] \"raw_mean_nnz\"                             #> [25] \"raw_variance_nnz\"                         #> [26] \"n_measured_vars\""},{"path":"/articles/comp_bio_census_info.html","id":"gene-metadata","dir":"Articles","previous_headings":"","what":"Gene metadata","title":"Learning about the CZ CELLxGENE Census","text":"Similarly, can obtain gene metadata variables directly querying columns corresponding SOMADataFrame. variables can use querying Census case specific genes interested . variables defined CELLxGENE dataset schema except following: soma_joinid: SOMA-defined value use join operations. feature_length: length base pairs gene.","code":"census$get(\"census_data\")$get(\"homo_sapiens\")$ms$get(\"RNA\")$var$colnames() #> [1] \"soma_joinid\"    \"feature_id\"     \"feature_name\"   \"feature_length\" \"nnz\"            #> [6] \"n_measured_obs\""},{"path":"/articles/comp_bio_census_info.html","id":"census-summary-content-tables","dir":"Articles","previous_headings":"","what":"Census summary content tables","title":"Learning about the CZ CELLxGENE Census","text":"can take quick look high-level Census information looking census$get(\"census_info\")$get(\"summary\"): special interest label-value combinations : total_cell_count total number cells Census. unique_cell_count number unique cells, cells may present twice due meta-analysis consortia-like data. number_donors_homo_sapiens number_donors_mus_musculus number individuals human mouse. guaranteed unique one individual ID may present identical different datasets.","code":"as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #>   soma_joinid                      label      value #> 1           0      census_schema_version      1.2.0 #> 2           1          census_build_date 2023-10-23 #> 3           2     dataset_schema_version      3.1.0 #> 4           3           total_cell_count   68683222 #> 5           4          unique_cell_count   40356133 #> 6           5 number_donors_homo_sapiens      15588 #> 7           6 number_donors_mus_musculus       1990"},{"path":"/articles/comp_bio_census_info.html","id":"cell-counts-by-cell-metadata","dir":"Articles","previous_headings":"Census summary content tables","what":"Cell counts by cell metadata","title":"Learning about the CZ CELLxGENE Census","text":"looking census$get(\"census_info)$get(\"summary_cell_counts\") can get general idea cell counts stratified relevant cell metadata. cell metadata included table, can take look cell gene metadata available sections “Cell metadata” “Gene metadata”. line retrieves table casts R data frame: combination organism values category cell metadata can take look total_cell_count unique_cell_count cell counts combination. values category specified ontology_term_id label, value’s IDs labels, respectively.","code":"census_counts <- as.data.frame(census$get(\"census_info\")$get(\"summary_cell_counts\")$read()$concat()) head(census_counts) #>   soma_joinid     organism category ontology_term_id unique_cell_count total_cell_count #> 1           0 Homo sapiens      all               na          36227903         62998417 #> 2           1 Homo sapiens    assay      EFO:0008722            264166           279635 #> 3           2 Homo sapiens    assay      EFO:0008780             25652            51304 #> 4           3 Homo sapiens    assay      EFO:0008796             54753            54753 #> 5           4 Homo sapiens    assay      EFO:0008919             89477           206754 #> 6           5 Homo sapiens    assay      EFO:0008931             78750           188248 #>        label #> 1         na #> 2   Drop-seq #> 3     inDrop #> 4   MARS-seq #> 5   Seq-Well #> 6 Smart-seq2"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-metadata-included-in-the-summary-counts-table","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell metadata included in the summary counts table","title":"Learning about the CZ CELLxGENE Census","text":"get available cell metadata summary counts table can following. Remember cell metadata available, variables omitted creation table.","code":"t(table(census_counts$organism, census_counts$category)) #>                           #>                           Homo sapiens Mus musculus #>   all                                1            1 #>   assay                             20           10 #>   cell_type                        631          248 #>   disease                           72            5 #>   self_reported_ethnicity           30            1 #>   sex                                3            3 #>   suspension_type                    1            1 #>   tissue                           230           74 #>   tissue_general                    53           27"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-counts-for-each-sequencing-assay-in-human-data","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: cell counts for each sequencing assay in human data","title":"Learning about the CZ CELLxGENE Census","text":"get cell counts sequencing assay type human data, can perform following operations:","code":"human_assay_counts <- census_counts[census_counts$organism == \"Homo sapiens\" & census_counts$category == \"assay\", ] human_assay_counts <- human_assay_counts[order(human_assay_counts$total_cell_count, decreasing = TRUE), ]"},{"path":"/articles/comp_bio_census_info.html","id":"example-number-of-microglial-cells-in-the-census","dir":"Articles","previous_headings":"Census summary content tables > Cell counts by cell metadata","what":"Example: number of microglial cells in the Census","title":"Learning about the CZ CELLxGENE Census","text":"specific term categories shown can directly find number cells term.","code":"census_counts[census_counts$label == \"microglial cell\", ] #>      soma_joinid     organism  category ontology_term_id unique_cell_count #> 72            71 Homo sapiens cell_type       CL:0000129            359243 #> 1080        1079 Mus musculus cell_type       CL:0000129             48998 #>      total_cell_count           label #> 72             544977 microglial cell #> 1080            75885 microglial cell"},{"path":"/articles/comp_bio_census_info.html","id":"understanding-census-contents-beyond-the-summary-tables","dir":"Articles","previous_headings":"","what":"Understanding Census contents beyond the summary tables","title":"Learning about the CZ CELLxGENE Census","text":"using pre-computed tables census$get(\"census_info\") easy quick way understand contents Census, falls short want learn certain slices Census. example, may want learn : cell types available human liver? total number cells lung datasets stratified sequencing technology? sex distribution cells brain mouse? diseases available T cells? questions can answered directly querying cell metadata shown examples .","code":""},{"path":"/articles/comp_bio_census_info.html","id":"example-all-cell-types-available-in-human","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: all cell types available in human","title":"Learning about the CZ CELLxGENE Census","text":"exemplify process accessing slicing cell metadata summary stats, let’s start trivial example take look human cell types available Census: number rows total number cells humans. Now, wish get cell counts per cell type can work data frame. addition, focus cells marked is_primary_data=TRUE ensures de-duplicate cells appear CELLxGENE Discover. number unique cells. Now let’s look counts per cell type: shows abundant cell types “glutamatergic neuron”, “CD8-positive, alpha-beta T cell”, “CD4-positive, alpha-beta T cell”. Now let’s take look number unique cell types: total number different cell types human. information example can quickly obtained summary table census$get(\"census-info\")$get(\"summary_cell_counts\"). examples complex can achieved accessing cell metadata.","code":"obs_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(column_names = c(\"cell_type\", \"is_primary_data\")) as.data.frame(obs_df$concat()) #>                            cell_type is_primary_data #> 1                    oligodendrocyte           FALSE #> 2     oligodendrocyte precursor cell           FALSE #> 3   astrocyte of the cerebral cortex           FALSE #> 4   astrocyte of the cerebral cortex           FALSE #> 5   astrocyte of the cerebral cortex           FALSE #> 6     oligodendrocyte precursor cell           FALSE #> 7   astrocyte of the cerebral cortex           FALSE #> 8                    microglial cell           FALSE #> 9   astrocyte of the cerebral cortex           FALSE #> 10  astrocyte of the cerebral cortex           FALSE #> 11  astrocyte of the cerebral cortex           FALSE #> 12  astrocyte of the cerebral cortex           FALSE #> 13  astrocyte of the cerebral cortex           FALSE #> 14  astrocyte of the cerebral cortex           FALSE #> 15  astrocyte of the cerebral cortex           FALSE #> 16    oligodendrocyte precursor cell           FALSE #> 17                   oligodendrocyte           FALSE #> 18  astrocyte of the cerebral cortex           FALSE #> 19  astrocyte of the cerebral cortex           FALSE #> 20  astrocyte of the cerebral cortex           FALSE #> 21  astrocyte of the cerebral cortex           FALSE #> 22  astrocyte of the cerebral cortex           FALSE #> 23    oligodendrocyte precursor cell           FALSE #> 24  astrocyte of the cerebral cortex           FALSE #> 25  astrocyte of the cerebral cortex           FALSE #> 26    oligodendrocyte precursor cell           FALSE #> 27                   microglial cell           FALSE #> 28                   oligodendrocyte           FALSE #> 29  astrocyte of the cerebral cortex           FALSE #> 30  cerebral cortex endothelial cell           FALSE #> 31                   microglial cell           FALSE #> 32                   microglial cell           FALSE #> 33                   microglial cell           FALSE #> 34                   oligodendrocyte           FALSE #> 35                   oligodendrocyte           FALSE #> 36                   microglial cell           FALSE #> 37                   oligodendrocyte           FALSE #> 38                   oligodendrocyte           FALSE #> 39  astrocyte of the cerebral cortex           FALSE #> 40                   oligodendrocyte           FALSE #> 41  astrocyte of the cerebral cortex           FALSE #> 42                   oligodendrocyte           FALSE #> 43    oligodendrocyte precursor cell           FALSE #> 44                   oligodendrocyte           FALSE #> 45  astrocyte of the cerebral cortex           FALSE #> 46    oligodendrocyte precursor cell           FALSE #> 47                   oligodendrocyte           FALSE #> 48    oligodendrocyte precursor cell           FALSE #> 49  astrocyte of the cerebral cortex           FALSE #> 50  astrocyte of the cerebral cortex           FALSE #> 51  astrocyte of the cerebral cortex           FALSE #> 52                   oligodendrocyte           FALSE #> 53                   oligodendrocyte           FALSE #> 54                   oligodendrocyte           FALSE #> 55  astrocyte of the cerebral cortex           FALSE #> 56  cerebral cortex endothelial cell           FALSE #> 57                   oligodendrocyte           FALSE #> 58                   oligodendrocyte           FALSE #> 59                   oligodendrocyte           FALSE #> 60                   microglial cell           FALSE #> 61                   microglial cell           FALSE #> 62    oligodendrocyte precursor cell           FALSE #> 63    oligodendrocyte precursor cell           FALSE #> 64                   oligodendrocyte           FALSE #> 65    oligodendrocyte precursor cell           FALSE #> 66                   oligodendrocyte           FALSE #> 67  astrocyte of the cerebral cortex           FALSE #> 68                   oligodendrocyte           FALSE #> 69    oligodendrocyte precursor cell           FALSE #> 70                   oligodendrocyte           FALSE #> 71  astrocyte of the cerebral cortex           FALSE #> 72  astrocyte of the cerebral cortex           FALSE #> 73  astrocyte of the cerebral cortex           FALSE #> 74    oligodendrocyte precursor cell           FALSE #> 75  astrocyte of the cerebral cortex           FALSE #> 76    oligodendrocyte precursor cell           FALSE #> 77                   microglial cell           FALSE #> 78                   microglial cell           FALSE #> 79    oligodendrocyte precursor cell           FALSE #> 80                   oligodendrocyte           FALSE #> 81                   oligodendrocyte           FALSE #> 82  astrocyte of the cerebral cortex           FALSE #> 83                   oligodendrocyte           FALSE #> 84  astrocyte of the cerebral cortex           FALSE #> 85  astrocyte of the cerebral cortex           FALSE #> 86                   oligodendrocyte           FALSE #> 87  astrocyte of the cerebral cortex           FALSE #> 88                   oligodendrocyte           FALSE #> 89    oligodendrocyte precursor cell           FALSE #> 90    oligodendrocyte precursor cell           FALSE #> 91  astrocyte of the cerebral cortex           FALSE #> 92  astrocyte of the cerebral cortex           FALSE #> 93  astrocyte of the cerebral cortex           FALSE #> 94                   oligodendrocyte           FALSE #> 95  astrocyte of the cerebral cortex           FALSE #> 96  astrocyte of the cerebral cortex           FALSE #> 97                   oligodendrocyte           FALSE #> 98                   oligodendrocyte           FALSE #> 99    oligodendrocyte precursor cell           FALSE #> 100                  oligodendrocyte           FALSE #> 101                  oligodendrocyte           FALSE #> 102                  oligodendrocyte           FALSE #> 103 astrocyte of the cerebral cortex           FALSE #> 104   oligodendrocyte precursor cell           FALSE #> 105                  oligodendrocyte           FALSE #> 106   oligodendrocyte precursor cell           FALSE #> 107                  oligodendrocyte           FALSE #> 108                  oligodendrocyte           FALSE #> 109                  oligodendrocyte           FALSE #> 110                  oligodendrocyte           FALSE #> 111   oligodendrocyte precursor cell           FALSE #> 112                  oligodendrocyte           FALSE #> 113                  oligodendrocyte           FALSE #> 114 astrocyte of the cerebral cortex           FALSE #> 115                  oligodendrocyte           FALSE #> 116 astrocyte of the cerebral cortex           FALSE #> 117                  oligodendrocyte           FALSE #> 118                  oligodendrocyte           FALSE #> 119                  oligodendrocyte           FALSE #> 120 astrocyte of the cerebral cortex           FALSE #> 121 astrocyte of the cerebral cortex           FALSE #> 122   oligodendrocyte precursor cell           FALSE #> 123                  microglial cell           FALSE #> 124 astrocyte of the cerebral cortex           FALSE #> 125 astrocyte of the cerebral cortex           FALSE #> 126                  microglial cell           FALSE #> 127 cerebral cortex endothelial cell           FALSE #> 128   oligodendrocyte precursor cell           FALSE #>  [ reached 'max' / getOption(\"max.print\") -- omitted 62998289 rows ] obs_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE\" )  obs_df <- as.data.frame(obs_df$concat()) nrow(obs_df) #> [1] 36227903 human_cell_type_counts <- table(obs_df$cell_type) sort(human_cell_type_counts, decreasing = TRUE)[1:10] #>  #>                                                             neuron  #>                                                            2815336  #>                                               glutamatergic neuron  #>                                                            1563446  #>                                    CD4-positive, alpha-beta T cell  #>                                                            1243885  #>                                    CD8-positive, alpha-beta T cell  #>                                                            1197715  #> L2/3-6 intratelencephalic projecting glutamatergic cortical neuron  #>                                                            1123360  #>                                                    oligodendrocyte  #>                                                            1063874  #>                                                 classical monocyte  #>                                                            1030996  #>                                                        native cell  #>                                                            1011949  #>                                                             B cell  #>                                                             934060  #>                                                natural killer cell  #>                                                             770637 length(human_cell_type_counts) #> [1] 610"},{"path":"/articles/comp_bio_census_info.html","id":"example-cell-types-available-in-human-liver","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: cell types available in human liver","title":"Learning about the CZ CELLxGENE Census","text":"Similar example , can learn cell types available specific tissue, e.g. liver. achieve goal just need limit cell metadata tissue. use information cell metadata variable tissue_general. variable contains high-level tissue label cells Census: cell types cell counts human liver.","code":"obs_liver_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = \"cell_type\",   value_filter = \"is_primary_data == TRUE & tissue_general == 'liver'\" )  obs_liver_df <- as.data.frame(obs_liver_df$concat())  sort(table(obs_liver_df$cell_type), decreasing = TRUE)[1:10] #>  #>                          T cell                     hepatoblast  #>                           85739                           58447  #>                 neoplastic cell                    erythroblast  #>                           52431                           45605  #>                        monocyte                      hepatocyte  #>                           31388                           28309  #>             natural killer cell    periportal region hepatocyte  #>                           26871                           23509  #>                      macrophage centrilobular region hepatocyte  #>                           16707                           15819"},{"path":"/articles/comp_bio_census_info.html","id":"example-diseased-t-cells-in-human-tissues","dir":"Articles","previous_headings":"Understanding Census contents beyond the summary tables","what":"Example: diseased T cells in human tissues","title":"Learning about the CZ CELLxGENE Census","text":"example going get counts diseased cells annotated T cells. sake example focus “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”: cell counts annotated indicated disease across human tissues “CD8-positive, alpha-beta T cell” “CD4-positive, alpha-beta T cell”.","code":"obs_t_cells_df <- census$get(\"census_data\")$get(\"homo_sapiens\")$obs$read(   column_names = c(\"disease\", \"tissue_general\"),   value_filter = \"is_primary_data == TRUE & disease != 'normal' & cell_type %in% c('CD8-positive, alpha-beta T cell', 'CD4-positive, alpha-beta T cell')\" )  obs_t_cells_df <- as.data.frame(obs_t_cells_df$concat())  print(table(obs_t_cells_df)) #>                                        tissue_general #> disease                                 adrenal gland  blood bone marrow  brain breast #>   COVID-19                                          0 819428           0      0      0 #>   Crohn disease                                     0      0           0      0      0 #>   Down syndrome                                     0      0         181      0      0 #>   breast cancer                                     0      0           0      0   1850 #>   chronic obstructive pulmonary disease             0      0           0      0      0 #>   chronic rhinitis                                  0      0           0      0      0 #>   clear cell renal carcinoma                        0   6548           0      0      0 #>   cystic fibrosis                                   0      0           0      0      0 #>   follicular lymphoma                               0      0           0      0      0 #>   influenza                                         0   8871           0      0      0 #>   interstitial lung disease                         0      0           0      0      0 #>   kidney benign neoplasm                            0      0           0      0      0 #>   kidney oncocytoma                                 0      0           0      0      0 #>   lung adenocarcinoma                             205      0           0   3274      0 #>   lung large cell carcinoma                         0      0           0      0      0 #>   lymphangioleiomyomatosis                          0      0           0      0      0 #>                                        tissue_general #> disease                                  colon kidney  liver   lung lymph node   nose #>   COVID-19                                   0      0      0  30578          0     13 #>   Crohn disease                          17490      0      0      0          0      0 #>   Down syndrome                              0      0      0      0          0      0 #>   breast cancer                              0      0      0      0          0      0 #>   chronic obstructive pulmonary disease      0      0      0   9382          0      0 #>   chronic rhinitis                           0      0      0      0          0    909 #>   clear cell renal carcinoma                 0  20540      0      0         36      0 #>   cystic fibrosis                            0      0      0      7          0      0 #>   follicular lymphoma                        0      0      0      0       1089      0 #>   influenza                                  0      0      0      0          0      0 #>   interstitial lung disease                  0      0      0   1803          0      0 #>   kidney benign neoplasm                     0     10      0      0          0      0 #>   kidney oncocytoma                          0   2303      0      0          0      0 #>   lung adenocarcinoma                        0      0    507 215013      24969      0 #>   lung large cell carcinoma                  0      0      0   5922          0      0 #>   lymphangioleiomyomatosis                   0      0      0    513          0      0 #>                                        tissue_general #> disease                                 pleural fluid respiratory system saliva #>   COVID-19                                          0                  4     41 #>   Crohn disease                                     0                  0      0 #>   Down syndrome                                     0                  0      0 #>   breast cancer                                     0                  0      0 #>   chronic obstructive pulmonary disease             0                  0      0 #>   chronic rhinitis                                  0                  0      0 #>   clear cell renal carcinoma                        0                  0      0 #>   cystic fibrosis                                   0                  0      0 #>   follicular lymphoma                               0                  0      0 #>   influenza                                         0                  0      0 #>   interstitial lung disease                         0                  0      0 #>   kidney benign neoplasm                            0                  0      0 #>   kidney oncocytoma                                 0                  0      0 #>   lung adenocarcinoma                           11558                  0      0 #>   lung large cell carcinoma                         0                  0      0 #>   lymphangioleiomyomatosis                          0                  0      0 #>                                        tissue_general #> disease                                 small intestine vasculature #>   COVID-19                                            0           0 #>   Crohn disease                                   52029           0 #>   Down syndrome                                       0           0 #>   breast cancer                                       0           0 #>   chronic obstructive pulmonary disease               0           0 #>   chronic rhinitis                                    0           0 #>   clear cell renal carcinoma                          0           0 #>   cystic fibrosis                                     0           0 #>   follicular lymphoma                                 0           0 #>   influenza                                           0           0 #>   interstitial lung disease                           0           0 #>   kidney benign neoplasm                              0           0 #>   kidney oncocytoma                                   0           0 #>   lung adenocarcinoma                                 0           0 #>   lung large cell carcinoma                           0           0 #>   lymphangioleiomyomatosis                            0           0 #>  [ reached getOption(\"max.print\") -- omitted 8 rows ]"},{"path":"/articles/comp_bio_data_integration.html","id":"finding-and-fetching-data-from-mouse-liver-10x-genomics-and-smart-seq2","dir":"Articles","previous_headings":"","what":"Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s load packages needed notebook. Now can open Census. notebook use Tabula Muris Senis data liver contains cells 10X Genomics Smart-Seq2 technologies. Let’s query datasets table Census filtering collection_name “Tabula Muris Senis” dataset_title “liver”. Now can use values dataset_id query load Seurat object cells datasets. can check cell counts 10X Genomics Smart-Seq2 data looking assay metadata.","code":"library(\"cellxgene.census\") library(\"Seurat\") census <- open_soma() census_datasets <- census$get(\"census_info\")$get(\"datasets\") census_datasets <- census_datasets$read(value_filter = \"collection_name == 'Tabula Muris Senis'\") census_datasets <- as.data.frame(census_datasets$concat())  # Print rows with liver data census_datasets[grep(\"Liver\", census_datasets$dataset_title), ] #>    soma_joinid                        collection_id    collection_name #> 15         583 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #> 36         605 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>               collection_doi                           dataset_id #> 15 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd #> 36 10.1038/s41586-020-2496-1 6202a243-b713-4e12-9ced-c387f8483dea #>                      dataset_version_id #> 15 0a851e26-a629-4e59-9b52-9b4d1ce4440b #> 36 70f4f091-86a9-44e3-a92a-54cee98cc223 #>                                                                                        dataset_title #> 15 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2 #> 36        Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - 10x #>                            dataset_h5ad_path dataset_total_cell_count #> 15 4546e757-34d0-4d17-be06-538318925fcd.h5ad                     2859 #> 36 6202a243-b713-4e12-9ced-c387f8483dea.h5ad                     7294 tabula_muris_liver_ids <- c(\"4546e757-34d0-4d17-be06-538318925fcd\", \"6202a243-b713-4e12-9ced-c387f8483dea\")  seurat_obj <- get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = \"dataset_id %in% tabula_muris_liver_ids\" ) table(seurat_obj$assay) #>  #>  10x 3' v2 Smart-seq2  #>       7294       2859"},{"path":"/articles/comp_bio_data_integration.html","id":"gene-length-normalization-of-smart-seq2-data-","dir":"Articles","previous_headings":"","what":"Gene-length normalization of Smart-Seq2 data.","title":"Integrating multi-dataset slices of data with Seurat","text":"Smart-seq2 read counts normalized gene length. Lets first get gene lengths var.feature_length. Now can use normalize Smart-seq data. let’s split object assay. normalize Smart-seq slice using gene lengths merge back single object.","code":"smart_seq_gene_lengths <- seurat_obj$RNA[[]]$feature_length seurat_obj.list <- SplitObject(seurat_obj, split.by = \"assay\") seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts <- seurat_obj.list[[\"Smart-seq2\"]][[\"RNA\"]]@counts / smart_seq_gene_lengths seurat_obj <- merge(seurat_obj.list[[1]], seurat_obj.list[[2]])"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-with-seurat","dir":"Articles","previous_headings":"","what":"Integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"use native integration capabilities Seurat. comprehensive usage best practices Seurat intergation please refer doc site Seurat.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"inspecting-data-prior-to-integration","dir":"Articles","previous_headings":"Integration with Seurat","what":"Inspecting data prior to integration","title":"Integrating multi-dataset slices of data with Seurat","text":"Let’s take look strength batch effects data. perform embedding visualization via UMAP. Let’s basic data normalization variable gene selection now perform PCA UMAP   can see batch effects strong cells cluster primarily assay cell_type. Properly integrated embedding principle cluster primarily cell_type, assay best randomly distributed.","code":"seurat_obj <- SCTransform(seurat_obj) seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = \"vst\", nfeatures = 2000) seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj)) seurat_obj <- RunUMAP(seurat_obj, dims = 1:30) # By assay p1 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"data-integration-with-seurat","dir":"Articles","previous_headings":"Integration with Seurat","what":"Data integration with Seurat","title":"Integrating multi-dataset slices of data with Seurat","text":"Whenever query fetch Census data multiple datasets integration needs performed evidenced batch effects observed. paramaters Seurat used notebook selected model run quickly. best practices integration single-cell data using Seurat please refer documentation page. seurat_d reading article integrated cell atlas human lung health disease Sikkema et al. perfomed integration 43 datasets Lung. focus metadata Census can batch information integration.","code":""},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id","title":"Integrating multi-dataset slices of data with Seurat","text":"cells Census annotated dataset come \"dataset_id\". great place start integration. let’s run Seurat integration pipeline. First define model batch set dataset_id. Firs normalize select variable genes seperated batch key dataset_id Now perform integration. Let’s inspect results normalization UMAP visulization. plot UMAP.   Great! can see clustering longer mainly driven assay, albeit still contributing . Great! can see clustering longer mainly driven assay, albeit still contributing .","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- ScaleData(seurat_obj.combined, verbose = FALSE) seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id","title":"Integrating multi-dataset slices of data with Seurat","text":"Similar dataset_id, cells Census annotated donor_id. definition donor_id depends dataset left discretion data curators. However still rich information can used batch variable integration. donor_id guaranteed unique across cells Census, strongly recommend concatenating dataset_id donor_id use batch separator Seurat Now perform integration. inspect new results UMAP. Plot UMAP.   can see using dataset_id donor_id batch cells now mostly cluster cell type.","code":"# split the dataset into a list of two seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = \"donor_id\") })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list) seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") #> Finding all pairwise anchors #> Running CCA #> Merging objects #> Finding neighborhoods #> Finding anchors #>  Found 7190 anchors #> Filtering anchors #>  Retained 5063 anchors seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\") #> [1] 1 #> Warning: Different cells and/or features from existing assay SCT #> Warning: Layer counts isn't present in the assay object; returning NULL #> [1] 2 #> Warning: Different cells and/or features from existing assay SCT #> Layer counts isn't present in the assay object; returning NULL #> Merging dataset 1 into 2 #> Extracting anchors for merged samples #> Finding integration vectors #> Finding integration vector weights #> Integrating data #> Warning: Layer counts isn't present in the assay object; returning NULL #> Warning: Assay integrated changing from Assay to SCTAssay #> Warning: Layer counts isn't present in the assay object; returning NULL #> Warning: Different cells and/or features from existing assay SCT DefaultAssay(seurat_obj.combined) <- \"integrated\"  # Run the standard workflow for visualization and clustering seurat_obj.combined <- RunPCA(seurat_obj.combined, npcs = 30, verbose = FALSE) seurat_obj.combined <- RunUMAP(seurat_obj.combined, reduction = \"pca\", dims = 1:30) #> 20:54:47 UMAP embedding parameters a = 0.9922 b = 1.112 #> 20:54:47 Read 10153 rows and found 30 numeric columns #> 20:54:47 Using Annoy for neighbor search, n_neighbors = 30 #> 20:54:47 Building Annoy index with metric = cosine, n_trees = 50 #> 0%   10   20   30   40   50   60   70   80   90   100% #> [----|----|----|----|----|----|----|----|----|----| #> **************************************************| #> 20:54:49 Writing NN index file to temp file /tmp/RtmpNZhqvF/file114141d66b377 #> 20:54:49 Searching Annoy index using 1 thread, search_k = 3000 #> 20:54:53 Annoy recall = 100% #> 20:54:53 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 #> 20:54:54 Initializing from normalized Laplacian + noise (using RSpectra) #> 20:54:54 Commencing optimization for 200 epochs, with 410528 positive edges #> 20:54:59 Optimization finished # By assay p1 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"assay\") p1 # By cell type p2 <- DimPlot(seurat_obj.combined, reduction = \"umap\", group.by = \"cell_type\") p2"},{"path":"/articles/comp_bio_data_integration.html","id":"integration-across-datasets-using-dataset_id-and-controlling-for-batch-using-donor_id-assay_ontology_term_id-suspension_type-","dir":"Articles","previous_headings":"Integration with Seurat > Data integration with Seurat","what":"Integration across datasets using dataset_id and controlling for batch using donor_id + assay_ontology_term_id + suspension_type.","title":"Integrating multi-dataset slices of data with Seurat","text":"cases one dataset may contain multiple assay types /multiple suspension types (cell vs nucleus), important consider metadata batches. Therefore, comprehensive definition batch Census can accomplished combining cell metadata dataset_id, donor_id, assay_ontology_term_id suspension_type, latter encode EFO ids assay types. example, two datasets used contain cells one assay , one suspension type . Thus make difference include metadata part batch. implementation look line","code":"# EXAMPLE, DON'T RUN.  # split the dataset into a list of seurat objects for each dataset seurat_obj.list <- SplitObject(seurat_obj, split.by = \"dataset_id\")  # normalize each dataset independently controlling for batch seurat_obj.list <- lapply(X = seurat_obj.list, FUN = function(x) {   x <- SCTransform(x, vars.to.regress = c(\"donor_id\", \"assay_ontology_term_id\", \"suspension_type\")) })  # select features for integration features <- SelectIntegrationFeatures(object.list = seurat_obj.list)  # integrate seurat_obj.list <- PrepSCTIntegration(seurat_obj.list, anchor.features = features) seurat_obj.anchors <- FindIntegrationAnchors(object.list = seurat_obj.list, anchor.features = features, normalization.method = \"SCT\") seurat_obj.combined <- IntegrateData(anchorset = seurat_obj.anchors, normalization.method = \"SCT\")"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the census","title":"Normalizing full-length gene sequencing data","text":"First open Census: can learn cellxgene.census methods accessing corresponding documentation, example ?cellxgene.census::open_soma.","code":"library(\"Seurat\") census <- cellxgene.census::open_soma()"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"fetching-full-length-example-sequencing-data-smart-seq","dir":"Articles","previous_headings":"","what":"Fetching full-length example sequencing data (Smart-Seq)","title":"Normalizing full-length gene sequencing data","text":"Let’s get example data, case ’ll fetch cells relatively small dataset derived Smart-Seq2 technology performs full-length gene sequencing: Collection: Tabula Muris Senis Dataset: Liver - single-cell transcriptomic atlas characterizes ageing tissues mouse - Smart-seq2 Let’s first find dataset’s id using dataset table Census. Now can use id fetch data. Let’s make sure data contains Smart-Seq2 cells. Great! can see small dataset containing 2,859 cells. Now let’s proceed normalize gene lengths.","code":"liver_dataset <- as.data.frame(   census$get(\"census_info\")$get(\"datasets\")   $read(value_filter = \"dataset_title == 'Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2'\")   $concat() ) liver_dataset #>   soma_joinid                        collection_id    collection_name #> 1         583 0b9d8a04-bb9d-44da-aa27-705bb65b54eb Tabula Muris Senis #>              collection_doi                           dataset_id #> 1 10.1038/s41586-020-2496-1 4546e757-34d0-4d17-be06-538318925fcd #>                     dataset_version_id #> 1 0a851e26-a629-4e59-9b52-9b4d1ce4440b #>                                                                                       dataset_title #> 1 Liver - A single-cell transcriptomic atlas characterizes ageing tissues in the mouse - Smart-seq2 #>                           dataset_h5ad_path dataset_total_cell_count #> 1 4546e757-34d0-4d17-be06-538318925fcd.h5ad                     2859 liver_dataset_id <- liver_dataset[1, \"dataset_id\"] liver_seurat <- cellxgene.census::get_seurat(   census,   organism = \"Mus musculus\",   obs_value_filter = paste0(\"dataset_id == '\", liver_dataset_id, \"'\") ) table(liver_seurat$assay) #>  #> Smart-seq2  #>       2859"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"normalizing-expression-to-account-for-gene-length","dir":"Articles","previous_headings":"","what":"Normalizing expression to account for gene length","title":"Normalizing full-length gene sequencing data","text":"default cellxgene_census::get_seurat() fetches genes Census. let’s first identify genes measured dataset subset Seurat obect include . goal can use “Dataset Presence Matrix” census$get(\"census_data\")$get(\"mus_musculus\")$ms$get(\"RNA\")$get(\"feature_dataset_presence_matrix\"). boolean matrix N x M N number datasets, M number genes Census, 1 entry indicates gene measured dataset. (Note Seurat objects transposed layout M x N.) Let’s get genes measured dataset. can see genes Census 17,992 measured dataset. Now let’s normalize genes gene length. can easily Census gene lengths included gene metadata feature_length. done! can now see real numbers instead integers.","code":"liver_seurat #> An object of class Seurat  #> 52417 features across 2859 samples within 1 assay  #> Active assay: RNA (52417 features, 0 variable features) #>  2 layers present: counts, data liver_dataset_joinid <- liver_dataset$soma_joinid[1] presence_matrix <- cellxgene.census::get_presence_matrix(census, \"Mus musculus\", \"RNA\") presence_matrix <- presence_matrix$take(liver_dataset_joinid) gene_presence <- as.vector(presence_matrix$get_one_based_matrix())  liver_seurat <- liver_seurat[gene_presence, ] liver_seurat #> An object of class Seurat  #> 17992 features across 2859 samples within 1 assay  #> Active assay: RNA (17992 features, 0 variable features) #>  2 layers present: counts, data GetAssayData(liver_seurat[1:5, 1:5], slot = \"data\") #> Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0. #> i Please use the `layer` argument instead. #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. #> 5 x 5 sparse Matrix of class \"dgCMatrix\" #>                    cell3959639 cell3959640 cell3959641 cell3959642 cell3959643 #> ENSMUSG00000025900           .           .           .           .           . #> ENSMUSG00000025902           .           .           .           .        2250 #> ENSMUSG00000033845           .         559        1969           .           . #> ENSMUSG00000025903           .           .           .           .           . #> ENSMUSG00000033813           .           .         828           1          54 gene_lengths <- liver_seurat$RNA@meta.features$feature_length liver_seurat <- SetAssayData(   liver_seurat,   new.data = sweep(GetAssayData(liver_seurat, slot = \"data\"), 1, gene_lengths, \"/\") ) GetAssayData(liver_seurat[1:5, 1:5], slot = \"data\") #> 5 x 5 sparse Matrix of class \"dgCMatrix\" #>                    cell3959639 cell3959640 cell3959641  cell3959642 cell3959643 #> ENSMUSG00000025900           .  .            .         .             .          #> ENSMUSG00000025902           .  .            .         .             0.47150042 #> ENSMUSG00000033845           .  0.06586544   0.2320019 .             .          #> ENSMUSG00000025903           .  .            .         .             .          #> ENSMUSG00000033813           .  .            0.2744448 0.0003314551  0.01789857"},{"path":"/articles/comp_bio_normalizing_full_gene_sequencing.html","id":"validation-through-clustering-exploration","dir":"Articles","previous_headings":"","what":"Validation through clustering exploration","title":"Normalizing full-length gene sequencing data","text":"Let’s perform basic clustering analysis see cell types cluster expected using normalized counts. First basic filtering cells genes. normalize account sequencing depth transform data log scale. subset highly variable genes. finally scale values across gene axis. Now can proceed clustering analysis.  exceptions can see cells cell type cluster near serves sanity check gene-length normalization applied. Don’t forget close census.","code":"cells_per_gene <- rowSums(GetAssayData(liver_seurat, slot = \"counts\") > 0) genes_per_cell <- Matrix::colSums(liver_seurat$RNA@counts > 0) liver_seurat <- liver_seurat[cells_per_gene >= 5, genes_per_cell >= 500] liver_seurat <- Seurat::NormalizeData(   liver_seurat,   normalization.method = \"LogNormalize\",   scale.factor = 10000 ) liver_seurat <- Seurat::FindVariableFeatures(   liver_seurat,   selection.method = \"vst\",   nfeatures = 1000 ) all.genes <- rownames(liver_seurat) liver_seurat <- Seurat::ScaleData(liver_seurat, features = all.genes) liver_seurat <- RunPCA(   liver_seurat,   features = VariableFeatures(object = liver_seurat) ) liver_seurat <- FindNeighbors(liver_seurat, dims = 1:40) liver_seurat <- RunUMAP(liver_seurat, dims = 1:40) DimPlot(liver_seurat, reduction = \"umap\", group.by = \"cell_type\") census$close()"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"opening-the-census","dir":"Articles","previous_headings":"","what":"Opening the Census","title":"Summarizing cell and gene metadata","text":"cellxgene.census R package contains convenient API open version Census (default, newest stable version). open Census, close census$close(). can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma(). can learn cellxgene.census methods accessing corresponding documentation. example ?cellxgene.census::open_soma.","code":"library(\"cellxgene.census\") census <- open_soma()"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"summarizing-cell-metadata","dir":"Articles","previous_headings":"","what":"Summarizing cell metadata","title":"Summarizing cell and gene metadata","text":"Census open can use TileDB-SOMA methods SOMACollection. can thus access metadata SOMADataFrame objects encoding cell gene metadata. Tips: can read entire SOMADataFrame R using .data.frame(soma_df$read()$concat()). Queries much faster request DataFrame columns required analysis (e.g. column_names = c(\"soma_joinid\", \"cell_type_ontology_term_id\")). can also refine query results using value_filter, filter census matching records.","code":""},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-all-cell-types","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize all cell types","title":"Summarizing cell and gene metadata","text":"example reads cell metadata (obs) R data frame summarize variety ways.","code":"human <- census$get(\"census_data\")$get(\"homo_sapiens\")  # Read obs into an R data frame (tibble). obs_df <- human$obs$read(column_names = c(\"cell_type\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column. unique_cell_type <- unique(obs_df$cell_type)  cat(   \"There are\",   length(unique_cell_type),   \"cell types in the Census! The first few are: \",   paste(head(unique_cell_type), collapse = \", \") ) #> There are 631 cell types in the Census! The first few are:  oligodendrocyte, oligodendrocyte precursor cell, astrocyte of the cerebral cortex, microglial cell, cerebral cortex endothelial cell, vascular leptomeningeal cell"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"example-summarize-a-subset-of-cell-types-selected-with-a-value_filter","dir":"Articles","previous_headings":"Summarizing cell metadata","what":"Example: Summarize a subset of cell types, selected with a value_filter","title":"Summarizing cell and gene metadata","text":"example utilizes SOMA “value filter” read subset cells tissue_ontology_term_id equal UBERON:0002048 (lung tissue), summarizes query result. can also define much complex value filters. example: combine terms & | use %% operator query multiple values","code":"# Read cell_type terms for cells which have a specific tissue term LUNG_TISSUE <- \"UBERON:0002048\"  obs_df <- human$obs$read(column_names = c(\"cell_type\"), value_filter = paste0(\"tissue_ontology_term_id == '\", LUNG_TISSUE, \"'\")) obs_df <- as.data.frame(obs_df$concat())  # Find all unique values in the cell_type column as an R data frame. unique_cell_type <- unique(obs_df$cell_type) cat(   \"There are \",   length(unique_cell_type),   \" cell types in the Census where tissue_ontology_term_id == \",   LUNG_TISSUE,   \"!\\nThe first few are:\",   paste(head(unique_cell_type), collapse = \", \"),   \"\\n\" ) #> There are  185  cell types in the Census where tissue_ontology_term_id ==  UBERON:0002048 ! #> The first few are: type II pneumocyte, neutrophil, effector CD4-positive, alpha-beta T cell, effector CD8-positive, alpha-beta T cell, mature NK T cell, blood vessel endothelial cell # Report the 10 most common top_10 <- sort(table(obs_df$cell_type), decreasing = TRUE)[1:10] cat(   \"The top 10 cell types where tissue_ontology_term_id ==\",   LUNG_TISSUE,   \"are: \",   paste(names(top_10), collapse = \", \") ) #> The top 10 cell types where tissue_ontology_term_id == UBERON:0002048 are:  native cell, alveolar macrophage, CD8-positive, alpha-beta T cell, CD4-positive, alpha-beta T cell, macrophage, type II pneumocyte, classical monocyte, natural killer cell, malignant cell, epithelial cell of lower respiratory tract # You can also do more complex queries, such as testing for inclusion in a list of values obs_df <- human$obs$read(   column_names = c(\"cell_type_ontology_term_id\"),   value_filter = \"tissue_ontology_term_id %in% c('UBERON:0002082', 'UBERON:OOO2084', 'UBERON:0002080')\" )  obs_df <- as.data.frame(obs_df$concat())  # Summarize top_10 <- sort(table(obs_df$cell_type_ontology_term_id), decreasing = TRUE)[1:10] print(top_10) #>  #> CL:0000746 CL:0008034 CL:0002131 CL:0002548 CL:0000115 CL:0000763 CL:0000057 CL:0000669  #>     160974      99458      96953      79733      79626      35560      33075      27515  #> CL:0000003 CL:0002144  #>      23613      18593"},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"full-census-metadata-stats","dir":"Articles","previous_headings":"","what":"Full Census metadata stats","title":"Summarizing cell and gene metadata","text":"example queries organisms Census, summarizes diversity various metadata labels.","code":"cols_to_query <- c(   \"cell_type_ontology_term_id\",   \"assay_ontology_term_id\",   \"tissue_ontology_term_id\" )  total_cells <- 0 for (organism in census$get(\"census_data\")$names()) {   print(organism)    obs_df <- census$get(\"census_data\")$get(organism)$obs$read(column_names = cols_to_query)   obs_df <- as.data.frame(obs_df$concat())    total_cells <- total_cells + nrow(obs_df)   for (col in cols_to_query) {     cat(\"  Unique \", col, \" values: \", length(unique(obs_df[[col]])), \"\\n\")   } } #> [1] \"homo_sapiens\" #>   Unique  cell_type_ontology_term_id  values:  631  #>   Unique  assay_ontology_term_id  values:  20  #>   Unique  tissue_ontology_term_id  values:  230  #> [1] \"mus_musculus\" #>   Unique  cell_type_ontology_term_id  values:  248  #>   Unique  assay_ontology_term_id  values:  10  #>   Unique  tissue_ontology_term_id  values:  74 cat(\"Complete Census contains \", total_cells, \" cells.\") #> Complete Census contains  68683222  cells."},{"path":"/articles/comp_bio_summarize_axis_query.html","id":"close-the-census","dir":"Articles","previous_headings":"Full Census metadata stats","what":"Close the census","title":"Summarizing cell and gene metadata","text":"use, census object closed release memory resources. also closes SOMA objects accessed via top-level census. Closing can automated using .exit(census$close(), add = TRUE) immediately census <- open_soma().","code":"census$close()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Chan Zuckerberg Initiative Foundation. Author, maintainer, copyright holder, funder.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Chan Zuckerberg Initiative Foundation (2024). cellxgene.census: CZ CELLxGENE Discover Cell Census. R package version 1.14.0, https://github.com/chanzuckerberg/cellxgene-census.","code":"@Manual{,   title = {cellxgene.census: CZ CELLxGENE Discover Cell Census},   author = {{Chan Zuckerberg Initiative Foundation}},   year = {2024},   note = {R package version 1.14.0},   url = {https://github.com/chanzuckerberg/cellxgene-census}, }"},{"path":"/index.html","id":"r-package-of-cz-cellxgene-discover-census","dir":"","previous_headings":"","what":"CZ CELLxGENE Discover Cell Census","title":"CZ CELLxGENE Discover Cell Census","text":"documentation R package cellxgene.census part CZ CELLxGENE Discover Census. full details Census data capabilities please go main Census site. cellxgene.census provides API efficiently access cloud-hosted Census single-cell data R. just seconds users can access slice Census data using cell gene filters across hundreds single-cell datasets. Census data can fetched iterative fashion bigger--memory slices data, quickly exported basic R structures, well Seurat SingleCellExperiment objects downstream analysis.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"CZ CELLxGENE Discover Cell Census","text":"installing Ubuntu, may need install following libraries via apt install, libxml2-dev libssl-dev libcurl4-openssl-dev. addition must cmake v3.21 greater. installing MacOS, need install developer tools Xcode. Windows supported. R session install cellxgene.census R-Universe. able export Census data Seurat SingleCellExperiment also need install respective packages.","code":"install.packages(   \"cellxgene.census\",   repos=c('https://chanzuckerberg.r-universe.dev', 'https://cloud.r-project.org') ) # Seurat install.packages(\"Seurat\")  # SingleCellExperiment if (!require(\"BiocManager\", quietly = TRUE))     install.packages(\"BiocManager\")  BiocManager::install(\"SingleCellExperiment\")"},{"path":"/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"CZ CELLxGENE Discover Cell Census","text":"Check vignettes “Articles” section navigation bar site. highly recommend following vignettes starting point: Querying fetching single-cell data cell/gene metadata Learning CZ CELLxGENE Discover Census can also check quick start guide main Census site.","code":""},{"path":"/index.html","id":"example-seurat-and-singlecellexperiment-query","dir":"","previous_headings":"Usage","what":"Example Seurat and SingleCellExperiment query","title":"CZ CELLxGENE Discover Cell Census","text":"following creates Seurat object -demand sympathetic neurons Census filtering genes ENSG00000161798, ENSG00000188229. following retrieves data SingleCellExperiment object.","code":"library(\"cellxgene.census\") library(\"Seurat\")  census <- open_soma()  organism <- \"Homo sapiens\" gene_filter <- \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" cell_filter <-  \"cell_type == 'sympathetic neuron'\" cell_columns <- c(\"assay\", \"cell_type\", \"tissue\", \"tissue_general\", \"suspension_type\", \"disease\")  seurat_obj <- get_seurat(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns ) library(\"SingleCellExperiment\")  sce_obj <- get_single_cell_experiment(    census = census,    organism = organism,    var_value_filter = gene_filter,    obs_value_filter = cell_filter,    obs_column_names = cell_columns )"},{"path":"/index.html","id":"for-more-help","dir":"","previous_headings":"","what":"For More Help","title":"CZ CELLxGENE Discover Cell Census","text":"help, please go visit main Census site. believe found security issue, appreciate notification. Please send email security@chanzuckerberg.com.","code":""},{"path":"/reference/download_source_h5ad.html","id":null,"dir":"Reference","previous_headings":"","what":"Download source H5AD to local file name. — download_source_h5ad","title":"Download source H5AD to local file name. — download_source_h5ad","text":"Download source H5AD local file name.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(   dataset_id,   file,   overwrite = FALSE,   census_version = \"stable\",   census = NULL )"},{"path":"/reference/download_source_h5ad.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download source H5AD to local file name. — download_source_h5ad","text":"dataset_id dataset_id interest. file Local file name store H5AD file. overwrite TRUE allow overwriting existing file. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling download_source_h5ad() multiple times.","code":""},{"path":"/reference/download_source_h5ad.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download source H5AD to local file name. — download_source_h5ad","text":"","code":"download_source_h5ad(\"0895c838-e550-48a3-a777-dbcd35d30272\", \"/tmp/data.h5ad\", overwrite = TRUE)"},{"path":"/reference/get_census_mirror.html","id":null,"dir":"Reference","previous_headings":"","what":"Get locator information about a Census mirror — get_census_mirror","title":"Get locator information about a Census mirror — get_census_mirror","text":"Get locator information Census mirror","code":""},{"path":"/reference/get_census_mirror.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get locator information about a Census mirror — get_census_mirror","text":"","code":"get_census_mirror(mirror)"},{"path":"/reference/get_census_mirror.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get locator information about a Census mirror — get_census_mirror","text":"mirror Name mirror.","code":""},{"path":"/reference/get_census_mirror.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get locator information about a Census mirror — get_census_mirror","text":"List mirror information","code":""},{"path":"/reference/get_census_mirror.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get locator information about a Census mirror — get_census_mirror","text":"","code":"get_census_mirror(\"AWS-S3-us-west-2\") #> $provider #> [1] \"S3\" #>  #> $base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $region #> [1] \"us-west-2\" #>  #> $alias #> [1] \"\" #>"},{"path":"/reference/get_census_mirror_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census mirrors currently available — get_census_mirror_directory","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"Get directory Census mirrors currently available","code":""},{"path":"/reference/get_census_mirror_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"","code":"get_census_mirror_directory()"},{"path":"/reference/get_census_mirror_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"Nested list information available mirrors","code":""},{"path":"/reference/get_census_mirror_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census mirrors currently available — get_census_mirror_directory","text":"","code":"get_census_mirror_directory() #> $default #> $default$provider #> [1] \"S3\" #>  #> $default$base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $default$region #> [1] \"us-west-2\" #>  #> $default$alias #> [1] \"default\" #>  #>  #> $`AWS-S3-us-west-2` #> $`AWS-S3-us-west-2`$provider #> [1] \"S3\" #>  #> $`AWS-S3-us-west-2`$base_uri #> [1] \"s3://cellxgene-census-public-us-west-2/\" #>  #> $`AWS-S3-us-west-2`$region #> [1] \"us-west-2\" #>  #> $`AWS-S3-us-west-2`$alias #> [1] \"\" #>  #>"},{"path":"/reference/get_census_version_description.html","id":null,"dir":"Reference","previous_headings":"","what":"Get release description for a Census version — get_census_version_description","title":"Get release description for a Census version — get_census_version_description","text":"Get release description Census version","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get release description for a Census version — get_census_version_description","text":"","code":"get_census_version_description(census_version)"},{"path":"/reference/get_census_version_description.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get release description for a Census version — get_census_version_description","text":"census_version census version name.","code":""},{"path":"/reference/get_census_version_description.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get release description for a Census version — get_census_version_description","text":"List release location metadata","code":""},{"path":"/reference/get_census_version_description.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get release description for a Census version — get_census_version_description","text":"","code":"as.data.frame(get_census_version_description(\"stable\")) #>   release_date release_build #> 1                 2023-12-15 #>                                                              soma.uri #> 1 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #>               soma.relative_uri soma.s3_region #> 1 /cell-census/2023-12-15/soma/      us-west-2 #>                                                              h5ads.uri #> 1 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #>               h5ads.relative_uri h5ads.s3_region do_not_delete  lts  alias #> 1 /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE stable #>   census_version #> 1         stable"},{"path":"/reference/get_census_version_directory.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the directory of Census releases currently available — get_census_version_directory","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Get directory Census releases currently available","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory()"},{"path":"/reference/get_census_version_directory.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"Data frame available cell census releases, including location metadata.","code":""},{"path":"/reference/get_census_version_directory.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get the directory of Census releases currently available — get_census_version_directory","text":"","code":"get_census_version_directory() #>            release_date release_build #> stable                     2023-12-15 #> latest                     2024-05-27 #> 2023-05-15                 2023-05-15 #> 2023-07-25                 2023-07-25 #> 2023-12-15                 2023-12-15 #> 2024-04-29                 2024-04-29 #> 2024-05-06                 2024-05-06 #> 2024-05-13                 2024-05-13 #> 2024-05-20                 2024-05-20 #> 2024-05-27                 2024-05-27 #>                                                                       soma.uri #> stable     s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #> latest     s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/soma/ #> 2023-05-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-05-15/soma/ #> 2023-07-25 s3://cellxgene-census-public-us-west-2/cell-census/2023-07-25/soma/ #> 2023-12-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/ #> 2024-04-29 s3://cellxgene-census-public-us-west-2/cell-census/2024-04-29/soma/ #> 2024-05-06 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-06/soma/ #> 2024-05-13 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-13/soma/ #> 2024-05-20 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-20/soma/ #> 2024-05-27 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/soma/ #>                        soma.relative_uri soma.s3_region #> stable     /cell-census/2023-12-15/soma/      us-west-2 #> latest     /cell-census/2024-05-27/soma/      us-west-2 #> 2023-05-15 /cell-census/2023-05-15/soma/      us-west-2 #> 2023-07-25 /cell-census/2023-07-25/soma/      us-west-2 #> 2023-12-15 /cell-census/2023-12-15/soma/      us-west-2 #> 2024-04-29 /cell-census/2024-04-29/soma/      us-west-2 #> 2024-05-06 /cell-census/2024-05-06/soma/      us-west-2 #> 2024-05-13 /cell-census/2024-05-13/soma/      us-west-2 #> 2024-05-20 /cell-census/2024-05-20/soma/      us-west-2 #> 2024-05-27 /cell-census/2024-05-27/soma/      us-west-2 #>                                                                       h5ads.uri #> stable     s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #> latest     s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/h5ads/ #> 2023-05-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-05-15/h5ads/ #> 2023-07-25 s3://cellxgene-census-public-us-west-2/cell-census/2023-07-25/h5ads/ #> 2023-12-15 s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/ #> 2024-04-29 s3://cellxgene-census-public-us-west-2/cell-census/2024-04-29/h5ads/ #> 2024-05-06 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-06/h5ads/ #> 2024-05-13 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-13/h5ads/ #> 2024-05-20 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-20/h5ads/ #> 2024-05-27 s3://cellxgene-census-public-us-west-2/cell-census/2024-05-27/h5ads/ #>                        h5ads.relative_uri h5ads.s3_region do_not_delete  lts #> stable     /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE #> latest     /cell-census/2024-05-27/h5ads/       us-west-2         FALSE   NA #> 2023-05-15 /cell-census/2023-05-15/h5ads/       us-west-2          TRUE TRUE #> 2023-07-25 /cell-census/2023-07-25/h5ads/       us-west-2          TRUE TRUE #> 2023-12-15 /cell-census/2023-12-15/h5ads/       us-west-2          TRUE TRUE #> 2024-04-29 /cell-census/2024-04-29/h5ads/       us-west-2         FALSE   NA #> 2024-05-06 /cell-census/2024-05-06/h5ads/       us-west-2         FALSE   NA #> 2024-05-13 /cell-census/2024-05-13/h5ads/       us-west-2         FALSE   NA #> 2024-05-20 /cell-census/2024-05-20/h5ads/       us-west-2          TRUE   NA #> 2024-05-27 /cell-census/2024-05-27/h5ads/       us-west-2         FALSE   NA #>             alias #> stable     stable #> latest     latest #> 2023-05-15        #> 2023-07-25        #> 2023-12-15        #> 2024-04-29        #> 2024-05-06        #> 2024-05-13        #> 2024-05-20        #> 2024-05-27"},{"path":"/reference/get_presence_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Read the feature dataset presence matrix. — get_presence_matrix","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"Read feature dataset presence matrix.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"get_presence_matrix(census, organism, measurement_name = \"RNA\")"},{"path":"/reference/get_presence_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"census census object cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA.","code":""},{"path":"/reference/get_presence_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"tiledbsoma::matrixZeroBasedView object dataset join id & feature join id dimensions, filled 1s indicating presence. sparse matrix accessed zero-based indexes since join id's may zero.","code":""},{"path":"/reference/get_presence_matrix.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read the feature dataset presence matrix. — get_presence_matrix","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. on.exit(census$close(), add = TRUE) print(get_presence_matrix(census, \"Homo sapiens\")$dim()) #> Error in private$check_open_for_read_or_write(): Item must be open for read or write. s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/soma/"},{"path":"/reference/get_seurat.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to Seurat — get_seurat","title":"Export Census slices to Seurat — get_seurat","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return Seurat object.","code":""},{"path":"/reference/get_seurat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to Seurat — get_seurat","text":"","code":"get_seurat(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\", data = NULL),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   obsm_layers = FALSE,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_seurat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to Seurat — get_seurat","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers named character X layers add Seurat assay, names names Seurat slots (counts data) values names layers within X. obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. obsm_layers Names arrays obsm add cell embeddings; pass FALSE suppress loading dimensional reductions. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_seurat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to Seurat — get_seurat","text":"Seurat object containing sensus slice.","code":""},{"path":"/reference/get_seurat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to Seurat — get_seurat","text":"","code":"if (FALSE) { census <- open_soma() seurat_obj <- get_seurat(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  seurat_obj  census$close() }"},{"path":"/reference/get_single_cell_experiment.html","id":null,"dir":"Reference","previous_headings":"","what":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"Convenience wrapper around SOMAExperimentAxisQuery, build execute query, return SingleCellExperiment object.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"get_single_cell_experiment(   census,   organism,   measurement_name = \"RNA\",   X_layers = c(counts = \"raw\"),   obs_value_filter = NULL,   obs_coords = NULL,   obs_column_names = NULL,   obsm_layers = FALSE,   var_value_filter = NULL,   var_coords = NULL,   var_column_names = NULL,   var_index = \"feature_id\" )"},{"path":"/reference/get_single_cell_experiment.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"census census object, usually returned cellxgene.census::open_soma(). organism organism query, usually one Homo sapiens Mus musculus measurement_name measurement object query. Defaults RNA. X_layers character vector X layers add assays main experiment; may optionally named set name resulting assay (eg. ‘X_layers = c(counts = \"raw\")’ load X layer “‘raw’” assay “‘counts’”); default, loads X layers obs_value_filter SOMA value_filter across columns obs dataframe, expressed string. obs_coords set coordinates obs dataframe index, expressed type format supported SOMADataFrame's read() method. obs_column_names Columns fetch obs data frame. obsm_layers Names arrays obsm add cell embeddings; pass FALSE suppress loading dimensional reductions. var_value_filter obs_value_filter var. var_coords obs_coords var. var_column_names Columns fetch var data frame. var_index Name column ‘var’ add feature names.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"SingleCellExperiment object containing sensus slice.","code":""},{"path":"/reference/get_single_cell_experiment.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Export Census slices to SingleCellExperiment — get_single_cell_experiment","text":"","code":"if (FALSE) { census <- open_soma() sce_obj <- get_single_cell_experiment(   census,   organism = \"Homo sapiens\",   obs_value_filter = \"cell_type == 'leptomeningeal cell'\",   var_value_filter = \"feature_id %in% c('ENSG00000107317', 'ENSG00000106034')\" )  sce_obj  census$close() }"},{"path":"/reference/get_source_h5ad_uri.html","id":null,"dir":"Reference","previous_headings":"","what":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"Locate source h5ad file dataset.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(dataset_id, census_version = \"stable\", census = NULL)"},{"path":"/reference/get_source_h5ad_uri.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"dataset_id dataset_id interest. census_version desired Census version. census open Census handle census_version. provided, opened closed automatically; efficient reuse handle calling get_source_h5ad_uri() multiple times.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"list uri optional s3_region.","code":""},{"path":"/reference/get_source_h5ad_uri.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locate source h5ad file for a dataset. — get_source_h5ad_uri","text":"","code":"get_source_h5ad_uri(\"0895c838-e550-48a3-a777-dbcd35d30272\") #> $uri #> [1] \"s3://cellxgene-census-public-us-west-2/cell-census/2023-12-15/h5ads/0895c838-e550-48a3-a777-dbcd35d30272.h5ad\" #>  #> $s3_region #> [1] \"us-west-2\" #>"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":null,"dir":"Reference","previous_headings":"","what":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"Create SOMATileDBContext suitable using open_soma(). Typically open_soma() creates context automatically, one can created separately order set custom configuration options, share multiple open Census handles.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"new_SOMATileDBContext_for_census(   census_version_description,   mirror = \"default\",   ... )"},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"census_version_description result get_census_version_description() desired Census version. mirror name intended census mirror (get_census_mirror_directory()[[name]] save lookup), NULL configure local file access. ... Custom configuration options.","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"SOMATileDBContext object open_soma().","code":""},{"path":"/reference/new_SOMATileDBContext_for_census.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create SOMATileDBContext for Census — new_SOMATileDBContext_for_census","text":"","code":"census_desc <- get_census_version_description(\"stable\") ctx <- new_SOMATileDBContext_for_census(census_desc, \"soma.init_buffer_bytes\" = paste(4 * 1024**3)) census <- open_soma(\"stable\", tiledbsoma_ctx = ctx) #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. census$close()"},{"path":"/reference/open_soma.html","id":null,"dir":"Reference","previous_headings":"","what":"Open the Census — open_soma","title":"Open the Census — open_soma","text":"Open Census","code":""},{"path":"/reference/open_soma.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Open the Census — open_soma","text":"","code":"open_soma(   census_version = \"stable\",   uri = NULL,   tiledbsoma_ctx = NULL,   mirror = NULL )"},{"path":"/reference/open_soma.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Open the Census — open_soma","text":"census_version version Census, e.g., \"stable\". uri URI containing Census SOMA objects open instead released version. (supplied, takes precedence census_version.) tiledbsoma_ctx tiledbsoma::SOMATileDBContext built using new_SOMATileDBContext_for_census(). Optional (created automatically) using census_version context need reused. mirror Census mirror access; one names(get_census_mirror_directory()).","code":""},{"path":"/reference/open_soma.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Open the Census — open_soma","text":"Top-level tiledbsoma::SOMACollection object. use, census closed release memory resources, usually .exit(census$close(), add = TRUE). Closing top-level census also close SOMA objects accessed .","code":""},{"path":"/reference/open_soma.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Open the Census — open_soma","text":"","code":"census <- open_soma() #> The stable Census release is currently 2023-12-15. Specify census_version = \"2023-12-15\" in future calls to open_soma() to ensure data consistency. as.data.frame(census$get(\"census_info\")$get(\"summary\")$read()$concat()) #>   soma_joinid                      label      value #> 1           0      census_schema_version      1.2.0 #> 2           1          census_build_date 2023-10-23 #> 3           2     dataset_schema_version      3.1.0 #> 4           3           total_cell_count   68683222 #> 5           4          unique_cell_count   40356133 #> 6           5 number_donors_homo_sapiens      15588 #> 7           6 number_donors_mus_musculus       1990 census$close()"}]
    diff --git a/search.html b/search.html
    index 8da8d602c..76b2efc83 100644
    --- a/search.html
    +++ b/search.html
    @@ -35,6 +35,8 @@
       
         
           
    +        
    +        
             
             
             
    @@ -127,6 +129,13 @@
                 
     
                 
    +
    +
    + + + + +
    diff --git a/searchindex.js b/searchindex.js index 4c4c2f174..fc084050e 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["_autosummary/cellxgene_census.download_source_h5ad", "_autosummary/cellxgene_census.experimental.get_all_available_embeddings", "_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding", "_autosummary/cellxgene_census.experimental.get_embedding", "_autosummary/cellxgene_census.experimental.get_embedding_metadata", "_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name", "_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder", "_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer", "_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe", "_autosummary/cellxgene_census.experimental.ml.pytorch.Stats", "_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader", "_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes", "_autosummary/cellxgene_census.experimental.pp.highly_variable_genes", "_autosummary/cellxgene_census.experimental.pp.mean_variance", "_autosummary/cellxgene_census.get_anndata", "_autosummary/cellxgene_census.get_census_version_description", "_autosummary/cellxgene_census.get_census_version_directory", "_autosummary/cellxgene_census.get_default_soma_context", "_autosummary/cellxgene_census.get_obs", "_autosummary/cellxgene_census.get_presence_matrix", "_autosummary/cellxgene_census.get_source_h5ad_uri", "_autosummary/cellxgene_census.get_var", "_autosummary/cellxgene_census.open_soma", "articles", "articles/2023/20230808-r_api_release", "articles/2023/20230919-out_of_core_methods", "articles/2023/20231012-normalized_layer_precalc_stats", "articles/2024/20240404-categoricals", "cellxgene_census_aws_open_data", "cellxgene_census_docsite_FAQ", "cellxgene_census_docsite_data_release_info", "cellxgene_census_docsite_installation", "cellxgene_census_docsite_landing", "cellxgene_census_docsite_quick_start", "cellxgene_census_docsite_schema", "cellxgene_census_schema", "examples", "index", "notebooks/analysis_demo/comp_bio_census_info", "notebooks/analysis_demo/comp_bio_data_integration_scvi", "notebooks/analysis_demo/comp_bio_embedding_exploration", "notebooks/analysis_demo/comp_bio_explore_and_load_lung_data", "notebooks/analysis_demo/comp_bio_geneformer_prediction", "notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing", "notebooks/analysis_demo/comp_bio_scvi_model_use", "notebooks/analysis_demo/comp_bio_summarize_axis_query", "notebooks/api_demo/census_access_maintained_embeddings", "notebooks/api_demo/census_citation_generation", "notebooks/api_demo/census_compute_over_X", "notebooks/api_demo/census_dataset_presence", "notebooks/api_demo/census_datasets", "notebooks/api_demo/census_duplicated_cells", "notebooks/api_demo/census_embedding", "notebooks/api_demo/census_gget_demo", "notebooks/api_demo/census_query_extract", "notebooks/api_demo/census_summary_cell_counts", "notebooks/experimental/highly_variable_genes", "notebooks/experimental/mean_variance", "notebooks/experimental/pytorch", "python-api"], "filenames": ["_autosummary/cellxgene_census.download_source_h5ad.rst", "_autosummary/cellxgene_census.experimental.get_all_available_embeddings.rst", "_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.rst", "_autosummary/cellxgene_census.experimental.get_embedding.rst", "_autosummary/cellxgene_census.experimental.get_embedding_metadata.rst", "_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.rst", "_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.rst", "_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.rst", "_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.rst", "_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.rst", "_autosummary/cellxgene_census.experimental.pp.mean_variance.rst", "_autosummary/cellxgene_census.get_anndata.rst", "_autosummary/cellxgene_census.get_census_version_description.rst", "_autosummary/cellxgene_census.get_census_version_directory.rst", "_autosummary/cellxgene_census.get_default_soma_context.rst", "_autosummary/cellxgene_census.get_obs.rst", "_autosummary/cellxgene_census.get_presence_matrix.rst", "_autosummary/cellxgene_census.get_source_h5ad_uri.rst", "_autosummary/cellxgene_census.get_var.rst", "_autosummary/cellxgene_census.open_soma.rst", "articles.rst", "articles/2023/20230808-r_api_release.md", "articles/2023/20230919-out_of_core_methods.md", "articles/2023/20231012-normalized_layer_precalc_stats.md", "articles/2024/20240404-categoricals.md", "cellxgene_census_aws_open_data.md", "cellxgene_census_docsite_FAQ.md", "cellxgene_census_docsite_data_release_info.md", "cellxgene_census_docsite_installation.md", "cellxgene_census_docsite_landing.md", "cellxgene_census_docsite_quick_start.md", "cellxgene_census_docsite_schema.md", "cellxgene_census_schema.md", "examples.rst", "index.rst", "notebooks/analysis_demo/comp_bio_census_info.ipynb", "notebooks/analysis_demo/comp_bio_data_integration_scvi.ipynb", "notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb", "notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.ipynb", "notebooks/analysis_demo/comp_bio_geneformer_prediction.ipynb", "notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.ipynb", "notebooks/analysis_demo/comp_bio_scvi_model_use.ipynb", "notebooks/analysis_demo/comp_bio_summarize_axis_query.ipynb", "notebooks/api_demo/census_access_maintained_embeddings.ipynb", "notebooks/api_demo/census_citation_generation.ipynb", "notebooks/api_demo/census_compute_over_X.ipynb", "notebooks/api_demo/census_dataset_presence.ipynb", "notebooks/api_demo/census_datasets.ipynb", "notebooks/api_demo/census_duplicated_cells.ipynb", "notebooks/api_demo/census_embedding.ipynb", "notebooks/api_demo/census_gget_demo.ipynb", "notebooks/api_demo/census_query_extract.ipynb", "notebooks/api_demo/census_summary_cell_counts.ipynb", "notebooks/experimental/highly_variable_genes.ipynb", "notebooks/experimental/mean_variance.ipynb", "notebooks/experimental/pytorch.ipynb", "python-api.rst"], "titles": ["cellxgene_census.download_source_h5ad", "cellxgene_census.experimental.get_all_available_embeddings", "cellxgene_census.experimental.get_all_census_versions_with_embedding", "cellxgene_census.experimental.get_embedding", "cellxgene_census.experimental.get_embedding_metadata", "cellxgene_census.experimental.get_embedding_metadata_by_name", "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder", "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer", "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe", "cellxgene_census.experimental.ml.pytorch.Stats", "cellxgene_census.experimental.ml.pytorch.experiment_dataloader", "cellxgene_census.experimental.pp.get_highly_variable_genes", "cellxgene_census.experimental.pp.highly_variable_genes", "cellxgene_census.experimental.pp.mean_variance", "cellxgene_census.get_anndata", "cellxgene_census.get_census_version_description", "cellxgene_census.get_census_version_directory", "cellxgene_census.get_default_soma_context", "cellxgene_census.get_obs", "cellxgene_census.get_presence_matrix", "cellxgene_census.get_source_h5ad_uri", "cellxgene_census.get_var", "cellxgene_census.open_soma", "What\u2019s new?", "R package cellxgene.census V1 is out!", "Memory-efficient implementations of commonly used single-cell methods", "Introducing a normalized layer and pre-calculated cell and gene statistics in Census", "Census supports categoricals for cell metadata", "CZ CELLxGENE Discover Census in AWS", "FAQ", "Census data releases", "Installation", "CZ CELLxGENE Discover Census", "Quick start", "Census data and schema", "CZ CELLxGENE Discover Census Schema", "Python tutorials", "CZ CELLxGENE Discover Census", "Learning about the CZ CELLxGENE Census", "Integrating multi-dataset slices of data", "Exploring biologically relevant clusters in Census embeddings", "Exploring all data from a tissue", "Geneformer for cell class prediction and data projection", "Normalizing full-length gene sequencing data", "scVI for cell type prediction and data projection", "Summarizing cell and gene metadata", "Access CELLxGENE collaboration embeddings (scVI, Geneformer)", "Generating citations for Census slices", "Computing on X using online (incremental) algorithms", "Genes measured in each cell (dataset presence matrix)", "Exploring the Census Datasets table", "Understanding and filtering out duplicate cells", "Access CELLxGENE-hosted embeddings", "Querying data using the gget cellxgene module", "Querying and fetching the single-cell data and cell/gene metadata.", "Exploring pre-calculated summary cell counts", "Experimental Highly Variable Genes API", "Out-of-core (incremental) mean and variance calculation", "Training a PyTorch Model", "Python API"], "terms": {"dataset_id": [0, 12, 20, 25, 27, 35, 38, 40, 41, 42, 43, 44, 46, 47, 49, 50, 51, 52, 53, 54, 57], "str": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 35, 39, 40, 42, 53], "to_path": [0, 50], "census_vers": [0, 1, 3, 5, 7, 11, 15, 16, 20, 22, 26, 27, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "stabl": [0, 11, 12, 16, 20, 22, 24, 30, 31, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "none": [0, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 21, 22, 35, 41, 42, 48, 53], "download": [0, 25, 29, 46, 52, 59], "sourc": [0, 20, 22, 28, 31, 35, 52, 53, 58], "h5ad": [0, 15, 16, 20, 22, 28, 35, 39, 42, 43, 47, 49, 53, 59], "dataset": [0, 6, 7, 11, 12, 19, 24, 26, 28, 30, 32, 34, 36, 37, 38, 40, 43, 44, 45, 46, 47, 48, 51, 52, 53, 54, 55], "given": [0, 1, 8, 15, 25, 28, 30, 35, 41, 46, 48, 49, 50, 52, 58], "user": [0, 11, 12, 17, 20, 24, 25, 26, 27, 28, 29, 32, 34, 37, 39, 41, 42, 43, 44, 48, 56, 58], "specifi": [0, 2, 5, 7, 8, 11, 12, 13, 16, 17, 22, 26, 28, 30, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "file": [0, 7, 22, 28, 29, 30, 35, 38, 42, 44, 45, 53], "name": [0, 2, 5, 6, 8, 11, 12, 15, 16, 20, 28, 30, 33, 34, 35, 38, 39, 41, 43, 45, 47, 48, 51, 52, 53, 54, 56, 59], "paramet": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 39, 41, 43, 52], "fetch": [0, 3, 8, 11, 12, 14, 18, 21, 24, 36, 42, 44, 46, 47, 51, 58], "origin": [0, 8, 26, 35, 40, 41, 42, 44, 51, 58], "associ": [0, 2, 5, 35, 36, 41, 42], "thi": [0, 7, 8, 9, 10, 12, 14, 16, 20, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "The": [0, 1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59], "where": [0, 8, 13, 35, 39, 40, 41, 43, 45, 46, 48, 51, 52, 56, 57, 58], "written": [0, 11, 14, 18, 21], "must": [0, 8, 11, 12, 31, 33, 35, 40, 51], "alreadi": [0, 40, 44], "exist": [0, 20, 27, 28, 29, 32, 37, 38, 41, 42, 51], "censu": [0, 1, 2, 3, 5, 6, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 31, 33, 39, 44, 46, 48, 53, 55, 56], "version": [0, 1, 2, 3, 5, 14, 15, 16, 20, 22, 24, 26, 29, 31, 38, 39, 40, 42, 44, 45, 46, 47, 49, 51, 52, 53, 54], "default": [0, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 16, 17, 18, 20, 21, 22, 27, 30, 39, 43, 48, 53, 57, 58], "rais": [0, 3, 5, 10, 11, 12, 15, 19, 20, 22, 38, 45], "valueerror": [0, 3, 5, 10, 11, 12, 15, 19, 22], "path": [0, 22, 28, 35, 42, 53], "i": [0, 2, 3, 5, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], "e": [0, 1, 2, 3, 5, 6, 8, 12, 13, 22, 26, 28, 30, 32, 34, 35, 37, 38, 40, 41, 42, 45, 46, 48, 49, 50, 51, 52, 53, 56], "overwrit": 0, "an": [0, 1, 3, 6, 8, 10, 11, 13, 14, 16, 18, 21, 22, 24, 25, 26, 28, 30, 31, 32, 33, 35, 37, 38, 39, 40, 41, 42, 45, 47, 48, 50, 54, 57, 59], "lifecycl": [0, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22], "matur": [0, 15, 16, 19, 20, 22, 38, 40], "get_source_h5ad_uri": [0, 22, 50], "look": [0, 22, 26, 38, 39, 41, 42, 43, 44, 46, 51, 52, 53, 54, 58], "up": [0, 22, 25, 44, 48, 51], "locat": [0, 17, 22, 29, 50, 52, 54], "exampl": [0, 1, 3, 4, 7, 11, 12, 14, 15, 16, 17, 19, 20, 22, 24, 26, 28, 29, 31, 33, 35, 36, 39, 40, 41, 46, 48, 52, 53, 54, 58], "8e47ed12": 0, "c658": 0, "4252": [0, 41, 49], "b126": 0, "381df8d52a3d": 0, "tmp": [0, 22], "data": [0, 3, 8, 9, 10, 11, 12, 15, 16, 20, 25, 27, 31, 33, 40, 45, 46, 47, 48, 49, 55, 56, 57, 58], "list": [1, 2, 11, 12, 14, 18, 21, 28, 32, 34, 35, 37, 38, 40, 41, 42, 44, 45, 49, 53, 54, 59], "dict": [1, 4, 5, 15, 16, 17, 22, 44], "ani": [1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 17, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 35, 37, 38, 40, 44, 46, 47, 48, 49, 50, 52, 55, 56, 58], "return": [1, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 38, 40, 45, 48, 49, 53, 54, 55, 56, 57, 58], "dictionari": [1, 4, 5, 15, 16, 17, 22, 27, 38, 42, 52, 54], "all": [1, 2, 7, 8, 11, 12, 14, 16, 18, 21, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 46, 47, 48, 50, 51, 52, 53, 54, 55], "avail": [1, 8, 12, 14, 16, 25, 26, 29, 30, 31, 39, 40, 41, 42, 46, 52, 53, 54, 56, 59], "embed": [1, 2, 3, 4, 5, 14, 32, 37, 39, 44], "tag": [1, 3, 5, 28, 30], "g": [1, 2, 3, 5, 6, 12, 13, 22, 28, 30, 32, 34, 35, 37, 38, 40, 42, 45, 48, 50, 52, 53, 56], "2023": [1, 3, 5, 24, 25, 26, 28, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "12": [1, 3, 5, 15, 16, 20, 22, 26, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 54], "15": [1, 3, 5, 16, 28, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 53, 57], "A": [1, 2, 3, 4, 5, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 28, 30, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54], "each": [1, 6, 7, 8, 16, 25, 26, 27, 29, 30, 33, 34, 35, 36, 39, 40, 41, 42, 43, 45, 46, 47, 48, 50, 52, 53, 55, 56, 58, 59], "contain": [1, 2, 3, 4, 5, 7, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 34, 35, 38, 39, 40, 41, 42, 43, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59], "metadata": [1, 4, 5, 7, 11, 14, 18, 21, 23, 25, 28, 29, 32, 34, 36, 37, 39, 40, 42, 43, 44, 46, 48, 49, 50, 51, 55, 56, 58], "describ": [1, 4, 5, 28, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55, 56, 57], "experiment_nam": [1, 40, 46, 52, 57], "experiment_1": 1, "measurement_nam": [1, 6, 8, 11, 14, 19, 25, 26, 33, 42, 44, 46, 47, 48, 49, 51, 52, 56, 57, 58], "rna": [1, 6, 11, 14, 19, 25, 26, 29, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "organ": [1, 2, 5, 11, 14, 18, 19, 21, 24, 25, 26, 29, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 58], "homo_sapien": [1, 6, 7, 24, 25, 26, 27, 28, 33, 34, 35, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 58], "n_embed": [1, 52], "1000": [1, 11, 12, 14, 25, 35, 39, 43], "n_featur": [1, 52], "200": [1, 3], "uri": [1, 3, 4, 15, 16, 17, 20, 22, 28, 42, 44, 50, 59], "s3": [1, 15, 16, 17, 20, 22, 28, 29, 31, 42, 44, 46, 50, 52], "bucket": [1, 17, 22, 28, 29, 31, 35], "embedding_1": 1, "embedding_nam": [2, 5, 40, 42, 44, 46, 52], "embedding_typ": [2, 5], "obs_embed": [2, 5, 14, 40, 42, 44, 46, 52], "get": [2, 11, 15, 16, 18, 21, 22, 24, 25, 26, 27, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 50, 51, 52, 54], "specif": [2, 5, 22, 29, 30, 32, 34, 35, 37, 38, 40, 45, 48, 51, 54], "scvi": [2, 5, 30, 36, 40, 52], "which": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 30, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "type": [2, 8, 19, 24, 27, 28, 30, 33, 34, 36, 39, 40, 42, 43, 48, 49, 55, 58], "embedding_uri": [3, 4, 46, 52], "obs_soma_joinid": [3, 52], "ndarrai": [3, 11, 14, 18, 21, 46, 48, 52], "dtype": [3, 8, 11, 14, 18, 21, 38, 39, 40, 41, 43, 45, 46, 47, 48, 51, 52, 54, 58], "int64": [3, 8, 27, 35, 38, 39, 41, 43, 45, 48, 51, 54], "arrai": [3, 11, 14, 18, 19, 21, 29, 32, 34, 37, 41, 43, 44, 48, 49, 58], "context": [3, 4, 6, 17, 22, 26, 28, 33, 38, 41, 45, 51, 52], "somatiledbcontext": [3, 4, 17, 22, 52], "float32": [3, 35, 43, 46, 48, 52], "read": [3, 4, 8, 9, 14, 19, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 52, 54, 55, 56, 58], "cell": [3, 6, 7, 8, 11, 12, 15, 16, 20, 23, 28, 29, 32, 36, 37, 39, 43, 50, 56, 57, 58], "ob": [3, 7, 8, 11, 12, 13, 14, 18, 24, 28, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58], "dens": [3, 8, 29, 32, 34, 37], "numpi": [3, 19, 29, 32, 37, 39, 40, 41, 42, 44, 48, 49, 58], "without": [3, 32, 37, 40, 42, 58], "nan": [3, 40, 52, 56], "valu": [3, 7, 8, 11, 12, 13, 14, 15, 18, 19, 21, 24, 25, 26, 27, 29, 30, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 52, 53, 54, 56, 57, 58], "us": [3, 4, 8, 9, 10, 11, 12, 13, 14, 16, 17, 22, 23, 27, 28, 30, 31, 33, 34, 35, 38, 39, 40, 41, 43, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59], "verifi": 3, "content": [3, 7, 28, 30, 33, 34, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58], "from": [3, 6, 7, 8, 13, 15, 19, 25, 26, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 43, 44, 46, 48, 49, 51, 52, 54, 55, 56, 57, 58], "same": [3, 25, 26, 33, 34, 40, 42, 43, 46, 50, 51, 52, 54, 56], "slice": [3, 11, 14, 18, 21, 25, 26, 29, 32, 36, 37, 38, 41, 46, 48, 49, 50, 52, 54], "custom": [3, 4, 17, 22, 28], "tiledbsoma": [3, 4, 6, 7, 8, 11, 12, 13, 14, 17, 22, 25, 26, 28, 33, 46, 48, 51, 52, 56, 57, 58, 59], "open": [3, 4, 17, 20, 22, 24, 25, 26, 28, 30, 32, 33, 37, 39, 41, 42, 47, 52, 53, 56], "soma": [3, 4, 8, 9, 11, 14, 15, 16, 17, 18, 21, 22, 24, 25, 29, 30, 32, 33, 35, 37, 38, 45, 46, 48, 49, 50, 52, 56, 57, 58, 59], "object": [3, 4, 8, 11, 14, 17, 18, 19, 20, 21, 22, 24, 26, 29, 32, 35, 37, 38, 39, 40, 41, 45, 46, 47, 50, 52, 54, 58], "option": [3, 4, 11, 12, 16, 20, 22, 28, 31, 35, 50, 53], "ar": [3, 5, 8, 10, 11, 12, 13, 16, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 52, 53, 54, 56, 57, 58], "position": [3, 48, 49], "index": [3, 8, 11, 13, 14, 18, 19, 21, 34, 35, 40, 42, 44, 48, 49, 50, 56, 57], "In": [3, 25, 26, 27, 30, 31, 32, 35, 37, 38, 39, 40, 41, 42, 46, 48, 49, 51, 52, 56, 58], "other": [3, 6, 10, 25, 26, 35, 38, 40, 43, 46, 48, 49, 50, 51, 52, 54], "word": [3, 35, 40, 48, 49, 52], "identifi": [3, 11, 12, 16, 25, 30, 40, 43], "correspond": [3, 13, 16, 26, 28, 35, 38, 41, 42, 43, 44, 45, 46, 48, 51, 52, 54], "ith": 3, "posit": [3, 8, 38, 41, 42, 48], "mismatch": 3, "obs_somaids_to_fetch": 3, "np": [3, 39, 40, 41, 42, 44, 48, 52], "10": [3, 16, 29, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 57, 58], "11": [3, 16, 29, 30, 31, 38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54, 57, 58], "emb": [3, 40, 42, 52], "shape": [3, 38, 40, 41, 46, 48, 51, 52, 58], "2": [3, 8, 15, 16, 17, 20, 22, 25, 28, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "0": [3, 8, 9, 10, 11, 12, 13, 14, 25, 26, 27, 30, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58], "4": [3, 8, 25, 30, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58], "02954102": 3, "1": [3, 8, 13, 17, 25, 26, 28, 30, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "0390625": 3, "14550781": 3, "40820312": 3, "00224304": 3, "265625": 3, "05883789": 3, "7890625": 3, "python": [4, 8, 24, 25, 26, 27, 30, 32, 37, 38, 42, 45, 47, 49, 52, 53, 54], "get_experiment_metadata": 4, "If": [5, 6, 8, 10, 11, 12, 13, 16, 17, 22, 28, 29, 30, 31, 32, 35, 37, 38, 41, 45, 51, 52, 53, 58], "more": [5, 8, 12, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 51, 52, 53, 54, 56, 58, 59], "match": [5, 11, 42, 44, 45, 50, 52, 53, 54, 56], "queri": [5, 6, 7, 8, 11, 12, 13, 14, 18, 19, 21, 25, 30, 32, 35, 36, 37, 38, 39, 41, 44, 45, 48, 50, 51, 55, 56, 57, 58], "most": [5, 16, 25, 29, 30, 35, 38, 39, 40, 41, 42, 45, 51, 56, 58], "recent": [5, 16, 24, 30], "one": [5, 6, 11, 14, 18, 19, 21, 22, 29, 30, 34, 35, 38, 39, 40, 42, 44, 50, 51, 52, 53, 54, 58], "either": [5, 16, 20, 28, 29, 35, 58], "var_embed": [5, 14, 52], "found": [5, 19, 22, 24, 32, 37, 39, 40, 41, 43, 44, 50, 54], "class": [6, 7, 8, 9, 19, 33, 34, 36, 46, 48, 49, 52], "experi": [6, 7, 8, 11, 14, 17, 26, 34, 35, 45, 46, 49, 50, 52, 55, 56, 58], "layer_nam": 6, "raw": [6, 8, 11, 12, 13, 14, 25, 26, 33, 34, 38, 40, 41, 46, 48, 51, 52, 57, 58], "block_siz": 6, "int": [6, 7, 8, 9, 10, 11, 12, 13, 14, 18, 21, 35, 40, 44, 48], "kwarg": [6, 7], "abstract": 6, "base": [6, 8, 11, 16, 24, 25, 26, 32, 34, 35, 37, 38, 40, 41, 42, 44, 46, 51, 52, 53, 54, 56], "method": [6, 7, 8, 9, 10, 11, 12, 17, 23, 26, 27, 28, 29, 30, 38, 40, 43, 45, 46, 48, 50, 52, 54, 56, 58], "process": [6, 7, 8, 10, 25, 26, 29, 38, 42, 48, 51], "cellxgen": [6, 7, 15, 16, 20, 23, 26, 27, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 43, 44, 47, 50, 51, 59], "experimentaxisqueri": [6, 7, 12, 13, 56, 57], "result": [6, 7, 8, 11, 12, 13, 16, 25, 30, 33, 39, 40, 44, 45, 46, 48, 52, 54, 56, 57, 58], "hug": [6, 7], "face": [6, 7], "item": [6, 7, 34, 38, 45, 50, 58], "repres": [6, 13, 24, 30, 34, 35, 41, 52, 57], "subclass": [6, 40], "implement": [6, 23, 29, 32, 35, 37, 48, 56, 58], "cell_item": 6, "row": [6, 8, 11, 12, 13, 19, 26, 33, 35, 38, 40, 41, 46, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58], "x": [6, 8, 11, 12, 13, 14, 26, 30, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 51, 52, 57, 58], "layer": [6, 8, 11, 12, 13, 14, 23, 32, 35, 37, 39, 49, 53, 57], "mai": [6, 8, 11, 14, 16, 18, 21, 24, 27, 29, 30, 31, 32, 35, 37, 38, 39, 40, 48, 49, 50, 51, 52, 58], "also": [6, 8, 16, 25, 27, 28, 29, 31, 40, 42, 44, 45, 49, 50, 51, 52, 53, 54, 56, 58], "overrid": [6, 17, 22], "__init__": [6, 7, 8, 9, 48, 58], "__enter__": 6, "perform": [6, 8, 16, 25, 26, 30, 31, 32, 33, 35, 37, 38, 39, 41, 43, 48, 51, 52, 54, 57, 58], "necessari": [6, 25, 32, 37, 40], "preprocess": [6, 41], "inherit": 6, "so": [6, 8, 29, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 58], "typic": [6, 40, 58], "usag": [6, 7, 8, 25, 28, 29, 33, 39, 51, 58], "would": [6, 8, 39, 51, 58], "import": [6, 7, 25, 26, 27, 28, 30, 33, 38, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "geneformertoken": 6, "open_soma": [6, 7, 11, 14, 17, 18, 21, 24, 25, 26, 27, 28, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "subclassofcelldatasetbuild": 6, "census_data": [6, 7, 24, 25, 26, 27, 28, 33, 38, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "obs_queri": [6, 7, 8, 25, 26, 33, 46, 48, 51, 52, 56, 57, 58], "tilebsoma": 6, "axisqueri": [6, 7, 8, 25, 26, 33, 46, 48, 51, 52, 56, 57, 58], "defin": [6, 7, 11, 12, 29, 34, 35, 38, 45, 48, 53, 54], "some": [6, 7, 8, 10, 25, 27, 35, 38, 39, 40, 41, 42, 43, 44, 51, 53], "subset": [6, 7, 11, 27, 39, 40, 41, 42, 43, 44, 52, 57, 58], "var_queri": [6, 8, 25, 48, 58], "builder": 6, "build": [6, 7, 11, 14, 27, 28, 29, 30, 31, 34, 35, 38, 41, 46, 52, 53], "initi": [6, 33, 35, 44, 46, 51, 52], "measur": [6, 8, 11, 14, 19, 25, 26, 34, 35, 36, 41, 43, 50, 52], "number": [6, 7, 8, 10, 11, 12, 13, 16, 26, 30, 35, 41, 42, 43, 44, 46, 48, 50, 51, 52, 56, 57, 58, 59], "memori": [6, 8, 17, 23, 24, 26, 27, 29, 31, 32, 36, 37, 45, 48, 50, 51, 53, 54, 58], "onc": [6, 11, 12, 16, 24, 30, 38, 45, 48, 58], "unspecifi": 6, "sparsendarrayread": 6, "blockwis": [6, 52], "select": [6, 11, 12, 13, 14, 18, 21, 26, 33, 35, 39, 40, 41, 42, 46, 49, 50, 51, 52, 54, 56], "pass": [6, 8, 10, 17, 39, 44, 48, 53, 54, 58], "through": [6, 31, 32, 37, 44, 52, 54, 58], "especi": 6, "attribut": [6, 7, 8, 9, 42, 46, 52, 53, 58], "obs_column_nam": [7, 8, 24, 26, 33, 58], "sequenc": [7, 8, 11, 12, 14, 18, 21, 32, 34, 36, 37, 39, 40, 41, 49, 50, 52], "obs_attribut": 7, "max_input_token": 7, "2048": 7, "token_dictionary_fil": 7, "gene_median_fil": 7, "gener": [7, 9, 11, 12, 25, 29, 30, 32, 35, 36, 37, 38, 39, 40, 52, 53], "geneform": [7, 30, 36, 40, 52], "token": 7, "human": [7, 24, 26, 33, 34, 35, 36, 39, 40, 42, 45, 46, 49, 50, 51, 54, 55], "requir": [7, 8, 28, 35, 41, 45, 46, 52, 53, 58], "packag": [7, 23, 29, 31, 32, 33, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 52, 53, 54, 56, 57], "instal": 7, "separ": [7, 35, 40, 51, 53, 56], "pip": [7, 29, 31, 53], "git": 7, "http": [7, 12, 16, 29, 31, 39, 41, 42, 43, 44, 47, 52, 53], "co": [7, 29, 32, 37], "ctheodori": 7, "8df5dc1": 7, "latest": [7, 15, 16, 22, 26, 27, 31, 38, 39, 45, 47, 49, 52, 53, 54], "set": [7, 8, 17, 22, 25, 26, 33, 39, 42, 44, 49, 56, 58], "value_filt": [7, 11, 14, 18, 21, 24, 25, 26, 28, 30, 33, 38, 39, 40, 41, 43, 46, 47, 48, 51, 52, 54, 55, 56, 57, 58], "is_primary_data": [7, 11, 25, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "true": [7, 8, 11, 13, 16, 25, 31, 35, 38, 39, 40, 41, 43, 44, 45, 48, 51, 52, 53, 54, 55, 56, 57, 58], "tissue_gener": [7, 11, 14, 24, 25, 28, 33, 35, 38, 40, 41, 46, 50, 51, 52, 53, 54, 55, 56, 57, 58], "tongu": [7, 46, 49, 51, 52, 58], "soma_joinid": [7, 8, 11, 13, 14, 18, 19, 21, 25, 27, 30, 33, 35, 38, 39, 40, 41, 43, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57], "cell_type_ontology_term_id": [7, 27, 35, 38, 41, 45, 46, 50, 52, 53, 54, 55, 57], "input_id": [7, 42], "length": [7, 35, 36, 38, 41, 42, 47], "datafram": [7, 8, 11, 12, 13, 14, 18, 19, 21, 24, 26, 27, 33, 34, 35, 38, 40, 41, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 58], "column": [7, 8, 11, 12, 13, 14, 18, 21, 33, 34, 35, 38, 40, 41, 42, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "propag": [7, 58], "maximum": [7, 8, 11, 12, 58], "input": [7, 26, 48, 54, 58], "pickl": [7, 58], "suppli": 7, "map": [7, 24, 35, 38, 41, 42, 44, 48, 49, 50], "ensembl": [7, 42, 44, 53], "gene": [7, 8, 11, 12, 23, 24, 29, 32, 33, 34, 36, 37, 40, 42, 44, 46, 51, 52, 58], "id": [7, 35, 38, 39, 40, 42, 43, 44, 46, 48, 52, 53], "onto": 7, "median": 7, "express": [7, 26, 29, 35, 39, 40, 44, 46, 48, 52], "By": [7, 24, 25, 26, 27, 38, 43, 53], "load": [7, 10, 24, 27, 29, 32, 37, 39, 41, 44, 47, 54, 58], "x_name": [8, 11, 14, 26, 46, 52, 58], "batch_siz": [8, 10, 58], "shuffl": [8, 10, 58], "bool": [8, 13, 16, 40], "fals": [8, 13, 16, 17, 25, 27, 28, 35, 38, 39, 40, 41, 42, 44, 51, 53, 54, 55, 56, 57], "seed": [8, 39, 58], "return_sparse_x": 8, "soma_chunk_s": [8, 58], "use_eager_fetch": 8, "torchdata": [8, 10, 58], "datapip": [8, 10, 58], "iter": [8, 10, 24, 26, 28, 33, 48, 51, 58], "iterdatapip": [8, 10, 58], "upon": [8, 11, 22, 30, 38, 45, 56], "along": [8, 13, 24, 26, 42, 57, 58], "var": [8, 11, 12, 13, 14, 19, 21, 25, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 52, 53, 56, 57, 58], "ax": [8, 13, 58], "provid": [8, 22, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 53, 55, 56, 58, 59], "over": [8, 12, 13, 22, 26, 33, 48, 52, 53, 57], "when": [8, 10, 11, 12, 27, 35, 40, 48, 52, 53, 55, 56, 58], "": [8, 12, 16, 24, 28, 29, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 48, 49, 51, 52, 54, 57, 58], "built": [8, 26, 32, 35, 37, 59], "function": [8, 11, 12, 26, 29, 30, 38, 48, 52, 53, 55, 56, 58, 59], "batch": [8, 11, 12, 25, 40, 42, 44, 48, 56, 58], "x_batch": [8, 58], "y_batch": [8, 58], "control": [8, 25, 56, 58], "tensor": [8, 58], "have": [8, 16, 24, 26, 30, 31, 32, 35, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49, 52, 56, 58], "rank": [8, 11, 12, 56, 58], "2415": 8, "torch": [8, 10, 58], "encod": [8, 38, 39, 45, 46, 48, 52, 58], "For": [8, 13, 24, 26, 27, 28, 29, 31, 32, 33, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51, 52, 53, 54, 56, 58, 59], "larger": [8, 29, 32, 33, 37, 40, 48], "dataload": [8, 10], "3": [8, 11, 12, 25, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "2416": 8, "2417": 8, "whether": [8, 53], "spars": [8, 13, 19, 29, 32, 33, 34, 37, 39, 43, 48, 49, 52], "model": [8, 11, 12, 30, 32, 35, 37, 39, 40, 46, 52, 56], "support": [8, 10, 23, 28, 31, 32, 34, 37, 40, 42, 53, 57, 58], "reduc": [8, 17, 26, 41, 46, 51, 52, 58], "determin": [8, 49, 58], "first": [8, 19, 24, 25, 31, 33, 39, 41, 42, 43, 45, 46, 47, 48, 49, 51, 52, 58], "element": [8, 13, 19, 48, 49, 57], "alwai": [8, 16, 27, 30, 34, 35, 51], "panda": [8, 11, 12, 13, 18, 19, 21, 27, 29, 32, 33, 37, 38, 40, 41, 45, 48, 49, 50, 54, 55, 56, 57, 58], "equival": [8, 26, 46, 48, 52], "soma_dim_0": [8, 46, 48, 51, 52], "matrix": [8, 13, 19, 26, 29, 32, 33, 34, 36, 37, 38, 39, 40, 41, 43, 46, 52, 53], "remain": [8, 40], "string": [8, 11, 12, 27, 35, 52, 54, 58], "integ": [8, 11, 14, 18, 21, 34, 41, 43, 48, 58], "need": [8, 27, 31, 33, 35, 38, 39, 42, 44, 49, 51, 54], "can": [8, 10, 11, 12, 17, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58], "decod": [8, 52, 58], "obtain": [8, 25, 38, 39, 41, 42, 44, 51, 54, 58], "call": [8, 11, 12, 28, 30, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "its": [8, 17, 24, 26, 28, 30, 32, 34, 35, 37, 39, 42, 44, 45, 49, 51, 54, 58], "inverse_transform": [8, 58], "exp_data_pip": 8, "obs_encod": [8, 58], "obs_attr_nam": 8, "encoded_valu": 8, "construct": [8, 38, 40, 41, 49, 50, 52], "new": [8, 30, 32, 33, 35, 37, 39, 42, 53, 58], "filter": [8, 11, 14, 16, 18, 21, 24, 25, 26, 29, 30, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "axi": [8, 11, 13, 14, 18, 21, 25, 34, 35, 40, 41, 42, 43, 44, 45, 46, 48, 51, 52, 56, 57, 58], "veri": [8, 55], "larg": [8, 24, 38, 41, 48, 51, 52, 53, 54, 55], "featur": [8, 19, 32, 33, 34, 36, 37, 41, 42, 44, 46, 49, 52, 53, 56], "doe": [8, 20, 44, 48, 52, 58], "onli": [8, 12, 13, 16, 22, 25, 26, 30, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 51, 52, 54, 56, 58], "being": [8, 58], "singl": [8, 11, 12, 23, 28, 32, 35, 36, 37, 38, 39, 40, 43, 49, 51, 52, 53, 58, 59], "multipl": [8, 11, 12, 16, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "reason": [8, 16, 40], "two": [8, 26, 28, 34, 35, 38, 39, 46, 52, 53, 54, 56], "step": [8, 25, 29, 41, 42, 58], "global": [8, 40, 41, 58], "contigu": 8, "group": [8, 35, 38, 40, 42, 55], "chunk": [8, 13, 29, 51, 58], "order": [8, 27, 30, 42, 58], "random": [8, 39, 40, 41, 42, 44, 58], "local": [8, 28, 40, 50, 58], "within": [8, 33, 35, 38, 40, 52, 58], "sinc": [8, 10, 24, 29, 30, 39, 41, 51, 53, 58], "retriev": [8, 9, 14, 22, 24, 26, 35, 36, 38, 46, 58], "keep": [8, 26, 42, 55], "fix": [8, 29, 35, 58], "size": [8, 13, 34, 35, 40, 42, 44, 52, 55, 58], "ensur": [8, 26, 29, 33, 38, 39, 41, 43, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "non": [8, 16, 25, 29, 34, 35, 38, 40, 41, 42, 48, 51, 52, 54], "occur": [8, 11, 12, 29, 52], "second": [8, 19, 24, 46, 49, 52, 58], "note": [8, 24, 27, 32, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "maintain": [8, 39, 46, 52], "proxim": [8, 41, 55], "even": [8, 27, 52], "after": [8, 29, 30, 41], "suffici": [8, 29, 58], "train": [8, 36, 39, 52], "To": [8, 17, 24, 26, 29, 30, 31, 32, 35, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 50, 51, 52, 53, 54, 58], "end": [8, 35, 39, 40, 51], "treat": 8, "hyperparamet": 8, "tune": [8, 30, 46], "nn": [8, 58], "parallel": [8, 48], "distributeddataparallel": 8, "partit": 8, "disjoint": [8, 34], "across": [8, 24, 29, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55], "worker": [8, 10], "As": [8, 25, 32, 33, 34, 35, 37, 39, 43, 46, 49, 52, 54, 57], "still": [8, 39], "impact": [8, 40], "aspect": 8, "behavior": 8, "util": [8, 10, 25, 29, 40, 42, 44, 45, 48, 51, 52], "better": [8, 35, 36], "granular": [8, 58], "see": [8, 12, 25, 26, 27, 28, 29, 31, 33, 34, 35, 39, 40, 41, 42, 43, 51, 52, 53, 54, 56, 58, 59], "detail": [8, 25, 27, 28, 32, 33, 37, 39, 40, 54, 58], "gib": 8, "ram": [8, 48, 53], "per": [8, 11, 12, 25, 26, 29, 35, 38, 41, 43, 49, 58], "request": [8, 28, 29, 32, 37, 42, 44, 45, 48, 55, 56, 58], "assum": [8, 12, 35, 40, 48, 58], "sparsiti": 8, "95": 8, "depend": [8, 24, 29, 31, 39, 42, 44], "next": [8, 24, 26, 28, 30, 33, 58], "immedi": 8, "previous": [8, 40, 41], "made": [8, 40], "via": [8, 9, 28, 29, 30, 31, 32, 33, 35, 37, 38, 39, 41, 42, 43, 44, 45, 50, 54, 58], "allow": [8, 24, 26, 44, 45, 51, 58], "network": 8, "filesystem": 8, "client": [8, 29], "side": 8, "potenti": [8, 32, 37, 40], "improv": 8, "overal": [8, 27, 58], "cost": [8, 29], "doubl": [8, 35], "n_ob": [9, 33, 41, 42, 44, 46, 48, 50, 52, 53, 54], "nnz": [9, 13, 26, 35, 46, 52], "elaps": 9, "n_soma_chunk": 9, "statist": [9, 13, 23, 48, 55], "about": [9, 24, 26, 29, 32, 33, 35, 36, 37, 39, 43, 45, 46, 51, 52, 53, 54], "experimentdatapip": [9, 10], "api": [9, 12, 25, 26, 30, 31, 32, 33, 35, 36, 37, 38, 39, 41, 43, 45, 49, 50, 53, 54, 58], "assess": [9, 40, 41], "throughput": 9, "attr": 9, "num_work": 10, "dataloader_kwarg": 10, "factori": 10, "safe": 10, "instanti": [10, 58], "work": [10, 24, 26, 31, 32, 37, 38], "constructor": [10, 58], "applic": [10, 52], "sampler": [10, 58], "batch_sampl": [10, 58], "collate_fn": [10, 58], "ha": [10, 11, 12, 24, 26, 32, 34, 35, 37, 38, 39, 42, 43, 46, 49, 51, 52], "been": [10, 24, 26, 30, 52], "chain": [10, 58], "main": [10, 29, 31, 34, 40, 46, 51, 52], "addit": [10, 14, 31, 32, 35, 37, 38, 42, 44, 50, 53, 56, 57], "keyword": 10, "argument": [10, 11, 12, 17, 22, 25, 26, 53, 54, 56, 57], "except": [10, 38, 40, 43, 54], "param": [10, 22], "collect": [11, 14, 18, 19, 21, 22, 28, 30, 34, 38, 41, 42, 43, 44, 47, 49, 53], "obs_value_filt": [11, 14, 24, 25, 26, 30, 33, 39, 40, 42, 43, 44, 46, 47, 50, 51, 52, 54, 56, 57], "obs_coord": [11, 14, 40, 41], "byte": [11, 14, 18, 21], "float": [11, 12, 14, 18, 21, 26, 51, 58], "datetime64": [11, 14, 18, 21], "timestamptyp": [11, 14, 18, 21], "chunkedarrai": [11, 14, 18, 21], "var_value_filt": [11, 14, 24, 26, 33, 47, 51, 54], "var_coord": [11, 14, 41], "n_top_gen": [11, 12, 25, 39, 41, 43, 56], "flavor": [11, 12, 39, 41], "liter": [11, 12], "seurat_v3": [11, 12, 39, 41, 56], "span": [11, 12, 29, 40, 56], "batch_kei": [11, 12, 25, 39, 56], "max_loess_jitt": [11, 12], "1e": [11, 12, 58], "06": [11, 12], "batch_key_func": [11, 12], "callabl": [11, 12], "convienc": 11, "wrapper": [11, 14, 28, 38, 56], "around": [11, 14, 33, 56], "highly_variable_gen": [11, 25, 39, 41, 42, 43], "execut": [11, 14, 28, 51], "annot": [11, 12, 29, 34, 35, 38, 39, 41, 42, 44, 56], "variabl": [11, 12, 21, 26, 27, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 58], "usual": [11, 14, 18, 19, 21, 25, 29, 58], "homo": [11, 14, 18, 19, 21, 24, 25, 26, 30, 33, 34, 35, 38, 41, 42, 44, 49, 51, 54, 55], "sapien": [11, 14, 18, 19, 21, 24, 25, 26, 30, 33, 34, 35, 38, 41, 42, 44, 49, 51, 54, 55], "mu": [11, 14, 18, 19, 21, 30, 35, 38, 39, 43, 50, 55], "musculu": [11, 14, 18, 19, 21, 30, 35, 38, 39, 43, 50, 55], "syntax": [11, 14, 18, 21], "coordin": [11, 14, 18, 21, 40, 48], "fraction": [11, 12, 25, 56], "estim": [11, 12, 56], "loess": [11, 12, 56], "varianc": [11, 12, 13, 26, 35, 36, 56], "fit": [11, 12, 39, 44, 45, 48, 56], "done": [11, 12, 13, 25, 28, 35, 41, 43, 56, 58], "combin": [11, 12, 24, 29, 35, 38, 39, 40, 41, 44, 45, 48, 49, 51, 54], "kei": [11, 12, 35, 38, 39, 40, 41, 46, 48, 52, 54], "convert": [11, 12, 24, 33, 48], "concaten": [11, 12, 33, 39, 51, 52, 57], "them": [11, 12, 24, 28, 29, 39, 42, 46, 51, 52, 54], "max_lowess_jitt": [11, 12, 56], "jitter": [11, 12, 41, 56], "add": [11, 12, 14, 26, 31, 35, 42, 43, 46, 48, 52], "case": [11, 12, 34, 35, 38, 39, 40, 43, 48, 51, 52, 56, 57, 58], "failur": [11, 12], "low": [11, 12, 29, 32, 37], "entri": [11, 12], "count": [11, 12, 25, 26, 29, 32, 33, 34, 36, 37, 39, 41, 42, 43, 45, 50, 51, 54], "creat": [11, 12, 28, 29, 32, 33, 34, 37, 38, 39, 42, 46, 47, 50, 52, 56], "receiv": [11, 12, 41], "seri": [11, 12, 27, 35, 41, 48], "paramat": [11, 39], "hvg": [11, 12, 25, 56], "lung": [11, 14, 29, 35, 38, 39, 42, 45, 49, 50, 51, 53, 54], "500": [11, 26, 29, 41, 43, 56], "anndata": [11, 14, 26, 28, 29, 32, 35, 37, 39, 40, 41, 42, 43, 44, 48, 50, 54], "top": [11, 22, 25, 35, 41, 45, 50, 55, 56], "mus_musculu": [11, 35, 43, 45, 48, 50, 51, 52, 53, 54, 56, 57], "highli": [11, 12, 29, 36, 39, 40, 41, 42, 43, 44, 58], "just": [11, 25, 29, 38, 41, 48, 51, 53], "hvg_soma_id": 11, "highly_vari": [11, 25, 41, 42, 43, 56], "adata": [11, 26, 33, 35, 39, 40, 41, 42, 44, 46, 47, 50, 51, 52, 53, 54], "get_anndata": [11, 26, 33, 39, 40, 41, 42, 43, 44, 47, 50, 51, 54, 56], "scanpi": [12, 25, 29, 33, 39, 40, 41, 42, 43, 44, 46, 50, 52, 53, 56, 59], "mimic": 12, "seurat": [12, 25, 26, 29, 31, 32, 37], "v3": [12, 25, 31, 33, 38, 41, 54], "readthedoc": [12, 39, 41, 43], "io": [12, 39, 41, 43], "en": [12, 39, 41, 43], "html": [12, 29, 39, 41, 43], "inform": [12, 26, 28, 29, 32, 34, 37, 38, 39, 40, 41, 42, 44, 50, 51, 52, 53, 54, 56, 59], "ident": [12, 38], "those": [12, 25, 35, 39, 41, 43, 48], "produc": 12, "donor_id": [12, 35, 38, 41, 46, 50, 52, 53, 54, 57], "lambda": [12, 44], "batch0": 12, "99": 12, "els": [12, 40, 49, 58], "batch1": 12, "calculate_mean": [13, 25, 57], "calculate_vari": [13, 25, 57], "ddof": [13, 57], "nnz_onli": 13, "calcul": [13, 23, 35, 36, 39, 40, 42], "mean": [13, 25, 30, 35, 36, 56], "accumul": [13, 25, 48], "fashion": [13, 24, 25], "total": [13, 25, 29, 30, 34, 35, 38, 41, 43], "n": [13, 26, 29, 33, 34, 35, 38, 41, 43, 46, 47, 48, 52, 57], "dimens": [13, 19, 34, 46, 49, 52, 58], "wise": [13, 41], "metric": [13, 40, 44], "explicitli": [13, 26, 35, 52], "store": [13, 19, 26, 34, 35, 38, 40, 42, 45, 46, 49, 52, 53], "comput": [13, 24, 25, 29, 32, 37, 38, 57, 58], "otherwis": [13, 35, 51], "skip": 13, "delta": [13, 48, 57], "degre": [13, 40, 57], "freedom": [13, 57], "divisor": [13, 57], "x_layer": [14, 26], "obsm_lay": 14, "obsp_lay": 14, "varm_lay": 14, "varp_lay": 14, "column_nam": [14, 18, 21, 24, 26, 28, 33, 38, 40, 41, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55], "axiscolumnnam": 14, "conveni": [14, 28, 38, 45, 48, 49, 50, 54, 56], "obsm": [14, 30, 34, 39, 40, 42, 44], "slot": [14, 30], "obsp": [14, 40], "varm": [14, 34], "varp": [14, 35, 49], "part": [14, 39, 40], "get_all_available_embed": [14, 52], "experiment": [14, 17, 25, 31, 35, 36, 42, 44, 46, 52, 57, 58], "brain": [14, 26, 33, 38, 48], "tissu": [14, 24, 26, 28, 30, 33, 35, 36, 42, 43, 45, 46, 47, 48, 50, 51, 52, 54, 57], "censusversiondescript": [15, 16], "releas": [15, 16, 24, 26, 31, 33, 35, 38, 39, 41, 43, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "descript": [15, 16, 29, 32, 34, 35, 37, 52, 54, 59], "directori": [15, 16, 31], "unknown": [15, 41, 53, 54], "get_census_version_directori": 15, "entir": [15, 41, 45, 49, 58], "release_d": [15, 16], "release_build": [15, 16], "2022": [15, 16, 20, 22, 49, 50], "01": [15, 16, 20, 27, 35, 39, 43, 44, 46, 47, 52], "public": [15, 16, 20, 28, 30, 35, 42, 44, 46, 47, 50, 52, 53], "s3_region": [15, 16, 20, 50], "u": [15, 16, 17, 20, 22, 28, 29, 31, 32, 37, 41, 48, 50, 52], "west": [15, 16, 20, 22, 28, 29, 31, 50, 52], "lt": [16, 26, 28, 39, 49], "retract": 16, "current": [16, 25, 26, 30, 32, 33, 37, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "flag": [16, 58], "includ": [16, 25, 28, 29, 32, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 59], "both": [16, 24, 26, 29, 35, 39, 40, 47, 51, 52, 54, 56, 58], "long": [16, 24, 28, 32, 37, 58], "term": [16, 28, 32, 35, 37, 38, 45, 48, 53, 58], "weekli": [16, 28, 32, 37], "exclud": [16, 35, 41, 51, 58], "date": [16, 28, 30, 34, 35, 38, 52], "yyyi": [16, 30], "mm": [16, 30], "dd": [16, 30], "alias": 16, "alia": 16, "appear": [16, 35, 38, 40, 58], "time": [16, 25, 29, 30, 35, 51, 53, 58], "under": [16, 35, 41, 43], "again": [16, 53], "v": [16, 39, 48], "sequenti": 16, "increment": [16, 25, 36], "get_census_version_descript": 16, "29": [16, 41, 42, 58], "v2": [16, 38, 39, 41, 53, 57], "v1": [16, 23, 26, 34, 38, 39, 41], "30": [16, 30, 39, 41, 42, 52, 58], "mistak": 16, "happen": 16, "info_url": 16, "com": [16, 26, 29, 32, 37, 42, 44, 47, 52, 53], "errata": 16, "replaced_bi": 16, "tiledb_config": [17, 22, 28, 52], "sensibl": 17, "further": [17, 26, 40, 45, 52], "somacor": 17, "somaobject": 17, "replac": [17, 40, 42, 44], "tiledb": [17, 22, 24, 29, 30, 31, 32, 33, 37, 38, 45, 54], "configur": [17, 22, 28, 29, 58], "amount": [17, 53, 55], "o": [17, 42, 44, 53], "oper": [17, 27, 29, 33, 38, 45, 48, 54, 58], "ctx": [17, 28, 52], "py": [17, 22, 39, 41, 43, 53], "init_buffer_byt": [17, 22], "128": [17, 22, 30, 41, 56, 58], "1024": [17, 22], "c": [17, 24, 26, 31, 33, 39, 41, 42, 43, 44, 49, 50], "my": [17, 28], "privat": [17, 28], "access": [17, 20, 29, 30, 32, 33, 34, 35, 36, 37, 38, 40, 43, 45, 54, 55, 58], "copi": [17, 28, 39, 40, 41, 43, 44], "differ": [17, 29, 35, 38, 39, 40, 46, 49, 51, 52, 54], "region": [17, 20, 28, 29, 31, 52], "vf": [17, 28, 52], "no_sign_request": [17, 28, 52], "east": [17, 28], "coord": [18, 21, 52], "observ": [18, 27, 34, 35, 39, 48, 51, 53, 55], "csr_matrix": [19, 39, 43], "presenc": [19, 34, 36, 40, 41, 43], "scipi": [19, 29, 32, 37, 39, 40, 43, 49, 52], "csr_arrai": 19, "deafult": 19, "cannot": [19, 22], "321x60554": 19, "uint8": [19, 49], "6441269": 19, "compress": [19, 49], "format": [19, 28, 30, 35, 48, 49, 59], "censusloc": 20, "guarante": [20, 30, 32, 35, 37, 38, 39], "interest": [20, 32, 34, 37, 38, 40, 49, 51, 53], "_release_directori": 20, "keyerror": 20, "do": [20, 26, 31, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 50, 52, 54, 55, 57], "cb5efdb0": 20, "f91c": 20, "4cbd": 20, "9ad4": 20, "9d4fa41c572d": 20, "mirror": 22, "suitabl": [22, 52], "chosen": 22, "automat": [22, 29, 38, 45], "take": [22, 25, 38, 39, 41, 42, 43, 46, 51, 52, 53, 54, 58], "preced": 22, "get_default_soma_context": [22, 28], "level": [22, 34, 35, 38, 42, 48, 50, 51, 53, 55, 56], "It": [22, 29, 30, 34, 35, 38, 52, 56], "manag": [22, 26, 33, 38, 45, 55, 56], "close": [22, 24, 25, 26, 33, 38, 39, 40, 41, 43, 45, 46, 47, 50, 52, 54, 55], "exit": 22, "neither": 22, "invalid": [22, 48], "updat": [22, 25, 29, 35, 39, 41, 43, 48, 52, 53], "31": [22, 41, 42, 58], "rather": [22, 41, 48], "than": [22, 24, 26, 29, 31, 32, 33, 35, 37, 38, 40, 41, 42, 48], "r": [23, 26, 27, 29, 30, 32, 37, 41], "out": [23, 26, 29, 30, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "effici": [23, 26, 29, 32, 36, 37, 51, 53], "commonli": [23, 53], "introduc": [23, 40, 53], "normal": [23, 28, 30, 32, 33, 34, 36, 37, 38, 40, 44, 52, 54, 56, 57], "pre": [23, 25, 29, 36, 38, 42, 51, 52], "categor": [23, 32, 37, 53], "publish": [24, 25, 26, 27, 29, 30, 32, 35, 37], "august": 24, "7th": 24, "pablo": [24, 25, 26, 27], "garcia": [24, 25, 26, 27], "nieto": [24, 25, 26, 27], "team": [24, 25, 26, 29], "pleas": [24, 26, 28, 29, 32, 37, 39, 40, 41, 42, 43, 44, 51, 53], "announc": [24, 25, 26], "come": [24, 33, 39, 41], "our": [24, 26, 29, 33, 38, 39, 40, 42, 44, 46, 52], "back": [24, 39, 42, 58], "now": [24, 25, 26, 27, 32, 33, 37, 38, 39, 41, 42, 43, 46, 47, 49, 50, 51, 52, 54, 57, 58], "biologist": 24, "largest": [24, 29], "standard": [24, 29, 32, 34, 37, 45, 48], "aggreg": 24, "compos": [24, 34], "60k": [24, 29], "With": [24, 25, 26, 38, 40, 43, 46, 52, 54, 58], "few": [24, 25, 36, 40, 42, 43, 51, 52, 53], "hundr": 24, "bigger": 24, "quickli": [24, 30, 38, 39], "basic": [24, 39, 40, 41, 42, 43, 45, 46, 50, 52, 58], "structur": [24, 32, 35, 37, 38, 40], "downstream": [24, 25, 26, 27, 33, 35, 52], "analysi": [24, 26, 33, 35, 36, 38, 39, 40, 41, 43, 45, 51, 52], "follow": [24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 44, 51, 52, 54, 57, 58], "instruct": [24, 29, 33], "learn": [24, 35, 39, 40, 43, 45, 51, 52, 54], "make": [24, 31, 35, 39, 41, 42, 43, 48], "sure": [24, 43], "check": [24, 27, 32, 33, 37, 39, 43, 49], "resourc": [24, 41], "quick": [24, 28, 29, 32, 36, 37, 38, 55, 58], "start": [24, 27, 28, 29, 30, 32, 36, 37, 38, 39, 41], "guid": [24, 28, 39], "refer": [24, 26, 28, 29, 32, 33, 35, 37, 39, 40, 42, 44, 54], "doc": [24, 29, 39, 58], "tutori": [24, 25, 29, 30, 32, 33, 37, 40, 41, 42, 43, 44, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58], "reli": 24, "capabl": [24, 36, 40, 49], "shown": [24, 27, 35, 38, 40, 46, 58], "section": [24, 28, 35, 38, 41, 42, 46, 51, 52], "czi": [24, 29, 32, 37, 59], "develop": [24, 30, 31, 39, 41, 53], "upgrad": [24, 29, 30, 53], "beta": [24, 38, 41, 42], "here": [24, 25, 29, 32, 33, 34, 35, 37, 39, 40, 51, 52, 53, 58], "ever": 24, "grow": 24, "cz": [24, 29, 30, 34, 36, 41, 43, 47, 50, 51], "discov": [24, 29, 30, 34, 38, 41, 42, 46, 47, 50, 51, 52, 59], "accompani": 24, "ontologi": [24, 35, 42, 53], "cl": [24, 35, 38, 41, 42, 45, 54, 55, 57], "uberon": [24, 35, 38, 41, 45, 53, 54, 55, 57], "respect": [24, 26, 31, 35, 38, 40, 53, 54], "you": [24, 26, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 48, 50, 51, 52, 53, 54, 55, 58], "find": [24, 26, 30, 32, 34, 37, 38, 40, 42, 43, 44, 45, 46, 49, 52, 56], "schema": [24, 26, 27, 28, 29, 30, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "page": [24, 28, 29, 30, 33, 34, 39, 40, 42, 44, 46, 52], "research": [24, 26, 29, 32, 37], "directli": [24, 28, 29, 30, 36, 38, 40, 41, 45, 50, 54, 58, 59], "session": [24, 28, 31], "librari": [24, 27, 29, 30, 31, 33, 34, 35, 38, 41, 58], "your": [24, 26, 29, 31, 32, 36, 37, 45, 50, 51, 52, 55], "navig": 24, "300k": [24, 33], "microgli": [24, 28, 33], "neuron": [24, 26, 28, 33, 38, 42, 49, 55], "femal": [24, 28, 33, 41, 51, 53, 54, 57], "donor": [24, 35, 41, 49, 50, 53], "somadatafram": [24, 33, 38, 45, 54], "cell_metadata": [24, 28, 33, 47], "arrow": [24, 26, 27, 29, 32, 33, 37], "tabl": [24, 26, 33, 34, 36, 39, 40, 41, 43, 47, 48, 49, 51], "sex": [24, 26, 28, 30, 33, 35, 38, 46, 48, 50, 51, 52, 53, 54, 57], "cell_typ": [24, 25, 26, 27, 28, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 57, 58], "assai": [24, 27, 28, 30, 33, 39, 40, 43, 46, 50, 52, 53, 54, 55, 57], "suspension_typ": [24, 28, 33, 35, 38, 41, 46, 50, 52, 53, 54, 57], "diseas": [24, 28, 30, 33, 35, 39, 40, 46, 50, 51, 52, 53, 54, 57], "concat": [24, 25, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57], "tibbl": [24, 33], "frame": [24, 28, 29, 32, 33, 34, 37, 38, 49], "similarli": [24, 26, 27, 33, 38, 49, 54], "gene_filt": [24, 25, 26, 33], "feature_id": [24, 25, 26, 33, 35, 38, 41, 42, 44, 46, 48, 49, 50, 52, 53, 54, 56], "ensg00000107317": [24, 26, 33], "ensg00000106034": [24, 26, 33], "cell_filt": [24, 25, 26, 33], "leptomening": 24, "cell_column": [24, 26, 33], "seurat_obj": [24, 26, 33], "get_seurat": [24, 26, 33], "sce_obj": [24, 26, 33], "get_single_cell_experi": [24, 26, 33], "sometim": 24, "too": 24, "overview": [24, 34, 55], "septemb": 25, "18": [25, 38, 39, 41, 42, 44, 52, 57], "thrill": 25, "offici": [25, 35], "wide": [25, 28, 32, 37, 40, 49], "algorithm": [25, 36, 40, 56, 57], "line": [25, 38, 42, 44, 58], "code": [25, 26, 48, 53, 55, 58], "task": [25, 29, 40], "ten": 25, "convent": [25, 38], "laptop": 25, "8gb": 25, "below": [25, 26, 27, 33, 35, 38, 41, 42, 46, 49, 55, 58], "full": [25, 28, 32, 34, 36, 37, 39, 40, 54, 55, 58], "correct": [25, 30, 35, 58], "These": [25, 26, 29, 32, 35, 37, 38, 40, 41, 42, 44, 52], "interwoven": 25, "wai": [25, 38, 45, 46, 49, 51, 52, 54], "seamlessli": 25, "appli": [25, 40, 43, 44], "33m": [25, 29], "continu": [25, 33], "cellxgene_censu": [25, 26, 27, 28, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59], "pp": [25, 39, 40, 41, 42, 43, 44, 46, 52, 56, 57], "mean_vari": [25, 57], "small": [25, 26, 35, 38, 40, 41, 43, 45, 48, 53, 54], "advantag": [25, 46, 52], "cpu": [25, 39, 42, 58], "multiprocess": 25, "speed": [25, 29], "popul": 25, "zero": [25, 26, 34, 35, 40, 44, 48, 52, 56], "futur": [25, 30, 33, 38, 39, 41, 42, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "we": [25, 26, 29, 30, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 51, 52, 53, 54, 57, 58], "enabl": [25, 29, 30, 35, 53], "easili": [25, 26, 29, 43, 46], "switch": [25, 53], "human_data": 25, "feature_nam": [25, 33, 35, 38, 40, 41, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56], "axis_queri": [25, 26, 33, 46, 48, 51, 52, 56, 57], "mean_variance_df": 25, "gene_df": 25, "to_panda": [25, 33, 38, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57], "8624": 25, "071926": 25, "5741": 25, "242485": 25, "16437": 25, "8": [25, 31, 33, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 53, 54, 56, 57, 58], "233282": 25, "452": 25, "119153": 25, "feature_length": [25, 33, 35, 38, 39, 41, 43, 46, 48, 49, 50, 52, 53, 54, 56], "ensg00000171885": 25, "5943": 25, "ensg00000133703": 25, "6845": 25, "get_highly_variable_gen": 25, "while": [25, 33, 38, 40, 42, 46, 52, 56], "account": [25, 39, 58], "effect": [25, 26, 39, 40, 52], "integr": [25, 29, 32, 37, 40, 41], "particular": [25, 27, 40, 58], "design": [25, 53], "paradigm": [25, 32, 37], "abov": [25, 29, 33, 34, 35, 38, 42, 51, 53, 54, 55], "tweak": 25, "compli": 25, "rule": 25, "thumb": 25, "good": [25, 40, 43, 52], "variances_norm": [25, 56], "003692": 25, "004627": 25, "748221": 25, "003084": 25, "003203": 25, "898657": 25, "014962": 25, "037395": 25, "513473": 25, "218865": 25, "547648": 25, "786928": 25, "002142": 25, "002242": 25, "894955": 25, "60659": [25, 41, 49], "000000": [25, 40, 48, 56], "60660": [25, 41, 49], "60661": [25, 41, 49], "60662": [25, 41, 49], "60663": [25, 41, 49], "octob": 26, "maximilian": 26, "lombardo": 26, "happi": 26, "introduct": 26, "tailor": 26, "empow": 26, "reflect": [26, 35, 40], "chang": [26, 30, 35], "expand": [26, 35, 40, 48], "exclus": 26, "thei": [26, 30, 35, 39, 40, 46, 48, 49, 51, 52], "invit": 26, "feedback": 26, "explor": [26, 29, 32, 36, 37, 52], "novel": [26, 41], "were": [26, 29, 34, 35, 38, 39, 40, 41, 43, 49, 51, 52], "mous": [26, 34, 35, 38, 43, 48, 50, 51, 54, 56, 57], "divid": [26, 48, 51], "sum": [26, 27, 35, 40, 41, 42, 44, 45, 48, 50, 58], "point": [26, 34, 40, 48], "precis": [26, 46, 52], "round": 26, "sigma": 26, "artifact": [26, 35, 40], "m": [26, 31, 34, 38, 41, 43, 49, 54, 56], "enrich": 26, "field": [26, 35, 52], "n_measured_ob": [26, 35, 46, 52], "wa": [26, 35, 40, 43, 44, 49, 50, 52, 53, 58], "augment": 26, "forego": 26, "common": [26, 33, 40, 45, 52, 54, 56, 58], "earli": 26, "raw_sum": [26, 35, 46, 48, 52], "deriv": [26, 42, 43, 52], "raw_mean_nnz": [26, 35, 46, 52], "averag": 26, "raw_variance_nnz": [26, 35, 46, 52], "n_measured_var": [26, 35, 46, 52], "thu": [26, 29, 32, 35, 37, 39, 42, 45, 54], "ensg00000161798": [26, 33, 54], "ensg00000188229": [26, 33, 54], "sympathet": [26, 33], "singlecellexperi": [26, 31, 32, 37], "outlin": 26, "like": [26, 27, 29, 38, 40, 41, 42, 45, 52, 58], "male": [26, 33, 41, 42, 48, 53, 54, 55, 57], "pyarrow": [26, 29, 32, 33, 37, 48, 51], "raw_slic": [26, 33], "And": [26, 28, 33, 38, 39, 41, 42, 43, 46, 47, 51, 52, 54], "somaaxisqueri": [26, 33], "read_next": [26, 33], "print": [26, 33, 40, 45, 47, 49, 50, 51, 52, 53, 58], "encourag": [26, 32, 37], "engag": 26, "share": [26, 29, 32, 37], "invalu": 26, "ongo": 26, "project": [26, 31, 36, 40], "reach": [26, 32, 37, 39], "chanzuckerberg": [26, 31, 32, 35, 37, 53], "report": [26, 30, 40, 53], "issu": [26, 29, 30, 40], "github": [26, 29, 32, 35, 37, 52, 53], "repositori": [26, 29, 32, 35, 37, 52], "april": 27, "4th": 27, "2024": [27, 29, 32, 35, 37, 42, 44, 47], "emanuel": 27, "bezzi": 27, "04": [27, 35, 43, 44, 46, 52], "instead": [27, 39, 40, 42, 43, 53, 58], "smaller": [27, 33, 58], "footprint": 27, "howev": [27, 29, 39, 40, 41, 58], "pipelin": [27, 32, 36, 37], "explain": 27, "adapt": [27, 48, 52], "link": [27, 41, 49, 50], "value_count": [27, 38, 39, 41, 43, 45, 48, 51, 54], "categori": [27, 30, 35, 38, 41, 42, 55], "present": [27, 29, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "groupbi": [27, 41, 44, 53, 55], "pivot": 27, "show": [27, 35, 36, 38, 40, 42, 43, 44, 48, 51, 58], "unus": 27, "factor": [27, 40], "interfac": [27, 42, 44, 46, 52, 53, 58], "inspect": [27, 46, 52, 58], "null": 27, "indic": [27, 29, 34, 35, 38, 40, 41, 43, 48, 49, 52, 54], "int16": 27, "int8": 27, "assay_ontology_term_id": [27, 35, 38, 41, 45, 46, 50, 52, 53, 54, 57], "development_stag": [27, 35, 38, 41, 46, 50, 52, 53, 54, 57], "development_stage_ontology_term_id": [27, 35, 38, 41, 46, 50, 52, 53, 54, 57], "output": [27, 33, 48, 58], "truncat": 27, "amazon": [28, 29], "web": [28, 29], "servic": [28, 29], "what": [28, 35, 38, 39, 40, 41, 51, 52, 54], "inclus": [28, 35, 45], "criteria": [28, 29, 33, 34, 35, 54], "individu": [28, 32, 35, 37, 38, 39, 43, 51], "root": [28, 35], "definit": [28, 39, 54], "publicli": [28, 29, 30, 32, 37], "host": [28, 30, 31, 32, 35, 36, 37, 40, 42, 44, 46, 59], "uniqu": [28, 29, 30, 35, 38, 39, 40, 41, 45, 48, 51], "05": [28, 42, 44, 46, 51, 53, 58], "bulk": 28, "07": [28, 33, 38, 39, 41, 43, 45, 48, 49, 50, 53, 54, 55, 56, 57, 58], "25": [28, 33, 38, 39, 40, 41, 42, 43, 45, 48, 49, 50, 53, 54, 55, 56, 57, 58], "shell": [28, 42, 44, 50], "sync": [28, 42], "sign": [28, 40, 42, 44], "recommend": [28, 29, 31, 33, 35, 39, 40, 42, 44, 51, 53], "folder": [28, 44], "should": [28, 30, 35, 38, 39, 40, 41, 43, 45, 50, 58], "interact": [28, 32, 35, 37], "document": [28, 29, 33, 35, 38, 39, 43, 45, 52, 54], "last": [29, 30, 35], "jan": 29, "latenc": [29, 32, 37], "acceler": [29, 32, 37], "50m": 29, "mice": 29, "harmon": [29, 32, 37], "label": [29, 35, 38, 40, 41, 42, 44, 47, 51, 53, 55, 58], "multi": [29, 34, 36, 41, 52], "core": [29, 36, 39, 48], "k": [29, 40], "onlin": [29, 30, 32, 36, 37, 57], "t": [29, 35, 39, 41, 42, 43, 44, 45, 47, 50, 51, 54, 55], "covid": [29, 38, 41, 51, 54], "19": [29, 30, 38, 39, 41, 42, 44, 45, 49, 51, 52, 54], "suit": 29, "author": [29, 35], "spatial": [29, 34, 35, 39, 40, 41, 49, 50], "yet": [29, 31], "d": [29, 52], "click": [29, 33], "citat": [29, 32, 35, 36, 37], "guidelin": [29, 32, 37], "offer": [29, 32, 37, 40, 46, 52], "becaus": [29, 39, 41, 43, 51], "therefor": [29, 39, 43, 45, 51, 52], "numer": [29, 40], "incompat": [29, 35], "purpos": 29, "suggest": [29, 40], "fast": 29, "corpu": 29, "60": [29, 42, 51], "gencod": 29, "readi": [29, 42, 58], "cloud": [29, 31, 32, 37, 50], "matric": [29, 32, 33, 34, 37, 38, 40, 48], "possibl": [29, 35, 42, 54], "due": [29, 30, 38, 40, 48, 58], "free": [29, 53], "aw": [29, 31, 42, 44, 50], "ye": 29, "download_source_h5ad": [29, 50], "site": [29, 39, 41, 43], "help": [29, 33, 38, 43, 45, 52, 53, 54, 56, 58], "pattern": [29, 40], "internet": [29, 31, 53], "limit": [29, 38, 51], "bandwidth": [29, 51], "tactic": 29, "connect": [29, 31, 41, 42, 53, 55], "high": [29, 34, 35, 38, 40, 41, 42, 51, 53, 56], "ethernet": 29, "wifi": 29, "coast": 29, "ec2": [29, 31], "instanc": [29, 31, 35, 40, 45, 53], "There": [29, 31, 41, 42, 45, 46, 49, 51, 52, 56], "environ": [29, 31], "census_env": 29, "activ": [29, 31, 33, 52], "submit": [29, 32, 37], "join": [29, 32, 37, 38, 41, 48, 50, 54, 56], "scienc": [29, 32, 37, 47, 49, 59], "commun": [29, 32, 37, 40, 46, 52], "slack": [29, 32, 37], "question": [29, 38], "channel": [29, 32, 37], "inquir": 29, "accept": [29, 35, 56], "meet": [29, 33, 54, 56], "biolog": [29, 36, 51, 52, 58], "try": [29, 58], "old": [29, 41, 57], "persist": [29, 34], "notebook": [29, 31, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 52, 53, 58], "sh": [29, 31], "command": [29, 38, 42, 44], "restart": 29, "runtim": 29, "reload": [29, 42], "numba": [29, 48], "relat": 29, "simpli": [29, 42], "magic": 29, "similar": [29, 38, 39, 40, 41, 44, 54, 55, 56], "dbutil": 29, "restartpython": 29, "addition": [29, 39, 40], "node": [29, 38], "cluster": [29, 36, 39, 44], "0d53f00001ghvp3cap": 29, "between": [29, 35, 40, 42], "altern": [29, 58], "ad": [29, 35, 53, 54], "tab": 29, "edit": [30, 35], "decemb": 30, "15th": [30, 32, 37], "stabil": 30, "scientif": 30, "reproduc": [30, 39, 53, 55], "plan": [30, 32, 37], "regular": 30, "everi": [30, 32, 37], "six": [30, 32, 37], "month": [30, 32, 37, 57], "least": [30, 32, 35, 37], "5": [30, 31, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58], "year": [30, 32, 37, 41], "recogn": 30, "previou": [30, 39, 41, 46, 52], "ingest": [30, 51], "hand": 30, "week": [30, 54], "651": 30, "62": [30, 41, 42, 44, 51], "998": 30, "417": 30, "684": 30, "805": 30, "36": [30, 38, 42, 58], "227": [30, 56], "903": 30, "230": 30, "588": [30, 41, 49, 50], "990": 30, "20": [30, 38, 39, 41, 42, 44, 47, 49, 52, 57], "631": 30, "248": [30, 38, 45], "stage": [30, 41, 53, 54, 57], "173": [30, 56], "72": [30, 42], "self": [30, 39, 48, 53, 58], "ethnic": [30, 53], "na": [30, 35, 38, 55, 57], "suspens": [30, 39, 53], "74": [30, 42], "53": [30, 42], "27": [30, 38, 39, 41, 42, 49, 58], "fine": [30, 46], "593": [30, 41, 49, 50], "56": [30, 41, 42], "400": 30, "873": 30, "255": 30, "245": [30, 49], "33": [30, 41, 42, 52, 58], "364": 30, "242": 30, "083": 30, "531": [30, 41], "13": [30, 38, 39, 40, 41, 42, 43, 44, 46, 51, 52], "035": 30, "9": [30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 58], "613": [30, 38, 45, 55], "164": 30, "64": [30, 38, 42], "26": [30, 38, 39, 41, 42, 49, 58], "220": [30, 38, 45, 49], "66": [30, 38, 42, 45], "54": [30, 38, 42], "prevent": [30, 52], "analys": [30, 53], "mark": [30, 35, 38, 40, 51], "is_primari": 30, "exactli": [30, 35], "243": [30, 38, 49], "569": 30, "twice": [30, 38], "wish": [30, 38, 56], "consid": [30, 39], "duplicate_cells_census_lts_2023": 30, "csv": [30, 53], "zip": [30, 44, 48], "562": 30, "794": 30, "728": 30, "086": 30, "032": 30, "758": 30, "887": 30, "914": 30, "318": 30, "493": 30, "362": 30, "604": 30, "226": 30, "68": [30, 42], "51": [30, 41, 42], "61": [30, 42], "natur": [30, 36, 41, 42, 51, 53], "storag": [30, 35], "backend": [30, 53], "backward": [30, 58], "re": [30, 42, 49], "forward": [30, 44, 58], "older": 30, "might": [30, 40, 53], "error": [30, 38, 42, 44, 45], "aim": 30, "polici": 30, "abl": [30, 31], "until": 30, "linux": 31, "maco": 31, "system": [31, 38, 40, 46, 50, 52], "Or": 31, "tbd": 31, "16": [31, 38, 39, 41, 42, 43, 44, 46, 52, 53, 57, 58], "gb": [31, 53], "mbp": [31, 53], "increas": [31, 32, 37, 53], "virtual": 31, "conda": 31, "venv": [31, 39, 41, 43], "bin": 31, "modul": [31, 36, 39, 58], "less": [31, 32, 37, 40, 58], "complex": [31, 38, 40, 45, 48, 49], "databrick": 31, "faq": [31, 32, 37], "ubuntu": 31, "apt": 31, "libxml2": 31, "dev": 31, "libssl": 31, "libcurl4": 31, "openssl": 31, "cmake": 31, "21": [31, 39, 41, 42, 43, 44, 49, 51, 54, 57], "greater": [31, 35, 47], "tool": [31, 40, 44, 53], "xcode": 31, "window": [31, 58], "univers": [31, 40, 52], "cran": 31, "repo": [31, 59], "org": [31, 47], "export": [31, 46], "biocmanag": 31, "quietli": 31, "break": [32, 37, 51], "ve": [32, 37], "central": [32, 37, 46, 52], "hub": [32, 37], "analyz": [32, 37], "significantli": [32, 37], "minim": [32, 37, 40], "studi": [32, 37, 39, 40], "scale": [32, 37, 39, 41, 42, 43], "interoper": [32, 37, 53], "toolkit": [32, 36, 37], "smart": [32, 34, 37, 38, 41, 49, 50, 55, 57], "seq2": [32, 34, 37, 38, 41, 43, 49, 50, 55, 57], "molecul": [32, 34, 35, 37], "10x": [32, 33, 34, 37, 38, 40, 41, 44, 49, 50, 51, 53, 54, 57], "duplic": [32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "five": [32, 37], "perman": [32, 37], "ask": [32, 37], "email": [32, 37, 52], "bug": [32, 37], "believ": [32, 37], "secur": [32, 37], "disclos": [32, 37], "contact": [32, 37], "seamless": [32, 37], "pytorch": [32, 36, 37], "usabl": [32, 37, 58], "area": [32, 37], "On": [32, 37], "demand": [32, 33, 37], "rich": [32, 37, 39], "subsampl": [32, 37], "vignett": [33, 44], "soon": 33, "remind": [33, 46, 49, 52], "etc": [33, 34, 38], "consist": [33, 38, 39, 40, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "ey": [33, 49], "379219": 33, "microwel": [33, 38, 41, 54], "seq": [33, 38, 39, 41, 54, 55], "adren": [33, 38], "gland": [33, 38, 42, 51, 52, 55], "379220": 33, "379221": 33, "379222": 33, "379223": 33, "379224": 33, "7": [33, 38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54, 58], "n_var": [33, 41, 43, 46, 48, 49, 50, 52, 53, 54], "demonstr": [33, 36, 38, 39, 40, 44, 46, 47, 48, 50, 52, 53, 56, 58], "lazi": [33, 46, 51, 52], "evalu": 33, "well": [33, 38, 39, 41, 51, 55], "logic": [33, 41], "wrap": [33, 48, 58], "loop": 33, "r6": 33, "familiar": [33, 35, 39, 41, 43, 58], "379": 33, "224": 33, "chr": 33, "fema": 33, "6": [33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 58], "\u2139": 33, "214": 33, "4k": 33, "4744": 33, "sampl": [33, 41, 42, 44, 48], "bioconductor": 33, "ecosystem": 33, "dim": 33, "rownam": 33, "rowdata": 33, "colnam": 33, "obs48350835": 33, "obs48351829": 33, "obs52469564": 33, "obs52470190": 33, "coldata": 33, "reduceddimnam": 33, "mainexpnam": 33, "altexpnam": 33, "sparse_matrix": 33, "state": [33, 40, 41, 49, 50], "monitor": 33, "read_complet": 33, "friendli": [34, 35], "varieti": [34, 40, 45, 48, 52], "hierarchi": 34, "somacollect": [34, 38, 45], "whole": [34, 38, 41], "summary_cell_count": [34, 38, 41, 55], "stratifi": [34, 38, 42], "relev": [34, 35, 36, 38, 54], "independ": [34, 38], "somaexperi": [34, 38, 48], "special": [34, 35, 38, 54], "form": [34, 38, 49, 58], "how": [34, 36, 38, 40, 41, 43, 47, 51, 52, 55, 58], "avialbl": 34, "feature_dataset_presence_matrix": [34, 41, 43], "boolean": [34, 35, 41, 43, 49], "adher": 34, "technologi": [34, 35, 38, 39, 41, 43], "short": [34, 38], "densendarrai": 34, "dimension": [34, 35, 40, 41], "offset": 34, "sparsendarrai": [34, 46, 52], "primari": [34, 35, 40, 42, 55], "march": 35, "NOT": [35, 48, 49], "shall": 35, "interpret": [35, 40], "bcp": 35, "14": [35, 38, 39, 41, 42, 43, 44, 46, 49, 52], "rfc2119": 35, "rfc8174": 35, "capit": 35, "hereaft": 35, "visit": [35, 40, 59], "understand": [35, 40], "reader": 35, "throughout": [35, 42, 44, 51, 52], "serv": [35, 43], "deposit": 35, "heart": [35, 49, 51, 56], "left": [35, 39, 41], "ventricl": [35, 45], "semver": 35, "major": [35, 41], "delet": 35, "modal": 35, "minor": 35, "compat": 35, "patch": 35, "editori": 35, "impos": 35, "organism_ontology_term_id": 35, "ncbitaxon": 35, "10090": 35, "9606": 35, "feature_refer": 35, "speic": 35, "AND": 35, "compris": 35, "children": 35, "efo": [35, 38, 39, 41, 54, 55, 57], "0002772": 35, "0010183": [35, 38], "nascent": 35, "elong": 35, "target": [35, 38], "manner": [35, 46, 52, 58], "doesn": [35, 41], "concurr": 35, "perturb": 35, "intend": [35, 56, 58], "primarili": [35, 39, 40, 41], "fusion": 35, "modif": 35, "mrna": [35, 38], "trna": 35, "rrna": 35, "viral": 35, "intron": 35, "ribosom": 35, "profil": [35, 38, 41], "umi": 35, "tissue_typ": 35, "equal": [35, 45], "referenc": [35, 41], "whose": [35, 41, 54], "readabl": [35, 41], "census_schema_vers": [35, 38, 47], "census_build_d": [35, 38, 47], "iso": [35, 52], "8601": 35, "dataset_schema_vers": [35, 38, 47], "total_cell_count": [35, 38, 41, 47, 55], "unique_cell_count": [35, 38, 41, 47, 55], "number_donors_homo_sapien": [35, 38, 47], "number_donors_mus_musculu": [35, 38, 47], "10000": [35, 40], "100": [35, 38, 39, 41], "collection_id": [35, 39, 43, 49, 50], "quot": 35, "collection_nam": [35, 39, 41, 43, 49, 50], "collection_doi": [35, 39, 43, 49, 50], "dataset_titl": [35, 39, 41, 43, 49, 50], "dataset_h5ad_path": [35, 39, 43, 49, 50], "rel": [35, 43, 57], "dataset_total_cell_count": [35, 39, 43, 49, 50], "dataset_version_id": 35, "self_reported_ethn": [35, 38, 41, 46, 50, 52, 53, 54], "ontology_term_id": [35, 38, 41, 55], "0002048": [35, 41, 45], "cell_type_a": 35, "xxxxx": 35, "cell_type_n": 35, "assay_a": 35, "assay_n": 35, "tissue_a": 35, "tissue_n": 35, "tissue_general_a": 35, "tissue_general_n": 35, "disease_a": 35, "mondo": [35, 41], "disease_n": 35, "self_reported_ethnicity_a": 35, "hancestro": [35, 54], "self_reported_ethnicity_n": 35, "sex_a": 35, "pato": [35, 41, 54, 57], "sex_n": 35, "suspension_type_a": 35, "suspension_type_n": 35, "organism_label": 35, "machin": [35, 42], "somameasur": 35, "somaindexeddatafram": 35, "fill": [35, 52], "remov": [35, 39, 41, 51], "variant": 35, "j": [35, 40, 47, 49, 50], "feature_biotyp": 35, "pin": 35, "clarifi": 35, "feature_1": 35, "feature_m": 35, "dataset_soma_joinid_1": 35, "dataset_soma_joinid_n": 35, "tissue_general_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "disease_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "observation_joinid": 35, "self_reported_ethnicity_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "sex_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "tissue_ontology_term_id": [35, 38, 41, 45, 46, 50, 52, 53, 54, 57], "handl": [35, 38, 45, 47, 51, 58], "text": 35, "cell_census_build_d": 35, "cell_census_schema_vers": 35, "renam": [35, 41], "move": [35, 58], "dataset_presence_matrix": 35, "ascii": 35, "0x22": 35, "stream": 36, "gget": 36, "workflow": [36, 42], "collabor": [36, 40, 42], "predict": [36, 40], "biologi": [36, 52], "gain": 36, "summari": [36, 47], "summar": [36, 38, 41, 55], "leverag": 36, "showcas": [38, 39, 48, 51, 52, 54], "cover": 38, "simpl": [38, 40, 44, 48, 53, 58], "sever": [38, 45, 46], "prefer": [38, 45, 50], "34": [38, 39, 41, 42, 43, 44, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "39": [38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "think": [38, 44], "piec": 38, "parent": 38, "variou": [38, 40, 45, 55], "analog": 38, "census_info": [38, 39, 41, 43, 47, 49, 50, 55], "census_obj": 38, "want": [38, 48, 51, 54, 58], "pair": [38, 48], "61656118": [38, 45, 50], "37447773": 38, "13035": 38, "1417": 38, "Of": 38, "meta": [38, 51, 53], "consortia": 38, "idea": 38, "Not": 38, "cast": 38, "census_count": 38, "33364242": [38, 55], "56400873": [38, 50, 55], "0008722": [38, 41, 55], "264166": [38, 55], "279635": [38, 55], "drop": [38, 41, 42, 48, 55], "0008780": [38, 55], "25652": [38, 41, 55], "51304": [38, 55], "indrop": [38, 41, 55], "0008919": [38, 55], "89477": [38, 55], "206754": [38, 55], "0008931": [38, 55, 57], "78750": [38, 55], "188248": [38, 55], "1357": [38, 55], "0002113": [38, 55], "179684": [38, 55], "208324": [38, 55], "kidnei": [38, 42, 49, 51, 55], "1358": [38, 55], "0002365": [38, 55], "15577": [38, 55], "31154": [38, 55], "exocrin": [38, 42, 52, 55], "1359": [38, 55], "0002367": [38, 55], "37715": [38, 55], "130135": [38, 55], "prostat": [38, 55], "1360": [38, 55], "0002368": [38, 55], "13322": [38, 55], "26644": [38, 55], "endocrin": [38, 42, 55], "1361": [38, 55], "0002371": [38, 55], "90225": [38, 55], "144962": [38, 55], "bone": [38, 42, 50, 51, 55], "marrow": [38, 50, 51, 55], "1362": [38, 55], "rememb": [38, 51], "omit": 38, "creation": 38, "sort": 38, "census_human_assai": 38, "sort_valu": [38, 42], "ascend": 38, "0009922": [38, 54], "11845077": 38, "25597563": 38, "0009899": [38, 41, 57], "7559102": 38, "12638794": 38, "0011025": 38, "3872375": 38, "6139786": 38, "0010550": 38, "4062980": 38, "5064268": 38, "sci": [38, 41], "0009900": 38, "2930054": 38, "3139770": 38, "17": [38, 39, 41, 42, 43, 44, 46, 52, 53, 57], "0030004": 38, "915037": 38, "1084235": 38, "transcript": [38, 41], "0030003": [38, 41], "744798": 38, "811422": 38, "0030002": [38, 54], "625175": 38, "642559": 38, "0700003": 38, "146278": 38, "177276": 38, "bd": [38, 41], "rhapsodi": [38, 41], "transcriptom": [38, 39, 41, 43, 49, 50, 51], "0009901": 38, "42397": 38, "121394": 38, "58981": [38, 41], "117962": 38, "0700004": 38, "96145": 38, "0008995": 38, "29128": 38, "0008953": 38, "4693": 38, "9386": 38, "strt": 38, "0010010": 38, "3105": 38, "5244": 38, "cel": 38, "69": [38, 42], "0000129": 38, "268114": 38, "370771": 38, "1038": [38, 39, 43, 47, 49, 50], "48998": 38, "62617": 38, "easi": [38, 48, 52], "fall": [38, 39], "certain": [38, 40, 58], "distribut": [38, 39, 47], "answer": 38, "exemplifi": 38, "stat": 38, "let": [38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54], "trivial": 38, "human_cell_typ": 38, "syncytiotrophoblast": [38, 54], "placent": [38, 54], "villou": [38, 54], "trophoblast": [38, 41, 42, 49, 50, 54], "extravil": [38, 54], "56400868": [38, 41], "pericyt": [38, 41, 42, 58], "56400869": [38, 41], "56400870": [38, 41], "56400871": [38, 41], "56400872": [38, 41], "focu": [38, 39, 40, 43], "de": 38, "human_cell_type_count": 38, "2673669": 38, "glutamaterg": [38, 42], "1541605": 38, "cd4": [38, 41, 42, 44], "alpha": [38, 41, 42], "1258976": 38, "cd8": [38, 41, 42, 44], "1235987": 38, "classic": [38, 41], "monocyt": [38, 41, 42, 44], "1030996": 38, "microfold": 38, "epithelium": 38, "intestin": [38, 42, 51], "dendrit": [38, 42, 44], "serou": 38, "bronchu": 38, "sperm": [38, 55], "enteroendocrin": 38, "599": 38, "abund": [38, 41], "That": 38, "achiev": [38, 52], "goal": [38, 39, 43, 48], "human_liver_cell_typ": 38, "85739": 38, "hepatoblast": 38, "58447": 38, "neoplast": [38, 42], "52431": 38, "erythroblast": 38, "45605": 38, "31388": 38, "pulmonari": [38, 41, 53, 54], "arteri": 38, "endotheli": [38, 41, 42, 49, 51, 58], "germin": 38, "center": 38, "b": [38, 41, 42, 44, 54], "pneumocyt": [38, 41], "innat": 38, "lymphoid": 38, "126": [38, 58], "go": 38, "sake": [38, 41, 48], "t_cells_list": 38, "t_cells_diseas": 38, "f": [38, 39, 40, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 57, 58], "hodgkin": 38, "lymphoma": 38, "blood": [38, 49, 51, 53, 54], "62499": 38, "819428": 38, "30578": 38, "nose": 38, "respiratori": [38, 41, 55], "saliva": 38, "41": [38, 42], "crohn": 38, "colon": 38, "17490": 38, "52029": 38, "down": 38, "syndrom": 38, "181": 38, "breast": 38, "cancer": [38, 41], "1850": 38, "chronic": [38, 41, 54], "obstruct": [38, 41, 54], "9382": 38, "rhiniti": 38, "909": 38, "clear": [38, 40, 51], "renal": [38, 41, 49, 50], "carcinoma": [38, 41, 54], "6548": 38, "20540": 38, "lymph": 38, "cystic": [38, 41], "fibrosi": [38, 41, 54], "follicular": 38, "1089": 38, "influenza": 38, "8871": 38, "interstiti": [38, 41, 42, 53, 54], "1803": 38, "benign": 38, "neoplasm": 38, "oncocytoma": 38, "2408": 38, "adenocarcinoma": [38, 41, 54], "205": 38, "3274": 38, "507": 38, "215013": 38, "24969": 38, "pleural": 38, "fluid": 38, "11558": 38, "5922": 38, "lymphangioleiomyomatosi": [38, 41, 54], "513": 38, "36573": 38, "nonpapillari": 38, "adipos": [38, 51], "4828": 38, "288": [38, 49], "clot": 38, "1717": 38, "69136": 38, "pleomorph": [38, 41, 54], "1715": 38, "pneumonia": [38, 41, 54], "856": [38, 48], "1671": 38, "disord": 38, "34301": 38, "squamou": [38, 41, 42, 54], "52053": 38, "lupu": 38, "erythematosu": 38, "355471": 38, "don": [38, 43, 45, 47, 51, 54], "forget": [38, 43, 45, 47, 54], "del": [38, 39, 40, 41], "opportun": 39, "inter": 39, "exhaust": 39, "proper": 39, "ignor": [39, 40, 41, 42, 43, 44, 46, 48, 52], "sc": [39, 40, 41, 42, 43, 44, 53], "home": [39, 41, 43], "ssm": [39, 41, 43], "lib": [39, 41, 43], "python3": [39, 41, 43], "_set": 39, "63": [39, 42], "userwarn": [39, 41, 43], "longer": 39, "run": [39, 40, 42, 44, 53, 58], "70": [39, 42], "dl_pin_memory_gpu_train": 39, "deprec": 39, "pin_memori": 39, "loader": 39, "tqdm": [39, 41, 43], "auto": [39, 41, 43], "tqdmwarn": [39, 41, 43], "iprogress": [39, 41, 43], "jupyt": [39, 41, 43], "ipywidget": [39, 41, 43], "user_instal": [39, 41, 43], "autonotebook": [39, 41, 43], "notebook_tqdm": [39, 41, 43], "tabula": [39, 41, 43, 49, 50], "muri": [39, 43, 50], "seni": [39, 43, 50], "census_dataset": [39, 41, 49, 50], "tabula_liv": 39, "loc": [39, 49], "525": [39, 43], "0b9d8a04": [39, 43, 50], "bb9d": [39, 43, 50], "44da": [39, 43, 50], "aa27": [39, 43, 50], "705bb65b54eb": [39, 43, 50], "s41586": [39, 43, 47, 49, 50], "020": [39, 43, 49, 50], "2496": [39, 43, 50], "4546e757": [39, 43], "34d0": [39, 43], "4d17": [39, 43], "be06": [39, 43], "538318925fcd": [39, 43], "atla": [39, 41, 43, 49, 50, 51], "cha": [39, 43], "2859": [39, 43], "547": 39, "6202a243": [39, 51], "b713": [39, 51], "4e12": [39, 51], "9ced": [39, 51], "c387f8483dea": [39, 51], "7294": [39, 51], "tabula_muris_liver_id": 39, "smart_seq_gene_length": 39, "to_numpi": [39, 41, 42, 43, 46, 48, 52], "smart_seq_index": 39, "smart_seq_x": 39, "proce": [39, 43], "ceil": 39, "put": [39, 52], "omic": [39, 52], "yosef": 39, "lab": [39, 41, 49, 50, 52], "uc": [39, 40, 52], "berkelei": 39, "variat": [39, 40], "infer": [39, 58], "deep": 39, "scrna": [39, 41], "comprehens": 39, "best": [39, 40], "practic": [39, 43], "strength": 39, "bread": [39, 41], "butter": [39, 41], "neighbor": [39, 40, 41, 42, 43, 44, 46, 52], "graph": [39, 40], "visual": [39, 40, 41, 42, 44], "umap": [39, 40, 41, 42, 43, 44, 46, 52], "But": [39, 51], "save": [39, 46, 50, 52, 53, 58], "normalize_tot": [39, 40, 41, 42, 43, 44], "target_sum": [39, 40, 41, 42, 43, 44], "1e4": [39, 41, 42, 43, 44], "log1p": [39, 40, 41, 42, 43, 44], "max_valu": [39, 41, 42, 43], "Then": [39, 42, 43, 46, 47, 52, 58], "final": [39, 40, 42, 43, 46, 48, 49, 51, 52, 56, 58], "tl": [39, 40, 41, 42, 43, 44, 46, 52], "pca": [39, 41, 42, 43], "n_neighbor": [39, 40, 42, 44], "n_pc": [39, 42], "40": [39, 42], "pl": [39, 40, 41, 42, 43, 44, 46, 52, 53], "color": [39, 40, 41, 42, 43, 44, 46, 52], "plot": [39, 40, 41, 42, 43, 44, 46, 52], "_tool": [39, 41, 43], "scatterplot": [39, 40, 41, 43], "392": [39, 41, 43], "No": [39, 41, 43], "colormap": [39, 41, 43], "cmap": [39, 41, 43], "cax": [39, 41, 43], "scatter": [39, 40, 41, 43, 44, 46, 52], "strong": [39, 41], "properli": 39, "principl": 39, "randomli": [39, 40], "whenev": 39, "evidenc": 39, "articl": 39, "health": 39, "sikkema": 39, "et": [39, 51], "al": [39, 51], "whom": 39, "perfom": 39, "43": [39, 42, 49, 56], "great": [39, 43], "place": [39, 58], "latent": [39, 40, 44], "setup_anndata": 39, "vae": 39, "n_layer": 39, "n_latent": 39, "gene_likelihood": 39, "nb": 39, "n_hidden": 39, "50": [39, 42, 46, 54], "gpu": [39, 42, 44], "tpu": 39, "tf_cpp_min_log_level": 39, "rerun": [39, 40], "info": [39, 41, 44, 53], "max_epoch": 39, "ipu": 39, "hpu": 39, "epoch": [39, 58], "00": [39, 43, 46], "15it": 39, "v_num": 39, "train_loss_step": 39, "545": 39, "train_loss_epoch": 39, "560": 39, "trainer": [39, 42], "stop": [39, 51], "17it": 39, "represent": [39, 40, 42], "x_scvi": 39, "get_latent_represent": [39, 44], "use_rep": [39, 40, 42, 44, 46, 52], "mainli": 39, "driven": [39, 40], "albeit": 39, "contribut": [39, 41, 46, 52], "discret": 39, "curat": [39, 47, 53], "dure": [39, 42], "strongli": 39, "22": [39, 41, 42, 44, 53, 55, 57], "dataset_id_donor_id": 39, "astyp": [39, 40, 42], "23": [39, 41, 42, 49, 53], "24": [39, 41, 42, 49, 57], "27it": 39, "520": 39, "550": 39, "25it": 39, "mostli": [39, 41], "nucleu": [39, 52, 54], "accomplish": [39, 41], "latter": [39, 54], "knowledg": 40, "later": [40, 42, 44, 46, 52], "journei": 40, "2d": [40, 46, 52], "involv": 40, "nonlinear": 40, "transform": [40, 41, 42, 43, 44, 52], "Such": 40, "affect": [40, 58], "manifold": 40, "overclust": 40, "reduct": [40, 51], "mind": [40, 55], "hypothes": 40, "focus": 40, "ultim": 40, "underli": [40, 58, 59], "investig": 40, "behind": 40, "One": 40, "foundat": [40, 52], "technic": 40, "often": 40, "could": [40, 44, 58], "pure": 40, "systemat": 40, "bias": [40, 41], "complic": 40, "matter": 40, "techniqu": 40, "nearest": 40, "themselv": 40, "amplifi": [40, 42], "rigor": 40, "benchmark": 40, "fulli": 40, "space": [40, 42], "highlight": 40, "challeng": 40, "unsolv": 40, "problem": 40, "briefli": [40, 53], "illustr": [40, 52], "capac": 40, "captur": 40, "intrigu": 40, "phenomena": 40, "disclaim": 40, "depth": [40, 41, 43], "insight": [40, 52], "glean": 40, "inaccur": 40, "leidenalg": 40, "hdbscan": 40, "scikit": 40, "warn": [40, 41, 42, 44, 46, 52], "filterwarn": [40, 42, 44, 46, 52], "def": [40, 48, 58], "remove_missing_embedding_cel": 40, "emb_nam": [40, 46], "miss": [40, 44, 48, 52], "intersect": 40, "accordingli": 40, "filt": 40, "ones": 40, "nan_row_sum": 40, "isnan": [40, 48], "total_column": 40, "generate_umaps_from_embed": 40, "euclidean": 40, "key_ad": 40, "neighbors_kei": 40, "x_emb_nam": 40, "x_": 40, "_": [40, 52], "_umap": 40, "x_umap": 40, "var_nam": [40, 41, 42, 44], "scgpt": [40, 52], "obs_df": [40, 45, 46, 48, 52, 55, 57], "n_subset_cel": 40, "150000": 40, "idx_rand": 40, "choic": [40, 42, 44, 53], "soma_joinids_subset": 40, "tolist": [40, 41, 44, 45], "799353": 40, "distinctli": 40, "oca2": 40, "marker": [40, 44], "kit": 40, "vari": 40, "immatur": 40, "clearli": 40, "slight": 40, "extens": [40, 51], "concentr": 40, "seen": 40, "satellit": 40, "signatur": 40, "probabl": [40, 42, 44, 58], "mani": [40, 48, 58], "disconnect": 40, "compon": 40, "tend": 40, "extent": 40, "versu": 40, "unclear": 40, "qualit": 40, "pronounc": 40, "basi": 40, "geneformer_umap": 40, "use_raw": 40, "scgpt_umap": 40, "uce_umap": 40, "scvi_umap": 40, "subclust": 40, "leiden": [40, 42, 44], "emploi": 40, "densiti": 40, "pairwis": 40, "distanc": [40, 48], "compar": [40, 44], "reveal": [40, 41], "distinct": [40, 58], "approach": 40, "signific": [40, 55], "agreement": 40, "mutual": 40, "nmi": 40, "score": [40, 44], "assign": [40, 48], "yield": 40, "65": [40, 42], "inher": 40, "expect": [40, 41, 43, 52], "finetun": 40, "homogen": [40, 58], "belong": 40, "underscor": 40, "draw": 40, "coupl": 40, "conclus": 40, "lead": 40, "identif": 40, "evid": 40, "examin": [40, 58], "relianc": 40, "unjustifi": 40, "known": 40, "phenomenon": 40, "cross": [40, 41], "fuller": 40, "much": [40, 45], "hold": [40, 58], "lack": 40, "necessit": 40, "thereof": 40, "pd": [40, 41, 48, 56, 57, 58], "pdist": 40, "squareform": 40, "sklearn": [40, 44], "normalized_mutual_info_scor": 40, "adata_rbn": 40, "_connect": 40, "_leiden": 40, "pairwise_dist": 40, "_hdbscan": 40, "min_cluster_s": 40, "min_sampl": 40, "precomput": [40, 55], "fit_predict": 40, "displai": [40, 44, 45, 48, 52, 53, 58], "embedding_kei": 40, "sim_scores_leiden": 40, "len": [40, 41, 42, 44, 45, 48, 50, 51, 58], "sim_scores_hdbscan": 40, "embedding_i": 40, "enumer": 40, "embedding_j": 40, "sim_scores_leiden_t": 40, "sim_scores_hdbscan_t": 40, "seem": [40, 41], "log": [40, 41, 43, 44], "019350262700332705": 40, "10823680188668149": 40, "33544664134758767": 40, "7692425249981675": 40, "512967": 40, "699360": 40, "656060": 40, "608826": 40, "587517": 40, "816612": 40, "075175": 40, "048565": 40, "012763": 40, "286486": 40, "096839": 40, "345248": 40, "11896761": 40, "th": 40, "wherea": [40, 52], "tendenc": 40, "condit": [40, 54], "glioblastoma": 40, "pilocyt": 40, "astrocytoma": 40, "mix": 40, "outsid": 40, "53d208b0": [40, 41, 49], "2cfd": [40, 41, 49], "4366": [40, 41, 49], "9866": [40, 41, 49], "c3c6114081bc": [40, 41, 49], "smartseq": 40, "cftr": 40, "rare": 40, "recogniz": 40, "summary_t": 41, "980": [41, 56], "2907156": 41, "6011592": 41, "lung_ob": 41, "5945423": 41, "9f222629": [41, 53], "9e39": [41, 53], "47d0": [41, 53], "b83f": [41, 53], "e08d610c7479": [41, 53], "nativ": [41, 55], "0000003": [41, 45, 55], "0000461": [41, 54, 57], "5945426": 41, "ciliat": [41, 42], "columnar": [41, 42], "tracheobronchi": 41, "tree": 41, "0002145": 41, "57": [41, 42], "hsapdv": [41, 54], "0000151": 41, "0002771": 41, "0000384": [41, 57], "5945428": 41, "0000625": [41, 45], "0005097": 41, "5945432": 41, "0000624": [41, 45], "0005061": 41, "5945441": 41, "2907151": 41, "8c42cfd0": [41, 49, 50, 53], "0b0a": [41, 49, 50, 53], "46d5": [41, 49, 50, 53], "910c": [41, 49, 50, 53], "fc833d83c45e": [41, 49, 50, 53], "0000669": [41, 45], "0000145": 41, "0000383": [41, 57], "2907152": 41, "2907153": 41, "2907154": 41, "2907155": 41, "deeper": 41, "dive": 41, "characterist": 41, "set_index": [41, 44, 48, 50, 56, 57], "f171db61": [41, 49, 50, 54], "e57": [41, 49, 50, 54], "4535": [41, 49, 50, 54], "a06a": [41, 49, 50, 54], "35d8b6ef8f2b": [41, 49, 50, 54], "multiom": [41, 49, 50], "developm": [41, 49, 50], "donor_p13_trophoblast": [41, 49, 50], "ecf2e08": [41, 49, 50], "2032": [41, 49, 50], "4a9e": [41, 49, 50], "b466": [41, 49, 50], "b65b395f4a02": [41, 49, 50], "74cff64f": [41, 49, 50], "9da9": [41, 49, 50], "4b2a": [41, 49, 50], "9b3b": [41, 49, 50], "8a04a1598040": [41, 49, 50], "vivo": [41, 49, 50], "5af90777": [41, 49, 50], "6760": [41, 49, 50], "4003": [41, 49, 50], "9dba": [41, 49, 50], "8f945fec6fdf": [41, 49, 50], "intr": [41, 49, 50], "bd65a70f": [41, 49, 50], "b274": [41, 49, 50], "4133": [41, 49, 50], "b9dd": [41, 49, 50], "0d1431b6af34": [41, 49, 50], "multiregion": [41, 49, 50], "imm": [41, 49, 50], "f9ad5649": [41, 49, 50], "f372": [41, 49, 50], "43e1": [41, 49, 50], "a3a8": [41, 49, 50], "423383e5a8a2": [41, 49, 50], "molecular": [41, 49, 50], "character": [41, 43, 49, 50, 51], "vuln": [41, 49, 50], "456e8b9b": [41, 49, 50], "f872": [41, 49, 50], "488b": [41, 49, 50], "871d": [41, 49, 50], "94534090a865": [41, 49, 50], "peripher": [41, 49, 50], "immun": [41, 49, 50, 51], "respon": [41, 49, 50], "589": [41, 49, 50], "2adb1f8a": [41, 49, 50, 54], "a6b1": [41, 49, 50, 54], "4909": [41, 49, 50, 54], "8ee8": [41, 49, 50, 54], "484814e2d4bf": [41, 49, 50, 54], "landscap": [41, 49, 50], "sing": [41, 49, 50], "590": [41, 49, 50], "e04daea4": [41, 49, 50], "4412": [41, 49, 50], "45b5": [41, 49, 50], "989e": [41, 49, 50], "76a9be070a89": [41, 49, 50], "krasnow": [41, 49, 50], "591": [41, 49, 50], "592": [41, 49, 50], "append": [41, 52], "dataset_cell_count": 41, "cell_count": 41, "merg": [41, 42, 52, 56], "1e6a6ef9": 41, "7ec9": 41, "4c90": 41, "bbfb": 41, "2ad3c3165fd1": 41, "1028006": 41, "resolut": [41, 53], "luca": 41, "ex": 41, "314": 41, "784630": 41, "f7c1c579": 41, "2dc0": 41, "47e2": 41, "ba19": 41, "8165c5a0e353": 41, "217738": 41, "fetal": 41, "survei": 41, "embryon": 41, "483": 41, "d8da613f": 41, "e681": 41, "4c69": 41, "b463": 41, "e94f5e66847f": 41, "116313": 41, "lethal": 41, "80": [41, 42, 55], "576f193c": 41, "75d0": 41, "4a11": 41, "bd25": 41, "8676587e6dc2": 41, "90384": 41, "htan": 41, "msk": 41, "377": 41, "d41f45c1": 41, "1b7b": 41, "4573": 41, "a998": 41, "ac5c5acb1647": 41, "82991": 41, "reg": 41, "regulatori": 41, "58": [41, 42], "3dc61ca1": 41, "ce40": 41, "46b6": 41, "8337": 41, "f27260fd9a03": 41, "71752": 41, "uncov": 41, "proxima": 41, "325": 41, "60993": 41, "2672b679": 41, "8048": 41, "4f5e": 41, "9786": 41, "f1b196ccfd08": 41, "57019": 41, "spleen": [41, 49, 51], "parenchyma": 41, "416": 41, "9dbab10c": 41, "118d": 41, "496b": 41, "966a": 41, "67f1763a6b7d": 41, "49014": 41, "criti": 41, "482": 41, "9968be68": 41, "ab65": 41, "4a38": 41, "9e1a": 41, "c9b6abece194": 41, "47909": 41, "chart": 41, "endod": 41, "78": [41, 42], "3de0ad6d": 41, "4378": 41, "4f62": 41, "b37b": 41, "ec0b75a50d94": 41, "46500": 41, "lungmap": 41, "broad": 41, "ag": [41, 43, 51], "healthi": 41, "456": 41, "2f132ec9": 41, "24b5": 41, "422f": 41, "9be0": 41, "ccef03b4fe28": 41, "39778": 41, "sar": 41, "cov": 41, "receptor": [41, 55], "ace2": [41, 53], "tmprss2": 41, "prima": 41, "312": 41, "1e5bd3b8": 41, "6a0e": 41, "4959": 41, "8d69": 41, "cafed30fe814": 41, "35699": 41, "emphysema": [41, 54], "130": 41, "35682": [41, 49], "475": [41, 49], "1b9d8702": 41, "5af8": 41, "4142": 41, "85ed": 41, "020eb06ec4f6": 41, "35419": 41, "tiss": 41, "411": 41, "4ed927e9": 41, "c099": 41, "49af": 41, "b8ce": 41, "a2652d069333": 41, "35284": 41, "367": 41, "33698": 41, "4b6af54a": 41, "4a21": 41, "46e0": 41, "bc8d": 41, "673c0561a836": 41, "18386": 41, "01209dce": 41, "3575": 41, "4bed": 41, "b1df": 41, "129f57fbc031": 41, "11059": 41, "8657": 41, "f9846bb4": 41, "784d": 41, "4582": 41, "92c1": 41, "3f279e4c6f0c": 41, "176": [41, 49], "fibroblast": [41, 42, 53, 55], "smooth": 41, "muscl": [41, 42, 49, 51], "317": 41, "f64e1be1": 41, "de15": 41, "4d27": 41, "8da4": 41, "82225cd4c035": 41, "55": [41, 42, 57], "370": 41, "810ac45f": 41, "8969": 41, "4698": 41, "b42c": 41, "652f802f75c2": 41, "endothelium": 41, "320": 41, "0ba16f4b": 41, "cb87": 41, "4fa3": 41, "9363": 41, "19fc51eec6e7": 41, "myeloid": [41, 42], "326": 41, "reprens": 41, "divers": [41, 45, 49, 52], "plastic": 41, "tumor": 41, "neutrophil": 41, "subpopul": 41, "distal": 41, "gradient": 41, "differenti": [41, 42], "regul": 41, "epitheli": [41, 42, 49, 51, 55, 58], "fate": 41, "tell": 41, "1236968": 41, "702074": 41, "262323": 41, "122902": 41, "97432": 41, "65220": 41, "41852": 41, "25662": 41, "8638": 41, "8016": 41, "1164084": 41, "772120": 41, "331019": 41, "209675": 41, "120796": 41, "55254": 41, "51343": 41, "45714": 41, "31923": 41, "31792": 41, "31540": 41, "21167": 41, "17590": 41, "12374": 41, "10765": 41, "1402565": 41, "1122990": 41, "381601": 41, "2468587": 41, "438569": 41, "head": [41, 49], "alveolar": [41, 55], "macrophag": [41, 42], "291507": 41, "263362": 41, "211456": 41, "189471": 41, "154415": 41, "ii": 41, "128463": 41, "lower": [41, 56, 58], "tract": 41, "105090": 41, "102303": 41, "killer": [41, 42, 51, 53], "95953": 41, "92846": 41, "stromal": [41, 42, 49, 51], "87714": 41, "81125": 41, "malign": 41, "75917": 41, "plasma": 41, "64551": 41, "59353": 41, "45305": 41, "capillari": 41, "39416": 41, "36381": 41, "36049": 41, "35467": 41, "2576327": 41, "147410": 41, "alveolu": 41, "54085": 41, "lingula": 41, "upper": [41, 49], "lobe": 41, "right": [41, 51], "32099": 41, "17854": 41, "12880": 41, "10113": 41, "9276": 41, "7981": 41, "middl": 41, "3847": 41, "lung_var": 41, "ensg00000121410": [41, 49], "a1bg": [41, 49], "3999": [41, 49], "ensg00000268895": [41, 49], "as1": [41, 49], "3374": [41, 49], "ensg00000148584": [41, 49], "a1cf": [41, 49], "9603": [41, 49], "ensg00000175899": [41, 49], "a2m": [41, 49], "6318": [41, 49], "ensg00000245105": [41, 49], "2948": [41, 49], "ensg00000288719": [41, 49], "rp4": [41, 49], "669p10": [41, 49], "ensg00000288720": [41, 49], "rp11": [41, 49], "852e15": [41, 49], "7007": [41, 49], "ensg00000288721": [41, 49], "rp5": [41, 49], "973n23": [41, 49], "7765": [41, 49], "ensg00000288723": [41, 49], "553n16": [41, 49], "1015": [41, 49], "ensg00000288724": [41, 49], "rp13": [41, 49], "546i2": [41, 49], "625": [41, 49], "60664": [41, 46, 49, 52, 58], "actual": [41, 58], "mislead": 41, "know": [41, 51, 54], "presence_matrix": [41, 43, 49], "get_presence_matrix": [41, 43, 49], "a1": 41, "17811": 41, "50259": 41, "44150": 41, "34265": 41, "22447": 41, "23642": 41, "26347": 41, "20921": 41, "24672": 41, "27705": 41, "27243": 41, "26323": 41, "27181": 41, "23203": 41, "57042": 41, "32610": 41, "29620": 41, "26454": 41, "23705": 41, "38676": 41, "47307": 41, "23740": 41, "22552": 41, "20594": 41, "19952": 41, "uint64": 41, "genes_measur": 41, "var_somaid": 41, "nonzero": [41, 43], "ensg00000128274": 41, "a4galt": 41, "3358": 41, "ensg00000094914": 41, "aaa": 41, "4727": 41, "ensg00000081760": 41, "aac": 41, "16039": 41, "29951": 41, "ensg00000177272": 41, "kcna3": 41, "2476": 41, "30157": 41, "ensg00000184709": 41, "lrrc26": 41, "1209": 41, "30185": 41, "ensg00000087250": 41, "mt3": 41, "1679": 41, "30202": 41, "ensg00000136352": 41, "nkx2": 41, "3165": 41, "30512": 41, "ensg00000231439": 41, "wasir2": 41, "1054": 41, "11595": 41, "composit": 41, "infect": 41, "12k": 41, "intens": 41, "exercis": 41, "exploratori": 41, "000": 41, "lung_cell_subsampled_n": 41, "100000": 41, "lung_cell_subsampled_id": 41, "random_st": 41, "lung_gene_id": 41, "lung_adata": 41, "highest_expr_gen": 41, "n_top": 41, "calculate_qc_metr": 41, "percent_top": 41, "inplac": [41, 44], "violin": [41, 44], "n_genes_by_count": 41, "rotat": 41, "90": 41, "total_count": 41, "outlier": 41, "exlcud": 41, "rest": 41, "ll": [41, 43, 52, 57], "extra": 41, "_highly_variable_gen": 41, "_simpl": 41, "843": 41, "view": [41, 52, 53, 56], "view_to_actu": 41, "28": [41, 42, 53, 58], "n_cell_typ": 41, "drop_dupl": [41, 54], "randint": 41, "rang": [41, 42, 44, 46, 52, 58], "06x": 41, "0xffffff": 41, "palett": 41, "legend_loc": 41, "hard": 41, "32": [41, 42, 58], "top_cell_typ": 41, "reset_index": [41, 48], "lung_adata_top_cell_typ": 41, "cziscienc": [42, 44, 47, 52, 53], "unix": [42, 44], "mkdir": [42, 44], "p": [42, 44, 47, 48, 56], "wget": [42, 44], "nv": [42, 44], "pbmc3k_filtered_gene_bc_matric": [42, 44], "tar": [42, 44], "gz": [42, 44], "cf": [42, 44], "10xgenom": [42, 44], "exp": [42, 44], "pbmc3k": [42, 44], "xzf": [42, 44], "url": [42, 44], "7621991": [42, 44], "gt": [42, 44, 49, 53], "deatail": [42, 44], "insid": [42, 44], "geneformer_info": 42, "get_embedding_metadata_by_nam": [42, 44, 46], "model_link": [42, 44, 52], "contrib": [42, 44, 46, 52], "cli": [42, 50], "progress": [42, 44, 53], "fine_tuned_geneform": 42, "json": [42, 52], "datacollatorforcellclassif": 42, "embextractor": 42, "transcriptometoken": 42, "bertforsequenceclassif": 42, "test": [42, 45, 58], "ensembl_id": [42, 44], "ensg00000139618": 42, "suffix": 42, "n_count": [42, 44], "joinid": [42, 44, 49, 52], "write": [42, 50], "disk": 42, "read_10x_mtx": [42, 44], "filtered_gene_bc_matric": [42, 44], "hg19": [42, 44], "gene_id": [42, 44], "h5ad_dir": 42, "makedir": 42, "track": 42, "token_dir": 42, "tokenized_data": 42, "custom_attr_name_dict": 42, "tokenize_data": 42, "data_directori": 42, "output_directori": 42, "output_prefix": 42, "file_format": 42, "filter_pass": 42, "model_dir": 42, "label_mapping_dict_fil": 42, "label_to_cell_subclass": 42, "fp": 42, "label_mapping_dict": 42, "best4": 42, "cn": 42, "sensu": 42, "vertebrata": 42, "gabaerg": 42, "abnorm": 42, "adventiti": [42, 53], "anim": 42, "cardiocyt": 42, "skelet": 42, "cuboid": 42, "contractil": 42, "defens": 42, "duct": 42, "ecto": 42, "ectoderm": 42, "endo": 42, "pancrea": [42, 49, 51], "urethra": 42, "eukaryot": 42, "fat": [42, 49], "germ": [42, 55], "glandular": 42, "35": [42, 58], "glial": 42, "37": 42, "hematopoiet": [42, 54], "38": [42, 53, 56], "precursor": 42, "hepatocyt": 42, "inflammatori": 42, "interneuron": [42, 49], "42": 42, "ionocyt": 42, "44": [42, 53], "45": [42, 56], "46": 42, "leukocyt": [42, 58], "47": 42, "lymphocyt": 42, "48": [42, 48], "49": 42, "mammari": [42, 51], "mesenchym": [42, 53], "52": [42, 44, 48], "meso": 42, "mesoderm": 42, "motor": 42, "mural": 42, "59": [42, 51], "myofibroblast": 42, "neural": 42, "termin": 42, "ovarian": 42, "surfac": 42, "67": [42, 56], "phagocyt": 42, "pigment": 42, "cultur": [42, 55], "71": 42, "primordi": 42, "progenitor": [42, 53], "73": 42, "salivari": 42, "sebac": 42, "75": [42, 49], "secretori": 42, "76": 42, "sensori": 42, "77": 42, "seromucu": 42, "secret": [42, 53], "somat": 42, "79": 42, "stem": [42, 53, 54, 57], "81": [42, 48], "82": 42, "83": [42, 48, 56], "84": 42, "transit": 42, "85": 42, "86": 42, "87": 42, "vertebr": 42, "load_from_disk": 42, "num_row": 42, "2700": 42, "dummi": [42, 44], "add_column": 42, "slow": 42, "pretrain": 42, "from_pretrain": 42, "data_col": 42, "vector": 42, "predicted_label_id": 42, "argmax": [42, 58], "predicted_logit": 42, "predicted_label": 42, "predicted_cell_subclass": 42, "predicted_cell_subclass_prob": 42, "min_mean": 42, "0125": 42, "max_mean": 42, "min_disp": 42, "svd_solver": 42, "arpack": 42, "scapi": 42, "original_cell_typ": [42, 44], "cd14": [42, 44], "fcgr3a": [42, 44], "megakaryocyt": [42, 44], "rename_categori": 42, "titl": [42, 46, 52], "shorter": 42, "panel": 42, "n_class": 42, "output_dir": 42, "geneformer_embed": 42, "embex": 42, "model_typ": 42, "cellclassifi": 42, "num_class": 42, "max_ncel": 42, "emb_label": 42, "emb_lay": 42, "forward_batch_s": 42, "nproc": 42, "extract_emb": 42, "model_directori": 42, "input_data_fil": 42, "grab": [42, 45, 49, 52, 56], "c697eaaf": [42, 44], "a3b": [42, 44], "4251": [42, 44], "b036": [42, 44], "5f9052179e70": [42, 44], "f2a488bf": [42, 44], "782f": [42, 44], "4c20": [42, 44], "a8e5": [42, 44], "cb34d48c1f7e": [42, 44], "fa8605cf": [42, 44], "f27e": [42, 44], "44af": [42, 44], "ac2a": [42, 44], "476bee4410d3": [42, 44], "3c75a463": [42, 44], "6a87": [42, 44], "4132": [42, 44], "83a8": [42, 44], "c3002624394d": [42, 44], "adata_censu": [42, 44], "simplifi": [42, 48], "shared_gen": 42, "index_subset": [42, 44], "3000": [42, 44], "adata_join": 42, "outer": 42, "liver": [43, 51], "liver_dataset": 43, "liver_dataset_id": 43, "liver_adata": 43, "859": 43, "52392": [43, 48, 50, 56], "gene_pres": 43, "17992": 43, "992": 43, "toarrai": [43, 52], "000e": 43, "590e": 43, "02": [43, 44, 46, 47, 52], "969e": 43, "03": [43, 46, 49, 50], "280e": 43, "250e": 43, "400e": 43, "gene_length": 43, "00000000e": [43, 46], "58654413e": 43, "32001885e": 43, "74444813e": 43, "31455088e": 43, "71500419e": 43, "78985747e": 43, "real": 43, "filter_cel": 43, "min_gen": 43, "filter_gen": 43, "min_cel": 43, "saniti": 43, "prepar": 44, "pbmc": 44, "3k": 44, "scvi_info": 44, "pt": 44, "cp": [44, 50], "randomforestclassifi": 44, "unassign": 44, "model_filenam": 44, "prepare_query_anndata": 44, "is_train": 44, "trick": 44, "reprsent": 44, "vae_q": 44, "load_query_data": 44, "gene_symbol": [44, 53], "notnul": 44, "perfectli": 44, "appropri": 44, "markers_row1": 44, "il7r": 44, "lyz": 44, "ms4a1": 44, "cd8a": 44, "gnly": 44, "markers_row2": 44, "nkg7": 44, "ms4a7": 44, "fcer1a": 44, "cst3": 44, "ppbp": 44, "catch_warn": 44, "nk": 44, "label_map": 44, "adata_census_subset": 44, "adata_combin": 44, "correl": 44, "forest": 44, "classifi": 44, "rfc": 44, "predicted_cell_typ": [44, 58], "confid": 44, "predict_proba": 44, "classes_": [44, 58], "predicted_cell_type_prob": 44, "enough": [45, 48], "itself": 45, "tip": 45, "soma_df": 45, "faster": 45, "refin": 45, "record": 45, "_obs_": 45, "unique_cell_type_ontology_term_id": 45, "lot": 45, "top_10": 45, "nthe": 45, "0000525": [45, 54], "2000060": [45, 54], "0008036": [45, 54], "0002488": 45, "0002343": 45, "0000084": 45, "0001078": 45, "0000815": 45, "0000235": 45, "3000001": 45, "0000540": 45, "7665340": 45, "0000679": 45, "1894047": 45, "0000128": 45, "1881077": 45, "1508920": 45, "1477453": 45, "1419507": 45, "0000057": 45, "1397813": 45, "0000860": 45, "1369142": 45, "1308000": [45, 55], "4023040": 45, "1229658": 45, "occurr": 45, "lung_tissu": 45, "ntop": 45, "185": 45, "0002063": 45, "0000775": 45, "0001044": 45, "0001050": 45, "0000814": 45, "0000071": 45, "0000192": 45, "0002503": 45, "0002370": 45, "562038": 45, "0000583": 45, "526859": 45, "323985": 45, "323610": 45, "266333": 45, "255425": 45, "205013": 45, "0000623": 45, "164944": 45, "0001064": 45, "149067": 45, "0002632": 45, "132243": 45, "0002082": 45, "ooo2084": 45, "0002080": 45, "0000746": 45, "49929": 45, "0008034": 45, "33361": 45, "0002548": 45, "33180": 45, "0002131": 45, "30915": 45, "0000115": 45, "30054": 45, "18391": 45, "0000763": 45, "14408": 45, "13552": 45, "9690": 45, "0002144": 45, "9025": 45, "labl": 45, "cols_to_queri": 45, "complet": [45, 55], "df": [45, 53], "col": [45, 48, 49], "tuniqu": 45, "372": [46, 52], "axisarrai": [46, 52], "soma_dim_1": [46, 48, 51, 52], "soma_data": [46, 48, 51, 52], "bfloat16": [46, 52], "bit": [46, 52], "expon": [46, 52], "mantissa": [46, 52], "simplest": [46, 52], "nervou": [46, 52], "befor": [46, 52], "correspondong": [46, 52], "31780": [46, 52], "get_embed": [46, 52], "to_anndata": [46, 52], "obs_joinid": [46, 52], "embeddinng": [46, 52], "stand": [46, 52], "alon": [46, 52], "17187500e": 46, "82995605e": 46, "50000000e": 46, "39941406e": 46, "71606445e": 46, "39843750e": 46, "71115112e": 46, "32031250e": 46, "00781250e": 46, "55310059e": 46, "85009766e": 46, "10156250e": 46, "42614746e": 46, "45312500e": 46, "53295898e": 46, "12915039e": 46, "84765625e": 46, "54113770e": 46, "94531250e": 46, "38281250e": 46, "03149414e": 46, "28881836e": 46, "14111328e": 46, "78125000e": 46, "15234375e": 46, "39562988e": 46, "79687500e": 46, "48388672e": 46, "19628906e": 46, "62803650e": 46, "88446045e": 46, "75694072": 47, "45846761": 47, "16292": 47, "2153": 47, "doi": [47, 52], "1002": 47, "ctm2": 47, "1356": 47, "695": 47, "696": 47, "697": 47, "1016": [47, 49, 50], "isci": 47, "698": 47, "1371": 47, "journal": 47, "699": 47, "700": 47, "cardiac": 47, "atrium": 47, "slice_dataset": 47, "isin": [47, 49], "sep": 47, "1126": [47, 49], "abl4896": [47, 49], "4866a804": 47, "37eb": 47, "436f": 47, "8c87": 47, "9cd585260061": 47, "e5f58829": [47, 49], "1a66": [47, 49], "40b5": [47, 49], "a624": [47, 49], "9046778e74f5": [47, 49], "bfd80f12": 47, "725c": 47, "4482": 47, "ad7f": 47, "1ed2b4909b0d": 47, "e6df8a57": 47, "f54f": 47, "413a": 47, "9d4d": 47, "dee03294d778": 47, "8d599205": 47, "5c51": 47, "4b50": 47, "9d48": 47, "3dec31238587": 47, "f6065c51": 47, "bd26": 47, "4aa5": 47, "a05d": 47, "2805aeea48d9": 47, "8cdbf790": 47, "4d29": 47, "4f46": 47, "9aef": 47, "21adfb2e21da": 47, "mybpc3": 47, "easier": 48, "extract": [48, 58], "experiment_queri": 48, "x_as_seri": 48, "nd": 48, "raw_n": 48, "aka": 48, "iloc": 48, "expens": 48, "var_df": [48, 49, 56], "float64": 48, "coo": 48, "arrow_tbl": 48, "var_dim": 48, "by_var": 48, "errstat": 48, "raw_mean": 48, "ensmusg00000051951": [48, 56], "xkr4": [48, 56], "6094": [48, 56], "202": 48, "032743": 48, "ensmusg00000089699": [48, 56], "gm1992": [48, 56], "250": [48, 56], "ensmusg00000102343": [48, 56], "gm37381": [48, 56], "1364": [48, 56], "ensmusg00000025900": [48, 56], "rp1": [48, 56], "12311": [48, 56], "106": 48, "236265": 48, "ensmusg00000025902": [48, 56], "sox17": [48, 56], "4772": [48, 56], "3259": 48, "991975": 48, "52387": [48, 56], "ensmusg00000081591": [48, 56], "btf3": [48, 56], "ps9": [48, 56], "496": [48, 56], "52388": [48, 56], "ensmusg00000118710": [48, 56], "mmu": [48, 56], "mir": [48, 56], "467a": [48, 56], "3_ensmusg00000118710": [48, 56], "52389": [48, 56], "ensmusg00000119584": [48, 56], "rn18": [48, 56], "1849": [48, 56], "52390": [48, 56], "ensmusg00000118538": [48, 56], "gm18218": [48, 56], "970": [48, 56], "52391": [48, 56], "ensmusg00000084217": [48, 56], "setd9": [48, 56], "670": [48, 56], "welford": [48, 57], "npt": 48, "onlinematrixmeanvari": 48, "n_sampl": 48, "n_variabl": 48, "axix": 48, "n_a": 48, "int32": [48, 58], "u_a": 48, "m2_a": 48, "coord_vec": 48, "value_vec": 48, "_mean_variance_upd": 48, "tupl": 48, "m2": 48, "_mean_variance_fin": 48, "max": 48, "jit": 48, "nopython": 48, "col_arr": 48, "val_arr": 48, "squar": 48, "val": 48, "u_prev": 48, "m2_prev": 48, "accont": 48, "chan": 48, "n_b": 48, "u_b": 48, "m2_b": 48, "mvn": 48, "raw_vari": 48, "848": 48, "312801": 48, "169": 48, "182975": 48, "279575": 48, "656207": 48, "malat1": 48, "ptprd": 48, "dlg2": 48, "pcdh9": 48, "n_cells_by_dataset": 48, "multiindex": 48, "from_product": 48, "n_cell": 48, "x_tbl": 48, "to_fram": 48, "get_index": 48, "pick": [48, 50], "3bbb6cf9": 48, "72b9": 48, "41be": 48, "b568": 48, "656de6eb18b5": 48, "ensmusg00000028399": 48, "79578": 48, "58b01044": 48, "c5e5": 48, "4b0f": 48, "8a2d": 48, "6ebf951e01ff": 48, "474": 48, "ensmusg00000052572": 48, "79513": 48, "98e5ea9f": [48, 57], "16d6": [48, 57], "47ec": [48, 57], "a529": [48, 57], "686e76515e39": [48, 57], "908": 48, "66ff82b4": 48, "9380": 48, "469c": 48, "bc4b": 48, "cfa08eacd325": 48, "c08f8441": 48, "4a10": 48, "4748": 48, "872a": 48, "e70c0bcccdba": 48, "ensmusg00000055421": 48, "79476": 48, "125": [48, 58], "3027": 48, "2910": 48, "117": 48, "ensmusg00000092341": 48, "79667": 48, "12622": 48, "20094": 48, "7102": 48, "12992": 48, "compil": 49, "n_dataset": 49, "therein": [49, 50], "human_rna": 49, "datasets_df": 49, "e2c257e7": [49, 50], "6f79": [49, 50], "487c": [49, 50], "b81c": [49, 50], "39451cd4ab3c": [49, 50], "023": [49, 50], "05869": [49, 50], "31497": [49, 50], "67070": [49, 50], "286326": [49, 50], "f7cecffa": [49, 50], "00b4": [49, 50], "4560": [49, 50], "a29a": [49, 50], "8ad626b8ee08": [49, 50], "ccell": [49, 50], "001": [49, 50], "270855": [49, 50], "3f50314f": [49, 50], "bdc9": [49, 50], "40c6": [49, 50], "8e4a": [49, 50], "b0901ebfbe4c": [49, 50], "2021": [49, 50], "007": [49, 50], "167283": [49, 50], "180bff9c": [49, 50], "c8a5": [49, 50], "4539": [49, 50], "b13b": [49, 50], "ddbc00d643e6": [49, 50], "s41593": [49, 50], "00764": [49, 50], "8168": [49, 50], "a72afd53": [49, 50], "ab92": [49, 50], "4511": [49, 50], "88da": [49, 50], "252fb0e26b9a": [49, 50], "s41591": [49, 50], "0944": [49, 50], "y": [49, 50], "44721": [49, 50], "38833785": [49, 50], "fac5": [49, 50], "48fd": [49, 50], "944a": [49, 50], "0f62a4c23ed1": [49, 50], "2157": [49, 50], "598266": [49, 50], "5d445965": [49, 50], "6f1a": [49, 50], "4b68": [49, 50], "ba3a": [49, 50], "b8f765155d3a": [49, 50], "2922": [49, 50], "9409": [49, 50], "65662": [49, 50], "593x60664": 49, "16133717": 49, "manipul": 49, "ensg00000286096": 49, "97a17473": 49, "e2b1": 49, "4f31": 49, "a544": 49, "44a60773e2dd": 49, "var_joinid": 49, "dataset_joinid": 49, "is_pres": 49, "tocoo": 49, "ff45e623": 49, "7f5f": 49, "46e3": 49, "b47d": 49, "56be0341f66b": 49, "13497": 49, "f01bdd17": 49, "4902": 49, "40f5": 49, "86e3": 49, "240d66dd2587": 49, "salivary_gland": 49, "27199": 49, "e6a11140": 49, "2545": 49, "46bc": 49, "929e": 49, "da243eed2ca": 49, "11505": 49, "e5c63d94": 49, "593c": 49, "4338": 49, "a489": 49, "e1048599e751": 49, "bladder": [49, 51], "24583": 49, "d8732da6": 49, "8d1d": 49, "42d9": 49, "b625": 49, "f2416c30054b": 49, "trachea": [49, 51], "9522": 49, "cee11228": 49, "9f0b": 49, "4e57": 49, "afe2": 49, "cfe15ee56312": 49, "34004": 49, "a357414d": 49, "2042": 49, "4eb5": 49, "95f0": 49, "c58604a18bdd": 49, "small_intestin": 49, "12467": 49, "a0754256": 49, "f44b": 49, "4c4a": 49, "962c": 49, "a552e47d3fdc": 49, "10650": 49, "983d5ec9": 49, "40e8": 49, "4512": 49, "9e65": 49, "a572a9c486cb": 49, "50115": 49, "5e5e7a2f": 49, "8f1c": 49, "42ac": 49, "90dc": 49, "b4f80f38e84c": 49, "20263": 49, "55cf0ea3": 49, "9d2b": 49, "4294": 49, "871e": 49, "bb4b49a79fc7": 49, "15020": [49, 58], "4f1555bc": 49, "4664": 49, "46c3": 49, "a606": 49, "78d34dd10d92": 49, "bone_marrow": [49, 50], "12297": 49, "2423ce2c": 49, "3149": 49, "4cca": 49, "a2ff": 49, "cf682ea29b5f": 49, "9641": 49, "1c9eb291": 49, "6d31": 49, "47e1": 49, "96b2": 49, "129b5e1ae64f": 49, "30746": 49, "18eb630b": 49, "a754": 49, "4111": 49, "8cd4": 49, "c24ec80aa5ec": 49, "lymph_nod": 49, "53275": 49, "0d2ee4ac": 49, "05ee": 49, "40b2": 49, "afb6": 49, "ebb584caa867": 49, "0ced5e76": 49, "6040": 49, "47ff": 49, "8a72": 49, "93847965afc0": 49, "thymu": [49, 51], "33664": 49, "283d65eb": 49, "dd53": 49, "496d": 49, "adb7": 49, "7570c7caa443": 49, "1101": [49, 52], "511898": 49, "8e10f1c4": 49, "8e98": 49, "41e5": 49, "b65f": 49, "8cd89a887122": 49, "2480956": 49, "139": 49, "fe1a73ab": 49, "a203": 49, "45fd": 49, "84e9": 49, "0f7fd19efcbd": 49, "dissect": 49, "amygdaloid": 49, "ami": 49, "basolat": 49, "35285": 49, "143": 49, "f8dda921": 49, "5fb4": 49, "4c94": 49, "a654": 49, "c6fc346bfd6d": 49, "cerebr": 49, "cortex": 49, "cx": 49, "occipitotem": 49, "31899": 49, "160": 49, "dd03ce70": 49, "3243": 49, "4c96": 49, "9561": 49, "330cc461e4d7": 49, "perirhin": 49, "23732": 49, "165": 49, "d2b5efc1": 49, "14c6": 49, "4b5f": 49, "bd98": 49, "40f9084872d7": 49, "tail": 49, "hippocampu": 49, "hit": 49, "caudal": 49, "36886": 49, "175": 49, "c4b03352": 49, "af8d": 49, "492a": 49, "8d6b": 49, "40f304e0a122": 49, "superclust": 49, "medium": 49, "spini": 49, "152189": 49, "c2aad8fc": 49, "b63b": 49, "4f9b": 49, "9cfd": 49, "baf7bc9c1771": 49, "tempor": 49, "po": 49, "37642": 49, "177": 49, "c202b243": 49, "1aa1": 49, "4b16": 49, "bc9a": 49, "b36241f3b1e3": 49, "amygdala": 49, "excitatori": 49, "109452": 49, "178": 49, "bdb26abd": 49, "f4ba": 49, "4ea3": 49, "8862": 49, "c2340e7a4f55": 49, "cge": 49, "227671": 49, "183": 49, "acae7679": 49, "d077": 49, "461c": 49, "b857": 49, "ee6ccfeb267f": 49, "hih": 49, "ca1": 49, "39147": 49, "196": 49, "9372df2d": 49, "13d6": 49, "4fac": 49, "980b": 49, "919a5b7eb483": 49, "midbrain": 49, "periaqueduct": 49, "grai": 49, "33794": 49, "197": 49, "93131426": 49, "0124": 49, "4ab4": 49, "a013": 49, "9dfbcd99d467": 49, "epithalamu": 49, "eth": 49, "24327": 49, "206": [49, 56], "7c1c3d47": 49, "3166": 49, "43e5": 49, "9a95": 49, "65ceb2d45f78": 49, "pon": 49, "pn": 49, "pontin": 49, "reticular": 49, "49512": 49, "208": 49, "7a0a8891": 49, "9a22": 49, "4549": 49, "a55b": 49, "c2aca23c3a2a": 49, "hippocamp": 49, "74979": 49, "5e5ab909": 49, "f73f": 49, "4b57": 49, "98a0": 49, "6d2c5662f6a4": 49, "inferior": 49, "colliculu": 49, "32306": 49, "3f56901c": 49, "dd4a": 49, "47d6": 49, "b60b": 49, "7b0c0111cfb2": 49, "37911": 49, "3a7f3ab4": 49, "a280": 49, "4b3b": 49, "b2c0": 49, "6dd05614a78c": 49, "splatter": 49, "291833": 49, "249": 49, "35c8a04c": 49, "8639": 49, "4d15": 49, "8228": 49, "765d8d93fc96": 49, "hypothalamu": 49, "hth": 49, "supraopt": 49, "16753": 49, "270": 49, "07b1d7c8": 49, "5c2e": 49, "42f7": 49, "9246": 49, "26f746cd6013": 49, "myelencephalon": 49, "medulla": 49, "oblongata": 49, "27210": 49, "273": 49, "0325478a": 49, "9b52": 49, "b40a": 49, "2e2ab0d72eb1": 49, "intratelencephal": 49, "455006": 49, "483152": 49, "476": 49, "a68b64d8": 49, "aee3": 49, "4947": 49, "81b7": 49, "36b8fe5a44d2": 49, "82478": 49, "477": 49, "c5d88abe": 49, "f23a": 49, "45fa": 49, "a534": 49, "788985e93dad": 49, "264824": 49, "478": 49, "5a11f879": 49, "d1ef": 49, "458a": 49, "9b0bdfca5ebf": 49, "31691": 49, "479": 49, "104148": 49, "17481d16": 49, "ee44": 49, "49e5": 49, "bcf0": 49, "28c0780d8c4a": 49, "58109": 49, "ensg00000277745": 49, "h2ab3": 49, "58354": 49, "ensg00000233522": 49, "fam224a": 49, "2031": 49, "58411": 49, "ensg00000183146": 49, "prori": 49, "878": 49, "58523": 49, "ensg00000279274": 49, "533e23": 49, "58632": 49, "ensg00000277836": 49, "27211": 49, "all_experi": 50, "organism_nam": 50, "organism_experi": 50, "experiments_total_cel": 50, "num_cel": 50, "nfound": 50, "5255245": 50, "turn": 50, "toolchain": 50, "0bd1a1d": 50, "3aee": 50, "40e0": 50, "b2ec": 50, "86c7a30c7149": 50, "522": 50, "atl": 50, "40220": [50, 51], "submitt": 50, "direct": 50, "tabula_muris_seni": 50, "explan": 51, "lineag": [51, 52], "jin": 51, "tabula_muris_dataset_id": 51, "48b37086": [51, 53, 57], "25f7": [51, 53, 57], "4ecd": [51, 53, 57], "be66": [51, 53, 57], "f5bb378e3aea": [51, 53, 57], "tabula_muris_ob": 51, "35718": 51, "limb": 51, "28867": 51, "24540": 51, "21647": 51, "20680": 51, "12295": 51, "9275": 51, "lumen": 51, "8945": 51, "8613": 51, "7976": 51, "6777": 51, "6201": 51, "skin": [51, 57], "bodi": [51, 57], "4454": 51, "1887": 51, "tabula_muris_liver_dataset_id": 51, "tabula_muris_liver_ob": 51, "awar": 51, "chanc": 51, "priori": [51, 54], "sai": 51, "nk_cell": 51, "80935": 51, "repeat": 51, "nk_cells_primari": 51, "59109": 51, "aqp5": [51, 54], "adata_primari": 51, "demo": [51, 55], "awai": 51, "8448858": 51, "52812487": 51, "52812553": 51, "52812556": 51, "52812566": 51, "113": 51, "170": 51, "37033": 51, "37052": 51, "36904": 51, "36919": 51, "meaning": 52, "confirm": 52, "easiest": [52, 54], "data_typ": 52, "nmf": 52, "featu": 52, "impli": 52, "anoth": 52, "get_embedding_metadata": 52, "cxg": 52, "00506592": 52, "01348877": 52, "03173828": 52, "02331543": 52, "02404785": 52, "02441406": 52, "00595093": 52, "0065918": 52, "00070572": 52, "00187683": 52, "04663086": 52, "04614258": 52, "115722": 52, "512": [52, 56], "advanc": [52, 56], "portion": 52, "caution": 52, "quit": 52, "500_000": 52, "fail": [52, 56], "embedding_slic": 52, "emb_data": 52, "emb_joinid": 52, "reindex_disable_on_axi": 52, "embedding_presence_mask": 52, "getnnz": 52, "embedding_data": 52, "vstack": 52, "embedding_joinid": 52, "00762939": 52, "00076675": 52, "00047874": 52, "03588867": 52, "00405884": 52, "00239563": 52, "00982666": 52, "00946045": 52, "00473022": 52, "0135498": 52, "01049805": 52, "03051758": 52, "critic": 52, "meaningless": 52, "embedding_metadata": 52, "toward": 52, "ai": 52, "burgeon": 52, "pioneer": 52, "million": 52, "distil": 52, "concern": 52, "transfer": 52, "optim": [52, 58], "superior": 52, "primary_contact": 52, "bo": 52, "wang": 52, "bowang": 52, "vectorinstitut": 52, "affili": 52, "toronto": 52, "additional_contact": 52, "538439": 52, "additional_inform": 52, "62998417": 52, "submission_d": 52, "09": 52, "nonsens": 52, "assert": 52, "laura": 53, "luebbert": 53, "lauraluebbert": 53, "caltech": 53, "edu": 53, "genom": 53, "databas": 53, "facilit": [53, 59], "cite": 53, "googl": 53, "colab": 53, "q": 53, "setup": 53, "notic": 53, "fri": 53, "jul": 53, "succesfulli": 53, "gget_cellxgen": 53, "speci": 53, "meta_onli": 53, "verbos": 53, "sub": 53, "arg": 53, "slc5a1": 53, "ensg00000130234": 53, "ensg00000100170": 53, "ui": 53, "celltyp": 53, "mucu": 53, "neuroendocrin": 53, "canon": 53, "cellular": 53, "reus": 53, "secondari": 53, "portal": 53, "blob": 53, "9b94ccb0a2e0a8f6182b213aa4852c491f6f6aff": 53, "wmg": 53, "tissue_mapp": 53, "three": [53, 54], "abca1": 53, "minut": 53, "3679": 53, "thousand": 53, "ensg00000165029": 53, "11343": 53, "5332": 53, "9739": 53, "24539": 53, "5081": 53, "3674": 53, "3675": 53, "3676": 53, "3677": 53, "3678": 53, "retina": 53, "config": 53, "inlinebackend": 53, "figure_format": 53, "dotplot": 53, "ensmusg00000015405": 53, "047d57f2": 53, "4d14": 53, "45de": 53, "aa98": 53, "336c6f583750": 53, "97547": 53, "97548": 53, "97549": 53, "97550": 53, "97551": 53, "97552": 53, "example_adata": 53, "example_meta": 53, "querycondit": 54, "2313": 54, "2308": 54, "2309": 54, "2310": 54, "2311": 54, "2312": 54, "8626": 54, "1884": 54, "27047": 54, "tubb4b": 54, "2037": 54, "materi": 54, "shortli": 54, "comparison": 54, "op": 54, "sex_cell_metadata": 54, "669": 54, "385437": 54, "metatadata": 54, "cell_metadata_all_unknown_sex": 54, "9th": 54, "post": 54, "fertil": 54, "0000046": 54, "decidua": 54, "basali": 54, "0000453": 54, "placenta": 54, "0001987": 54, "3251329": 54, "56274573": 54, "cord": 54, "2000095": 54, "newborn": 54, "0000082": 54, "han": 54, "chines": 54, "0027": 54, "umbil": 54, "0012168": 54, "0000178": 54, "3251330": 54, "56274574": 54, "3251331": 54, "56274575": 54, "3251332": 54, "56274576": 54, "3251333": 54, "56274577": 54, "3251334": 54, "cell_metadata_b_cel": 54, "42720": 54, "10631": 54, "8742": 54, "8187": 54, "2083": 54, "1534": 54, "1512": 54, "1474": 54, "1210": 54, "332": 54, "204": 54, "133": 54, "gene_metadata": 54, "isn": 55, "narrow": 55, "as_index": 55, "0000001": 55, "0000006": 55, "2502": 55, "0000015": 55, "621": 55, "0000019": 55, "608": 55, "4028006": 55, "38250": 55, "609": 55, "4030009": 55, "tubul": 55, "segment": 55, "777": 55, "610": 55, "4030011": 55, "989": 55, "611": 55, "4030018": 55, "princip": 55, "107": [55, 56], "612": 55, "4030023": 55, "hillock": 55, "10170": 55, "semant": 56, "maxmimum": 56, "nois": 56, "disabl": 56, "docstr": 56, "hvgs_df": 56, "highly_variable_rank": 56, "230445": 56, "116": 56, "044863": 56, "749637": 56, "287551": 56, "276809": 56, "461324": 56, "407450": 56, "363945": 56, "055626": 56, "280": 56, "958509": 56, "combined_df": [56, 57], "188": 56, "ensmusg00000026117": 56, "zap70": 56, "2992": 56, "409091": 56, "14793": 56, "026717": 56, "350": 56, "775560": 56, "233": 56, "ensmusg00000026073": 56, "il1r2": 56, "1908": 56, "764085": 56, "41918": 56, "471500": 56, "402176": 56, "ensmusg00000026185": 56, "igfbp5": 56, "6006": 56, "234876": 56, "314355": 56, "591239": 56, "156": 56, "825651": 56, "ensmusg00000026180": 56, "cxcr2": 56, "3048": 56, "379390": 56, "10491": 56, "033344": 56, "640129": 56, "30296": 56, "ensmusg00000024803": 56, "ankrd1": 56, "2886": 56, "548572": 56, "274005": 56, "455137": 56, "741864": 56, "30313": 56, "ensmusg00000024987": 56, "cyp26a1": 56, "1983": 56, "186686": 56, "12973": 56, "622003": 56, "454": 56, "580162": 56, "30379": 56, "ensmusg00000018822": 56, "sfrp5": 56, "1900": 56, "927853": 56, "10943": 56, "645525": 56, "410": 56, "637004": 56, "32042": 56, "ensmusg00000031838": 56, "ifi30": 56, "91": 56, "676950": 56, "995276": 56, "564962": 56, "205886": 56, "33314": 56, "ensmusg00000092572": 56, "serpinb10": 56, "3490": 56, "264085": 56, "239812": 56, "487": 56, "535469": 56, "who": 56, "own": 56, "mv_df": 57, "3095357": 57, "915025": 57, "69571": 57, "774917": 57, "3095359": 57, "972801": 57, "9471": 57, "427044": 57, "3095363": 57, "169472": 57, "139042": 57, "208628": 57, "3095366": 57, "049836": 57, "24762": 57, "926397": 57, "3095368": 57, "345415": 57, "150412": 57, "440839": 57, "3278898": 57, "164319": 57, "339741": 57, "3278899": 57, "368339": 57, "930156": 57, "3278900": 57, "246049": 57, "886186": 57, "3278901": 57, "240724": 57, "307266": 57, "3278902": 57, "278420": 57, "086994": 57, "9314": 57, "keratinocyt": [57, 58], "0002337": 57, "mmusdv": 57, "0000089": 57, "18_53_m": 57, "0002097": 57, "18_47_f": 57, "basal": [57, 58], "epidermi": 57, "0002187": 57, "0000091": 57, "epiderm": 57, "0000362": 57, "logist": 58, "regress": 58, "ml": 58, "primer": 58, "census_ml": 58, "experiment_datapip": 58, "10_000": 58, "mechan": 58, "encapsul": 58, "caller": 58, "importantli": 58, "lazili": 58, "avoid": 58, "legaci": 58, "interchang": 58, "shuffler": 58, "layout": 58, "strategi": 58, "held": 58, "1gb": 58, "caus": 58, "valid": 58, "randomsplitt": 58, "train_datapip": 58, "test_datapip": 58, "random_split": 58, "weight": 58, "togeth": 58, "experiment_dataload": 58, "style": 58, "enforc": 58, "linear": 58, "logisticregress": 58, "input_dim": 58, "output_dim": 58, "super": 58, "noqa": 58, "up008": 58, "sigmoid": 58, "train_epoch": 58, "train_dataload": 58, "loss_fn": 58, "devic": 58, "train_loss": 58, "train_correct": 58, "train_tot": 58, "zero_grad": 58, "softmax": 58, "loss": 58, "train_accuraci": 58, "secondli": 58, "42496620": 58, "42496621": 58, "42496622": 58, "42496633": 58, "42496634": 58, "42496635": 58, "desir": 58, "cuda": 58, "is_avail": 58, "cell_type_encod": 58, "crossentropyloss": 58, "adam": 58, "lr": 58, "7f": 58, "accuraci": 58, "4f": 58, "0167253": 58, "4856": 58, "0156710": 58, "4943": 58, "0149408": 58, "4813": 58, "0144469": 58, "5040": 58, "0141749": 58, "5669": 58, "0139776": 58, "6672": 58, "0138565": 58, "7920": 58, "0138094": 58, "8088": 58, "0136689": 58, "8757": 58, "0136101": 58, "8923": 58, "invok": 58, "eval": 58, "recov": 58, "At": 58, "unpickl": 58, "vein": 58, "123": 58, "124": 58, "127": 58, "helper": 59}, "objects": {"": [[59, 0, 0, "-", "cellxgene_census"]], "cellxgene_census": [[0, 1, 1, "", "download_source_h5ad"], [14, 1, 1, "", "get_anndata"], [15, 1, 1, "", "get_census_version_description"], [16, 1, 1, "", "get_census_version_directory"], [17, 1, 1, "", "get_default_soma_context"], [18, 1, 1, "", "get_obs"], [19, 1, 1, "", "get_presence_matrix"], [20, 1, 1, "", "get_source_h5ad_uri"], [21, 1, 1, "", "get_var"], [22, 1, 1, "", "open_soma"]], "cellxgene_census.experimental": [[1, 1, 1, "", "get_all_available_embeddings"], [2, 1, 1, "", "get_all_census_versions_with_embedding"], [3, 1, 1, "", "get_embedding"], [4, 1, 1, "", "get_embedding_metadata"], [5, 1, 1, "", "get_embedding_metadata_by_name"]], "cellxgene_census.experimental.ml.huggingface": [[6, 2, 1, "", "CellDatasetBuilder"], [7, 2, 1, "", "GeneformerTokenizer"]], "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder": [[6, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer": [[7, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.pytorch": [[8, 2, 1, "", "ExperimentDataPipe"], [9, 2, 1, "", "Stats"], [10, 1, 1, "", "experiment_dataloader"]], "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe": [[8, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.pytorch.Stats": [[9, 3, 1, "", "__init__"]], "cellxgene_census.experimental.pp": [[11, 1, 1, "", "get_highly_variable_genes"], [12, 1, 1, "", "highly_variable_genes"], [13, 1, 1, "", "mean_variance"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "method", "Python method"]}, "titleterms": {"cellxgene_censu": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 29, 46, 52], "download_source_h5ad": 0, "experiment": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 56, 59], "get_all_available_embed": 1, "get_all_census_versions_with_embed": 2, "get_embed": 3, "get_embedding_metadata": 4, "get_embedding_metadata_by_nam": 5, "ml": [6, 7, 8, 9, 10], "huggingfac": [6, 7], "celldatasetbuild": 6, "geneformertoken": 7, "pytorch": [8, 9, 10, 58], "experimentdatapip": [8, 58], "stat": [9, 26, 45], "experiment_dataload": 10, "pp": [11, 12, 13], "get_highly_variable_gen": [11, 56], "highly_variable_gen": [12, 56], "mean_vari": 13, "get_anndata": [14, 46, 52], "get_census_version_descript": 15, "get_census_version_directori": 16, "get_default_soma_context": 17, "get_ob": 18, "get_presence_matrix": 19, "get_source_h5ad_uri": 20, "get_var": 21, "open_soma": 22, "what": [23, 29, 30], "": [23, 53], "new": [23, 26, 29, 36], "2023": [23, 30], "2024": 23, "r": [24, 28, 31, 33], "packag": [24, 30, 42], "cellxgen": [24, 28, 32, 35, 37, 38, 46, 52, 53], "censu": [24, 26, 27, 28, 29, 30, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 47, 49, 50, 51, 52, 54, 57, 58, 59], "v1": 24, "i": [24, 29, 30], "out": [24, 51, 57], "instal": [24, 29, 31, 33, 53], "usag": 24, "made": 24, "possibl": 24, "tiledbsoma": 24, "effici": [24, 25, 33], "access": [24, 26, 28, 46, 52], "singl": [24, 25, 26, 29, 34, 41, 42, 50, 54], "cell": [24, 25, 26, 27, 30, 33, 34, 35, 38, 40, 41, 42, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 59], "data": [24, 26, 28, 29, 30, 32, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 50, 51, 52, 53, 54, 59], "33m": 24, "from": [24, 29, 39, 40, 41, 42, 50, 53], "easi": 24, "us": [24, 25, 26, 29, 32, 36, 37, 42, 44, 48, 53], "handl": 24, "cloud": 24, "host": [24, 29, 52], "queri": [24, 26, 29, 33, 46, 47, 52, 53, 54], "read": [24, 51], "metadata": [24, 26, 27, 30, 33, 35, 38, 41, 45, 47, 52, 53, 54], "export": [24, 26, 36], "slice": [24, 33, 39, 47, 57, 59], "seurat": [24, 33], "singlecellexperi": [24, 33], "stream": 24, "increment": [24, 48, 57], "chunk": 24, "memori": [25, 33], "implement": 25, "commonli": 25, "method": 25, "calcul": [25, 26, 41, 48, 55, 57], "averag": 25, "varianc": [25, 48, 57], "gene": [25, 26, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 56], "express": [25, 41, 43, 50, 53, 54], "across": 25, "million": 25, "how": [25, 26, 28, 29], "work": 25, "exampl": [25, 38, 42, 43, 44, 45, 51, 57], "kra": 25, "aqp4": 25, "lung": [25, 40, 41], "epitheli": 25, "highli": [25, 56], "variabl": [25, 56], "find": [25, 39], "all": [25, 38, 41, 45, 49], "human": [25, 29, 38, 41], "esophagu": 25, "introduc": 26, "normal": [26, 29, 35, 39, 41, 43], "layer": [26, 29, 41], "pre": [26, 55], "statist": 26, "descript": 26, "ad": 26, "librari": 26, "size": 26, "enhanc": 26, "featur": [26, 29, 35, 59], "exist": 26, "toolkit": 26, "via": [26, 46, 47, 52], "tiledb": [26, 28], "soma": [26, 28, 34], "util": 26, "ob": [26, 27, 35, 51, 53, 54], "var": [26, 35, 54], "help": 26, "u": 26, "improv": 26, "addit": 26, "support": [27, 29, 30], "categor": 27, "potenti": 27, "break": 27, "chang": 27, "identifi": [27, 49], "column": 27, "encod": [27, 35], "cz": [28, 32, 35, 37, 38, 53], "discov": [28, 32, 35, 37, 53], "aw": 28, "avail": [28, 38], "specif": [28, 49], "releas": [28, 30, 32, 37], "version": [28, 30, 35, 59], "cli": 28, "programat": 28, "download": [28, 42, 44, 50], "api": [28, 29, 56, 57, 59], "python": [28, 29, 31, 33, 36, 59], "faq": 29, "why": [29, 51], "should": 29, "contain": 29, "do": 29, "cite": [29, 32, 37], "public": 29, "doe": 29, "have": 29, "embed": [29, 36, 40, 41, 42, 46, 52, 59], "differenti": 29, "other": [29, 42], "tool": [29, 32, 37, 39], "can": 29, "mous": [29, 39], "where": 29, "ar": [29, 51], "retriev": [29, 59], "origin": [29, 50], "h5ad": [29, 50], "dataset": [29, 35, 39, 41, 42, 49, 50, 58], "which": 29, "wa": 29, "built": 29, "increas": 29, "perform": [29, 42], "my": 29, "conda": 29, "ask": 29, "contribut": 29, "get": [29, 59], "an": [29, 46, 51, 52, 53, 58], "arrayschema": 29, "error": 29, "when": [29, 51], "open": [29, 38, 43, 45, 49, 54, 58, 59], "run": 29, "import": [29, 40, 42], "databrick": 29, "long": 30, "term": 30, "lt": 30, "weekli": 30, "latest": 30, "list": 30, "12": 30, "15": 30, "inform": [30, 35], "donor": 30, "count": [30, 35, 38, 48, 55], "embbed": 30, "07": 30, "25": 30, "05": 30, "errata": 30, "duplic": [30, 51], "observ": [30, 40], "is_primary_data": 30, "true": 30, "compat": 30, "requir": [31, 40, 42, 44, 47], "capabl": [32, 37], "schema": [32, 34, 35, 37], "question": [32, 37], "feedback": [32, 37], "issu": [32, 37], "come": [32, 37], "soon": [32, 37], "project": [32, 37, 42, 44], "quick": [33, 46, 52], "start": [33, 46, 52], "obtain": 33, "anndata": [33, 46, 47, 51, 52, 53, 59], "object": [33, 34, 53], "summari": [34, 35, 38, 41, 55], "info": [34, 38], "census_info": [34, 35], "census_data": [34, 35], "includ": [34, 35, 38], "overview": 35, "definit": [35, 40], "speci": 35, "multi": [35, 39], "constraint": 35, "assai": [35, 38, 41], "full": [35, 43, 45], "sequenc": [35, 38, 43], "matrix": [35, 49, 59], "type": [35, 38, 41, 44, 45, 53], "sampl": [35, 40], "repeat": 35, "organ": [35, 38], "census_obj": 35, "somacollect": 35, "somadatafram": 35, "tabl": [35, 38, 50], "summary_cell_count": 35, "somaexperi": 35, "raw": 35, "m": 35, "rna": 35, "x": [35, 48], "somasparsendarrai": 35, "presenc": [35, 49, 59], "feature_dataset_presence_matrix": 35, "changelog": 35, "2": 35, "0": 35, "1": 35, "3": 35, "tutori": 36, "integr": [36, 39], "model": [36, 42, 44, 58], "understand": [36, 38, 51], "analyz": 36, "scalabl": 36, "comput": [36, 48], "machin": [36, 59], "learn": [36, 38, 41, 59], "about": [38, 41], "main": 38, "compon": 38, "content": [38, 52], "each": [38, 49], "number": 38, "microgli": 38, "beyond": [38, 55], "liver": [38, 39], "diseas": [38, 41], "t": 38, "tissu": [38, 40, 41, 53], "fetch": [39, 40, 41, 43, 49, 50, 52, 53, 54, 55], "10x": [39, 42], "genom": 39, "smart": [39, 43], "seq2": 39, "length": [39, 43], "scvi": [39, 44, 46], "inspect": [39, 42], "prior": 39, "batch": 39, "defin": [39, 58], "dataset_id": [39, 48], "donor_id": 39, "assay_ontology_term_id": 39, "suspension_typ": 39, "explor": [40, 41, 43, 50, 55], "biolog": 40, "relev": 40, "cluster": [40, 43], "background": [40, 52], "function": 40, "melanocyt": 40, "ey": 40, "150k": 40, "retin": 40, "bipolar": 40, "neuron": 40, "dopaminerg": 40, "brain": 40, "pulmonari": 40, "ionocyt": 40, "tabula": [40, 51], "sapien": 40, "sex": 41, "v": 41, "nucleu": 41, "sub": 41, "qc": 41, "metric": 41, "creat": [41, 51, 55, 58], "geneform": [42, 46], "class": [42, 58], "predict": [42, 44, 58], "system": [42, 44], "fine": 42, "tune": 42, "prepar": 42, "subclass": 42, "infer": [42, 44], "load": [42, 46, 52], "token": 42, "result": 42, "gener": [42, 47], "pbmc": 42, "3k": 42, "join": 42, "seq": 43, "account": 43, "valid": 43, "through": 43, "train": [44, 58], "pretrain": 44, "summar": 45, "subset": 45, "select": [45, 53], "value_filt": 45, "collabor": 46, "storag": [46, 52], "format": [46, 52], "associ": [46, 52], "obsm": [46, 52], "slot": [46, 52], "experimentaxisqueri": [46, 52], "dens": [46, 52], "numpi": [46, 52], "arrai": [46, 52], "citat": 47, "string": 47, "onlin": 48, "algorithm": 48, "mean": [48, 57], "per": 48, "group": 48, "measur": 49, "id": 49, "sourc": 50, "file": 50, "filter": 51, "muri": 51, "seni": 51, "frame": 51, "core": [51, 57], "oper": 51, "gget": 53, "modul": 53, "set": 53, "up": 53, "plot": 53, "dot": 53, "similar": 53, "those": 53, "shown": 53, "onli": 53, "correspond": 53, "command": 53, "line": 53, "census_summary_cell_count": 55, "datafram": 55, "valu": 55, "The": 57, "explain": 58, "paramet": 58, "split": 58, "dataload": 58, "make": 58, "build": 59, "process": 59}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"cellxgene_census.download_source_h5ad": [[0, "cellxgene-census-download-source-h5ad"]], "cellxgene_census.experimental.get_all_available_embeddings": [[1, "cellxgene-census-experimental-get-all-available-embeddings"]], "cellxgene_census.experimental.get_all_census_versions_with_embedding": [[2, "cellxgene-census-experimental-get-all-census-versions-with-embedding"]], "cellxgene_census.experimental.get_embedding": [[3, "cellxgene-census-experimental-get-embedding"]], "cellxgene_census.experimental.get_embedding_metadata": [[4, "cellxgene-census-experimental-get-embedding-metadata"]], "cellxgene_census.experimental.get_embedding_metadata_by_name": [[5, "cellxgene-census-experimental-get-embedding-metadata-by-name"]], "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder": [[6, "cellxgene-census-experimental-ml-huggingface-celldatasetbuilder"]], "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer": [[7, "cellxgene-census-experimental-ml-huggingface-geneformertokenizer"]], "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe": [[8, "cellxgene-census-experimental-ml-pytorch-experimentdatapipe"]], "cellxgene_census.experimental.ml.pytorch.Stats": [[9, "cellxgene-census-experimental-ml-pytorch-stats"]], "cellxgene_census.experimental.ml.pytorch.experiment_dataloader": [[10, "cellxgene-census-experimental-ml-pytorch-experiment-dataloader"]], "cellxgene_census.experimental.pp.get_highly_variable_genes": [[11, "cellxgene-census-experimental-pp-get-highly-variable-genes"]], "cellxgene_census.experimental.pp.highly_variable_genes": [[12, "cellxgene-census-experimental-pp-highly-variable-genes"]], "cellxgene_census.experimental.pp.mean_variance": [[13, "cellxgene-census-experimental-pp-mean-variance"]], "cellxgene_census.get_anndata": [[14, "cellxgene-census-get-anndata"]], "cellxgene_census.get_census_version_description": [[15, "cellxgene-census-get-census-version-description"]], "cellxgene_census.get_census_version_directory": [[16, "cellxgene-census-get-census-version-directory"]], "cellxgene_census.get_default_soma_context": [[17, "cellxgene-census-get-default-soma-context"]], "cellxgene_census.get_obs": [[18, "cellxgene-census-get-obs"]], "cellxgene_census.get_presence_matrix": [[19, "cellxgene-census-get-presence-matrix"]], "cellxgene_census.get_source_h5ad_uri": [[20, "cellxgene-census-get-source-h5ad-uri"]], "cellxgene_census.get_var": [[21, "cellxgene-census-get-var"]], "cellxgene_census.open_soma": [[22, "cellxgene-census-open-soma"]], "What\u2019s new?": [[23, "what-s-new"]], "2023": [[23, "id1"]], "2024": [[23, "id2"]], "R package cellxgene.census V1 is out!": [[24, "r-package-cellxgene-census-v1-is-out"]], "Installation and usage": [[24, "installation-and-usage"]], "Census R package is made possible by tiledbsoma": [[24, "census-r-package-is-made-possible-by-tiledbsoma"]], "Efficient access to single-cell data for >33M cells from R": [[24, "efficient-access-to-single-cell-data-for-33m-cells-from-r"]], "Easy-to-use handles to the cloud-hosted Census data": [[24, "easy-to-use-handles-to-the-cloud-hosted-census-data"]], "Querying and reading single-cell metadata from Census": [[24, "querying-and-reading-single-cell-metadata-from-census"]], "Exporting Census slices to Seurat and SingleCellExperiment": [[24, "exporting-census-slices-to-seurat-and-singlecellexperiment"]], "Streaming data incrementally in chunks": [[24, "streaming-data-incrementally-in-chunks"]], "Memory-efficient implementations of commonly used single-cell methods": [[25, "memory-efficient-implementations-of-commonly-used-single-cell-methods"]], "Efficient calculation of average and variance gene expression across millions of cells": [[25, "efficient-calculation-of-average-and-variance-gene-expression-across-millions-of-cells"]], "How it works": [[25, "how-it-works"], [25, "id1"]], "Example: KRAS and AQP4 average and variance expression in lung epithelial cells": [[25, "example-kras-and-aqp4-average-and-variance-expression-in-lung-epithelial-cells"]], "Efficient calculation of highly variable genes across millions of cells": [[25, "efficient-calculation-of-highly-variable-genes-across-millions-of-cells"]], "Example: Finding highly variable genes for all cells of the human esophagus": [[25, "example-finding-highly-variable-genes-for-all-cells-of-the-human-esophagus"]], "Introducing a normalized layer and pre-calculated cell and gene statistics in Census": [[26, "introducing-a-normalized-layer-and-pre-calculated-cell-and-gene-statistics-in-census"]], "Description of new data added to Census": [[26, "description-of-new-data-added-to-census"]], "Added a new library-size normalized layer": [[26, "added-a-new-library-size-normalized-layer"]], "Enhanced gene metadata": [[26, "enhanced-gene-metadata"]], "Enhanced cell metadata": [[26, "enhanced-cell-metadata"]], "How to use the new features": [[26, "how-to-use-the-new-features"]], "Exporting the normalized data to existing single-cell toolkits": [[26, "exporting-the-normalized-data-to-existing-single-cell-toolkits"]], "Accessing library-size normalized data layer via TileDB-SOMA": [[26, "accessing-library-size-normalized-data-layer-via-tiledb-soma"]], "Utilizing pre-calculated stats for querying obs and var": [[26, "utilizing-pre-calculated-stats-for-querying-obs-and-var"]], "Help us improve these data additions": [[26, "help-us-improve-these-data-additions"]], "Census supports categoricals for cell metadata": [[27, "census-supports-categoricals-for-cell-metadata"]], "Potential breaking changes": [[27, "potential-breaking-changes"]], "Identifying the obs columns encoded as categorical": [[27, "identifying-the-obs-columns-encoded-as-categorical"]], "CZ CELLxGENE Discover Census in AWS": [[28, "cz-cellxgene-discover-census-in-aws"]], "Census data available in AWS": [[28, "census-data-available-in-aws"]], "Data specifications": [[28, "data-specifications"]], "Data release versioning": [[28, "data-release-versioning"]], "How to access AWS Census data": [[28, "how-to-access-aws-census-data"]], "AWS CLI for programatic downloads": [[28, "aws-cli-for-programatic-downloads"]], "CELLxGENE Census API (Python and R)": [[28, "cellxgene-census-api-python-and-r"]], "TileDB-SOMA API (Python and R)": [[28, "tiledb-soma-api-python-and-r"]], "FAQ": [[29, "faq"]], "Why should I use the Census?": [[29, "why-should-i-use-the-census"]], "What data is contained in the Census?": [[29, "what-data-is-contained-in-the-census"]], "How do I cite the use of the Census for a publication?": [[29, "how-do-i-cite-the-use-of-the-census-for-a-publication"]], "Why does the Census not have a normalized layer or embeddings?": [[29, "why-does-the-census-not-have-a-normalized-layer-or-embeddings"]], "How does the Census differentiate from other tools?": [[29, "how-does-the-census-differentiate-from-other-tools"]], "Can I query human and mouse data in a single query?": [[29, "can-i-query-human-and-mouse-data-in-a-single-query"]], "Where are the Census data hosted?": [[29, "where-are-the-census-data-hosted"]], "Can I retrieve the original H5AD datasets from which the Census was built?": [[29, "can-i-retrieve-the-original-h5ad-datasets-from-which-the-census-was-built"]], "How can I increase the performance of my queries?": [[29, "how-can-i-increase-the-performance-of-my-queries"]], "Can I use conda to install the Census Python API?": [[29, "can-i-use-conda-to-install-the-census-python-api"]], "How can I ask for support?": [[29, "how-can-i-ask-for-support"]], "How can I ask for new features?": [[29, "how-can-i-ask-for-new-features"]], "How can I contribute my data to the Census?": [[29, "how-can-i-contribute-my-data-to-the-census"]], "Why do I get an ArraySchema error when opening the Census?": [[29, "why-do-i-get-an-arrayschema-error-when-opening-the-census"]], "Why do I get an error when running import cellxgene_census on Databricks?": [[29, "why-do-i-get-an-error-when-running-import-cellxgene-census-on-databricks"]], "Census data releases": [[30, "census-data-releases"]], "What is a Census data release?": [[30, "what-is-a-census-data-release"]], "Long-term supported (LTS) Census releases": [[30, "long-term-supported-lts-census-releases"]], "Weekly Census releases (latest)": [[30, "weekly-census-releases-latest"]], "List of LTS Census data releases": [[30, "list-of-lts-census-data-releases"]], "LTS 2023-12-15": [[30, "lts-2023-12-15"]], "Version information": [[30, "version-information"], [30, "id1"], [30, "id4"]], "Cell and donor counts": [[30, "cell-and-donor-counts"], [30, "id2"], [30, "id5"]], "Cell metadata": [[30, "cell-metadata"], [30, "id3"], [30, "id6"], [38, "Cell-metadata"]], "Cell embbedings": [[30, "cell-embbedings"]], "LTS 2023-07-25": [[30, "lts-2023-07-25"]], "LTS 2023-05-15": [[30, "lts-2023-05-15"]], "\ud83d\udd34 Errata \ud83d\udd34": [[30, "errata"]], "Duplicate observations with is_primary_data = True": [[30, "duplicate-observations-with-is-primary-data-true"]], "Compatibility with package versions": [[30, "compatibility-with-package-versions"]], "Installation": [[31, "installation"], [33, "installation"]], "Requirements": [[31, "requirements"], [40, "Requirements"], [42, "Requirements"], [44, "Requirements"], [47, "Requirements"]], "Python": [[31, "python"]], "R": [[31, "r"]], "CZ CELLxGENE Discover Census": [[32, "cz-cellxgene-discover-census"], [37, "cz-cellxgene-discover-census"]], "Citing Census": [[32, "citing-census"], [37, "citing-census"]], "Census Capabilities": [[32, "census-capabilities"], [37, "census-capabilities"]], "Census Data and Schema": [[32, "census-data-and-schema"], [37, "census-data-and-schema"]], "Census Data Releases": [[32, "census-data-releases"], [37, "census-data-releases"]], "Questions, Feedback and Issues": [[32, "questions-feedback-and-issues"], [37, "questions-feedback-and-issues"]], "Coming Soon!": [[32, "coming-soon"], [37, "coming-soon"]], "Projects and Tools Using Census": [[32, "projects-and-tools-using-census"], [37, "projects-and-tools-using-census"]], "Quick start": [[33, "quick-start"], [46, "Quick-start"], [52, "Quick-start"]], "Python quick start": [[33, "python-quick-start"]], "Querying a slice of cell metadata": [[33, "querying-a-slice-of-cell-metadata"], [33, "id1"]], "Obtaining a slice as AnnData": [[33, "obtaining-a-slice-as-anndata"]], "Memory-efficient queries": [[33, "memory-efficient-queries"], [33, "id2"]], "R quick start": [[33, "r-quick-start"]], "Obtaining a slice as a Seurat or SingleCellExperiment object": [[33, "obtaining-a-slice-as-a-seurat-or-singlecellexperiment-object"]], "Census data and schema": [[34, "census-data-and-schema"]], "Schema": [[34, "schema"], [35, "schema"]], "Census summary info \"census_info\"": [[34, "census-summary-info-census-info"]], "Census single-cell data \"census_data\"": [[34, "census-single-cell-data-census-data"]], "Data included in the Census": [[34, "data-included-in-the-census"]], "SOMA objects": [[34, "soma-objects"]], "CZ CELLxGENE Discover Census Schema": [[35, "cz-cellxgene-discover-census-schema"]], "Census overview": [[35, "census-overview"]], "Definitions": [[35, "definitions"]], "Census Schema versioning": [[35, "census-schema-versioning"]], "Data included": [[35, "data-included"]], "Species": [[35, "species"]], "Multi-species data constraints": [[35, "multi-species-data-constraints"]], "Assays": [[35, "assays"], [41, "Assays"]], "Full-gene sequencing assays": [[35, "full-gene-sequencing-assays"]], "Data matrix types": [[35, "data-matrix-types"]], "Sample types": [[35, "sample-types"]], "Repeated data": [[35, "repeated-data"]], "Data encoding and organization": [[35, "data-encoding-and-organization"]], "Census information census_obj[\"census_info\"] - SOMACollection": [[35, "census-information-census-obj-census-info-somacollection"]], "Census metadata \u2013 census_obj\u200b\u200b[\"census_info\"][\"summary\"] \u2013 SOMADataFrame": [[35, "census-metadata-census-obj-census-info-summary-somadataframe"]], "Census table of CELLxGENE Discover datasets \u2013 census_obj[\"census_info\"][\"datasets\"] \u2013 SOMADataFrame": [[35, "census-table-of-cellxgene-discover-datasets-census-obj-census-info-datasets-somadataframe"]], "Census summary cell counts \u2013 census_obj[\"census_info\"][\"summary_cell_counts\"] \u2013 SOMADataframe": [[35, "census-summary-cell-counts-census-obj-census-info-summary-cell-counts-somadataframe"]], "Census table of organisms \u2013 census_obj[\"census_info\"][\"organisms\"] \u2013 SOMADataframe": [[35, "census-table-of-organisms-census-obj-census-info-organisms-somadataframe"]], "Census Data \u2013 census_obj[\"census_data\"][organism] \u2013 SOMAExperiment": [[35, "census-data-census-obj-census-data-organism-somaexperiment"]], "Matrix Data, count (raw) matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].X[\"raw\"] \u2013 SOMASparseNDArray": [[35, "matrix-data-count-raw-matrix-census-obj-census-data-organism-ms-rna-x-raw-somasparsendarray"]], "Matrix Data, normalized count matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].X[\"normalized\"] \u2013 SOMASparseNDArray": [[35, "matrix-data-normalized-count-matrix-census-obj-census-data-organism-ms-rna-x-normalized-somasparsendarray"]], "Feature metadata \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].var \u2013 SOMADataFrame": [[35, "feature-metadata-census-obj-census-data-organism-ms-rna-var-somadataframe"]], "Feature dataset presence matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"][\"feature_dataset_presence_matrix\"] \u2013 SOMASparseNDArray": [[35, "feature-dataset-presence-matrix-census-obj-census-data-organism-ms-rna-feature-dataset-presence-matrix-somasparsendarray"]], "Cell metadata \u2013 census_obj[\"census_data\"][organism].obs \u2013 SOMADataFrame": [[35, "cell-metadata-census-obj-census-data-organism-obs-somadataframe"]], "Changelog": [[35, "changelog"]], "Version 2.0.0": [[35, "version-2-0-0"]], "Version 1.3.0": [[35, "version-1-3-0"]], "Version 1.2.0": [[35, "version-1-2-0"]], "Version 1.1.0": [[35, "version-1-1-0"]], "Version 1.0.0": [[35, "version-1-0-0"]], "Version 0.1.1": [[35, "version-0-1-1"]], "Version 0.1.0": [[35, "version-0-1-0"]], "Version 0.0.1": [[35, "version-0-0-1"]], "Python tutorials": [[36, "python-tutorials"]], "Exporting data": [[36, "exporting-data"]], "[NEW! \ud83d\ude80] Using integrated embeddings and models": [[36, "new-using-integrated-embeddings-and-models"]], "Understanding Census data": [[36, "understanding-census-data"]], "Analyzing Census data": [[36, "analyzing-census-data"]], "Scalable computing": [[36, "scalable-computing"]], "Scalable machine learning": [[36, "scalable-machine-learning"]], "Learning about the CZ CELLxGENE Census": [[38, "Learning-about-the-CZ-CELLxGENE-Census"]], "Opening the Census": [[38, "Opening-the-Census"], [45, "Opening-the-Census"], [49, "Opening-the-Census"]], "Census organization": [[38, "Census-organization"]], "Main Census components": [[38, "Main-Census-components"]], "Census summary info": [[38, "Census-summary-info"]], "Census data": [[38, "Census-data"]], "Gene metadata": [[38, "Gene-metadata"]], "Census summary content tables": [[38, "Census-summary-content-tables"]], "Cell counts by cell metadata": [[38, "Cell-counts-by-cell-metadata"]], "Example: cell metadata included in the summary counts table": [[38, "Example:-cell-metadata-included-in-the-summary-counts-table"]], "Example: cell counts for each sequencing assay in human data": [[38, "Example:-cell-counts-for-each-sequencing-assay-in-human-data"]], "Example: number of microglial cells in the Census": [[38, "Example:-number-of-microglial-cells-in-the-Census"]], "Understanding Census contents beyond the summary tables": [[38, "Understanding-Census-contents-beyond-the-summary-tables"]], "Example: all cell types available in human": [[38, "Example:-all-cell-types-available-in-human"]], "Example: cell types available in human liver": [[38, "Example:-cell-types-available-in-human-liver"]], "Example: diseased T cells in human tissues": [[38, "Example:-diseased-T-cells-in-human-tissues"]], "Integrating multi-dataset slices of data": [[39, "Integrating-multi-dataset-slices-of-data"]], "Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)": [[39, "Finding-and-fetching-data-from-mouse-liver-(10X-Genomics-and-Smart-Seq2)"]], "Gene-length normalization of Smart-Seq2 data.": [[39, "Gene-length-normalization-of-Smart-Seq2-data."]], "Integration with scvi-tools": [[39, "Integration-with-scvi-tools"]], "Inspecting data prior to integration": [[39, "Inspecting-data-prior-to-integration"]], "Data integration with scVI": [[39, "Data-integration-with-scVI"]], "Integration with batch defined as dataset_id": [[39, "Integration-with-batch-defined-as-dataset_id"]], "Integration with batch defined as dataset_id + donor_id": [[39, "Integration-with-batch-defined-as-dataset_id-+-donor_id"]], "Integration with batch defined as dataset_id + donor_id + assay_ontology_term_id + suspension_type": [[39, "Integration-with-batch-defined-as-dataset_id-+-donor_id-+-assay_ontology_term_id-+-suspension_type"]], "Exploring biologically relevant clusters in Census embeddings": [[40, "Exploring-biologically-relevant-clusters-in-Census-embeddings"]], "Background": [[40, "Background"], [52, "Background"]], "Imports and function definitions": [[40, "Imports-and-function-definitions"]], "Melanocytes in eye": [[40, "Melanocytes-in-eye"]], "Sample and fetch 150k cells from eye tissue": [[40, "Sample-and-fetch-150k-cells-from-eye-tissue"]], "Observations": [[40, "Observations"], [40, "id1"], [40, "id2"]], "Retinal bipolar neurons in eye": [[40, "Retinal-bipolar-neurons-in-eye"]], "Dopaminergic neurons in brain": [[40, "Dopaminergic-neurons-in-brain"]], "Sample and fetch 150k cells from brain tissue": [[40, "Sample-and-fetch-150k-cells-from-brain-tissue"]], "Pulmonary ionocytes in lung (Tabula Sapiens)": [[40, "Pulmonary-ionocytes-in-lung-(Tabula-Sapiens)"]], "Fetch lung cells from Tabula Sapiens": [[40, "Fetch-lung-cells-from-Tabula-Sapiens"]], "Exploring all data from a tissue": [[41, "Exploring-all-data-from-a-tissue"]], "Learning about the lung data in the Census": [[41, "Learning-about-the-lung-data-in-the-Census"]], "Learning about cells of lung data": [[41, "Learning-about-cells-of-lung-data"]], "Datasets": [[41, "Datasets"]], "Disease": [[41, "Disease"]], "Sex": [[41, "Sex"]], "Cell vs nucleus": [[41, "Cell-vs-nucleus"]], "Cell types": [[41, "Cell-types"]], "Sub-tissues": [[41, "Sub-tissues"]], "Learning about genes of lung data": [[41, "Learning-about-genes-of-lung-data"]], "Summary of lung metadata": [[41, "Summary-of-lung-metadata"]], "Fetching all single-cell human lung data from the Census": [[41, "Fetching-all-single-cell-human-lung-data-from-the-Census"]], "Calculating QC metrics of the lung data": [[41, "Calculating-QC-metrics-of-the-lung-data"]], "Creating a normalized expression layer and embeddings": [[41, "Creating-a-normalized-expression-layer-and-embeddings"]], "Geneformer for cell class prediction and data projection": [[42, "Geneformer-for-cell-class-prediction-and-data-projection"]], "System requirements": [[42, "System-requirements"], [44, "System-requirements"]], "Downloading example data": [[42, "Downloading-example-data"], [44, "Downloading-example-data"]], "Downloading the fine-tuned Geneformer model": [[42, "Downloading-the-fine-tuned-Geneformer-model"]], "Importing required packages": [[42, "Importing-required-packages"]], "Preparing data and model": [[42, "Preparing-data-and-model"]], "Preparing single-cell data": [[42, "Preparing-single-cell-data"]], "Preparing data from model": [[42, "Preparing-data-from-model"]], "Using the Geneformer fine-tuned model for cell subclass inference": [[42, "Using-the-Geneformer-fine-tuned-model-for-cell-subclass-inference"]], "Loading tokenized data": [[42, "Loading-tokenized-data"]], "Performing inference of cell subclass": [[42, "Performing-inference-of-cell-subclass"]], "Inspecting inference results": [[42, "Inspecting-inference-results"]], "Using the Geneformer fine-tuned model for data projection": [[42, "Using-the-Geneformer-fine-tuned-model-for-data-projection"]], "Generating Geneformer embeddings for 10X PBMC 3K data": [[42, "Generating-Geneformer-embeddings-for-10X-PBMC-3K-data"]], "Joining Geneformer embeddings from 10X PBMC 3K data with other Census datasets": [[42, "Joining-Geneformer-embeddings-from-10X-PBMC-3K-data-with-other-Census-datasets"]], "Normalizing full-length gene sequencing data": [[43, "Normalizing-full-length-gene-sequencing-data"]], "Opening the census": [[43, "Opening-the-census"], [54, "Opening-the-census"]], "Fetching full-length example sequencing data (Smart-Seq)": [[43, "Fetching-full-length-example-sequencing-data-(Smart-Seq)"]], "Normalizing expression to account for gene length": [[43, "Normalizing-expression-to-account-for-gene-length"]], "Validation through clustering exploration": [[43, "Validation-through-clustering-exploration"]], "scVI for cell type prediction and data projection": [[44, "scVI-for-cell-type-prediction-and-data-projection"]], "Downloading the trained scVI model": [[44, "Downloading-the-trained-scVI-model"]], "Using the scVI pretrained model for data projection": [[44, "Using-the-scVI-pretrained-model-for-data-projection"]], "Using the scVI pretrained model for cell cell type inference.": [[44, "Using-the-scVI-pretrained-model-for-cell-cell-type-inference."]], "Summarizing cell and gene metadata": [[45, "Summarizing-cell-and-gene-metadata"]], "Summarizing cell metadata": [[45, "Summarizing-cell-metadata"]], "Example: Summarize all cell types": [[45, "Example:-Summarize-all-cell-types"]], "Example: Summarize a subset of cell types, selected with a value_filter": [[45, "Example:-Summarize-a-subset-of-cell-types,-selected-with-a-value_filter"]], "Full Census metadata stats": [[45, "Full-Census-metadata-stats"]], "Access CELLxGENE collaboration embeddings (scVI, Geneformer)": [[46, "Access-CELLxGENE-collaboration-embeddings-(scVI,-Geneformer)"]], "Storage format": [[46, "Storage-format"], [52, "Storage-format"]], "Query cells and load associated embeddings": [[46, "Query-cells-and-load-associated-embeddings"], [52, "Query-cells-and-load-associated-embeddings"]], "Loading embeddings into an AnnData obsm slot": [[46, "Loading-embeddings-into-an-AnnData-obsm-slot"]], "AnnData embeddings via cellxgene_census.get_anndata()": [[46, "AnnData-embeddings-via-cellxgene_census.get_anndata()"], [52, "AnnData-embeddings-via-cellxgene_census.get_anndata()"]], "AnnData embeddings via ExperimentAxisQuery": [[46, "AnnData-embeddings-via-ExperimentAxisQuery"], [52, "AnnData-embeddings-via-ExperimentAxisQuery"]], "Load an embedding into a dense NumPy array": [[46, "Load-an-embedding-into-a-dense-NumPy-array"], [52, "Load-an-embedding-into-a-dense-NumPy-array"]], "Generating citations for Census slices": [[47, "Generating-citations-for-Census-slices"]], "Generating citation strings": [[47, "Generating-citation-strings"]], "Via cell metadata query": [[47, "Via-cell-metadata-query"]], "Via AnnData query": [[47, "Via-AnnData-query"]], "Computing on X using online (incremental) algorithms": [[48, "Computing-on-X-using-online-(incremental)-algorithms"]], "Incremental count and mean calculation.": [[48, "Incremental-count-and-mean-calculation."]], "Incremental variance calculation": [[48, "Incremental-variance-calculation"]], "Counting cells per gene, grouped by dataset_id": [[48, "Counting-cells-per-gene,-grouped-by-dataset_id"]], "Genes measured in each cell (dataset presence matrix)": [[49, "Genes-measured-in-each-cell-(dataset-presence-matrix)"]], "Fetching the IDs of the Census datasets": [[49, "Fetching-the-IDs-of-the-Census-datasets"]], "Fetching the dataset presence matrix": [[49, "Fetching-the-dataset-presence-matrix"]], "Identifying genes measured in a specific dataset.": [[49, "Identifying-genes-measured-in-a-specific-dataset."]], "Identifying datasets that measured specific genes": [[49, "Identifying-datasets-that-measured-specific-genes"]], "Identifying all genes measured in a dataset": [[49, "Identifying-all-genes-measured-in-a-dataset"]], "Exploring the Census Datasets table": [[50, "Exploring-the-Census-Datasets-table"]], "Fetching the datasets table": [[50, "Fetching-the-datasets-table"]], "Fetching the expression data from a single dataset": [[50, "Fetching-the-expression-data-from-a-single-dataset"]], "Downloading the original source H5AD file of a dataset.": [[50, "Downloading-the-original-source-H5AD-file-of-a-dataset."]], "Understanding and filtering out duplicate cells": [[51, "Understanding-and-filtering-out-duplicate-cells"]], "Why are there duplicate cells in the Census?": [[51, "Why-are-there-duplicate-cells-in-the-Census?"]], "An example: duplicate cells in the Tabula Muris Senis data": [[51, "An-example:-duplicate-cells-in-the-Tabula-Muris-Senis-data"]], "Filtering out duplicate cells": [[51, "Filtering-out-duplicate-cells"]], "Filtering out duplicate cells when reading the obs data frame.": [[51, "Filtering-out-duplicate-cells-when-reading-the-obs-data-frame."]], "Filtering out duplicate cells when creating an AnnData": [[51, "Filtering-out-duplicate-cells-when-creating-an-AnnData"]], "Filtering out duplicate cells for out-of-core operations.": [[51, "Filtering-out-duplicate-cells-for-out-of-core-operations."]], "Access CELLxGENE-hosted embeddings": [[52, "Access-CELLxGENE-hosted-embeddings"]], "Contents": [[52, "Contents"]], "Load an embedding into an AnnData obsm slot": [[52, "Load-an-embedding-into-an-AnnData-obsm-slot"]], "Load embeddings and fetch associated Census data": [[52, "Load-embeddings-and-fetch-associated-Census-data"]], "Embedding Metadata": [[52, "Embedding-Metadata"]], "Querying data using the gget cellxgene module": [[53, "Querying-data-using-the-gget-cellxgene-module"]], "Install gget and set up cellxgene module": [[53, "Install-gget-and-set-up-cellxgene-module"]], "Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)": [[53, "Fetch-an-AnnData-object-by-selecting-gene(s),-tissue(s)-and-cell-type(s)"]], "Plot a dot plot similar to those shown on the CZ CELLxGENE Discover Gene Expression": [[53, "Plot-a-dot-plot-similar-to-those-shown-on-the-CZ-CELLxGENE-Discover-Gene-Expression"]], "Fetch only cell metadata (corresponds to AnnData.obs)": [[53, "Fetch-only-cell-metadata-(corresponds-to-AnnData.obs)"]], "Use gget cellxgene from the command line": [[53, "Use-gget-cellxgene-from-the-command-line"]], "Querying and fetching the single-cell data and cell/gene metadata.": [[54, "Querying-and-fetching-the-single-cell-data-and-cell/gene-metadata."]], "Querying expression data": [[54, "Querying-expression-data"]], "Querying cell metadata (obs)": [[54, "Querying-cell-metadata-(obs)"]], "Querying gene metadata (var)": [[54, "Querying-gene-metadata-(var)"]], "Exploring pre-calculated summary cell counts": [[55, "Exploring-pre-calculated-summary-cell-counts"]], "Fetching the census_summary_cell_counts dataframe": [[55, "Fetching-the-census_summary_cell_counts-dataframe"]], "Creating summary counts beyond pre-calculated values.": [[55, "Creating-summary-counts-beyond-pre-calculated-values."]], "Experimental Highly Variable Genes API": [[56, "Experimental-Highly-Variable-Genes-API"]], "get_highly_variable_genes": [[56, "get_highly_variable_genes"]], "highly_variable_genes": [[56, "highly_variable_genes"]], "Out-of-core (incremental) mean and variance calculation": [[57, "Out-of-core-(incremental)-mean-and-variance-calculation"]], "The mean and variance API": [[57, "The-mean-and-variance-API"]], "Example: calculate mean and variance for a slice of the Census": [[57, "Example:-calculate-mean-and-variance-for-a-slice-of-the-Census"]], "Training a PyTorch Model": [[58, "Training-a-PyTorch-Model"]], "Open the Census": [[58, "Open-the-Census"]], "Create an ExperimentDataPipe": [[58, "Create-an-ExperimentDataPipe"]], "ExperimentDataPipe class explained": [[58, "ExperimentDataPipe-class-explained"]], "ExperimentDataPipe parameters explained": [[58, "ExperimentDataPipe-parameters-explained"]], "Split the dataset": [[58, "Split-the-dataset"]], "Create the DataLoader": [[58, "Create-the-DataLoader"]], "Define the model": [[58, "Define-the-model"]], "Train the model": [[58, "Train-the-model"]], "Make predictions with the model": [[58, "Make-predictions-with-the-model"]], "Python API": [[59, "module-cellxgene_census"]], "Open/retrieve Cell Census data": [[59, "open-retrieve-cell-census-data"]], "Get slice as AnnData": [[59, "get-slice-as-anndata"]], "Feature presence matrix": [[59, "feature-presence-matrix"]], "Versioning of Cell Census builds": [[59, "versioning-of-cell-census-builds"]], "Experimental: Machine Learning": [[59, "experimental-machine-learning"]], "Experimental: Processing": [[59, "experimental-processing"]], "Experimental: Embeddings": [[59, "experimental-embeddings"]]}, "indexentries": {"download_source_h5ad() (in module cellxgene_census)": [[0, "cellxgene_census.download_source_h5ad"]], "get_all_available_embeddings() (in module cellxgene_census.experimental)": [[1, "cellxgene_census.experimental.get_all_available_embeddings"]], "get_all_census_versions_with_embedding() (in module cellxgene_census.experimental)": [[2, "cellxgene_census.experimental.get_all_census_versions_with_embedding"]], "get_embedding() (in module cellxgene_census.experimental)": [[3, "cellxgene_census.experimental.get_embedding"]], "get_embedding_metadata() (in module cellxgene_census.experimental)": [[4, "cellxgene_census.experimental.get_embedding_metadata"]], "get_embedding_metadata_by_name() (in module cellxgene_census.experimental)": [[5, "cellxgene_census.experimental.get_embedding_metadata_by_name"]], "celldatasetbuilder (class in cellxgene_census.experimental.ml.huggingface)": [[6, "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder"]], "__init__() (cellxgene_census.experimental.ml.huggingface.celldatasetbuilder method)": [[6, "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.__init__"]], "geneformertokenizer (class in cellxgene_census.experimental.ml.huggingface)": [[7, "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer"]], "__init__() (cellxgene_census.experimental.ml.huggingface.geneformertokenizer method)": [[7, "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.__init__"]], "experimentdatapipe (class in cellxgene_census.experimental.ml.pytorch)": [[8, "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe"]], "__init__() (cellxgene_census.experimental.ml.pytorch.experimentdatapipe method)": [[8, "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.__init__"]], "stats (class in cellxgene_census.experimental.ml.pytorch)": [[9, "cellxgene_census.experimental.ml.pytorch.Stats"]], "__init__() (cellxgene_census.experimental.ml.pytorch.stats method)": [[9, "cellxgene_census.experimental.ml.pytorch.Stats.__init__"]], "experiment_dataloader() (in module cellxgene_census.experimental.ml.pytorch)": [[10, "cellxgene_census.experimental.ml.pytorch.experiment_dataloader"]], "get_highly_variable_genes() (in module cellxgene_census.experimental.pp)": [[11, "cellxgene_census.experimental.pp.get_highly_variable_genes"]], "highly_variable_genes() (in module cellxgene_census.experimental.pp)": [[12, "cellxgene_census.experimental.pp.highly_variable_genes"]], "mean_variance() (in module cellxgene_census.experimental.pp)": [[13, "cellxgene_census.experimental.pp.mean_variance"]], "get_anndata() (in module cellxgene_census)": [[14, "cellxgene_census.get_anndata"]], "get_census_version_description() (in module cellxgene_census)": [[15, "cellxgene_census.get_census_version_description"]], "get_census_version_directory() (in module cellxgene_census)": [[16, "cellxgene_census.get_census_version_directory"]], "get_default_soma_context() (in module cellxgene_census)": [[17, "cellxgene_census.get_default_soma_context"]], "get_obs() (in module cellxgene_census)": [[18, "cellxgene_census.get_obs"]], "get_presence_matrix() (in module cellxgene_census)": [[19, "cellxgene_census.get_presence_matrix"]], "get_source_h5ad_uri() (in module cellxgene_census)": [[20, "cellxgene_census.get_source_h5ad_uri"]], "get_var() (in module cellxgene_census)": [[21, "cellxgene_census.get_var"]], "open_soma() (in module cellxgene_census)": [[22, "cellxgene_census.open_soma"]], "cellxgene_census": [[59, "module-cellxgene_census"]], "module": [[59, "module-cellxgene_census"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["_autosummary/cellxgene_census.download_source_h5ad", "_autosummary/cellxgene_census.experimental.get_all_available_embeddings", "_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding", "_autosummary/cellxgene_census.experimental.get_embedding", "_autosummary/cellxgene_census.experimental.get_embedding_metadata", "_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name", "_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder", "_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer", "_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe", "_autosummary/cellxgene_census.experimental.ml.pytorch.Stats", "_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader", "_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes", "_autosummary/cellxgene_census.experimental.pp.highly_variable_genes", "_autosummary/cellxgene_census.experimental.pp.mean_variance", "_autosummary/cellxgene_census.get_anndata", "_autosummary/cellxgene_census.get_census_version_description", "_autosummary/cellxgene_census.get_census_version_directory", "_autosummary/cellxgene_census.get_default_soma_context", "_autosummary/cellxgene_census.get_obs", "_autosummary/cellxgene_census.get_presence_matrix", "_autosummary/cellxgene_census.get_source_h5ad_uri", "_autosummary/cellxgene_census.get_var", "_autosummary/cellxgene_census.open_soma", "articles", "articles/2023/20230808-r_api_release", "articles/2023/20230919-out_of_core_methods", "articles/2023/20231012-normalized_layer_precalc_stats", "articles/2024/20240404-categoricals", "cellxgene_census_aws_open_data", "cellxgene_census_docsite_FAQ", "cellxgene_census_docsite_data_release_info", "cellxgene_census_docsite_installation", "cellxgene_census_docsite_landing", "cellxgene_census_docsite_quick_start", "cellxgene_census_docsite_schema", "cellxgene_census_schema", "examples", "index", "notebooks/analysis_demo/comp_bio_census_info", "notebooks/analysis_demo/comp_bio_data_integration_scvi", "notebooks/analysis_demo/comp_bio_embedding_exploration", "notebooks/analysis_demo/comp_bio_explore_and_load_lung_data", "notebooks/analysis_demo/comp_bio_geneformer_prediction", "notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing", "notebooks/analysis_demo/comp_bio_scvi_model_use", "notebooks/analysis_demo/comp_bio_summarize_axis_query", "notebooks/api_demo/census_access_maintained_embeddings", "notebooks/api_demo/census_citation_generation", "notebooks/api_demo/census_compute_over_X", "notebooks/api_demo/census_dataset_presence", "notebooks/api_demo/census_datasets", "notebooks/api_demo/census_duplicated_cells", "notebooks/api_demo/census_embedding", "notebooks/api_demo/census_gget_demo", "notebooks/api_demo/census_query_extract", "notebooks/api_demo/census_summary_cell_counts", "notebooks/experimental/highly_variable_genes", "notebooks/experimental/mean_variance", "notebooks/experimental/pytorch", "python-api"], "filenames": ["_autosummary/cellxgene_census.download_source_h5ad.rst", "_autosummary/cellxgene_census.experimental.get_all_available_embeddings.rst", "_autosummary/cellxgene_census.experimental.get_all_census_versions_with_embedding.rst", "_autosummary/cellxgene_census.experimental.get_embedding.rst", "_autosummary/cellxgene_census.experimental.get_embedding_metadata.rst", "_autosummary/cellxgene_census.experimental.get_embedding_metadata_by_name.rst", "_autosummary/cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.rst", "_autosummary/cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.Stats.rst", "_autosummary/cellxgene_census.experimental.ml.pytorch.experiment_dataloader.rst", "_autosummary/cellxgene_census.experimental.pp.get_highly_variable_genes.rst", "_autosummary/cellxgene_census.experimental.pp.highly_variable_genes.rst", "_autosummary/cellxgene_census.experimental.pp.mean_variance.rst", "_autosummary/cellxgene_census.get_anndata.rst", "_autosummary/cellxgene_census.get_census_version_description.rst", "_autosummary/cellxgene_census.get_census_version_directory.rst", "_autosummary/cellxgene_census.get_default_soma_context.rst", "_autosummary/cellxgene_census.get_obs.rst", "_autosummary/cellxgene_census.get_presence_matrix.rst", "_autosummary/cellxgene_census.get_source_h5ad_uri.rst", "_autosummary/cellxgene_census.get_var.rst", "_autosummary/cellxgene_census.open_soma.rst", "articles.rst", "articles/2023/20230808-r_api_release.md", "articles/2023/20230919-out_of_core_methods.md", "articles/2023/20231012-normalized_layer_precalc_stats.md", "articles/2024/20240404-categoricals.md", "cellxgene_census_aws_open_data.md", "cellxgene_census_docsite_FAQ.md", "cellxgene_census_docsite_data_release_info.md", "cellxgene_census_docsite_installation.md", "cellxgene_census_docsite_landing.md", "cellxgene_census_docsite_quick_start.md", "cellxgene_census_docsite_schema.md", "cellxgene_census_schema.md", "examples.rst", "index.rst", "notebooks/analysis_demo/comp_bio_census_info.ipynb", "notebooks/analysis_demo/comp_bio_data_integration_scvi.ipynb", "notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb", "notebooks/analysis_demo/comp_bio_explore_and_load_lung_data.ipynb", "notebooks/analysis_demo/comp_bio_geneformer_prediction.ipynb", "notebooks/analysis_demo/comp_bio_normalizing_full_gene_sequencing.ipynb", "notebooks/analysis_demo/comp_bio_scvi_model_use.ipynb", "notebooks/analysis_demo/comp_bio_summarize_axis_query.ipynb", "notebooks/api_demo/census_access_maintained_embeddings.ipynb", "notebooks/api_demo/census_citation_generation.ipynb", "notebooks/api_demo/census_compute_over_X.ipynb", "notebooks/api_demo/census_dataset_presence.ipynb", "notebooks/api_demo/census_datasets.ipynb", "notebooks/api_demo/census_duplicated_cells.ipynb", "notebooks/api_demo/census_embedding.ipynb", "notebooks/api_demo/census_gget_demo.ipynb", "notebooks/api_demo/census_query_extract.ipynb", "notebooks/api_demo/census_summary_cell_counts.ipynb", "notebooks/experimental/highly_variable_genes.ipynb", "notebooks/experimental/mean_variance.ipynb", "notebooks/experimental/pytorch.ipynb", "python-api.rst"], "titles": ["cellxgene_census.download_source_h5ad", "cellxgene_census.experimental.get_all_available_embeddings", "cellxgene_census.experimental.get_all_census_versions_with_embedding", "cellxgene_census.experimental.get_embedding", "cellxgene_census.experimental.get_embedding_metadata", "cellxgene_census.experimental.get_embedding_metadata_by_name", "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder", "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer", "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe", "cellxgene_census.experimental.ml.pytorch.Stats", "cellxgene_census.experimental.ml.pytorch.experiment_dataloader", "cellxgene_census.experimental.pp.get_highly_variable_genes", "cellxgene_census.experimental.pp.highly_variable_genes", "cellxgene_census.experimental.pp.mean_variance", "cellxgene_census.get_anndata", "cellxgene_census.get_census_version_description", "cellxgene_census.get_census_version_directory", "cellxgene_census.get_default_soma_context", "cellxgene_census.get_obs", "cellxgene_census.get_presence_matrix", "cellxgene_census.get_source_h5ad_uri", "cellxgene_census.get_var", "cellxgene_census.open_soma", "What\u2019s new?", "R package cellxgene.census V1 is out!", "Memory-efficient implementations of commonly used single-cell methods", "Introducing a normalized layer and pre-calculated cell and gene statistics in Census", "Census supports categoricals for cell metadata", "CZ CELLxGENE Discover Census in AWS", "FAQ", "Census data releases", "Installation", "CZ CELLxGENE Discover Census", "Quick start", "Census data and schema", "CZ CELLxGENE Discover Census Schema", "Python tutorials", "CZ CELLxGENE Discover Census", "Learning about the CZ CELLxGENE Census", "Integrating multi-dataset slices of data", "Exploring biologically relevant clusters in Census embeddings", "Exploring all data from a tissue", "Geneformer for cell class prediction and data projection", "Normalizing full-length gene sequencing data", "scVI for cell type prediction and data projection", "Summarizing cell and gene metadata", "Access CELLxGENE collaboration embeddings (scVI, Geneformer)", "Generating citations for Census slices", "Computing on X using online (incremental) algorithms", "Genes measured in each cell (dataset presence matrix)", "Exploring the Census Datasets table", "Understanding and filtering out duplicate cells", "Access CELLxGENE-hosted embeddings", "Querying data using the gget cellxgene module", "Querying and fetching the single-cell data and cell/gene metadata.", "Exploring pre-calculated summary cell counts", "Experimental Highly Variable Genes API", "Out-of-core (incremental) mean and variance calculation", "Training a PyTorch Model", "Python API"], "terms": {"dataset_id": [0, 12, 20, 25, 27, 35, 38, 40, 41, 42, 43, 44, 46, 47, 49, 50, 51, 52, 53, 54, 57], "str": [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 35, 39, 40, 42, 53], "to_path": [0, 50], "census_vers": [0, 1, 3, 5, 7, 11, 15, 16, 20, 22, 26, 27, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "stabl": [0, 11, 12, 16, 20, 22, 24, 30, 31, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "progress_bar": 0, "bool": [0, 8, 13, 16, 40], "true": [0, 7, 8, 11, 13, 16, 25, 31, 35, 38, 39, 40, 41, 43, 44, 45, 48, 51, 52, 53, 54, 55, 56, 57, 58], "none": [0, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 21, 22, 35, 41, 42, 48, 53], "download": [0, 25, 29, 46, 52, 59], "sourc": [0, 20, 22, 28, 31, 35, 52, 53, 58], "h5ad": [0, 15, 16, 20, 22, 28, 35, 39, 42, 43, 47, 49, 53, 59], "dataset": [0, 6, 7, 11, 12, 19, 24, 26, 28, 30, 32, 34, 36, 37, 38, 40, 43, 44, 45, 46, 47, 48, 51, 52, 53, 54, 55], "given": [0, 1, 8, 15, 25, 28, 30, 35, 41, 46, 48, 49, 50, 52, 58], "user": [0, 11, 12, 17, 20, 24, 25, 26, 27, 28, 29, 32, 34, 37, 39, 41, 42, 43, 44, 48, 56, 58], "specifi": [0, 2, 5, 7, 8, 11, 12, 13, 16, 17, 22, 26, 28, 30, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "file": [0, 7, 22, 28, 29, 30, 35, 38, 42, 44, 45, 53], "name": [0, 2, 5, 6, 8, 11, 12, 15, 16, 20, 28, 30, 33, 34, 35, 38, 39, 41, 43, 45, 47, 48, 51, 52, 53, 54, 56, 59], "paramet": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 39, 41, 43, 52], "fetch": [0, 3, 8, 11, 12, 14, 18, 21, 24, 36, 42, 44, 46, 47, 51, 58], "origin": [0, 8, 26, 35, 40, 41, 42, 44, 51, 58], "associ": [0, 2, 5, 35, 36, 41, 42], "thi": [0, 7, 8, 9, 10, 12, 14, 16, 20, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "The": [0, 1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59], "where": [0, 8, 13, 35, 39, 40, 41, 43, 45, 46, 48, 51, 52, 56, 57, 58], "written": [0, 11, 14, 18, 21], "must": [0, 8, 11, 12, 31, 33, 35, 40, 51], "alreadi": [0, 40, 44], "exist": [0, 20, 27, 28, 29, 32, 37, 38, 41, 42, 51], "censu": [0, 1, 2, 3, 5, 6, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 31, 33, 39, 44, 46, 48, 53, 55, 56], "version": [0, 1, 2, 3, 5, 14, 15, 16, 20, 22, 24, 26, 29, 31, 38, 39, 40, 42, 44, 45, 46, 47, 49, 51, 52, 53, 54], "default": [0, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 16, 17, 18, 20, 21, 22, 27, 30, 39, 43, 48, 53, 57, 58], "whether": [0, 8, 53], "displai": [0, 40, 44, 45, 48, 52, 53, 58], "progress": [0, 42, 44, 53], "bar": 0, "rais": [0, 3, 5, 10, 11, 12, 15, 19, 20, 22, 38, 45], "valueerror": [0, 3, 5, 10, 11, 12, 15, 19, 22], "path": [0, 22, 28, 35, 42, 53], "i": [0, 2, 3, 5, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], "e": [0, 1, 2, 3, 5, 6, 8, 12, 13, 22, 26, 28, 30, 32, 34, 35, 37, 38, 40, 41, 42, 45, 46, 48, 49, 50, 51, 52, 53, 56], "overwrit": 0, "an": [0, 1, 3, 6, 8, 10, 11, 13, 14, 16, 18, 21, 22, 24, 25, 26, 28, 30, 31, 32, 33, 35, 37, 38, 39, 40, 41, 42, 45, 47, 48, 50, 54, 57, 59], "lifecycl": [0, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22], "matur": [0, 15, 16, 19, 20, 22, 38, 40], "get_source_h5ad_uri": [0, 22, 50], "look": [0, 22, 26, 38, 39, 41, 42, 43, 44, 46, 51, 52, 53, 54, 58], "up": [0, 22, 25, 44, 48, 51], "locat": [0, 17, 22, 29, 50, 52, 54], "exampl": [0, 1, 3, 4, 7, 11, 12, 14, 15, 16, 17, 19, 20, 22, 24, 26, 28, 29, 31, 33, 35, 36, 39, 40, 41, 46, 48, 52, 53, 54, 58], "8e47ed12": 0, "c658": 0, "4252": [0, 41, 49], "b126": 0, "381df8d52a3d": 0, "tmp": [0, 22], "data": [0, 3, 8, 9, 10, 11, 12, 15, 16, 20, 25, 27, 31, 33, 40, 45, 46, 47, 48, 49, 55, 56, 57, 58], "list": [1, 2, 11, 12, 14, 18, 21, 28, 32, 34, 35, 37, 38, 40, 41, 42, 44, 45, 49, 53, 54, 59], "dict": [1, 4, 5, 15, 16, 17, 22, 44], "ani": [1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 17, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 35, 37, 38, 40, 44, 46, 47, 48, 49, 50, 52, 55, 56, 58], "return": [1, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 38, 40, 45, 48, 49, 53, 54, 55, 56, 57, 58], "dictionari": [1, 4, 5, 15, 16, 17, 22, 27, 38, 42, 52, 54], "all": [1, 2, 7, 8, 11, 12, 14, 16, 18, 21, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 46, 47, 48, 50, 51, 52, 53, 54, 55], "avail": [1, 8, 12, 14, 16, 25, 26, 29, 30, 31, 39, 40, 41, 42, 46, 52, 53, 54, 56, 59], "embed": [1, 2, 3, 4, 5, 14, 32, 37, 39, 44], "tag": [1, 3, 5, 28, 30], "g": [1, 2, 3, 5, 6, 12, 13, 22, 28, 30, 32, 34, 35, 37, 38, 40, 42, 45, 48, 50, 52, 53, 56], "2023": [1, 3, 5, 24, 25, 26, 28, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "12": [1, 3, 5, 15, 16, 20, 22, 26, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 54], "15": [1, 3, 5, 16, 28, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 53, 57], "A": [1, 2, 3, 4, 5, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 28, 30, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 49, 50, 51, 52, 53, 54], "each": [1, 6, 7, 8, 16, 25, 26, 27, 29, 30, 33, 34, 35, 36, 39, 40, 41, 42, 43, 45, 46, 47, 48, 50, 52, 53, 55, 56, 58, 59], "contain": [1, 2, 3, 4, 5, 7, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 34, 35, 38, 39, 40, 41, 42, 43, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59], "metadata": [1, 4, 5, 7, 11, 14, 18, 21, 23, 25, 28, 29, 32, 34, 36, 37, 39, 40, 42, 43, 44, 46, 48, 49, 50, 51, 55, 56, 58], "describ": [1, 4, 5, 28, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55, 56, 57], "experiment_nam": [1, 40, 46, 52, 57], "experiment_1": 1, "measurement_nam": [1, 6, 8, 11, 14, 19, 25, 26, 33, 42, 44, 46, 47, 48, 49, 51, 52, 56, 57, 58], "rna": [1, 6, 11, 14, 19, 25, 26, 29, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "organ": [1, 2, 5, 11, 14, 18, 19, 21, 24, 25, 26, 29, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 58], "homo_sapien": [1, 6, 7, 24, 25, 26, 27, 28, 33, 34, 35, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 58], "n_embed": [1, 52], "1000": [1, 11, 12, 14, 25, 35, 39, 43], "n_featur": [1, 52], "200": [1, 3], "uri": [1, 3, 4, 15, 16, 17, 20, 22, 28, 42, 44, 50, 59], "s3": [1, 15, 16, 17, 20, 22, 28, 29, 31, 42, 44, 46, 50, 52], "bucket": [1, 17, 22, 28, 29, 31, 35], "embedding_1": 1, "embedding_nam": [2, 5, 40, 42, 44, 46, 52], "embedding_typ": [2, 5], "obs_embed": [2, 5, 14, 40, 42, 44, 46, 52], "get": [2, 11, 15, 16, 18, 21, 22, 24, 25, 26, 27, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 50, 51, 52, 54], "specif": [2, 5, 22, 29, 30, 32, 34, 35, 37, 38, 40, 45, 48, 51, 54], "scvi": [2, 5, 30, 36, 40, 52], "which": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 30, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "type": [2, 8, 19, 24, 27, 28, 30, 33, 34, 36, 39, 40, 42, 43, 48, 49, 55, 58], "embedding_uri": [3, 4, 46, 52], "obs_soma_joinid": [3, 52], "ndarrai": [3, 11, 14, 18, 21, 46, 48, 52], "dtype": [3, 8, 11, 14, 18, 21, 38, 39, 40, 41, 43, 45, 46, 47, 48, 51, 52, 54, 58], "int64": [3, 8, 27, 35, 38, 39, 41, 43, 45, 48, 51, 54], "arrai": [3, 11, 14, 18, 19, 21, 29, 32, 34, 37, 41, 43, 44, 48, 49, 58], "context": [3, 4, 6, 17, 22, 26, 28, 33, 38, 41, 45, 51, 52], "somatiledbcontext": [3, 4, 17, 22, 52], "float32": [3, 35, 43, 46, 48, 52], "read": [3, 4, 8, 9, 14, 19, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 52, 54, 55, 56, 58], "cell": [3, 6, 7, 8, 11, 12, 15, 16, 20, 23, 28, 29, 32, 36, 37, 39, 43, 50, 56, 57, 58], "ob": [3, 7, 8, 11, 12, 13, 14, 18, 24, 28, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58], "dens": [3, 8, 29, 32, 34, 37], "numpi": [3, 19, 29, 32, 37, 39, 40, 41, 42, 44, 48, 49, 58], "without": [3, 32, 37, 40, 42, 58], "nan": [3, 40, 52, 56], "valu": [3, 7, 8, 11, 12, 13, 14, 15, 18, 19, 21, 24, 25, 26, 27, 29, 30, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 52, 53, 54, 56, 57, 58], "us": [3, 4, 8, 9, 10, 11, 12, 13, 14, 16, 17, 22, 23, 27, 28, 30, 31, 33, 34, 35, 38, 39, 40, 41, 43, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59], "verifi": 3, "content": [3, 7, 28, 30, 33, 34, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58], "from": [3, 6, 7, 8, 13, 15, 19, 25, 26, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 43, 44, 46, 48, 49, 51, 52, 54, 55, 56, 57, 58], "same": [3, 25, 26, 33, 34, 40, 42, 43, 46, 50, 51, 52, 54, 56], "slice": [3, 11, 14, 18, 21, 25, 26, 29, 32, 36, 37, 38, 41, 46, 48, 49, 50, 52, 54], "custom": [3, 4, 17, 22, 28], "tiledbsoma": [3, 4, 6, 7, 8, 11, 12, 13, 14, 17, 22, 25, 26, 28, 33, 46, 48, 51, 52, 56, 57, 58, 59], "open": [3, 4, 17, 20, 22, 24, 25, 26, 28, 30, 32, 33, 37, 39, 41, 42, 47, 52, 53, 56], "soma": [3, 4, 8, 9, 11, 14, 15, 16, 17, 18, 21, 22, 24, 25, 29, 30, 32, 33, 35, 37, 38, 45, 46, 48, 49, 50, 52, 56, 57, 58, 59], "object": [3, 4, 8, 11, 14, 17, 18, 19, 20, 21, 22, 24, 26, 29, 32, 35, 37, 38, 39, 40, 41, 45, 46, 47, 50, 52, 54, 58], "option": [3, 4, 11, 12, 16, 20, 22, 28, 31, 35, 50, 53], "ar": [3, 5, 8, 10, 11, 12, 13, 16, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 52, 53, 54, 56, 57, 58], "position": [3, 48, 49], "index": [3, 8, 11, 13, 14, 18, 19, 21, 34, 35, 40, 42, 44, 48, 49, 50, 56, 57], "In": [3, 25, 26, 27, 30, 31, 32, 35, 37, 38, 39, 40, 41, 42, 46, 48, 49, 51, 52, 56, 58], "other": [3, 6, 10, 25, 26, 35, 38, 40, 43, 46, 48, 49, 50, 51, 52, 54], "word": [3, 35, 40, 48, 49, 52], "identifi": [3, 11, 12, 16, 25, 30, 40, 43], "correspond": [3, 13, 16, 26, 28, 35, 38, 41, 42, 43, 44, 45, 46, 48, 51, 52, 54], "ith": 3, "posit": [3, 8, 38, 41, 42, 48], "mismatch": 3, "obs_somaids_to_fetch": 3, "np": [3, 39, 40, 41, 42, 44, 48, 52], "10": [3, 16, 29, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 57, 58], "11": [3, 16, 29, 30, 31, 38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54, 57, 58], "emb": [3, 40, 42, 52], "shape": [3, 38, 40, 41, 46, 48, 51, 52, 58], "2": [3, 8, 15, 16, 17, 20, 22, 25, 28, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "0": [3, 8, 9, 10, 11, 12, 13, 14, 25, 26, 27, 30, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58], "4": [3, 8, 25, 30, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58], "02954102": 3, "1": [3, 8, 13, 17, 25, 26, 28, 30, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "0390625": 3, "14550781": 3, "40820312": 3, "00224304": 3, "265625": 3, "05883789": 3, "7890625": 3, "python": [4, 8, 24, 25, 26, 27, 30, 32, 37, 38, 42, 45, 47, 49, 52, 53, 54], "get_experiment_metadata": 4, "If": [5, 6, 8, 10, 11, 12, 13, 16, 17, 22, 28, 29, 30, 31, 32, 35, 37, 38, 41, 45, 51, 52, 53, 58], "more": [5, 8, 12, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 51, 52, 53, 54, 56, 58, 59], "match": [5, 11, 42, 44, 45, 50, 52, 53, 54, 56], "queri": [5, 6, 7, 8, 11, 12, 13, 14, 18, 19, 21, 25, 30, 32, 35, 36, 37, 38, 39, 41, 44, 45, 48, 50, 51, 55, 56, 57, 58], "most": [5, 16, 25, 29, 30, 35, 38, 39, 40, 41, 42, 45, 51, 56, 58], "recent": [5, 16, 24, 30], "one": [5, 6, 11, 14, 18, 19, 21, 22, 29, 30, 34, 35, 38, 39, 40, 42, 44, 50, 51, 52, 53, 54, 58], "either": [5, 16, 20, 28, 29, 35, 58], "var_embed": [5, 14, 52], "found": [5, 19, 22, 24, 32, 37, 39, 40, 41, 43, 44, 50, 54], "class": [6, 7, 8, 9, 19, 33, 34, 36, 46, 48, 49, 52], "experi": [6, 7, 8, 11, 14, 17, 26, 34, 35, 45, 46, 49, 50, 52, 55, 56, 58], "layer_nam": 6, "raw": [6, 8, 11, 12, 13, 14, 25, 26, 33, 34, 38, 40, 41, 46, 48, 51, 52, 57, 58], "block_siz": 6, "int": [6, 7, 8, 9, 10, 11, 12, 13, 14, 18, 21, 35, 40, 44, 48], "kwarg": [6, 7], "abstract": 6, "base": [6, 8, 11, 16, 24, 25, 26, 32, 34, 35, 37, 38, 40, 41, 42, 44, 46, 51, 52, 53, 54, 56], "method": [6, 7, 8, 9, 10, 11, 12, 17, 23, 26, 27, 28, 29, 30, 38, 40, 43, 45, 46, 48, 50, 52, 54, 56, 58], "process": [6, 7, 8, 10, 25, 26, 29, 38, 42, 48, 51], "cellxgen": [6, 7, 15, 16, 20, 23, 26, 27, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 43, 44, 47, 50, 51, 59], "experimentaxisqueri": [6, 7, 12, 13, 56, 57], "result": [6, 7, 8, 11, 12, 13, 16, 25, 30, 33, 39, 40, 44, 45, 46, 48, 52, 54, 56, 57, 58], "hug": [6, 7], "face": [6, 7], "item": [6, 7, 34, 38, 45, 50, 58], "repres": [6, 13, 24, 30, 34, 35, 41, 52, 57], "subclass": [6, 40], "implement": [6, 23, 29, 32, 35, 37, 48, 56, 58], "cell_item": 6, "row": [6, 8, 11, 12, 13, 19, 26, 33, 35, 38, 40, 41, 46, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58], "x": [6, 8, 11, 12, 13, 14, 26, 30, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 51, 52, 57, 58], "layer": [6, 8, 11, 12, 13, 14, 23, 32, 35, 37, 39, 49, 53, 57], "mai": [6, 8, 11, 14, 16, 18, 21, 24, 27, 29, 30, 31, 32, 35, 37, 38, 39, 40, 48, 49, 50, 51, 52, 58], "also": [6, 8, 16, 25, 27, 28, 29, 31, 40, 42, 44, 45, 49, 50, 51, 52, 53, 54, 56, 58], "overrid": [6, 17, 22], "__init__": [6, 7, 8, 9, 48, 58], "__enter__": 6, "perform": [6, 8, 16, 25, 26, 30, 31, 32, 33, 35, 37, 38, 39, 41, 43, 48, 51, 52, 54, 57, 58], "necessari": [6, 25, 32, 37, 40], "preprocess": [6, 41], "inherit": 6, "so": [6, 8, 29, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 58], "typic": [6, 40, 58], "usag": [6, 7, 8, 25, 28, 29, 33, 39, 51, 58], "would": [6, 8, 39, 51, 58], "import": [6, 7, 25, 26, 27, 28, 30, 33, 38, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "geneformertoken": 6, "open_soma": [6, 7, 11, 14, 17, 18, 21, 24, 25, 26, 27, 28, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "subclassofcelldatasetbuild": 6, "census_data": [6, 7, 24, 25, 26, 27, 28, 33, 38, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58], "obs_queri": [6, 7, 8, 25, 26, 33, 46, 48, 51, 52, 56, 57, 58], "tilebsoma": 6, "axisqueri": [6, 7, 8, 25, 26, 33, 46, 48, 51, 52, 56, 57, 58], "defin": [6, 7, 11, 12, 29, 34, 35, 38, 45, 48, 53, 54], "some": [6, 7, 8, 10, 25, 27, 35, 38, 39, 40, 41, 42, 43, 44, 51, 53], "subset": [6, 7, 11, 27, 39, 40, 41, 42, 43, 44, 52, 57, 58], "var_queri": [6, 8, 25, 48, 58], "builder": 6, "build": [6, 7, 11, 14, 27, 28, 29, 30, 31, 34, 35, 38, 41, 46, 52, 53], "initi": [6, 33, 35, 44, 46, 51, 52], "measur": [6, 8, 11, 14, 19, 25, 26, 34, 35, 36, 41, 43, 50, 52], "number": [6, 7, 8, 10, 11, 12, 13, 16, 26, 30, 35, 41, 42, 43, 44, 46, 48, 50, 51, 52, 56, 57, 58, 59], "memori": [6, 8, 17, 23, 24, 26, 27, 29, 31, 32, 36, 37, 45, 48, 50, 51, 53, 54, 58], "onc": [6, 11, 12, 16, 24, 30, 38, 45, 48, 58], "unspecifi": 6, "sparsendarrayread": 6, "blockwis": [6, 52], "select": [6, 11, 12, 13, 14, 18, 21, 26, 33, 35, 39, 40, 41, 42, 46, 49, 50, 51, 52, 54, 56], "pass": [6, 8, 10, 17, 39, 44, 48, 53, 54, 58], "through": [6, 31, 32, 37, 44, 52, 54, 58], "especi": 6, "attribut": [6, 7, 8, 9, 42, 46, 52, 53, 58], "obs_column_nam": [7, 8, 24, 26, 33, 58], "sequenc": [7, 8, 11, 12, 14, 18, 21, 32, 34, 36, 37, 39, 40, 41, 49, 50, 52], "obs_attribut": 7, "max_input_token": 7, "2048": 7, "token_dictionary_fil": 7, "gene_median_fil": 7, "gener": [7, 9, 11, 12, 25, 29, 30, 32, 35, 36, 37, 38, 39, 40, 52, 53], "geneform": [7, 30, 36, 40, 52], "token": 7, "human": [7, 24, 26, 33, 34, 35, 36, 39, 40, 42, 45, 46, 49, 50, 51, 54, 55], "requir": [7, 8, 28, 35, 41, 45, 46, 52, 53, 58], "packag": [7, 23, 29, 31, 32, 33, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 52, 53, 54, 56, 57], "instal": 7, "separ": [7, 35, 40, 51, 53, 56], "pip": [7, 29, 31, 53], "git": 7, "http": [7, 12, 16, 29, 31, 39, 41, 42, 43, 44, 47, 52, 53], "co": [7, 29, 32, 37], "ctheodori": 7, "8df5dc1": 7, "latest": [7, 15, 16, 22, 26, 27, 31, 38, 39, 45, 47, 49, 52, 53, 54], "set": [7, 8, 17, 22, 25, 26, 33, 39, 42, 44, 49, 56, 58], "value_filt": [7, 11, 14, 18, 21, 24, 25, 26, 28, 30, 33, 38, 39, 40, 41, 43, 46, 47, 48, 51, 52, 54, 55, 56, 57, 58], "is_primary_data": [7, 11, 25, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "tissue_gener": [7, 11, 14, 24, 25, 28, 33, 35, 38, 40, 41, 46, 50, 51, 52, 53, 54, 55, 56, 57, 58], "tongu": [7, 46, 49, 51, 52, 58], "soma_joinid": [7, 8, 11, 13, 14, 18, 19, 21, 25, 27, 30, 33, 35, 38, 39, 40, 41, 43, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57], "cell_type_ontology_term_id": [7, 27, 35, 38, 41, 45, 46, 50, 52, 53, 54, 55, 57], "input_id": [7, 42], "length": [7, 35, 36, 38, 41, 42, 47], "datafram": [7, 8, 11, 12, 13, 14, 18, 19, 21, 24, 26, 27, 33, 34, 35, 38, 40, 41, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 58], "column": [7, 8, 11, 12, 13, 14, 18, 21, 33, 34, 35, 38, 40, 41, 42, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "propag": [7, 58], "maximum": [7, 8, 11, 12, 58], "input": [7, 26, 48, 54, 58], "pickl": [7, 58], "suppli": 7, "map": [7, 24, 35, 38, 41, 42, 44, 48, 49, 50], "ensembl": [7, 42, 44, 53], "gene": [7, 8, 11, 12, 23, 24, 29, 32, 33, 34, 36, 37, 40, 42, 44, 46, 51, 52, 58], "id": [7, 35, 38, 39, 40, 42, 43, 44, 46, 48, 52, 53], "onto": 7, "median": 7, "express": [7, 26, 29, 35, 39, 40, 44, 46, 48, 52], "By": [7, 24, 25, 26, 27, 38, 43, 53], "load": [7, 10, 24, 27, 29, 32, 37, 39, 41, 44, 47, 54, 58], "x_name": [8, 11, 14, 26, 46, 52, 58], "batch_siz": [8, 10, 58], "shuffl": [8, 10, 58], "fals": [8, 13, 16, 17, 25, 27, 28, 35, 38, 39, 40, 41, 42, 44, 51, 53, 54, 55, 56, 57], "seed": [8, 39, 58], "return_sparse_x": 8, "soma_chunk_s": [8, 58], "use_eager_fetch": 8, "torchdata": [8, 10, 58], "datapip": [8, 10, 58], "iter": [8, 10, 24, 26, 28, 33, 48, 51, 58], "iterdatapip": [8, 10, 58], "upon": [8, 11, 22, 30, 38, 45, 56], "along": [8, 13, 24, 26, 42, 57, 58], "var": [8, 11, 12, 13, 14, 19, 21, 25, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 52, 53, 56, 57, 58], "ax": [8, 13, 58], "provid": [8, 22, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 53, 55, 56, 58, 59], "over": [8, 12, 13, 22, 26, 33, 48, 52, 53, 57], "when": [8, 10, 11, 12, 27, 35, 40, 48, 52, 53, 55, 56, 58], "": [8, 12, 16, 24, 28, 29, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 48, 49, 51, 52, 54, 57, 58], "built": [8, 26, 32, 35, 37, 59], "function": [8, 11, 12, 26, 29, 30, 38, 48, 52, 53, 55, 56, 58, 59], "batch": [8, 11, 12, 25, 40, 42, 44, 48, 56, 58], "x_batch": [8, 58], "y_batch": [8, 58], "control": [8, 25, 56, 58], "tensor": [8, 58], "have": [8, 16, 24, 26, 30, 31, 32, 35, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49, 52, 56, 58], "rank": [8, 11, 12, 56, 58], "2415": 8, "torch": [8, 10, 58], "encod": [8, 38, 39, 45, 46, 48, 52, 58], "For": [8, 13, 24, 26, 27, 28, 29, 31, 32, 33, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51, 52, 53, 54, 56, 58, 59], "larger": [8, 29, 32, 33, 37, 40, 48], "dataload": [8, 10], "3": [8, 11, 12, 25, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "2416": 8, "2417": 8, "spars": [8, 13, 19, 29, 32, 33, 34, 37, 39, 43, 48, 49, 52], "model": [8, 11, 12, 30, 32, 35, 37, 39, 40, 46, 52, 56], "support": [8, 10, 23, 28, 31, 32, 34, 37, 40, 42, 53, 57, 58], "reduc": [8, 17, 26, 41, 46, 51, 52, 58], "determin": [8, 49, 58], "first": [8, 19, 24, 25, 31, 33, 39, 41, 42, 43, 45, 46, 47, 48, 49, 51, 52, 58], "element": [8, 13, 19, 48, 49, 57], "alwai": [8, 16, 27, 30, 34, 35, 51], "panda": [8, 11, 12, 13, 18, 19, 21, 27, 29, 32, 33, 37, 38, 40, 41, 45, 48, 49, 50, 54, 55, 56, 57, 58], "equival": [8, 26, 46, 48, 52], "soma_dim_0": [8, 46, 48, 51, 52], "matrix": [8, 13, 19, 26, 29, 32, 33, 34, 36, 37, 38, 39, 40, 41, 43, 46, 52, 53], "remain": [8, 40], "string": [8, 11, 12, 27, 35, 52, 54, 58], "integ": [8, 11, 14, 18, 21, 34, 41, 43, 48, 58], "need": [8, 27, 31, 33, 35, 38, 39, 42, 44, 49, 51, 54], "can": [8, 10, 11, 12, 17, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58], "decod": [8, 52, 58], "obtain": [8, 25, 38, 39, 41, 42, 44, 51, 54, 58], "call": [8, 11, 12, 28, 30, 33, 38, 39, 41, 43, 45, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "its": [8, 17, 24, 26, 28, 30, 32, 34, 35, 37, 39, 42, 44, 45, 49, 51, 54, 58], "inverse_transform": [8, 58], "exp_data_pip": 8, "obs_encod": [8, 58], "obs_attr_nam": 8, "encoded_valu": 8, "construct": [8, 38, 40, 41, 49, 50, 52], "new": [8, 30, 32, 33, 35, 37, 39, 42, 53, 58], "filter": [8, 11, 14, 16, 18, 21, 24, 25, 26, 29, 30, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "axi": [8, 11, 13, 14, 18, 21, 25, 34, 35, 40, 41, 42, 43, 44, 45, 46, 48, 51, 52, 56, 57, 58], "veri": [8, 55], "larg": [8, 24, 38, 41, 48, 51, 52, 53, 54, 55], "featur": [8, 19, 32, 33, 34, 36, 37, 41, 42, 44, 46, 49, 52, 53, 56], "doe": [8, 20, 44, 48, 52, 58], "onli": [8, 12, 13, 16, 22, 25, 26, 30, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 51, 52, 54, 56, 58], "being": [8, 58], "singl": [8, 11, 12, 23, 28, 32, 35, 36, 37, 38, 39, 40, 43, 49, 51, 52, 53, 58, 59], "multipl": [8, 11, 12, 16, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "reason": [8, 16, 40], "two": [8, 26, 28, 34, 35, 38, 39, 46, 52, 53, 54, 56], "step": [8, 25, 29, 41, 42, 58], "global": [8, 40, 41, 58], "contigu": 8, "group": [8, 35, 38, 40, 42, 55], "chunk": [8, 13, 29, 51, 58], "order": [8, 27, 30, 42, 58], "random": [8, 39, 40, 41, 42, 44, 58], "local": [8, 28, 40, 50, 58], "within": [8, 33, 35, 38, 40, 52, 58], "sinc": [8, 10, 24, 29, 30, 39, 41, 51, 53, 58], "retriev": [8, 9, 14, 22, 24, 26, 35, 36, 38, 46, 58], "keep": [8, 26, 42, 55], "fix": [8, 29, 35, 58], "size": [8, 13, 34, 35, 40, 42, 44, 52, 55, 58], "ensur": [8, 26, 29, 33, 38, 39, 41, 43, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "non": [8, 16, 25, 29, 34, 35, 38, 40, 41, 42, 48, 51, 52, 54], "occur": [8, 11, 12, 29, 52], "second": [8, 19, 24, 46, 49, 52, 58], "note": [8, 24, 27, 32, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "maintain": [8, 39, 46, 52], "proxim": [8, 41, 55], "even": [8, 27, 52], "after": [8, 29, 30, 41], "suffici": [8, 29, 58], "train": [8, 36, 39, 52], "To": [8, 17, 24, 26, 29, 30, 31, 32, 35, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 50, 51, 52, 53, 54, 58], "end": [8, 35, 39, 40, 51], "treat": 8, "hyperparamet": 8, "tune": [8, 30, 46], "nn": [8, 58], "parallel": [8, 48], "distributeddataparallel": 8, "partit": 8, "disjoint": [8, 34], "across": [8, 24, 29, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55], "worker": [8, 10], "As": [8, 25, 32, 33, 34, 35, 37, 39, 43, 46, 49, 52, 54, 57], "still": [8, 39], "impact": [8, 40], "aspect": 8, "behavior": 8, "util": [8, 10, 25, 29, 40, 42, 44, 45, 48, 51, 52], "better": [8, 35, 36], "granular": [8, 58], "see": [8, 12, 25, 26, 27, 28, 29, 31, 33, 34, 35, 39, 40, 41, 42, 43, 51, 52, 53, 54, 56, 58, 59], "detail": [8, 25, 27, 28, 32, 33, 37, 39, 40, 54, 58], "gib": 8, "ram": [8, 48, 53], "per": [8, 11, 12, 25, 26, 29, 35, 38, 41, 43, 49, 58], "request": [8, 28, 29, 32, 37, 42, 44, 45, 48, 55, 56, 58], "assum": [8, 12, 35, 40, 48, 58], "sparsiti": 8, "95": 8, "depend": [8, 24, 29, 31, 39, 42, 44], "next": [8, 24, 26, 28, 30, 33, 58], "immedi": 8, "previous": [8, 40, 41], "made": [8, 40], "via": [8, 9, 28, 29, 30, 31, 32, 33, 35, 37, 38, 39, 41, 42, 43, 44, 45, 50, 54, 58], "allow": [8, 24, 26, 44, 45, 51, 58], "network": 8, "filesystem": 8, "client": [8, 29], "side": 8, "potenti": [8, 32, 37, 40], "improv": 8, "overal": [8, 27, 58], "cost": [8, 29], "doubl": [8, 35], "n_ob": [9, 33, 41, 42, 44, 46, 48, 50, 52, 53, 54], "nnz": [9, 13, 26, 35, 46, 52], "elaps": 9, "n_soma_chunk": 9, "statist": [9, 13, 23, 48, 55], "about": [9, 24, 26, 29, 32, 33, 35, 36, 37, 39, 43, 45, 46, 51, 52, 53, 54], "experimentdatapip": [9, 10], "api": [9, 12, 25, 26, 30, 31, 32, 33, 35, 36, 37, 38, 39, 41, 43, 45, 49, 50, 53, 54, 58], "assess": [9, 40, 41], "throughput": 9, "attr": 9, "num_work": 10, "dataloader_kwarg": 10, "factori": 10, "safe": 10, "instanti": [10, 58], "work": [10, 24, 26, 31, 32, 37, 38], "constructor": [10, 58], "applic": [10, 52], "sampler": [10, 58], "batch_sampl": [10, 58], "collate_fn": [10, 58], "ha": [10, 11, 12, 24, 26, 32, 34, 35, 37, 38, 39, 42, 43, 46, 49, 51, 52], "been": [10, 24, 26, 30, 52], "chain": [10, 58], "main": [10, 29, 31, 34, 40, 46, 51, 52], "addit": [10, 14, 31, 32, 35, 37, 38, 42, 44, 50, 53, 56, 57], "keyword": 10, "argument": [10, 11, 12, 17, 22, 25, 26, 53, 54, 56, 57], "except": [10, 38, 40, 43, 54], "param": [10, 22], "collect": [11, 14, 18, 19, 21, 22, 28, 30, 34, 38, 41, 42, 43, 44, 47, 49, 53], "obs_value_filt": [11, 14, 24, 25, 26, 30, 33, 39, 40, 42, 43, 44, 46, 47, 50, 51, 52, 54, 56, 57], "obs_coord": [11, 14, 40, 41], "byte": [11, 14, 18, 21], "float": [11, 12, 14, 18, 21, 26, 51, 58], "datetime64": [11, 14, 18, 21], "timestamptyp": [11, 14, 18, 21], "chunkedarrai": [11, 14, 18, 21], "var_value_filt": [11, 14, 24, 26, 33, 47, 51, 54], "var_coord": [11, 14, 41], "n_top_gen": [11, 12, 25, 39, 41, 43, 56], "flavor": [11, 12, 39, 41], "liter": [11, 12], "seurat_v3": [11, 12, 39, 41, 56], "span": [11, 12, 29, 40, 56], "batch_kei": [11, 12, 25, 39, 56], "max_loess_jitt": [11, 12], "1e": [11, 12, 58], "06": [11, 12], "batch_key_func": [11, 12], "callabl": [11, 12], "convienc": 11, "wrapper": [11, 14, 28, 38, 56], "around": [11, 14, 33, 56], "highly_variable_gen": [11, 25, 39, 41, 42, 43], "execut": [11, 14, 28, 51], "annot": [11, 12, 29, 34, 35, 38, 39, 41, 42, 44, 56], "variabl": [11, 12, 21, 26, 27, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 58], "usual": [11, 14, 18, 19, 21, 25, 29, 58], "homo": [11, 14, 18, 19, 21, 24, 25, 26, 30, 33, 34, 35, 38, 41, 42, 44, 49, 51, 54, 55], "sapien": [11, 14, 18, 19, 21, 24, 25, 26, 30, 33, 34, 35, 38, 41, 42, 44, 49, 51, 54, 55], "mu": [11, 14, 18, 19, 21, 30, 35, 38, 39, 43, 50, 55], "musculu": [11, 14, 18, 19, 21, 30, 35, 38, 39, 43, 50, 55], "syntax": [11, 14, 18, 21], "coordin": [11, 14, 18, 21, 40, 48], "fraction": [11, 12, 25, 56], "estim": [11, 12, 56], "loess": [11, 12, 56], "varianc": [11, 12, 13, 26, 35, 36, 56], "fit": [11, 12, 39, 44, 45, 48, 56], "done": [11, 12, 13, 25, 28, 35, 41, 43, 56, 58], "combin": [11, 12, 24, 29, 35, 38, 39, 40, 41, 44, 45, 48, 49, 51, 54], "kei": [11, 12, 35, 38, 39, 40, 41, 46, 48, 52, 54], "convert": [11, 12, 24, 33, 48], "concaten": [11, 12, 33, 39, 51, 52, 57], "them": [11, 12, 24, 28, 29, 39, 42, 46, 51, 52, 54], "max_lowess_jitt": [11, 12, 56], "jitter": [11, 12, 41, 56], "add": [11, 12, 14, 26, 31, 35, 42, 43, 46, 48, 52], "case": [11, 12, 34, 35, 38, 39, 40, 43, 48, 51, 52, 56, 57, 58], "failur": [11, 12], "low": [11, 12, 29, 32, 37], "entri": [11, 12], "count": [11, 12, 25, 26, 29, 32, 33, 34, 36, 37, 39, 41, 42, 43, 45, 50, 51, 54], "creat": [11, 12, 28, 29, 32, 33, 34, 37, 38, 39, 42, 46, 47, 50, 52, 56], "receiv": [11, 12, 41], "seri": [11, 12, 27, 35, 41, 48], "paramat": [11, 39], "hvg": [11, 12, 25, 56], "lung": [11, 14, 29, 35, 38, 39, 42, 45, 49, 50, 51, 53, 54], "500": [11, 26, 29, 41, 43, 56], "anndata": [11, 14, 26, 28, 29, 32, 35, 37, 39, 40, 41, 42, 43, 44, 48, 50, 54], "top": [11, 22, 25, 35, 41, 45, 50, 55, 56], "mus_musculu": [11, 35, 43, 45, 48, 50, 51, 52, 53, 54, 56, 57], "highli": [11, 12, 29, 36, 39, 40, 41, 42, 43, 44, 58], "just": [11, 25, 29, 38, 41, 48, 51, 53], "hvg_soma_id": 11, "highly_vari": [11, 25, 41, 42, 43, 56], "adata": [11, 26, 33, 35, 39, 40, 41, 42, 44, 46, 47, 50, 51, 52, 53, 54], "get_anndata": [11, 26, 33, 39, 40, 41, 42, 43, 44, 47, 50, 51, 54, 56], "scanpi": [12, 25, 29, 33, 39, 40, 41, 42, 43, 44, 46, 50, 52, 53, 56, 59], "mimic": 12, "seurat": [12, 25, 26, 29, 31, 32, 37], "v3": [12, 25, 31, 33, 38, 41, 54], "readthedoc": [12, 39, 41, 43], "io": [12, 39, 41, 43], "en": [12, 39, 41, 43], "html": [12, 29, 39, 41, 43], "inform": [12, 26, 28, 29, 32, 34, 37, 38, 39, 40, 41, 42, 44, 50, 51, 52, 53, 54, 56, 59], "ident": [12, 38], "those": [12, 25, 35, 39, 41, 43, 48], "produc": 12, "donor_id": [12, 35, 38, 41, 46, 50, 52, 53, 54, 57], "lambda": [12, 44], "batch0": 12, "99": 12, "els": [12, 40, 49, 58], "batch1": 12, "calculate_mean": [13, 25, 57], "calculate_vari": [13, 25, 57], "ddof": [13, 57], "nnz_onli": 13, "calcul": [13, 23, 35, 36, 39, 40, 42], "mean": [13, 25, 30, 35, 36, 56], "accumul": [13, 25, 48], "fashion": [13, 24, 25], "total": [13, 25, 29, 30, 34, 35, 38, 41, 43], "n": [13, 26, 29, 33, 34, 35, 38, 41, 43, 46, 47, 48, 52, 57], "dimens": [13, 19, 34, 46, 49, 52, 58], "wise": [13, 41], "metric": [13, 40, 44], "explicitli": [13, 26, 35, 52], "store": [13, 19, 26, 34, 35, 38, 40, 42, 45, 46, 49, 52, 53], "comput": [13, 24, 25, 29, 32, 37, 38, 57, 58], "otherwis": [13, 35, 51], "skip": 13, "delta": [13, 48, 57], "degre": [13, 40, 57], "freedom": [13, 57], "divisor": [13, 57], "x_layer": [14, 26], "obsm_lay": 14, "obsp_lay": 14, "varm_lay": 14, "varp_lay": 14, "column_nam": [14, 18, 21, 24, 26, 28, 33, 38, 40, 41, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55], "axiscolumnnam": 14, "conveni": [14, 28, 38, 45, 48, 49, 50, 54, 56], "obsm": [14, 30, 34, 39, 40, 42, 44], "slot": [14, 30], "obsp": [14, 40], "varm": [14, 34], "varp": [14, 35, 49], "part": [14, 39, 40], "get_all_available_embed": [14, 52], "experiment": [14, 17, 25, 31, 35, 36, 42, 44, 46, 52, 57, 58], "brain": [14, 26, 33, 38, 48], "tissu": [14, 24, 26, 28, 30, 33, 35, 36, 42, 43, 45, 46, 47, 48, 50, 51, 52, 54, 57], "censusversiondescript": [15, 16], "releas": [15, 16, 24, 26, 31, 33, 35, 38, 39, 41, 43, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "descript": [15, 16, 29, 32, 34, 35, 37, 52, 54, 59], "directori": [15, 16, 31], "unknown": [15, 41, 53, 54], "get_census_version_directori": 15, "entir": [15, 41, 45, 49, 58], "release_d": [15, 16], "release_build": [15, 16], "2022": [15, 16, 20, 22, 49, 50], "01": [15, 16, 20, 27, 35, 39, 43, 44, 46, 47, 52], "public": [15, 16, 20, 28, 30, 35, 42, 44, 46, 47, 50, 52, 53], "s3_region": [15, 16, 20, 50], "u": [15, 16, 17, 20, 22, 28, 29, 31, 32, 37, 41, 48, 50, 52], "west": [15, 16, 20, 22, 28, 29, 31, 50, 52], "lt": [16, 26, 28, 39, 49], "retract": 16, "current": [16, 25, 26, 30, 32, 33, 37, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "flag": [16, 58], "includ": [16, 25, 28, 29, 32, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 59], "both": [16, 24, 26, 29, 35, 39, 40, 47, 51, 52, 54, 56, 58], "long": [16, 24, 28, 32, 37, 58], "term": [16, 28, 32, 35, 37, 38, 45, 48, 53, 58], "weekli": [16, 28, 32, 37], "exclud": [16, 35, 41, 51, 58], "date": [16, 28, 30, 34, 35, 38, 52], "yyyi": [16, 30], "mm": [16, 30], "dd": [16, 30], "alias": 16, "alia": 16, "appear": [16, 35, 38, 40, 58], "time": [16, 25, 29, 30, 35, 51, 53, 58], "under": [16, 35, 41, 43], "again": [16, 53], "v": [16, 39, 48], "sequenti": 16, "increment": [16, 25, 36], "get_census_version_descript": 16, "29": [16, 41, 42, 58], "v2": [16, 38, 39, 41, 53, 57], "v1": [16, 23, 26, 34, 38, 39, 41], "30": [16, 30, 39, 41, 42, 52, 58], "mistak": 16, "happen": 16, "info_url": 16, "com": [16, 26, 29, 32, 37, 42, 44, 47, 52, 53], "errata": 16, "replaced_bi": 16, "tiledb_config": [17, 22, 28, 52], "sensibl": 17, "further": [17, 26, 40, 45, 52], "somacor": 17, "somaobject": 17, "replac": [17, 40, 42, 44], "tiledb": [17, 22, 24, 29, 30, 31, 32, 33, 37, 38, 45, 54], "configur": [17, 22, 28, 29, 58], "amount": [17, 53, 55], "o": [17, 42, 44, 53], "oper": [17, 27, 29, 33, 38, 45, 48, 54, 58], "ctx": [17, 28, 52], "py": [17, 22, 39, 41, 43, 53], "init_buffer_byt": [17, 22], "128": [17, 22, 30, 41, 56, 58], "1024": [17, 22], "c": [17, 24, 26, 31, 33, 39, 41, 42, 43, 44, 49, 50], "my": [17, 28], "privat": [17, 28], "access": [17, 20, 29, 30, 32, 33, 34, 35, 36, 37, 38, 40, 43, 45, 54, 55, 58], "copi": [17, 28, 39, 40, 41, 43, 44], "differ": [17, 29, 35, 38, 39, 40, 46, 49, 51, 52, 54], "region": [17, 20, 28, 29, 31, 52], "vf": [17, 28, 52], "no_sign_request": [17, 28, 52], "east": [17, 28], "coord": [18, 21, 52], "observ": [18, 27, 34, 35, 39, 48, 51, 53, 55], "csr_matrix": [19, 39, 43], "presenc": [19, 34, 36, 40, 41, 43], "scipi": [19, 29, 32, 37, 39, 40, 43, 49, 52], "csr_arrai": 19, "deafult": 19, "cannot": [19, 22], "321x60554": 19, "uint8": [19, 49], "6441269": 19, "compress": [19, 49], "format": [19, 28, 30, 35, 48, 49, 59], "censusloc": 20, "guarante": [20, 30, 32, 35, 37, 38, 39], "interest": [20, 32, 34, 37, 38, 40, 49, 51, 53], "_release_directori": 20, "keyerror": 20, "do": [20, 26, 31, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 50, 52, 54, 55, 57], "cb5efdb0": 20, "f91c": 20, "4cbd": 20, "9ad4": 20, "9d4fa41c572d": 20, "mirror": 22, "suitabl": [22, 52], "chosen": 22, "automat": [22, 29, 38, 45], "take": [22, 25, 38, 39, 41, 42, 43, 46, 51, 52, 53, 54, 58], "preced": 22, "get_default_soma_context": [22, 28], "level": [22, 34, 35, 38, 42, 48, 50, 51, 53, 55, 56], "It": [22, 29, 30, 34, 35, 38, 52, 56], "manag": [22, 26, 33, 38, 45, 55, 56], "close": [22, 24, 25, 26, 33, 38, 39, 40, 41, 43, 45, 46, 47, 50, 52, 54, 55], "exit": 22, "neither": 22, "invalid": [22, 48], "updat": [22, 25, 29, 35, 39, 41, 43, 48, 52, 53], "31": [22, 41, 42, 58], "rather": [22, 41, 48], "than": [22, 24, 26, 29, 31, 32, 33, 35, 37, 38, 40, 41, 42, 48], "r": [23, 26, 27, 29, 30, 32, 37, 41], "out": [23, 26, 29, 30, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 58], "effici": [23, 26, 29, 32, 36, 37, 51, 53], "commonli": [23, 53], "introduc": [23, 40, 53], "normal": [23, 28, 30, 32, 33, 34, 36, 37, 38, 40, 44, 52, 54, 56, 57], "pre": [23, 25, 29, 36, 38, 42, 51, 52], "categor": [23, 32, 37, 53], "publish": [24, 25, 26, 27, 29, 30, 32, 35, 37], "august": 24, "7th": 24, "pablo": [24, 25, 26, 27], "garcia": [24, 25, 26, 27], "nieto": [24, 25, 26, 27], "team": [24, 25, 26, 29], "pleas": [24, 26, 28, 29, 32, 37, 39, 40, 41, 42, 43, 44, 51, 53], "announc": [24, 25, 26], "come": [24, 33, 39, 41], "our": [24, 26, 29, 33, 38, 39, 40, 42, 44, 46, 52], "back": [24, 39, 42, 58], "now": [24, 25, 26, 27, 32, 33, 37, 38, 39, 41, 42, 43, 46, 47, 49, 50, 51, 52, 54, 57, 58], "biologist": 24, "largest": [24, 29], "standard": [24, 29, 32, 34, 37, 45, 48], "aggreg": 24, "compos": [24, 34], "60k": [24, 29], "With": [24, 25, 26, 38, 40, 43, 46, 52, 54, 58], "few": [24, 25, 36, 40, 42, 43, 51, 52, 53], "hundr": 24, "bigger": 24, "quickli": [24, 30, 38, 39], "basic": [24, 39, 40, 41, 42, 43, 45, 46, 50, 52, 58], "structur": [24, 32, 35, 37, 38, 40], "downstream": [24, 25, 26, 27, 33, 35, 52], "analysi": [24, 26, 33, 35, 36, 38, 39, 40, 41, 43, 45, 51, 52], "follow": [24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 44, 51, 52, 54, 57, 58], "instruct": [24, 29, 33], "learn": [24, 35, 39, 40, 43, 45, 51, 52, 54], "make": [24, 31, 35, 39, 41, 42, 43, 48], "sure": [24, 43], "check": [24, 27, 32, 33, 37, 39, 43, 49], "resourc": [24, 41], "quick": [24, 28, 29, 32, 36, 37, 38, 55, 58], "start": [24, 27, 28, 29, 30, 32, 36, 37, 38, 39, 41], "guid": [24, 28, 39], "refer": [24, 26, 28, 29, 32, 33, 35, 37, 39, 40, 42, 44, 54], "doc": [24, 29, 39, 58], "tutori": [24, 25, 29, 30, 32, 33, 37, 40, 41, 42, 43, 44, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58], "reli": 24, "capabl": [24, 36, 40, 49], "shown": [24, 27, 35, 38, 40, 46, 58], "section": [24, 28, 35, 38, 41, 42, 46, 51, 52], "czi": [24, 29, 32, 37, 59], "develop": [24, 30, 31, 39, 41, 53], "upgrad": [24, 29, 30, 53], "beta": [24, 38, 41, 42], "here": [24, 25, 29, 32, 33, 34, 35, 37, 39, 40, 51, 52, 53, 58], "ever": 24, "grow": 24, "cz": [24, 29, 30, 34, 36, 41, 43, 47, 50, 51], "discov": [24, 29, 30, 34, 38, 41, 42, 46, 47, 50, 51, 52, 59], "accompani": 24, "ontologi": [24, 35, 42, 53], "cl": [24, 35, 38, 41, 42, 45, 54, 55, 57], "uberon": [24, 35, 38, 41, 45, 53, 54, 55, 57], "respect": [24, 26, 31, 35, 38, 40, 53, 54], "you": [24, 26, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 48, 50, 51, 52, 53, 54, 55, 58], "find": [24, 26, 30, 32, 34, 37, 38, 40, 42, 43, 44, 45, 46, 49, 52, 56], "schema": [24, 26, 27, 28, 29, 30, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "page": [24, 28, 29, 30, 33, 34, 39, 40, 42, 44, 46, 52], "research": [24, 26, 29, 32, 37], "directli": [24, 28, 29, 30, 36, 38, 40, 41, 45, 50, 54, 58, 59], "session": [24, 28, 31], "librari": [24, 27, 29, 30, 31, 33, 34, 35, 38, 41, 58], "your": [24, 26, 29, 31, 32, 36, 37, 45, 50, 51, 52, 55], "navig": 24, "300k": [24, 33], "microgli": [24, 28, 33], "neuron": [24, 26, 28, 33, 38, 42, 49, 55], "femal": [24, 28, 33, 41, 51, 53, 54, 57], "donor": [24, 35, 41, 49, 50, 53], "somadatafram": [24, 33, 38, 45, 54], "cell_metadata": [24, 28, 33, 47], "arrow": [24, 26, 27, 29, 32, 33, 37], "tabl": [24, 26, 33, 34, 36, 39, 40, 41, 43, 47, 48, 49, 51], "sex": [24, 26, 28, 30, 33, 35, 38, 46, 48, 50, 51, 52, 53, 54, 57], "cell_typ": [24, 25, 26, 27, 28, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 57, 58], "assai": [24, 27, 28, 30, 33, 39, 40, 43, 46, 50, 52, 53, 54, 55, 57], "suspension_typ": [24, 28, 33, 35, 38, 41, 46, 50, 52, 53, 54, 57], "diseas": [24, 28, 30, 33, 35, 39, 40, 46, 50, 51, 52, 53, 54, 57], "concat": [24, 25, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57], "tibbl": [24, 33], "frame": [24, 28, 29, 32, 33, 34, 37, 38, 49], "similarli": [24, 26, 27, 33, 38, 49, 54], "gene_filt": [24, 25, 26, 33], "feature_id": [24, 25, 26, 33, 35, 38, 41, 42, 44, 46, 48, 49, 50, 52, 53, 54, 56], "ensg00000107317": [24, 26, 33], "ensg00000106034": [24, 26, 33], "cell_filt": [24, 25, 26, 33], "leptomening": 24, "cell_column": [24, 26, 33], "seurat_obj": [24, 26, 33], "get_seurat": [24, 26, 33], "sce_obj": [24, 26, 33], "get_single_cell_experi": [24, 26, 33], "sometim": 24, "too": 24, "overview": [24, 34, 55], "septemb": 25, "18": [25, 38, 39, 41, 42, 44, 52, 57], "thrill": 25, "offici": [25, 35], "wide": [25, 28, 32, 37, 40, 49], "algorithm": [25, 36, 40, 56, 57], "line": [25, 38, 42, 44, 58], "code": [25, 26, 48, 53, 55, 58], "task": [25, 29, 40], "ten": 25, "convent": [25, 38], "laptop": 25, "8gb": 25, "below": [25, 26, 27, 33, 35, 38, 41, 42, 46, 49, 55, 58], "full": [25, 28, 32, 34, 36, 37, 39, 40, 54, 55, 58], "correct": [25, 30, 35, 58], "These": [25, 26, 29, 32, 35, 37, 38, 40, 41, 42, 44, 52], "interwoven": 25, "wai": [25, 38, 45, 46, 49, 51, 52, 54], "seamlessli": 25, "appli": [25, 40, 43, 44], "33m": [25, 29], "continu": [25, 33], "cellxgene_censu": [25, 26, 27, 28, 30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59], "pp": [25, 39, 40, 41, 42, 43, 44, 46, 52, 56, 57], "mean_vari": [25, 57], "small": [25, 26, 35, 38, 40, 41, 43, 45, 48, 53, 54], "advantag": [25, 46, 52], "cpu": [25, 39, 42, 58], "multiprocess": 25, "speed": [25, 29], "popul": 25, "zero": [25, 26, 34, 35, 40, 44, 48, 52, 56], "futur": [25, 30, 33, 38, 39, 41, 42, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "we": [25, 26, 29, 30, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 51, 52, 53, 54, 57, 58], "enabl": [25, 29, 30, 35, 53], "easili": [25, 26, 29, 43, 46], "switch": [25, 53], "human_data": 25, "feature_nam": [25, 33, 35, 38, 40, 41, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56], "axis_queri": [25, 26, 33, 46, 48, 51, 52, 56, 57], "mean_variance_df": 25, "gene_df": 25, "to_panda": [25, 33, 38, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57], "8624": 25, "071926": 25, "5741": 25, "242485": 25, "16437": 25, "8": [25, 31, 33, 38, 39, 40, 41, 42, 43, 44, 46, 49, 51, 52, 53, 54, 56, 57, 58], "233282": 25, "452": 25, "119153": 25, "feature_length": [25, 33, 35, 38, 39, 41, 43, 46, 48, 49, 50, 52, 53, 54, 56], "ensg00000171885": 25, "5943": 25, "ensg00000133703": 25, "6845": 25, "get_highly_variable_gen": 25, "while": [25, 33, 38, 40, 42, 46, 52, 56], "account": [25, 39, 58], "effect": [25, 26, 39, 40, 52], "integr": [25, 29, 32, 37, 40, 41], "particular": [25, 27, 40, 58], "design": [25, 53], "paradigm": [25, 32, 37], "abov": [25, 29, 33, 34, 35, 38, 42, 51, 53, 54, 55], "tweak": 25, "compli": 25, "rule": 25, "thumb": 25, "good": [25, 40, 43, 52], "variances_norm": [25, 56], "003692": 25, "004627": 25, "748221": 25, "003084": 25, "003203": 25, "898657": 25, "014962": 25, "037395": 25, "513473": 25, "218865": 25, "547648": 25, "786928": 25, "002142": 25, "002242": 25, "894955": 25, "60659": [25, 41, 49], "000000": [25, 40, 48, 56], "60660": [25, 41, 49], "60661": [25, 41, 49], "60662": [25, 41, 49], "60663": [25, 41, 49], "octob": 26, "maximilian": 26, "lombardo": 26, "happi": 26, "introduct": 26, "tailor": 26, "empow": 26, "reflect": [26, 35, 40], "chang": [26, 30, 35], "expand": [26, 35, 40, 48], "exclus": 26, "thei": [26, 30, 35, 39, 40, 46, 48, 49, 51, 52], "invit": 26, "feedback": 26, "explor": [26, 29, 32, 36, 37, 52], "novel": [26, 41], "were": [26, 29, 34, 35, 38, 39, 40, 41, 43, 49, 51, 52], "mous": [26, 34, 35, 38, 43, 48, 50, 51, 54, 56, 57], "divid": [26, 48, 51], "sum": [26, 27, 35, 40, 41, 42, 44, 45, 48, 50, 58], "point": [26, 34, 40, 48], "precis": [26, 46, 52], "round": 26, "sigma": 26, "artifact": [26, 35, 40], "m": [26, 31, 34, 38, 41, 43, 49, 54, 56], "enrich": 26, "field": [26, 35, 52], "n_measured_ob": [26, 35, 46, 52], "wa": [26, 35, 40, 43, 44, 49, 50, 52, 53, 58], "augment": 26, "forego": 26, "common": [26, 33, 40, 45, 52, 54, 56, 58], "earli": 26, "raw_sum": [26, 35, 46, 48, 52], "deriv": [26, 42, 43, 52], "raw_mean_nnz": [26, 35, 46, 52], "averag": 26, "raw_variance_nnz": [26, 35, 46, 52], "n_measured_var": [26, 35, 46, 52], "thu": [26, 29, 32, 35, 37, 39, 42, 45, 54], "ensg00000161798": [26, 33, 54], "ensg00000188229": [26, 33, 54], "sympathet": [26, 33], "singlecellexperi": [26, 31, 32, 37], "outlin": 26, "like": [26, 27, 29, 38, 40, 41, 42, 45, 52, 58], "male": [26, 33, 41, 42, 48, 53, 54, 55, 57], "pyarrow": [26, 29, 32, 33, 37, 48, 51], "raw_slic": [26, 33], "And": [26, 28, 33, 38, 39, 41, 42, 43, 46, 47, 51, 52, 54], "somaaxisqueri": [26, 33], "read_next": [26, 33], "print": [26, 33, 40, 45, 47, 49, 50, 51, 52, 53, 58], "encourag": [26, 32, 37], "engag": 26, "share": [26, 29, 32, 37], "invalu": 26, "ongo": 26, "project": [26, 31, 36, 40], "reach": [26, 32, 37, 39], "chanzuckerberg": [26, 31, 32, 35, 37, 53], "report": [26, 30, 40, 53], "issu": [26, 29, 30, 40], "github": [26, 29, 32, 35, 37, 52, 53], "repositori": [26, 29, 32, 35, 37, 52], "april": 27, "4th": 27, "2024": [27, 29, 32, 35, 37, 42, 44, 47], "emanuel": 27, "bezzi": 27, "04": [27, 35, 43, 44, 46, 52], "instead": [27, 39, 40, 42, 43, 53, 58], "smaller": [27, 33, 58], "footprint": 27, "howev": [27, 29, 39, 40, 41, 58], "pipelin": [27, 32, 36, 37], "explain": 27, "adapt": [27, 48, 52], "link": [27, 41, 49, 50], "value_count": [27, 38, 39, 41, 43, 45, 48, 51, 54], "categori": [27, 30, 35, 38, 41, 42, 55], "present": [27, 29, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "groupbi": [27, 41, 44, 53, 55], "pivot": 27, "show": [27, 35, 36, 38, 40, 42, 43, 44, 48, 51, 58], "unus": 27, "factor": [27, 40], "interfac": [27, 42, 44, 46, 52, 53, 58], "inspect": [27, 46, 52, 58], "null": 27, "indic": [27, 29, 34, 35, 38, 40, 41, 43, 48, 49, 52, 54], "int16": 27, "int8": 27, "assay_ontology_term_id": [27, 35, 38, 41, 45, 46, 50, 52, 53, 54, 57], "development_stag": [27, 35, 38, 41, 46, 50, 52, 53, 54, 57], "development_stage_ontology_term_id": [27, 35, 38, 41, 46, 50, 52, 53, 54, 57], "output": [27, 33, 48, 58], "truncat": 27, "amazon": [28, 29], "web": [28, 29], "servic": [28, 29], "what": [28, 35, 38, 39, 40, 41, 51, 52, 54], "inclus": [28, 35, 45], "criteria": [28, 29, 33, 34, 35, 54], "individu": [28, 32, 35, 37, 38, 39, 43, 51], "root": [28, 35], "definit": [28, 39, 54], "publicli": [28, 29, 30, 32, 37], "host": [28, 30, 31, 32, 35, 36, 37, 40, 42, 44, 46, 59], "uniqu": [28, 29, 30, 35, 38, 39, 40, 41, 45, 48, 51], "05": [28, 42, 44, 46, 51, 53, 58], "bulk": 28, "07": [28, 33, 38, 39, 41, 43, 45, 48, 49, 50, 53, 54, 55, 56, 57, 58], "25": [28, 33, 38, 39, 40, 41, 42, 43, 45, 48, 49, 50, 53, 54, 55, 56, 57, 58], "shell": [28, 42, 44, 50], "sync": [28, 42], "sign": [28, 40, 42, 44], "recommend": [28, 29, 31, 33, 35, 39, 40, 42, 44, 51, 53], "folder": [28, 44], "should": [28, 30, 35, 38, 39, 40, 41, 43, 45, 50, 58], "interact": [28, 32, 35, 37], "document": [28, 29, 33, 35, 38, 39, 43, 45, 52, 54], "last": [29, 30, 35], "jan": 29, "latenc": [29, 32, 37], "acceler": [29, 32, 37], "50m": 29, "mice": 29, "harmon": [29, 32, 37], "label": [29, 35, 38, 40, 41, 42, 44, 47, 51, 53, 55, 58], "multi": [29, 34, 36, 41, 52], "core": [29, 36, 39, 48], "k": [29, 40], "onlin": [29, 30, 32, 36, 37, 57], "t": [29, 35, 39, 41, 42, 43, 44, 45, 47, 50, 51, 54, 55], "covid": [29, 38, 41, 51, 54], "19": [29, 30, 38, 39, 41, 42, 44, 45, 49, 51, 52, 54], "suit": 29, "author": [29, 35], "spatial": [29, 34, 35, 39, 40, 41, 49, 50], "yet": [29, 31], "d": [29, 52], "click": [29, 33], "citat": [29, 32, 35, 36, 37], "guidelin": [29, 32, 37], "offer": [29, 32, 37, 40, 46, 52], "becaus": [29, 39, 41, 43, 51], "therefor": [29, 39, 43, 45, 51, 52], "numer": [29, 40], "incompat": [29, 35], "purpos": 29, "suggest": [29, 40], "fast": 29, "corpu": 29, "60": [29, 42, 51], "gencod": 29, "readi": [29, 42, 58], "cloud": [29, 31, 32, 37, 50], "matric": [29, 32, 33, 34, 37, 38, 40, 48], "possibl": [29, 35, 42, 54], "due": [29, 30, 38, 40, 48, 58], "free": [29, 53], "aw": [29, 31, 42, 44, 50], "ye": 29, "download_source_h5ad": [29, 50], "site": [29, 39, 41, 43], "help": [29, 33, 38, 43, 45, 52, 53, 54, 56, 58], "pattern": [29, 40], "internet": [29, 31, 53], "limit": [29, 38, 51], "bandwidth": [29, 51], "tactic": 29, "connect": [29, 31, 41, 42, 53, 55], "high": [29, 34, 35, 38, 40, 41, 42, 51, 53, 56], "ethernet": 29, "wifi": 29, "coast": 29, "ec2": [29, 31], "instanc": [29, 31, 35, 40, 45, 53], "There": [29, 31, 41, 42, 45, 46, 49, 51, 52, 56], "environ": [29, 31], "census_env": 29, "activ": [29, 31, 33, 52], "submit": [29, 32, 37], "join": [29, 32, 37, 38, 41, 48, 50, 54, 56], "scienc": [29, 32, 37, 47, 49, 59], "commun": [29, 32, 37, 40, 46, 52], "slack": [29, 32, 37], "question": [29, 38], "channel": [29, 32, 37], "inquir": 29, "accept": [29, 35, 56], "meet": [29, 33, 54, 56], "biolog": [29, 36, 51, 52, 58], "try": [29, 58], "old": [29, 41, 57], "persist": [29, 34], "notebook": [29, 31, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 52, 53, 58], "sh": [29, 31], "command": [29, 38, 42, 44], "restart": 29, "runtim": 29, "reload": [29, 42], "numba": [29, 48], "relat": 29, "simpli": [29, 42], "magic": 29, "similar": [29, 38, 39, 40, 41, 44, 54, 55, 56], "dbutil": 29, "restartpython": 29, "addition": [29, 39, 40], "node": [29, 38], "cluster": [29, 36, 39, 44], "0d53f00001ghvp3cap": 29, "between": [29, 35, 40, 42], "altern": [29, 58], "ad": [29, 35, 53, 54], "tab": 29, "edit": [30, 35], "decemb": 30, "15th": [30, 32, 37], "stabil": 30, "scientif": 30, "reproduc": [30, 39, 53, 55], "plan": [30, 32, 37], "regular": 30, "everi": [30, 32, 37], "six": [30, 32, 37], "month": [30, 32, 37, 57], "least": [30, 32, 35, 37], "5": [30, 31, 33, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58], "year": [30, 32, 37, 41], "recogn": 30, "previou": [30, 39, 41, 46, 52], "ingest": [30, 51], "hand": 30, "week": [30, 54], "651": 30, "62": [30, 41, 42, 44, 51], "998": 30, "417": 30, "684": 30, "805": 30, "36": [30, 38, 42, 58], "227": [30, 56], "903": 30, "230": 30, "588": [30, 41, 49, 50], "990": 30, "20": [30, 38, 39, 41, 42, 44, 47, 49, 52, 57], "631": 30, "248": [30, 38, 45], "stage": [30, 41, 53, 54, 57], "173": [30, 56], "72": [30, 42], "self": [30, 39, 48, 53, 58], "ethnic": [30, 53], "na": [30, 35, 38, 55, 57], "suspens": [30, 39, 53], "74": [30, 42], "53": [30, 42], "27": [30, 38, 39, 41, 42, 49, 58], "fine": [30, 46], "593": [30, 41, 49, 50], "56": [30, 41, 42], "400": 30, "873": 30, "255": 30, "245": [30, 49], "33": [30, 41, 42, 52, 58], "364": 30, "242": 30, "083": 30, "531": [30, 41], "13": [30, 38, 39, 40, 41, 42, 43, 44, 46, 51, 52], "035": 30, "9": [30, 33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 58], "613": [30, 38, 45, 55], "164": 30, "64": [30, 38, 42], "26": [30, 38, 39, 41, 42, 49, 58], "220": [30, 38, 45, 49], "66": [30, 38, 42, 45], "54": [30, 38, 42], "prevent": [30, 52], "analys": [30, 53], "mark": [30, 35, 38, 40, 51], "is_primari": 30, "exactli": [30, 35], "243": [30, 38, 49], "569": 30, "twice": [30, 38], "wish": [30, 38, 56], "consid": [30, 39], "duplicate_cells_census_lts_2023": 30, "csv": [30, 53], "zip": [30, 44, 48], "562": 30, "794": 30, "728": 30, "086": 30, "032": 30, "758": 30, "887": 30, "914": 30, "318": 30, "493": 30, "362": 30, "604": 30, "226": 30, "68": [30, 42], "51": [30, 41, 42], "61": [30, 42], "natur": [30, 36, 41, 42, 51, 53], "storag": [30, 35], "backend": [30, 53], "backward": [30, 58], "re": [30, 42, 49], "forward": [30, 44, 58], "older": 30, "might": [30, 40, 53], "error": [30, 38, 42, 44, 45], "aim": 30, "polici": 30, "abl": [30, 31], "until": 30, "linux": 31, "maco": 31, "system": [31, 38, 40, 46, 50, 52], "Or": 31, "tbd": 31, "16": [31, 38, 39, 41, 42, 43, 44, 46, 52, 53, 57, 58], "gb": [31, 53], "mbp": [31, 53], "increas": [31, 32, 37, 53], "virtual": 31, "conda": 31, "venv": [31, 39, 41, 43], "bin": 31, "modul": [31, 36, 39, 58], "less": [31, 32, 37, 40, 58], "complex": [31, 38, 40, 45, 48, 49], "databrick": 31, "faq": [31, 32, 37], "ubuntu": 31, "apt": 31, "libxml2": 31, "dev": 31, "libssl": 31, "libcurl4": 31, "openssl": 31, "cmake": 31, "21": [31, 39, 41, 42, 43, 44, 49, 51, 54, 57], "greater": [31, 35, 47], "tool": [31, 40, 44, 53], "xcode": 31, "window": [31, 58], "univers": [31, 40, 52], "cran": 31, "repo": [31, 59], "org": [31, 47], "export": [31, 46], "biocmanag": 31, "quietli": 31, "break": [32, 37, 51], "ve": [32, 37], "central": [32, 37, 46, 52], "hub": [32, 37], "analyz": [32, 37], "significantli": [32, 37], "minim": [32, 37, 40], "studi": [32, 37, 39, 40], "scale": [32, 37, 39, 41, 42, 43], "interoper": [32, 37, 53], "toolkit": [32, 36, 37], "smart": [32, 34, 37, 38, 41, 49, 50, 55, 57], "seq2": [32, 34, 37, 38, 41, 43, 49, 50, 55, 57], "molecul": [32, 34, 35, 37], "10x": [32, 33, 34, 37, 38, 40, 41, 44, 49, 50, 51, 53, 54, 57], "duplic": [32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 54, 55], "five": [32, 37], "perman": [32, 37], "ask": [32, 37], "email": [32, 37, 52], "bug": [32, 37], "believ": [32, 37], "secur": [32, 37], "disclos": [32, 37], "contact": [32, 37], "seamless": [32, 37], "pytorch": [32, 36, 37], "usabl": [32, 37, 58], "area": [32, 37], "On": [32, 37], "demand": [32, 33, 37], "rich": [32, 37, 39], "subsampl": [32, 37], "vignett": [33, 44], "soon": 33, "remind": [33, 46, 49, 52], "etc": [33, 34, 38], "consist": [33, 38, 39, 40, 41, 43, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "ey": [33, 49], "379219": 33, "microwel": [33, 38, 41, 54], "seq": [33, 38, 39, 41, 54, 55], "adren": [33, 38], "gland": [33, 38, 42, 51, 52, 55], "379220": 33, "379221": 33, "379222": 33, "379223": 33, "379224": 33, "7": [33, 38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54, 58], "n_var": [33, 41, 43, 46, 48, 49, 50, 52, 53, 54], "demonstr": [33, 36, 38, 39, 40, 44, 46, 47, 48, 50, 52, 53, 56, 58], "lazi": [33, 46, 51, 52], "evalu": 33, "well": [33, 38, 39, 41, 51, 55], "logic": [33, 41], "wrap": [33, 48, 58], "loop": 33, "r6": 33, "familiar": [33, 35, 39, 41, 43, 58], "379": 33, "224": 33, "chr": 33, "fema": 33, "6": [33, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 58], "\u2139": 33, "214": 33, "4k": 33, "4744": 33, "sampl": [33, 41, 42, 44, 48], "bioconductor": 33, "ecosystem": 33, "dim": 33, "rownam": 33, "rowdata": 33, "colnam": 33, "obs48350835": 33, "obs48351829": 33, "obs52469564": 33, "obs52470190": 33, "coldata": 33, "reduceddimnam": 33, "mainexpnam": 33, "altexpnam": 33, "sparse_matrix": 33, "state": [33, 40, 41, 49, 50], "monitor": 33, "read_complet": 33, "friendli": [34, 35], "varieti": [34, 40, 45, 48, 52], "hierarchi": 34, "somacollect": [34, 38, 45], "whole": [34, 38, 41], "summary_cell_count": [34, 38, 41, 55], "stratifi": [34, 38, 42], "relev": [34, 35, 36, 38, 54], "independ": [34, 38], "somaexperi": [34, 38, 48], "special": [34, 35, 38, 54], "form": [34, 38, 49, 58], "how": [34, 36, 38, 40, 41, 43, 47, 51, 52, 55, 58], "avialbl": 34, "feature_dataset_presence_matrix": [34, 41, 43], "boolean": [34, 35, 41, 43, 49], "adher": 34, "technologi": [34, 35, 38, 39, 41, 43], "short": [34, 38], "densendarrai": 34, "dimension": [34, 35, 40, 41], "offset": 34, "sparsendarrai": [34, 46, 52], "primari": [34, 35, 40, 42, 55], "march": 35, "NOT": [35, 48, 49], "shall": 35, "interpret": [35, 40], "bcp": 35, "14": [35, 38, 39, 41, 42, 43, 44, 46, 49, 52], "rfc2119": 35, "rfc8174": 35, "capit": 35, "hereaft": 35, "visit": [35, 40, 59], "understand": [35, 40], "reader": 35, "throughout": [35, 42, 44, 51, 52], "serv": [35, 43], "deposit": 35, "heart": [35, 49, 51, 56], "left": [35, 39, 41], "ventricl": [35, 45], "semver": 35, "major": [35, 41], "delet": 35, "modal": 35, "minor": 35, "compat": 35, "patch": 35, "editori": 35, "impos": 35, "organism_ontology_term_id": 35, "ncbitaxon": 35, "10090": 35, "9606": 35, "feature_refer": 35, "speic": 35, "AND": 35, "compris": 35, "children": 35, "efo": [35, 38, 39, 41, 54, 55, 57], "0002772": 35, "0010183": [35, 38], "nascent": 35, "elong": 35, "target": [35, 38], "manner": [35, 46, 52, 58], "doesn": [35, 41], "concurr": 35, "perturb": 35, "intend": [35, 56, 58], "primarili": [35, 39, 40, 41], "fusion": 35, "modif": 35, "mrna": [35, 38], "trna": 35, "rrna": 35, "viral": 35, "intron": 35, "ribosom": 35, "profil": [35, 38, 41], "umi": 35, "tissue_typ": 35, "equal": [35, 45], "referenc": [35, 41], "whose": [35, 41, 54], "readabl": [35, 41], "census_schema_vers": [35, 38, 47], "census_build_d": [35, 38, 47], "iso": [35, 52], "8601": 35, "dataset_schema_vers": [35, 38, 47], "total_cell_count": [35, 38, 41, 47, 55], "unique_cell_count": [35, 38, 41, 47, 55], "number_donors_homo_sapien": [35, 38, 47], "number_donors_mus_musculu": [35, 38, 47], "10000": [35, 40], "100": [35, 38, 39, 41], "collection_id": [35, 39, 43, 49, 50], "quot": 35, "collection_nam": [35, 39, 41, 43, 49, 50], "collection_doi": [35, 39, 43, 49, 50], "dataset_titl": [35, 39, 41, 43, 49, 50], "dataset_h5ad_path": [35, 39, 43, 49, 50], "rel": [35, 43, 57], "dataset_total_cell_count": [35, 39, 43, 49, 50], "dataset_version_id": 35, "self_reported_ethn": [35, 38, 41, 46, 50, 52, 53, 54], "ontology_term_id": [35, 38, 41, 55], "0002048": [35, 41, 45], "cell_type_a": 35, "xxxxx": 35, "cell_type_n": 35, "assay_a": 35, "assay_n": 35, "tissue_a": 35, "tissue_n": 35, "tissue_general_a": 35, "tissue_general_n": 35, "disease_a": 35, "mondo": [35, 41], "disease_n": 35, "self_reported_ethnicity_a": 35, "hancestro": [35, 54], "self_reported_ethnicity_n": 35, "sex_a": 35, "pato": [35, 41, 54, 57], "sex_n": 35, "suspension_type_a": 35, "suspension_type_n": 35, "organism_label": 35, "machin": [35, 42], "somameasur": 35, "somaindexeddatafram": 35, "fill": [35, 52], "remov": [35, 39, 41, 51], "variant": 35, "j": [35, 40, 47, 49, 50], "feature_biotyp": 35, "pin": 35, "clarifi": 35, "feature_1": 35, "feature_m": 35, "dataset_soma_joinid_1": 35, "dataset_soma_joinid_n": 35, "tissue_general_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "disease_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "observation_joinid": 35, "self_reported_ethnicity_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "sex_ontology_term_id": [35, 38, 41, 46, 50, 52, 53, 54, 57], "tissue_ontology_term_id": [35, 38, 41, 45, 46, 50, 52, 53, 54, 57], "handl": [35, 38, 45, 47, 51, 58], "text": 35, "cell_census_build_d": 35, "cell_census_schema_vers": 35, "renam": [35, 41], "move": [35, 58], "dataset_presence_matrix": 35, "ascii": 35, "0x22": 35, "stream": 36, "gget": 36, "workflow": [36, 42], "collabor": [36, 40, 42], "predict": [36, 40], "biologi": [36, 52], "gain": 36, "summari": [36, 47], "summar": [36, 38, 41, 55], "leverag": 36, "showcas": [38, 39, 48, 51, 52, 54], "cover": 38, "simpl": [38, 40, 44, 48, 53, 58], "sever": [38, 45, 46], "prefer": [38, 45, 50], "34": [38, 39, 41, 42, 43, 44, 45, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58], "39": [38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58], "think": [38, 44], "piec": 38, "parent": 38, "variou": [38, 40, 45, 55], "analog": 38, "census_info": [38, 39, 41, 43, 47, 49, 50, 55], "census_obj": 38, "want": [38, 48, 51, 54, 58], "pair": [38, 48], "61656118": [38, 45, 50], "37447773": 38, "13035": 38, "1417": 38, "Of": 38, "meta": [38, 51, 53], "consortia": 38, "idea": 38, "Not": 38, "cast": 38, "census_count": 38, "33364242": [38, 55], "56400873": [38, 50, 55], "0008722": [38, 41, 55], "264166": [38, 55], "279635": [38, 55], "drop": [38, 41, 42, 48, 55], "0008780": [38, 55], "25652": [38, 41, 55], "51304": [38, 55], "indrop": [38, 41, 55], "0008919": [38, 55], "89477": [38, 55], "206754": [38, 55], "0008931": [38, 55, 57], "78750": [38, 55], "188248": [38, 55], "1357": [38, 55], "0002113": [38, 55], "179684": [38, 55], "208324": [38, 55], "kidnei": [38, 42, 49, 51, 55], "1358": [38, 55], "0002365": [38, 55], "15577": [38, 55], "31154": [38, 55], "exocrin": [38, 42, 52, 55], "1359": [38, 55], "0002367": [38, 55], "37715": [38, 55], "130135": [38, 55], "prostat": [38, 55], "1360": [38, 55], "0002368": [38, 55], "13322": [38, 55], "26644": [38, 55], "endocrin": [38, 42, 55], "1361": [38, 55], "0002371": [38, 55], "90225": [38, 55], "144962": [38, 55], "bone": [38, 42, 50, 51, 55], "marrow": [38, 50, 51, 55], "1362": [38, 55], "rememb": [38, 51], "omit": 38, "creation": 38, "sort": 38, "census_human_assai": 38, "sort_valu": [38, 42], "ascend": 38, "0009922": [38, 54], "11845077": 38, "25597563": 38, "0009899": [38, 41, 57], "7559102": 38, "12638794": 38, "0011025": 38, "3872375": 38, "6139786": 38, "0010550": 38, "4062980": 38, "5064268": 38, "sci": [38, 41], "0009900": 38, "2930054": 38, "3139770": 38, "17": [38, 39, 41, 42, 43, 44, 46, 52, 53, 57], "0030004": 38, "915037": 38, "1084235": 38, "transcript": [38, 41], "0030003": [38, 41], "744798": 38, "811422": 38, "0030002": [38, 54], "625175": 38, "642559": 38, "0700003": 38, "146278": 38, "177276": 38, "bd": [38, 41], "rhapsodi": [38, 41], "transcriptom": [38, 39, 41, 43, 49, 50, 51], "0009901": 38, "42397": 38, "121394": 38, "58981": [38, 41], "117962": 38, "0700004": 38, "96145": 38, "0008995": 38, "29128": 38, "0008953": 38, "4693": 38, "9386": 38, "strt": 38, "0010010": 38, "3105": 38, "5244": 38, "cel": 38, "69": [38, 42], "0000129": 38, "268114": 38, "370771": 38, "1038": [38, 39, 43, 47, 49, 50], "48998": 38, "62617": 38, "easi": [38, 48, 52], "fall": [38, 39], "certain": [38, 40, 58], "distribut": [38, 39, 47], "answer": 38, "exemplifi": 38, "stat": 38, "let": [38, 39, 40, 41, 42, 43, 44, 46, 49, 50, 51, 52, 53, 54], "trivial": 38, "human_cell_typ": 38, "syncytiotrophoblast": [38, 54], "placent": [38, 54], "villou": [38, 54], "trophoblast": [38, 41, 42, 49, 50, 54], "extravil": [38, 54], "56400868": [38, 41], "pericyt": [38, 41, 42, 58], "56400869": [38, 41], "56400870": [38, 41], "56400871": [38, 41], "56400872": [38, 41], "focu": [38, 39, 40, 43], "de": 38, "human_cell_type_count": 38, "2673669": 38, "glutamaterg": [38, 42], "1541605": 38, "cd4": [38, 41, 42, 44], "alpha": [38, 41, 42], "1258976": 38, "cd8": [38, 41, 42, 44], "1235987": 38, "classic": [38, 41], "monocyt": [38, 41, 42, 44], "1030996": 38, "microfold": 38, "epithelium": 38, "intestin": [38, 42, 51], "dendrit": [38, 42, 44], "serou": 38, "bronchu": 38, "sperm": [38, 55], "enteroendocrin": 38, "599": 38, "abund": [38, 41], "That": 38, "achiev": [38, 52], "goal": [38, 39, 43, 48], "human_liver_cell_typ": 38, "85739": 38, "hepatoblast": 38, "58447": 38, "neoplast": [38, 42], "52431": 38, "erythroblast": 38, "45605": 38, "31388": 38, "pulmonari": [38, 41, 53, 54], "arteri": 38, "endotheli": [38, 41, 42, 49, 51, 58], "germin": 38, "center": 38, "b": [38, 41, 42, 44, 54], "pneumocyt": [38, 41], "innat": 38, "lymphoid": 38, "126": [38, 58], "go": 38, "sake": [38, 41, 48], "t_cells_list": 38, "t_cells_diseas": 38, "f": [38, 39, 40, 41, 42, 43, 44, 45, 46, 49, 50, 51, 52, 57, 58], "hodgkin": 38, "lymphoma": 38, "blood": [38, 49, 51, 53, 54], "62499": 38, "819428": 38, "30578": 38, "nose": 38, "respiratori": [38, 41, 55], "saliva": 38, "41": [38, 42], "crohn": 38, "colon": 38, "17490": 38, "52029": 38, "down": 38, "syndrom": 38, "181": 38, "breast": 38, "cancer": [38, 41], "1850": 38, "chronic": [38, 41, 54], "obstruct": [38, 41, 54], "9382": 38, "rhiniti": 38, "909": 38, "clear": [38, 40, 51], "renal": [38, 41, 49, 50], "carcinoma": [38, 41, 54], "6548": 38, "20540": 38, "lymph": 38, "cystic": [38, 41], "fibrosi": [38, 41, 54], "follicular": 38, "1089": 38, "influenza": 38, "8871": 38, "interstiti": [38, 41, 42, 53, 54], "1803": 38, "benign": 38, "neoplasm": 38, "oncocytoma": 38, "2408": 38, "adenocarcinoma": [38, 41, 54], "205": 38, "3274": 38, "507": 38, "215013": 38, "24969": 38, "pleural": 38, "fluid": 38, "11558": 38, "5922": 38, "lymphangioleiomyomatosi": [38, 41, 54], "513": 38, "36573": 38, "nonpapillari": 38, "adipos": [38, 51], "4828": 38, "288": [38, 49], "clot": 38, "1717": 38, "69136": 38, "pleomorph": [38, 41, 54], "1715": 38, "pneumonia": [38, 41, 54], "856": [38, 48], "1671": 38, "disord": 38, "34301": 38, "squamou": [38, 41, 42, 54], "52053": 38, "lupu": 38, "erythematosu": 38, "355471": 38, "don": [38, 43, 45, 47, 51, 54], "forget": [38, 43, 45, 47, 54], "del": [38, 39, 40, 41], "opportun": 39, "inter": 39, "exhaust": 39, "proper": 39, "ignor": [39, 40, 41, 42, 43, 44, 46, 48, 52], "sc": [39, 40, 41, 42, 43, 44, 53], "home": [39, 41, 43], "ssm": [39, 41, 43], "lib": [39, 41, 43], "python3": [39, 41, 43], "_set": 39, "63": [39, 42], "userwarn": [39, 41, 43], "longer": 39, "run": [39, 40, 42, 44, 53, 58], "70": [39, 42], "dl_pin_memory_gpu_train": 39, "deprec": 39, "pin_memori": 39, "loader": 39, "tqdm": [39, 41, 43], "auto": [39, 41, 43], "tqdmwarn": [39, 41, 43], "iprogress": [39, 41, 43], "jupyt": [39, 41, 43], "ipywidget": [39, 41, 43], "user_instal": [39, 41, 43], "autonotebook": [39, 41, 43], "notebook_tqdm": [39, 41, 43], "tabula": [39, 41, 43, 49, 50], "muri": [39, 43, 50], "seni": [39, 43, 50], "census_dataset": [39, 41, 49, 50], "tabula_liv": 39, "loc": [39, 49], "525": [39, 43], "0b9d8a04": [39, 43, 50], "bb9d": [39, 43, 50], "44da": [39, 43, 50], "aa27": [39, 43, 50], "705bb65b54eb": [39, 43, 50], "s41586": [39, 43, 47, 49, 50], "020": [39, 43, 49, 50], "2496": [39, 43, 50], "4546e757": [39, 43], "34d0": [39, 43], "4d17": [39, 43], "be06": [39, 43], "538318925fcd": [39, 43], "atla": [39, 41, 43, 49, 50, 51], "cha": [39, 43], "2859": [39, 43], "547": 39, "6202a243": [39, 51], "b713": [39, 51], "4e12": [39, 51], "9ced": [39, 51], "c387f8483dea": [39, 51], "7294": [39, 51], "tabula_muris_liver_id": 39, "smart_seq_gene_length": 39, "to_numpi": [39, 41, 42, 43, 46, 48, 52], "smart_seq_index": 39, "smart_seq_x": 39, "proce": [39, 43], "ceil": 39, "put": [39, 52], "omic": [39, 52], "yosef": 39, "lab": [39, 41, 49, 50, 52], "uc": [39, 40, 52], "berkelei": 39, "variat": [39, 40], "infer": [39, 58], "deep": 39, "scrna": [39, 41], "comprehens": 39, "best": [39, 40], "practic": [39, 43], "strength": 39, "bread": [39, 41], "butter": [39, 41], "neighbor": [39, 40, 41, 42, 43, 44, 46, 52], "graph": [39, 40], "visual": [39, 40, 41, 42, 44], "umap": [39, 40, 41, 42, 43, 44, 46, 52], "But": [39, 51], "save": [39, 46, 50, 52, 53, 58], "normalize_tot": [39, 40, 41, 42, 43, 44], "target_sum": [39, 40, 41, 42, 43, 44], "1e4": [39, 41, 42, 43, 44], "log1p": [39, 40, 41, 42, 43, 44], "max_valu": [39, 41, 42, 43], "Then": [39, 42, 43, 46, 47, 52, 58], "final": [39, 40, 42, 43, 46, 48, 49, 51, 52, 56, 58], "tl": [39, 40, 41, 42, 43, 44, 46, 52], "pca": [39, 41, 42, 43], "n_neighbor": [39, 40, 42, 44], "n_pc": [39, 42], "40": [39, 42], "pl": [39, 40, 41, 42, 43, 44, 46, 52, 53], "color": [39, 40, 41, 42, 43, 44, 46, 52], "plot": [39, 40, 41, 42, 43, 44, 46, 52], "_tool": [39, 41, 43], "scatterplot": [39, 40, 41, 43], "392": [39, 41, 43], "No": [39, 41, 43], "colormap": [39, 41, 43], "cmap": [39, 41, 43], "cax": [39, 41, 43], "scatter": [39, 40, 41, 43, 44, 46, 52], "strong": [39, 41], "properli": 39, "principl": 39, "randomli": [39, 40], "whenev": 39, "evidenc": 39, "articl": 39, "health": 39, "sikkema": 39, "et": [39, 51], "al": [39, 51], "whom": 39, "perfom": 39, "43": [39, 42, 49, 56], "great": [39, 43], "place": [39, 58], "latent": [39, 40, 44], "setup_anndata": 39, "vae": 39, "n_layer": 39, "n_latent": 39, "gene_likelihood": 39, "nb": 39, "n_hidden": 39, "50": [39, 42, 46, 54], "gpu": [39, 42, 44], "tpu": 39, "tf_cpp_min_log_level": 39, "rerun": [39, 40], "info": [39, 41, 44, 53], "max_epoch": 39, "ipu": 39, "hpu": 39, "epoch": [39, 58], "00": [39, 43, 46], "15it": 39, "v_num": 39, "train_loss_step": 39, "545": 39, "train_loss_epoch": 39, "560": 39, "trainer": [39, 42], "stop": [39, 51], "17it": 39, "represent": [39, 40, 42], "x_scvi": 39, "get_latent_represent": [39, 44], "use_rep": [39, 40, 42, 44, 46, 52], "mainli": 39, "driven": [39, 40], "albeit": 39, "contribut": [39, 41, 46, 52], "discret": 39, "curat": [39, 47, 53], "dure": [39, 42], "strongli": 39, "22": [39, 41, 42, 44, 53, 55, 57], "dataset_id_donor_id": 39, "astyp": [39, 40, 42], "23": [39, 41, 42, 49, 53], "24": [39, 41, 42, 49, 57], "27it": 39, "520": 39, "550": 39, "25it": 39, "mostli": [39, 41], "nucleu": [39, 52, 54], "accomplish": [39, 41], "latter": [39, 54], "knowledg": 40, "later": [40, 42, 44, 46, 52], "journei": 40, "2d": [40, 46, 52], "involv": 40, "nonlinear": 40, "transform": [40, 41, 42, 43, 44, 52], "Such": 40, "affect": [40, 58], "manifold": 40, "overclust": 40, "reduct": [40, 51], "mind": [40, 55], "hypothes": 40, "focus": 40, "ultim": 40, "underli": [40, 58, 59], "investig": 40, "behind": 40, "One": 40, "foundat": [40, 52], "technic": 40, "often": 40, "could": [40, 44, 58], "pure": 40, "systemat": 40, "bias": [40, 41], "complic": 40, "matter": 40, "techniqu": 40, "nearest": 40, "themselv": 40, "amplifi": [40, 42], "rigor": 40, "benchmark": 40, "fulli": 40, "space": [40, 42], "highlight": 40, "challeng": 40, "unsolv": 40, "problem": 40, "briefli": [40, 53], "illustr": [40, 52], "capac": 40, "captur": 40, "intrigu": 40, "phenomena": 40, "disclaim": 40, "depth": [40, 41, 43], "insight": [40, 52], "glean": 40, "inaccur": 40, "leidenalg": 40, "hdbscan": 40, "scikit": 40, "warn": [40, 41, 42, 44, 46, 52], "filterwarn": [40, 42, 44, 46, 52], "def": [40, 48, 58], "remove_missing_embedding_cel": 40, "emb_nam": [40, 46], "miss": [40, 44, 48, 52], "intersect": 40, "accordingli": 40, "filt": 40, "ones": 40, "nan_row_sum": 40, "isnan": [40, 48], "total_column": 40, "generate_umaps_from_embed": 40, "euclidean": 40, "key_ad": 40, "neighbors_kei": 40, "x_emb_nam": 40, "x_": 40, "_": [40, 52], "_umap": 40, "x_umap": 40, "var_nam": [40, 41, 42, 44], "scgpt": [40, 52], "obs_df": [40, 45, 46, 48, 52, 55, 57], "n_subset_cel": 40, "150000": 40, "idx_rand": 40, "choic": [40, 42, 44, 53], "soma_joinids_subset": 40, "tolist": [40, 41, 44, 45], "799353": 40, "distinctli": 40, "oca2": 40, "marker": [40, 44], "kit": 40, "vari": 40, "immatur": 40, "clearli": 40, "slight": 40, "extens": [40, 51], "concentr": 40, "seen": 40, "satellit": 40, "signatur": 40, "probabl": [40, 42, 44, 58], "mani": [40, 48, 58], "disconnect": 40, "compon": 40, "tend": 40, "extent": 40, "versu": 40, "unclear": 40, "qualit": 40, "pronounc": 40, "basi": 40, "geneformer_umap": 40, "use_raw": 40, "scgpt_umap": 40, "uce_umap": 40, "scvi_umap": 40, "subclust": 40, "leiden": [40, 42, 44], "emploi": 40, "densiti": 40, "pairwis": 40, "distanc": [40, 48], "compar": [40, 44], "reveal": [40, 41], "distinct": [40, 58], "approach": 40, "signific": [40, 55], "agreement": 40, "mutual": 40, "nmi": 40, "score": [40, 44], "assign": [40, 48], "yield": 40, "65": [40, 42], "inher": 40, "expect": [40, 41, 43, 52], "finetun": 40, "homogen": [40, 58], "belong": 40, "underscor": 40, "draw": 40, "coupl": 40, "conclus": 40, "lead": 40, "identif": 40, "evid": 40, "examin": [40, 58], "relianc": 40, "unjustifi": 40, "known": 40, "phenomenon": 40, "cross": [40, 41], "fuller": 40, "much": [40, 45], "hold": [40, 58], "lack": 40, "necessit": 40, "thereof": 40, "pd": [40, 41, 48, 56, 57, 58], "pdist": 40, "squareform": 40, "sklearn": [40, 44], "normalized_mutual_info_scor": 40, "adata_rbn": 40, "_connect": 40, "_leiden": 40, "pairwise_dist": 40, "_hdbscan": 40, "min_cluster_s": 40, "min_sampl": 40, "precomput": [40, 55], "fit_predict": 40, "embedding_kei": 40, "sim_scores_leiden": 40, "len": [40, 41, 42, 44, 45, 48, 50, 51, 58], "sim_scores_hdbscan": 40, "embedding_i": 40, "enumer": 40, "embedding_j": 40, "sim_scores_leiden_t": 40, "sim_scores_hdbscan_t": 40, "seem": [40, 41], "log": [40, 41, 43, 44], "019350262700332705": 40, "10823680188668149": 40, "33544664134758767": 40, "7692425249981675": 40, "512967": 40, "699360": 40, "656060": 40, "608826": 40, "587517": 40, "816612": 40, "075175": 40, "048565": 40, "012763": 40, "286486": 40, "096839": 40, "345248": 40, "11896761": 40, "th": 40, "wherea": [40, 52], "tendenc": 40, "condit": [40, 54], "glioblastoma": 40, "pilocyt": 40, "astrocytoma": 40, "mix": 40, "outsid": 40, "53d208b0": [40, 41, 49], "2cfd": [40, 41, 49], "4366": [40, 41, 49], "9866": [40, 41, 49], "c3c6114081bc": [40, 41, 49], "smartseq": 40, "cftr": 40, "rare": 40, "recogniz": 40, "summary_t": 41, "980": [41, 56], "2907156": 41, "6011592": 41, "lung_ob": 41, "5945423": 41, "9f222629": [41, 53], "9e39": [41, 53], "47d0": [41, 53], "b83f": [41, 53], "e08d610c7479": [41, 53], "nativ": [41, 55], "0000003": [41, 45, 55], "0000461": [41, 54, 57], "5945426": 41, "ciliat": [41, 42], "columnar": [41, 42], "tracheobronchi": 41, "tree": 41, "0002145": 41, "57": [41, 42], "hsapdv": [41, 54], "0000151": 41, "0002771": 41, "0000384": [41, 57], "5945428": 41, "0000625": [41, 45], "0005097": 41, "5945432": 41, "0000624": [41, 45], "0005061": 41, "5945441": 41, "2907151": 41, "8c42cfd0": [41, 49, 50, 53], "0b0a": [41, 49, 50, 53], "46d5": [41, 49, 50, 53], "910c": [41, 49, 50, 53], "fc833d83c45e": [41, 49, 50, 53], "0000669": [41, 45], "0000145": 41, "0000383": [41, 57], "2907152": 41, "2907153": 41, "2907154": 41, "2907155": 41, "deeper": 41, "dive": 41, "characterist": 41, "set_index": [41, 44, 48, 50, 56, 57], "f171db61": [41, 49, 50, 54], "e57": [41, 49, 50, 54], "4535": [41, 49, 50, 54], "a06a": [41, 49, 50, 54], "35d8b6ef8f2b": [41, 49, 50, 54], "multiom": [41, 49, 50], "developm": [41, 49, 50], "donor_p13_trophoblast": [41, 49, 50], "ecf2e08": [41, 49, 50], "2032": [41, 49, 50], "4a9e": [41, 49, 50], "b466": [41, 49, 50], "b65b395f4a02": [41, 49, 50], "74cff64f": [41, 49, 50], "9da9": [41, 49, 50], "4b2a": [41, 49, 50], "9b3b": [41, 49, 50], "8a04a1598040": [41, 49, 50], "vivo": [41, 49, 50], "5af90777": [41, 49, 50], "6760": [41, 49, 50], "4003": [41, 49, 50], "9dba": [41, 49, 50], "8f945fec6fdf": [41, 49, 50], "intr": [41, 49, 50], "bd65a70f": [41, 49, 50], "b274": [41, 49, 50], "4133": [41, 49, 50], "b9dd": [41, 49, 50], "0d1431b6af34": [41, 49, 50], "multiregion": [41, 49, 50], "imm": [41, 49, 50], "f9ad5649": [41, 49, 50], "f372": [41, 49, 50], "43e1": [41, 49, 50], "a3a8": [41, 49, 50], "423383e5a8a2": [41, 49, 50], "molecular": [41, 49, 50], "character": [41, 43, 49, 50, 51], "vuln": [41, 49, 50], "456e8b9b": [41, 49, 50], "f872": [41, 49, 50], "488b": [41, 49, 50], "871d": [41, 49, 50], "94534090a865": [41, 49, 50], "peripher": [41, 49, 50], "immun": [41, 49, 50, 51], "respon": [41, 49, 50], "589": [41, 49, 50], "2adb1f8a": [41, 49, 50, 54], "a6b1": [41, 49, 50, 54], "4909": [41, 49, 50, 54], "8ee8": [41, 49, 50, 54], "484814e2d4bf": [41, 49, 50, 54], "landscap": [41, 49, 50], "sing": [41, 49, 50], "590": [41, 49, 50], "e04daea4": [41, 49, 50], "4412": [41, 49, 50], "45b5": [41, 49, 50], "989e": [41, 49, 50], "76a9be070a89": [41, 49, 50], "krasnow": [41, 49, 50], "591": [41, 49, 50], "592": [41, 49, 50], "append": [41, 52], "dataset_cell_count": 41, "cell_count": 41, "merg": [41, 42, 52, 56], "1e6a6ef9": 41, "7ec9": 41, "4c90": 41, "bbfb": 41, "2ad3c3165fd1": 41, "1028006": 41, "resolut": [41, 53], "luca": 41, "ex": 41, "314": 41, "784630": 41, "f7c1c579": 41, "2dc0": 41, "47e2": 41, "ba19": 41, "8165c5a0e353": 41, "217738": 41, "fetal": 41, "survei": 41, "embryon": 41, "483": 41, "d8da613f": 41, "e681": 41, "4c69": 41, "b463": 41, "e94f5e66847f": 41, "116313": 41, "lethal": 41, "80": [41, 42, 55], "576f193c": 41, "75d0": 41, "4a11": 41, "bd25": 41, "8676587e6dc2": 41, "90384": 41, "htan": 41, "msk": 41, "377": 41, "d41f45c1": 41, "1b7b": 41, "4573": 41, "a998": 41, "ac5c5acb1647": 41, "82991": 41, "reg": 41, "regulatori": 41, "58": [41, 42], "3dc61ca1": 41, "ce40": 41, "46b6": 41, "8337": 41, "f27260fd9a03": 41, "71752": 41, "uncov": 41, "proxima": 41, "325": 41, "60993": 41, "2672b679": 41, "8048": 41, "4f5e": 41, "9786": 41, "f1b196ccfd08": 41, "57019": 41, "spleen": [41, 49, 51], "parenchyma": 41, "416": 41, "9dbab10c": 41, "118d": 41, "496b": 41, "966a": 41, "67f1763a6b7d": 41, "49014": 41, "criti": 41, "482": 41, "9968be68": 41, "ab65": 41, "4a38": 41, "9e1a": 41, "c9b6abece194": 41, "47909": 41, "chart": 41, "endod": 41, "78": [41, 42], "3de0ad6d": 41, "4378": 41, "4f62": 41, "b37b": 41, "ec0b75a50d94": 41, "46500": 41, "lungmap": 41, "broad": 41, "ag": [41, 43, 51], "healthi": 41, "456": 41, "2f132ec9": 41, "24b5": 41, "422f": 41, "9be0": 41, "ccef03b4fe28": 41, "39778": 41, "sar": 41, "cov": 41, "receptor": [41, 55], "ace2": [41, 53], "tmprss2": 41, "prima": 41, "312": 41, "1e5bd3b8": 41, "6a0e": 41, "4959": 41, "8d69": 41, "cafed30fe814": 41, "35699": 41, "emphysema": [41, 54], "130": 41, "35682": [41, 49], "475": [41, 49], "1b9d8702": 41, "5af8": 41, "4142": 41, "85ed": 41, "020eb06ec4f6": 41, "35419": 41, "tiss": 41, "411": 41, "4ed927e9": 41, "c099": 41, "49af": 41, "b8ce": 41, "a2652d069333": 41, "35284": 41, "367": 41, "33698": 41, "4b6af54a": 41, "4a21": 41, "46e0": 41, "bc8d": 41, "673c0561a836": 41, "18386": 41, "01209dce": 41, "3575": 41, "4bed": 41, "b1df": 41, "129f57fbc031": 41, "11059": 41, "8657": 41, "f9846bb4": 41, "784d": 41, "4582": 41, "92c1": 41, "3f279e4c6f0c": 41, "176": [41, 49], "fibroblast": [41, 42, 53, 55], "smooth": 41, "muscl": [41, 42, 49, 51], "317": 41, "f64e1be1": 41, "de15": 41, "4d27": 41, "8da4": 41, "82225cd4c035": 41, "55": [41, 42, 57], "370": 41, "810ac45f": 41, "8969": 41, "4698": 41, "b42c": 41, "652f802f75c2": 41, "endothelium": 41, "320": 41, "0ba16f4b": 41, "cb87": 41, "4fa3": 41, "9363": 41, "19fc51eec6e7": 41, "myeloid": [41, 42], "326": 41, "reprens": 41, "divers": [41, 45, 49, 52], "plastic": 41, "tumor": 41, "neutrophil": 41, "subpopul": 41, "distal": 41, "gradient": 41, "differenti": [41, 42], "regul": 41, "epitheli": [41, 42, 49, 51, 55, 58], "fate": 41, "tell": 41, "1236968": 41, "702074": 41, "262323": 41, "122902": 41, "97432": 41, "65220": 41, "41852": 41, "25662": 41, "8638": 41, "8016": 41, "1164084": 41, "772120": 41, "331019": 41, "209675": 41, "120796": 41, "55254": 41, "51343": 41, "45714": 41, "31923": 41, "31792": 41, "31540": 41, "21167": 41, "17590": 41, "12374": 41, "10765": 41, "1402565": 41, "1122990": 41, "381601": 41, "2468587": 41, "438569": 41, "head": [41, 49], "alveolar": [41, 55], "macrophag": [41, 42], "291507": 41, "263362": 41, "211456": 41, "189471": 41, "154415": 41, "ii": 41, "128463": 41, "lower": [41, 56, 58], "tract": 41, "105090": 41, "102303": 41, "killer": [41, 42, 51, 53], "95953": 41, "92846": 41, "stromal": [41, 42, 49, 51], "87714": 41, "81125": 41, "malign": 41, "75917": 41, "plasma": 41, "64551": 41, "59353": 41, "45305": 41, "capillari": 41, "39416": 41, "36381": 41, "36049": 41, "35467": 41, "2576327": 41, "147410": 41, "alveolu": 41, "54085": 41, "lingula": 41, "upper": [41, 49], "lobe": 41, "right": [41, 51], "32099": 41, "17854": 41, "12880": 41, "10113": 41, "9276": 41, "7981": 41, "middl": 41, "3847": 41, "lung_var": 41, "ensg00000121410": [41, 49], "a1bg": [41, 49], "3999": [41, 49], "ensg00000268895": [41, 49], "as1": [41, 49], "3374": [41, 49], "ensg00000148584": [41, 49], "a1cf": [41, 49], "9603": [41, 49], "ensg00000175899": [41, 49], "a2m": [41, 49], "6318": [41, 49], "ensg00000245105": [41, 49], "2948": [41, 49], "ensg00000288719": [41, 49], "rp4": [41, 49], "669p10": [41, 49], "ensg00000288720": [41, 49], "rp11": [41, 49], "852e15": [41, 49], "7007": [41, 49], "ensg00000288721": [41, 49], "rp5": [41, 49], "973n23": [41, 49], "7765": [41, 49], "ensg00000288723": [41, 49], "553n16": [41, 49], "1015": [41, 49], "ensg00000288724": [41, 49], "rp13": [41, 49], "546i2": [41, 49], "625": [41, 49], "60664": [41, 46, 49, 52, 58], "actual": [41, 58], "mislead": 41, "know": [41, 51, 54], "presence_matrix": [41, 43, 49], "get_presence_matrix": [41, 43, 49], "a1": 41, "17811": 41, "50259": 41, "44150": 41, "34265": 41, "22447": 41, "23642": 41, "26347": 41, "20921": 41, "24672": 41, "27705": 41, "27243": 41, "26323": 41, "27181": 41, "23203": 41, "57042": 41, "32610": 41, "29620": 41, "26454": 41, "23705": 41, "38676": 41, "47307": 41, "23740": 41, "22552": 41, "20594": 41, "19952": 41, "uint64": 41, "genes_measur": 41, "var_somaid": 41, "nonzero": [41, 43], "ensg00000128274": 41, "a4galt": 41, "3358": 41, "ensg00000094914": 41, "aaa": 41, "4727": 41, "ensg00000081760": 41, "aac": 41, "16039": 41, "29951": 41, "ensg00000177272": 41, "kcna3": 41, "2476": 41, "30157": 41, "ensg00000184709": 41, "lrrc26": 41, "1209": 41, "30185": 41, "ensg00000087250": 41, "mt3": 41, "1679": 41, "30202": 41, "ensg00000136352": 41, "nkx2": 41, "3165": 41, "30512": 41, "ensg00000231439": 41, "wasir2": 41, "1054": 41, "11595": 41, "composit": 41, "infect": 41, "12k": 41, "intens": 41, "exercis": 41, "exploratori": 41, "000": 41, "lung_cell_subsampled_n": 41, "100000": 41, "lung_cell_subsampled_id": 41, "random_st": 41, "lung_gene_id": 41, "lung_adata": 41, "highest_expr_gen": 41, "n_top": 41, "calculate_qc_metr": 41, "percent_top": 41, "inplac": [41, 44], "violin": [41, 44], "n_genes_by_count": 41, "rotat": 41, "90": 41, "total_count": 41, "outlier": 41, "exlcud": 41, "rest": 41, "ll": [41, 43, 52, 57], "extra": 41, "_highly_variable_gen": 41, "_simpl": 41, "843": 41, "view": [41, 52, 53, 56], "view_to_actu": 41, "28": [41, 42, 53, 58], "n_cell_typ": 41, "drop_dupl": [41, 54], "randint": 41, "rang": [41, 42, 44, 46, 52, 58], "06x": 41, "0xffffff": 41, "palett": 41, "legend_loc": 41, "hard": 41, "32": [41, 42, 58], "top_cell_typ": 41, "reset_index": [41, 48], "lung_adata_top_cell_typ": 41, "cziscienc": [42, 44, 47, 52, 53], "unix": [42, 44], "mkdir": [42, 44], "p": [42, 44, 47, 48, 56], "wget": [42, 44], "nv": [42, 44], "pbmc3k_filtered_gene_bc_matric": [42, 44], "tar": [42, 44], "gz": [42, 44], "cf": [42, 44], "10xgenom": [42, 44], "exp": [42, 44], "pbmc3k": [42, 44], "xzf": [42, 44], "url": [42, 44], "7621991": [42, 44], "gt": [42, 44, 49, 53], "deatail": [42, 44], "insid": [42, 44], "geneformer_info": 42, "get_embedding_metadata_by_nam": [42, 44, 46], "model_link": [42, 44, 52], "contrib": [42, 44, 46, 52], "cli": [42, 50], "fine_tuned_geneform": 42, "json": [42, 52], "datacollatorforcellclassif": 42, "embextractor": 42, "transcriptometoken": 42, "bertforsequenceclassif": 42, "test": [42, 45, 58], "ensembl_id": [42, 44], "ensg00000139618": 42, "suffix": 42, "n_count": [42, 44], "joinid": [42, 44, 49, 52], "write": [42, 50], "disk": 42, "read_10x_mtx": [42, 44], "filtered_gene_bc_matric": [42, 44], "hg19": [42, 44], "gene_id": [42, 44], "h5ad_dir": 42, "makedir": 42, "track": 42, "token_dir": 42, "tokenized_data": 42, "custom_attr_name_dict": 42, "tokenize_data": 42, "data_directori": 42, "output_directori": 42, "output_prefix": 42, "file_format": 42, "filter_pass": 42, "model_dir": 42, "label_mapping_dict_fil": 42, "label_to_cell_subclass": 42, "fp": 42, "label_mapping_dict": 42, "best4": 42, "cn": 42, "sensu": 42, "vertebrata": 42, "gabaerg": 42, "abnorm": 42, "adventiti": [42, 53], "anim": 42, "cardiocyt": 42, "skelet": 42, "cuboid": 42, "contractil": 42, "defens": 42, "duct": 42, "ecto": 42, "ectoderm": 42, "endo": 42, "pancrea": [42, 49, 51], "urethra": 42, "eukaryot": 42, "fat": [42, 49], "germ": [42, 55], "glandular": 42, "35": [42, 58], "glial": 42, "37": 42, "hematopoiet": [42, 54], "38": [42, 53, 56], "precursor": 42, "hepatocyt": 42, "inflammatori": 42, "interneuron": [42, 49], "42": 42, "ionocyt": 42, "44": [42, 53], "45": [42, 56], "46": 42, "leukocyt": [42, 58], "47": 42, "lymphocyt": 42, "48": [42, 48], "49": 42, "mammari": [42, 51], "mesenchym": [42, 53], "52": [42, 44, 48], "meso": 42, "mesoderm": 42, "motor": 42, "mural": 42, "59": [42, 51], "myofibroblast": 42, "neural": 42, "termin": 42, "ovarian": 42, "surfac": 42, "67": [42, 56], "phagocyt": 42, "pigment": 42, "cultur": [42, 55], "71": 42, "primordi": 42, "progenitor": [42, 53], "73": 42, "salivari": 42, "sebac": 42, "75": [42, 49], "secretori": 42, "76": 42, "sensori": 42, "77": 42, "seromucu": 42, "secret": [42, 53], "somat": 42, "79": 42, "stem": [42, 53, 54, 57], "81": [42, 48], "82": 42, "83": [42, 48, 56], "84": 42, "transit": 42, "85": 42, "86": 42, "87": 42, "vertebr": 42, "load_from_disk": 42, "num_row": 42, "2700": 42, "dummi": [42, 44], "add_column": 42, "slow": 42, "pretrain": 42, "from_pretrain": 42, "data_col": 42, "vector": 42, "predicted_label_id": 42, "argmax": [42, 58], "predicted_logit": 42, "predicted_label": 42, "predicted_cell_subclass": 42, "predicted_cell_subclass_prob": 42, "min_mean": 42, "0125": 42, "max_mean": 42, "min_disp": 42, "svd_solver": 42, "arpack": 42, "scapi": 42, "original_cell_typ": [42, 44], "cd14": [42, 44], "fcgr3a": [42, 44], "megakaryocyt": [42, 44], "rename_categori": 42, "titl": [42, 46, 52], "shorter": 42, "panel": 42, "n_class": 42, "output_dir": 42, "geneformer_embed": 42, "embex": 42, "model_typ": 42, "cellclassifi": 42, "num_class": 42, "max_ncel": 42, "emb_label": 42, "emb_lay": 42, "forward_batch_s": 42, "nproc": 42, "extract_emb": 42, "model_directori": 42, "input_data_fil": 42, "grab": [42, 45, 49, 52, 56], "c697eaaf": [42, 44], "a3b": [42, 44], "4251": [42, 44], "b036": [42, 44], "5f9052179e70": [42, 44], "f2a488bf": [42, 44], "782f": [42, 44], "4c20": [42, 44], "a8e5": [42, 44], "cb34d48c1f7e": [42, 44], "fa8605cf": [42, 44], "f27e": [42, 44], "44af": [42, 44], "ac2a": [42, 44], "476bee4410d3": [42, 44], "3c75a463": [42, 44], "6a87": [42, 44], "4132": [42, 44], "83a8": [42, 44], "c3002624394d": [42, 44], "adata_censu": [42, 44], "simplifi": [42, 48], "shared_gen": 42, "index_subset": [42, 44], "3000": [42, 44], "adata_join": 42, "outer": 42, "liver": [43, 51], "liver_dataset": 43, "liver_dataset_id": 43, "liver_adata": 43, "859": 43, "52392": [43, 48, 50, 56], "gene_pres": 43, "17992": 43, "992": 43, "toarrai": [43, 52], "000e": 43, "590e": 43, "02": [43, 44, 46, 47, 52], "969e": 43, "03": [43, 46, 49, 50], "280e": 43, "250e": 43, "400e": 43, "gene_length": 43, "00000000e": [43, 46], "58654413e": 43, "32001885e": 43, "74444813e": 43, "31455088e": 43, "71500419e": 43, "78985747e": 43, "real": 43, "filter_cel": 43, "min_gen": 43, "filter_gen": 43, "min_cel": 43, "saniti": 43, "prepar": 44, "pbmc": 44, "3k": 44, "scvi_info": 44, "pt": 44, "cp": [44, 50], "randomforestclassifi": 44, "unassign": 44, "model_filenam": 44, "prepare_query_anndata": 44, "is_train": 44, "trick": 44, "reprsent": 44, "vae_q": 44, "load_query_data": 44, "gene_symbol": [44, 53], "notnul": 44, "perfectli": 44, "appropri": 44, "markers_row1": 44, "il7r": 44, "lyz": 44, "ms4a1": 44, "cd8a": 44, "gnly": 44, "markers_row2": 44, "nkg7": 44, "ms4a7": 44, "fcer1a": 44, "cst3": 44, "ppbp": 44, "catch_warn": 44, "nk": 44, "label_map": 44, "adata_census_subset": 44, "adata_combin": 44, "correl": 44, "forest": 44, "classifi": 44, "rfc": 44, "predicted_cell_typ": [44, 58], "confid": 44, "predict_proba": 44, "classes_": [44, 58], "predicted_cell_type_prob": 44, "enough": [45, 48], "itself": 45, "tip": 45, "soma_df": 45, "faster": 45, "refin": 45, "record": 45, "_obs_": 45, "unique_cell_type_ontology_term_id": 45, "lot": 45, "top_10": 45, "nthe": 45, "0000525": [45, 54], "2000060": [45, 54], "0008036": [45, 54], "0002488": 45, "0002343": 45, "0000084": 45, "0001078": 45, "0000815": 45, "0000235": 45, "3000001": 45, "0000540": 45, "7665340": 45, "0000679": 45, "1894047": 45, "0000128": 45, "1881077": 45, "1508920": 45, "1477453": 45, "1419507": 45, "0000057": 45, "1397813": 45, "0000860": 45, "1369142": 45, "1308000": [45, 55], "4023040": 45, "1229658": 45, "occurr": 45, "lung_tissu": 45, "ntop": 45, "185": 45, "0002063": 45, "0000775": 45, "0001044": 45, "0001050": 45, "0000814": 45, "0000071": 45, "0000192": 45, "0002503": 45, "0002370": 45, "562038": 45, "0000583": 45, "526859": 45, "323985": 45, "323610": 45, "266333": 45, "255425": 45, "205013": 45, "0000623": 45, "164944": 45, "0001064": 45, "149067": 45, "0002632": 45, "132243": 45, "0002082": 45, "ooo2084": 45, "0002080": 45, "0000746": 45, "49929": 45, "0008034": 45, "33361": 45, "0002548": 45, "33180": 45, "0002131": 45, "30915": 45, "0000115": 45, "30054": 45, "18391": 45, "0000763": 45, "14408": 45, "13552": 45, "9690": 45, "0002144": 45, "9025": 45, "labl": 45, "cols_to_queri": 45, "complet": [45, 55], "df": [45, 53], "col": [45, 48, 49], "tuniqu": 45, "372": [46, 52], "axisarrai": [46, 52], "soma_dim_1": [46, 48, 51, 52], "soma_data": [46, 48, 51, 52], "bfloat16": [46, 52], "bit": [46, 52], "expon": [46, 52], "mantissa": [46, 52], "simplest": [46, 52], "nervou": [46, 52], "befor": [46, 52], "correspondong": [46, 52], "31780": [46, 52], "get_embed": [46, 52], "to_anndata": [46, 52], "obs_joinid": [46, 52], "embeddinng": [46, 52], "stand": [46, 52], "alon": [46, 52], "17187500e": 46, "82995605e": 46, "50000000e": 46, "39941406e": 46, "71606445e": 46, "39843750e": 46, "71115112e": 46, "32031250e": 46, "00781250e": 46, "55310059e": 46, "85009766e": 46, "10156250e": 46, "42614746e": 46, "45312500e": 46, "53295898e": 46, "12915039e": 46, "84765625e": 46, "54113770e": 46, "94531250e": 46, "38281250e": 46, "03149414e": 46, "28881836e": 46, "14111328e": 46, "78125000e": 46, "15234375e": 46, "39562988e": 46, "79687500e": 46, "48388672e": 46, "19628906e": 46, "62803650e": 46, "88446045e": 46, "75694072": 47, "45846761": 47, "16292": 47, "2153": 47, "doi": [47, 52], "1002": 47, "ctm2": 47, "1356": 47, "695": 47, "696": 47, "697": 47, "1016": [47, 49, 50], "isci": 47, "698": 47, "1371": 47, "journal": 47, "699": 47, "700": 47, "cardiac": 47, "atrium": 47, "slice_dataset": 47, "isin": [47, 49], "sep": 47, "1126": [47, 49], "abl4896": [47, 49], "4866a804": 47, "37eb": 47, "436f": 47, "8c87": 47, "9cd585260061": 47, "e5f58829": [47, 49], "1a66": [47, 49], "40b5": [47, 49], "a624": [47, 49], "9046778e74f5": [47, 49], "bfd80f12": 47, "725c": 47, "4482": 47, "ad7f": 47, "1ed2b4909b0d": 47, "e6df8a57": 47, "f54f": 47, "413a": 47, "9d4d": 47, "dee03294d778": 47, "8d599205": 47, "5c51": 47, "4b50": 47, "9d48": 47, "3dec31238587": 47, "f6065c51": 47, "bd26": 47, "4aa5": 47, "a05d": 47, "2805aeea48d9": 47, "8cdbf790": 47, "4d29": 47, "4f46": 47, "9aef": 47, "21adfb2e21da": 47, "mybpc3": 47, "easier": 48, "extract": [48, 58], "experiment_queri": 48, "x_as_seri": 48, "nd": 48, "raw_n": 48, "aka": 48, "iloc": 48, "expens": 48, "var_df": [48, 49, 56], "float64": 48, "coo": 48, "arrow_tbl": 48, "var_dim": 48, "by_var": 48, "errstat": 48, "raw_mean": 48, "ensmusg00000051951": [48, 56], "xkr4": [48, 56], "6094": [48, 56], "202": 48, "032743": 48, "ensmusg00000089699": [48, 56], "gm1992": [48, 56], "250": [48, 56], "ensmusg00000102343": [48, 56], "gm37381": [48, 56], "1364": [48, 56], "ensmusg00000025900": [48, 56], "rp1": [48, 56], "12311": [48, 56], "106": 48, "236265": 48, "ensmusg00000025902": [48, 56], "sox17": [48, 56], "4772": [48, 56], "3259": 48, "991975": 48, "52387": [48, 56], "ensmusg00000081591": [48, 56], "btf3": [48, 56], "ps9": [48, 56], "496": [48, 56], "52388": [48, 56], "ensmusg00000118710": [48, 56], "mmu": [48, 56], "mir": [48, 56], "467a": [48, 56], "3_ensmusg00000118710": [48, 56], "52389": [48, 56], "ensmusg00000119584": [48, 56], "rn18": [48, 56], "1849": [48, 56], "52390": [48, 56], "ensmusg00000118538": [48, 56], "gm18218": [48, 56], "970": [48, 56], "52391": [48, 56], "ensmusg00000084217": [48, 56], "setd9": [48, 56], "670": [48, 56], "welford": [48, 57], "npt": 48, "onlinematrixmeanvari": 48, "n_sampl": 48, "n_variabl": 48, "axix": 48, "n_a": 48, "int32": [48, 58], "u_a": 48, "m2_a": 48, "coord_vec": 48, "value_vec": 48, "_mean_variance_upd": 48, "tupl": 48, "m2": 48, "_mean_variance_fin": 48, "max": 48, "jit": 48, "nopython": 48, "col_arr": 48, "val_arr": 48, "squar": 48, "val": 48, "u_prev": 48, "m2_prev": 48, "accont": 48, "chan": 48, "n_b": 48, "u_b": 48, "m2_b": 48, "mvn": 48, "raw_vari": 48, "848": 48, "312801": 48, "169": 48, "182975": 48, "279575": 48, "656207": 48, "malat1": 48, "ptprd": 48, "dlg2": 48, "pcdh9": 48, "n_cells_by_dataset": 48, "multiindex": 48, "from_product": 48, "n_cell": 48, "x_tbl": 48, "to_fram": 48, "get_index": 48, "pick": [48, 50], "3bbb6cf9": 48, "72b9": 48, "41be": 48, "b568": 48, "656de6eb18b5": 48, "ensmusg00000028399": 48, "79578": 48, "58b01044": 48, "c5e5": 48, "4b0f": 48, "8a2d": 48, "6ebf951e01ff": 48, "474": 48, "ensmusg00000052572": 48, "79513": 48, "98e5ea9f": [48, 57], "16d6": [48, 57], "47ec": [48, 57], "a529": [48, 57], "686e76515e39": [48, 57], "908": 48, "66ff82b4": 48, "9380": 48, "469c": 48, "bc4b": 48, "cfa08eacd325": 48, "c08f8441": 48, "4a10": 48, "4748": 48, "872a": 48, "e70c0bcccdba": 48, "ensmusg00000055421": 48, "79476": 48, "125": [48, 58], "3027": 48, "2910": 48, "117": 48, "ensmusg00000092341": 48, "79667": 48, "12622": 48, "20094": 48, "7102": 48, "12992": 48, "compil": 49, "n_dataset": 49, "therein": [49, 50], "human_rna": 49, "datasets_df": 49, "e2c257e7": [49, 50], "6f79": [49, 50], "487c": [49, 50], "b81c": [49, 50], "39451cd4ab3c": [49, 50], "023": [49, 50], "05869": [49, 50], "31497": [49, 50], "67070": [49, 50], "286326": [49, 50], "f7cecffa": [49, 50], "00b4": [49, 50], "4560": [49, 50], "a29a": [49, 50], "8ad626b8ee08": [49, 50], "ccell": [49, 50], "001": [49, 50], "270855": [49, 50], "3f50314f": [49, 50], "bdc9": [49, 50], "40c6": [49, 50], "8e4a": [49, 50], "b0901ebfbe4c": [49, 50], "2021": [49, 50], "007": [49, 50], "167283": [49, 50], "180bff9c": [49, 50], "c8a5": [49, 50], "4539": [49, 50], "b13b": [49, 50], "ddbc00d643e6": [49, 50], "s41593": [49, 50], "00764": [49, 50], "8168": [49, 50], "a72afd53": [49, 50], "ab92": [49, 50], "4511": [49, 50], "88da": [49, 50], "252fb0e26b9a": [49, 50], "s41591": [49, 50], "0944": [49, 50], "y": [49, 50], "44721": [49, 50], "38833785": [49, 50], "fac5": [49, 50], "48fd": [49, 50], "944a": [49, 50], "0f62a4c23ed1": [49, 50], "2157": [49, 50], "598266": [49, 50], "5d445965": [49, 50], "6f1a": [49, 50], "4b68": [49, 50], "ba3a": [49, 50], "b8f765155d3a": [49, 50], "2922": [49, 50], "9409": [49, 50], "65662": [49, 50], "593x60664": 49, "16133717": 49, "manipul": 49, "ensg00000286096": 49, "97a17473": 49, "e2b1": 49, "4f31": 49, "a544": 49, "44a60773e2dd": 49, "var_joinid": 49, "dataset_joinid": 49, "is_pres": 49, "tocoo": 49, "ff45e623": 49, "7f5f": 49, "46e3": 49, "b47d": 49, "56be0341f66b": 49, "13497": 49, "f01bdd17": 49, "4902": 49, "40f5": 49, "86e3": 49, "240d66dd2587": 49, "salivary_gland": 49, "27199": 49, "e6a11140": 49, "2545": 49, "46bc": 49, "929e": 49, "da243eed2ca": 49, "11505": 49, "e5c63d94": 49, "593c": 49, "4338": 49, "a489": 49, "e1048599e751": 49, "bladder": [49, 51], "24583": 49, "d8732da6": 49, "8d1d": 49, "42d9": 49, "b625": 49, "f2416c30054b": 49, "trachea": [49, 51], "9522": 49, "cee11228": 49, "9f0b": 49, "4e57": 49, "afe2": 49, "cfe15ee56312": 49, "34004": 49, "a357414d": 49, "2042": 49, "4eb5": 49, "95f0": 49, "c58604a18bdd": 49, "small_intestin": 49, "12467": 49, "a0754256": 49, "f44b": 49, "4c4a": 49, "962c": 49, "a552e47d3fdc": 49, "10650": 49, "983d5ec9": 49, "40e8": 49, "4512": 49, "9e65": 49, "a572a9c486cb": 49, "50115": 49, "5e5e7a2f": 49, "8f1c": 49, "42ac": 49, "90dc": 49, "b4f80f38e84c": 49, "20263": 49, "55cf0ea3": 49, "9d2b": 49, "4294": 49, "871e": 49, "bb4b49a79fc7": 49, "15020": [49, 58], "4f1555bc": 49, "4664": 49, "46c3": 49, "a606": 49, "78d34dd10d92": 49, "bone_marrow": [49, 50], "12297": 49, "2423ce2c": 49, "3149": 49, "4cca": 49, "a2ff": 49, "cf682ea29b5f": 49, "9641": 49, "1c9eb291": 49, "6d31": 49, "47e1": 49, "96b2": 49, "129b5e1ae64f": 49, "30746": 49, "18eb630b": 49, "a754": 49, "4111": 49, "8cd4": 49, "c24ec80aa5ec": 49, "lymph_nod": 49, "53275": 49, "0d2ee4ac": 49, "05ee": 49, "40b2": 49, "afb6": 49, "ebb584caa867": 49, "0ced5e76": 49, "6040": 49, "47ff": 49, "8a72": 49, "93847965afc0": 49, "thymu": [49, 51], "33664": 49, "283d65eb": 49, "dd53": 49, "496d": 49, "adb7": 49, "7570c7caa443": 49, "1101": [49, 52], "511898": 49, "8e10f1c4": 49, "8e98": 49, "41e5": 49, "b65f": 49, "8cd89a887122": 49, "2480956": 49, "139": 49, "fe1a73ab": 49, "a203": 49, "45fd": 49, "84e9": 49, "0f7fd19efcbd": 49, "dissect": 49, "amygdaloid": 49, "ami": 49, "basolat": 49, "35285": 49, "143": 49, "f8dda921": 49, "5fb4": 49, "4c94": 49, "a654": 49, "c6fc346bfd6d": 49, "cerebr": 49, "cortex": 49, "cx": 49, "occipitotem": 49, "31899": 49, "160": 49, "dd03ce70": 49, "3243": 49, "4c96": 49, "9561": 49, "330cc461e4d7": 49, "perirhin": 49, "23732": 49, "165": 49, "d2b5efc1": 49, "14c6": 49, "4b5f": 49, "bd98": 49, "40f9084872d7": 49, "tail": 49, "hippocampu": 49, "hit": 49, "caudal": 49, "36886": 49, "175": 49, "c4b03352": 49, "af8d": 49, "492a": 49, "8d6b": 49, "40f304e0a122": 49, "superclust": 49, "medium": 49, "spini": 49, "152189": 49, "c2aad8fc": 49, "b63b": 49, "4f9b": 49, "9cfd": 49, "baf7bc9c1771": 49, "tempor": 49, "po": 49, "37642": 49, "177": 49, "c202b243": 49, "1aa1": 49, "4b16": 49, "bc9a": 49, "b36241f3b1e3": 49, "amygdala": 49, "excitatori": 49, "109452": 49, "178": 49, "bdb26abd": 49, "f4ba": 49, "4ea3": 49, "8862": 49, "c2340e7a4f55": 49, "cge": 49, "227671": 49, "183": 49, "acae7679": 49, "d077": 49, "461c": 49, "b857": 49, "ee6ccfeb267f": 49, "hih": 49, "ca1": 49, "39147": 49, "196": 49, "9372df2d": 49, "13d6": 49, "4fac": 49, "980b": 49, "919a5b7eb483": 49, "midbrain": 49, "periaqueduct": 49, "grai": 49, "33794": 49, "197": 49, "93131426": 49, "0124": 49, "4ab4": 49, "a013": 49, "9dfbcd99d467": 49, "epithalamu": 49, "eth": 49, "24327": 49, "206": [49, 56], "7c1c3d47": 49, "3166": 49, "43e5": 49, "9a95": 49, "65ceb2d45f78": 49, "pon": 49, "pn": 49, "pontin": 49, "reticular": 49, "49512": 49, "208": 49, "7a0a8891": 49, "9a22": 49, "4549": 49, "a55b": 49, "c2aca23c3a2a": 49, "hippocamp": 49, "74979": 49, "5e5ab909": 49, "f73f": 49, "4b57": 49, "98a0": 49, "6d2c5662f6a4": 49, "inferior": 49, "colliculu": 49, "32306": 49, "3f56901c": 49, "dd4a": 49, "47d6": 49, "b60b": 49, "7b0c0111cfb2": 49, "37911": 49, "3a7f3ab4": 49, "a280": 49, "4b3b": 49, "b2c0": 49, "6dd05614a78c": 49, "splatter": 49, "291833": 49, "249": 49, "35c8a04c": 49, "8639": 49, "4d15": 49, "8228": 49, "765d8d93fc96": 49, "hypothalamu": 49, "hth": 49, "supraopt": 49, "16753": 49, "270": 49, "07b1d7c8": 49, "5c2e": 49, "42f7": 49, "9246": 49, "26f746cd6013": 49, "myelencephalon": 49, "medulla": 49, "oblongata": 49, "27210": 49, "273": 49, "0325478a": 49, "9b52": 49, "b40a": 49, "2e2ab0d72eb1": 49, "intratelencephal": 49, "455006": 49, "483152": 49, "476": 49, "a68b64d8": 49, "aee3": 49, "4947": 49, "81b7": 49, "36b8fe5a44d2": 49, "82478": 49, "477": 49, "c5d88abe": 49, "f23a": 49, "45fa": 49, "a534": 49, "788985e93dad": 49, "264824": 49, "478": 49, "5a11f879": 49, "d1ef": 49, "458a": 49, "9b0bdfca5ebf": 49, "31691": 49, "479": 49, "104148": 49, "17481d16": 49, "ee44": 49, "49e5": 49, "bcf0": 49, "28c0780d8c4a": 49, "58109": 49, "ensg00000277745": 49, "h2ab3": 49, "58354": 49, "ensg00000233522": 49, "fam224a": 49, "2031": 49, "58411": 49, "ensg00000183146": 49, "prori": 49, "878": 49, "58523": 49, "ensg00000279274": 49, "533e23": 49, "58632": 49, "ensg00000277836": 49, "27211": 49, "all_experi": 50, "organism_nam": 50, "organism_experi": 50, "experiments_total_cel": 50, "num_cel": 50, "nfound": 50, "5255245": 50, "turn": 50, "toolchain": 50, "0bd1a1d": 50, "3aee": 50, "40e0": 50, "b2ec": 50, "86c7a30c7149": 50, "522": 50, "atl": 50, "40220": [50, 51], "submitt": 50, "direct": 50, "tabula_muris_seni": 50, "explan": 51, "lineag": [51, 52], "jin": 51, "tabula_muris_dataset_id": 51, "48b37086": [51, 53, 57], "25f7": [51, 53, 57], "4ecd": [51, 53, 57], "be66": [51, 53, 57], "f5bb378e3aea": [51, 53, 57], "tabula_muris_ob": 51, "35718": 51, "limb": 51, "28867": 51, "24540": 51, "21647": 51, "20680": 51, "12295": 51, "9275": 51, "lumen": 51, "8945": 51, "8613": 51, "7976": 51, "6777": 51, "6201": 51, "skin": [51, 57], "bodi": [51, 57], "4454": 51, "1887": 51, "tabula_muris_liver_dataset_id": 51, "tabula_muris_liver_ob": 51, "awar": 51, "chanc": 51, "priori": [51, 54], "sai": 51, "nk_cell": 51, "80935": 51, "repeat": 51, "nk_cells_primari": 51, "59109": 51, "aqp5": [51, 54], "adata_primari": 51, "demo": [51, 55], "awai": 51, "8448858": 51, "52812487": 51, "52812553": 51, "52812556": 51, "52812566": 51, "113": 51, "170": 51, "37033": 51, "37052": 51, "36904": 51, "36919": 51, "meaning": 52, "confirm": 52, "easiest": [52, 54], "data_typ": 52, "nmf": 52, "featu": 52, "impli": 52, "anoth": 52, "get_embedding_metadata": 52, "cxg": 52, "00506592": 52, "01348877": 52, "03173828": 52, "02331543": 52, "02404785": 52, "02441406": 52, "00595093": 52, "0065918": 52, "00070572": 52, "00187683": 52, "04663086": 52, "04614258": 52, "115722": 52, "512": [52, 56], "advanc": [52, 56], "portion": 52, "caution": 52, "quit": 52, "500_000": 52, "fail": [52, 56], "embedding_slic": 52, "emb_data": 52, "emb_joinid": 52, "reindex_disable_on_axi": 52, "embedding_presence_mask": 52, "getnnz": 52, "embedding_data": 52, "vstack": 52, "embedding_joinid": 52, "00762939": 52, "00076675": 52, "00047874": 52, "03588867": 52, "00405884": 52, "00239563": 52, "00982666": 52, "00946045": 52, "00473022": 52, "0135498": 52, "01049805": 52, "03051758": 52, "critic": 52, "meaningless": 52, "embedding_metadata": 52, "toward": 52, "ai": 52, "burgeon": 52, "pioneer": 52, "million": 52, "distil": 52, "concern": 52, "transfer": 52, "optim": [52, 58], "superior": 52, "primary_contact": 52, "bo": 52, "wang": 52, "bowang": 52, "vectorinstitut": 52, "affili": 52, "toronto": 52, "additional_contact": 52, "538439": 52, "additional_inform": 52, "62998417": 52, "submission_d": 52, "09": 52, "nonsens": 52, "assert": 52, "laura": 53, "luebbert": 53, "lauraluebbert": 53, "caltech": 53, "edu": 53, "genom": 53, "databas": 53, "facilit": [53, 59], "cite": 53, "googl": 53, "colab": 53, "q": 53, "setup": 53, "notic": 53, "fri": 53, "jul": 53, "succesfulli": 53, "gget_cellxgen": 53, "speci": 53, "meta_onli": 53, "verbos": 53, "sub": 53, "arg": 53, "slc5a1": 53, "ensg00000130234": 53, "ensg00000100170": 53, "ui": 53, "celltyp": 53, "mucu": 53, "neuroendocrin": 53, "canon": 53, "cellular": 53, "reus": 53, "secondari": 53, "portal": 53, "blob": 53, "9b94ccb0a2e0a8f6182b213aa4852c491f6f6aff": 53, "wmg": 53, "tissue_mapp": 53, "three": [53, 54], "abca1": 53, "minut": 53, "3679": 53, "thousand": 53, "ensg00000165029": 53, "11343": 53, "5332": 53, "9739": 53, "24539": 53, "5081": 53, "3674": 53, "3675": 53, "3676": 53, "3677": 53, "3678": 53, "retina": 53, "config": 53, "inlinebackend": 53, "figure_format": 53, "dotplot": 53, "ensmusg00000015405": 53, "047d57f2": 53, "4d14": 53, "45de": 53, "aa98": 53, "336c6f583750": 53, "97547": 53, "97548": 53, "97549": 53, "97550": 53, "97551": 53, "97552": 53, "example_adata": 53, "example_meta": 53, "querycondit": 54, "2313": 54, "2308": 54, "2309": 54, "2310": 54, "2311": 54, "2312": 54, "8626": 54, "1884": 54, "27047": 54, "tubb4b": 54, "2037": 54, "materi": 54, "shortli": 54, "comparison": 54, "op": 54, "sex_cell_metadata": 54, "669": 54, "385437": 54, "metatadata": 54, "cell_metadata_all_unknown_sex": 54, "9th": 54, "post": 54, "fertil": 54, "0000046": 54, "decidua": 54, "basali": 54, "0000453": 54, "placenta": 54, "0001987": 54, "3251329": 54, "56274573": 54, "cord": 54, "2000095": 54, "newborn": 54, "0000082": 54, "han": 54, "chines": 54, "0027": 54, "umbil": 54, "0012168": 54, "0000178": 54, "3251330": 54, "56274574": 54, "3251331": 54, "56274575": 54, "3251332": 54, "56274576": 54, "3251333": 54, "56274577": 54, "3251334": 54, "cell_metadata_b_cel": 54, "42720": 54, "10631": 54, "8742": 54, "8187": 54, "2083": 54, "1534": 54, "1512": 54, "1474": 54, "1210": 54, "332": 54, "204": 54, "133": 54, "gene_metadata": 54, "isn": 55, "narrow": 55, "as_index": 55, "0000001": 55, "0000006": 55, "2502": 55, "0000015": 55, "621": 55, "0000019": 55, "608": 55, "4028006": 55, "38250": 55, "609": 55, "4030009": 55, "tubul": 55, "segment": 55, "777": 55, "610": 55, "4030011": 55, "989": 55, "611": 55, "4030018": 55, "princip": 55, "107": [55, 56], "612": 55, "4030023": 55, "hillock": 55, "10170": 55, "semant": 56, "maxmimum": 56, "nois": 56, "disabl": 56, "docstr": 56, "hvgs_df": 56, "highly_variable_rank": 56, "230445": 56, "116": 56, "044863": 56, "749637": 56, "287551": 56, "276809": 56, "461324": 56, "407450": 56, "363945": 56, "055626": 56, "280": 56, "958509": 56, "combined_df": [56, 57], "188": 56, "ensmusg00000026117": 56, "zap70": 56, "2992": 56, "409091": 56, "14793": 56, "026717": 56, "350": 56, "775560": 56, "233": 56, "ensmusg00000026073": 56, "il1r2": 56, "1908": 56, "764085": 56, "41918": 56, "471500": 56, "402176": 56, "ensmusg00000026185": 56, "igfbp5": 56, "6006": 56, "234876": 56, "314355": 56, "591239": 56, "156": 56, "825651": 56, "ensmusg00000026180": 56, "cxcr2": 56, "3048": 56, "379390": 56, "10491": 56, "033344": 56, "640129": 56, "30296": 56, "ensmusg00000024803": 56, "ankrd1": 56, "2886": 56, "548572": 56, "274005": 56, "455137": 56, "741864": 56, "30313": 56, "ensmusg00000024987": 56, "cyp26a1": 56, "1983": 56, "186686": 56, "12973": 56, "622003": 56, "454": 56, "580162": 56, "30379": 56, "ensmusg00000018822": 56, "sfrp5": 56, "1900": 56, "927853": 56, "10943": 56, "645525": 56, "410": 56, "637004": 56, "32042": 56, "ensmusg00000031838": 56, "ifi30": 56, "91": 56, "676950": 56, "995276": 56, "564962": 56, "205886": 56, "33314": 56, "ensmusg00000092572": 56, "serpinb10": 56, "3490": 56, "264085": 56, "239812": 56, "487": 56, "535469": 56, "who": 56, "own": 56, "mv_df": 57, "3095357": 57, "915025": 57, "69571": 57, "774917": 57, "3095359": 57, "972801": 57, "9471": 57, "427044": 57, "3095363": 57, "169472": 57, "139042": 57, "208628": 57, "3095366": 57, "049836": 57, "24762": 57, "926397": 57, "3095368": 57, "345415": 57, "150412": 57, "440839": 57, "3278898": 57, "164319": 57, "339741": 57, "3278899": 57, "368339": 57, "930156": 57, "3278900": 57, "246049": 57, "886186": 57, "3278901": 57, "240724": 57, "307266": 57, "3278902": 57, "278420": 57, "086994": 57, "9314": 57, "keratinocyt": [57, 58], "0002337": 57, "mmusdv": 57, "0000089": 57, "18_53_m": 57, "0002097": 57, "18_47_f": 57, "basal": [57, 58], "epidermi": 57, "0002187": 57, "0000091": 57, "epiderm": 57, "0000362": 57, "logist": 58, "regress": 58, "ml": 58, "primer": 58, "census_ml": 58, "experiment_datapip": 58, "10_000": 58, "mechan": 58, "encapsul": 58, "caller": 58, "importantli": 58, "lazili": 58, "avoid": 58, "legaci": 58, "interchang": 58, "shuffler": 58, "layout": 58, "strategi": 58, "held": 58, "1gb": 58, "caus": 58, "valid": 58, "randomsplitt": 58, "train_datapip": 58, "test_datapip": 58, "random_split": 58, "weight": 58, "togeth": 58, "experiment_dataload": 58, "style": 58, "enforc": 58, "linear": 58, "logisticregress": 58, "input_dim": 58, "output_dim": 58, "super": 58, "noqa": 58, "up008": 58, "sigmoid": 58, "train_epoch": 58, "train_dataload": 58, "loss_fn": 58, "devic": 58, "train_loss": 58, "train_correct": 58, "train_tot": 58, "zero_grad": 58, "softmax": 58, "loss": 58, "train_accuraci": 58, "secondli": 58, "42496620": 58, "42496621": 58, "42496622": 58, "42496633": 58, "42496634": 58, "42496635": 58, "desir": 58, "cuda": 58, "is_avail": 58, "cell_type_encod": 58, "crossentropyloss": 58, "adam": 58, "lr": 58, "7f": 58, "accuraci": 58, "4f": 58, "0167253": 58, "4856": 58, "0156710": 58, "4943": 58, "0149408": 58, "4813": 58, "0144469": 58, "5040": 58, "0141749": 58, "5669": 58, "0139776": 58, "6672": 58, "0138565": 58, "7920": 58, "0138094": 58, "8088": 58, "0136689": 58, "8757": 58, "0136101": 58, "8923": 58, "invok": 58, "eval": 58, "recov": 58, "At": 58, "unpickl": 58, "vein": 58, "123": 58, "124": 58, "127": 58, "helper": 59}, "objects": {"": [[59, 0, 0, "-", "cellxgene_census"]], "cellxgene_census": [[0, 1, 1, "", "download_source_h5ad"], [14, 1, 1, "", "get_anndata"], [15, 1, 1, "", "get_census_version_description"], [16, 1, 1, "", "get_census_version_directory"], [17, 1, 1, "", "get_default_soma_context"], [18, 1, 1, "", "get_obs"], [19, 1, 1, "", "get_presence_matrix"], [20, 1, 1, "", "get_source_h5ad_uri"], [21, 1, 1, "", "get_var"], [22, 1, 1, "", "open_soma"]], "cellxgene_census.experimental": [[1, 1, 1, "", "get_all_available_embeddings"], [2, 1, 1, "", "get_all_census_versions_with_embedding"], [3, 1, 1, "", "get_embedding"], [4, 1, 1, "", "get_embedding_metadata"], [5, 1, 1, "", "get_embedding_metadata_by_name"]], "cellxgene_census.experimental.ml.huggingface": [[6, 2, 1, "", "CellDatasetBuilder"], [7, 2, 1, "", "GeneformerTokenizer"]], "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder": [[6, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer": [[7, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.pytorch": [[8, 2, 1, "", "ExperimentDataPipe"], [9, 2, 1, "", "Stats"], [10, 1, 1, "", "experiment_dataloader"]], "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe": [[8, 3, 1, "", "__init__"]], "cellxgene_census.experimental.ml.pytorch.Stats": [[9, 3, 1, "", "__init__"]], "cellxgene_census.experimental.pp": [[11, 1, 1, "", "get_highly_variable_genes"], [12, 1, 1, "", "highly_variable_genes"], [13, 1, 1, "", "mean_variance"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "method", "Python method"]}, "titleterms": {"cellxgene_censu": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 29, 46, 52], "download_source_h5ad": 0, "experiment": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 56, 59], "get_all_available_embed": 1, "get_all_census_versions_with_embed": 2, "get_embed": 3, "get_embedding_metadata": 4, "get_embedding_metadata_by_nam": 5, "ml": [6, 7, 8, 9, 10], "huggingfac": [6, 7], "celldatasetbuild": 6, "geneformertoken": 7, "pytorch": [8, 9, 10, 58], "experimentdatapip": [8, 58], "stat": [9, 26, 45], "experiment_dataload": 10, "pp": [11, 12, 13], "get_highly_variable_gen": [11, 56], "highly_variable_gen": [12, 56], "mean_vari": 13, "get_anndata": [14, 46, 52], "get_census_version_descript": 15, "get_census_version_directori": 16, "get_default_soma_context": 17, "get_ob": 18, "get_presence_matrix": 19, "get_source_h5ad_uri": 20, "get_var": 21, "open_soma": 22, "what": [23, 29, 30], "": [23, 53], "new": [23, 26, 29, 36], "2023": [23, 30], "2024": 23, "r": [24, 28, 31, 33], "packag": [24, 30, 42], "cellxgen": [24, 28, 32, 35, 37, 38, 46, 52, 53], "censu": [24, 26, 27, 28, 29, 30, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 47, 49, 50, 51, 52, 54, 57, 58, 59], "v1": 24, "i": [24, 29, 30], "out": [24, 51, 57], "instal": [24, 29, 31, 33, 53], "usag": 24, "made": 24, "possibl": 24, "tiledbsoma": 24, "effici": [24, 25, 33], "access": [24, 26, 28, 46, 52], "singl": [24, 25, 26, 29, 34, 41, 42, 50, 54], "cell": [24, 25, 26, 27, 30, 33, 34, 35, 38, 40, 41, 42, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 59], "data": [24, 26, 28, 29, 30, 32, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 50, 51, 52, 53, 54, 59], "33m": 24, "from": [24, 29, 39, 40, 41, 42, 50, 53], "easi": 24, "us": [24, 25, 26, 29, 32, 36, 37, 42, 44, 48, 53], "handl": 24, "cloud": 24, "host": [24, 29, 52], "queri": [24, 26, 29, 33, 46, 47, 52, 53, 54], "read": [24, 51], "metadata": [24, 26, 27, 30, 33, 35, 38, 41, 45, 47, 52, 53, 54], "export": [24, 26, 36], "slice": [24, 33, 39, 47, 57, 59], "seurat": [24, 33], "singlecellexperi": [24, 33], "stream": 24, "increment": [24, 48, 57], "chunk": 24, "memori": [25, 33], "implement": 25, "commonli": 25, "method": 25, "calcul": [25, 26, 41, 48, 55, 57], "averag": 25, "varianc": [25, 48, 57], "gene": [25, 26, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 56], "express": [25, 41, 43, 50, 53, 54], "across": 25, "million": 25, "how": [25, 26, 28, 29], "work": 25, "exampl": [25, 38, 42, 43, 44, 45, 51, 57], "kra": 25, "aqp4": 25, "lung": [25, 40, 41], "epitheli": 25, "highli": [25, 56], "variabl": [25, 56], "find": [25, 39], "all": [25, 38, 41, 45, 49], "human": [25, 29, 38, 41], "esophagu": 25, "introduc": 26, "normal": [26, 29, 35, 39, 41, 43], "layer": [26, 29, 41], "pre": [26, 55], "statist": 26, "descript": 26, "ad": 26, "librari": 26, "size": 26, "enhanc": 26, "featur": [26, 29, 35, 59], "exist": 26, "toolkit": 26, "via": [26, 46, 47, 52], "tiledb": [26, 28], "soma": [26, 28, 34], "util": 26, "ob": [26, 27, 35, 51, 53, 54], "var": [26, 35, 54], "help": 26, "u": 26, "improv": 26, "addit": 26, "support": [27, 29, 30], "categor": 27, "potenti": 27, "break": 27, "chang": 27, "identifi": [27, 49], "column": 27, "encod": [27, 35], "cz": [28, 32, 35, 37, 38, 53], "discov": [28, 32, 35, 37, 53], "aw": 28, "avail": [28, 38], "specif": [28, 49], "releas": [28, 30, 32, 37], "version": [28, 30, 35, 59], "cli": 28, "programat": 28, "download": [28, 42, 44, 50], "api": [28, 29, 56, 57, 59], "python": [28, 29, 31, 33, 36, 59], "faq": 29, "why": [29, 51], "should": 29, "contain": 29, "do": 29, "cite": [29, 32, 37], "public": 29, "doe": 29, "have": 29, "embed": [29, 36, 40, 41, 42, 46, 52, 59], "differenti": 29, "other": [29, 42], "tool": [29, 32, 37, 39], "can": 29, "mous": [29, 39], "where": 29, "ar": [29, 51], "retriev": [29, 59], "origin": [29, 50], "h5ad": [29, 50], "dataset": [29, 35, 39, 41, 42, 49, 50, 58], "which": 29, "wa": 29, "built": 29, "increas": 29, "perform": [29, 42], "my": 29, "conda": 29, "ask": 29, "contribut": 29, "get": [29, 59], "an": [29, 46, 51, 52, 53, 58], "arrayschema": 29, "error": 29, "when": [29, 51], "open": [29, 38, 43, 45, 49, 54, 58, 59], "run": 29, "import": [29, 40, 42], "databrick": 29, "long": 30, "term": 30, "lt": 30, "weekli": 30, "latest": 30, "list": 30, "12": 30, "15": 30, "inform": [30, 35], "donor": 30, "count": [30, 35, 38, 48, 55], "embbed": 30, "07": 30, "25": 30, "05": 30, "errata": 30, "duplic": [30, 51], "observ": [30, 40], "is_primary_data": 30, "true": 30, "compat": 30, "requir": [31, 40, 42, 44, 47], "capabl": [32, 37], "schema": [32, 34, 35, 37], "question": [32, 37], "feedback": [32, 37], "issu": [32, 37], "come": [32, 37], "soon": [32, 37], "project": [32, 37, 42, 44], "quick": [33, 46, 52], "start": [33, 46, 52], "obtain": 33, "anndata": [33, 46, 47, 51, 52, 53, 59], "object": [33, 34, 53], "summari": [34, 35, 38, 41, 55], "info": [34, 38], "census_info": [34, 35], "census_data": [34, 35], "includ": [34, 35, 38], "overview": 35, "definit": [35, 40], "speci": 35, "multi": [35, 39], "constraint": 35, "assai": [35, 38, 41], "full": [35, 43, 45], "sequenc": [35, 38, 43], "matrix": [35, 49, 59], "type": [35, 38, 41, 44, 45, 53], "sampl": [35, 40], "repeat": 35, "organ": [35, 38], "census_obj": 35, "somacollect": 35, "somadatafram": 35, "tabl": [35, 38, 50], "summary_cell_count": 35, "somaexperi": 35, "raw": 35, "m": 35, "rna": 35, "x": [35, 48], "somasparsendarrai": 35, "presenc": [35, 49, 59], "feature_dataset_presence_matrix": 35, "changelog": 35, "2": 35, "0": 35, "1": 35, "3": 35, "tutori": 36, "integr": [36, 39], "model": [36, 42, 44, 58], "understand": [36, 38, 51], "analyz": 36, "scalabl": 36, "comput": [36, 48], "machin": [36, 59], "learn": [36, 38, 41, 59], "about": [38, 41], "main": 38, "compon": 38, "content": [38, 52], "each": [38, 49], "number": 38, "microgli": 38, "beyond": [38, 55], "liver": [38, 39], "diseas": [38, 41], "t": 38, "tissu": [38, 40, 41, 53], "fetch": [39, 40, 41, 43, 49, 50, 52, 53, 54, 55], "10x": [39, 42], "genom": 39, "smart": [39, 43], "seq2": 39, "length": [39, 43], "scvi": [39, 44, 46], "inspect": [39, 42], "prior": 39, "batch": 39, "defin": [39, 58], "dataset_id": [39, 48], "donor_id": 39, "assay_ontology_term_id": 39, "suspension_typ": 39, "explor": [40, 41, 43, 50, 55], "biolog": 40, "relev": 40, "cluster": [40, 43], "background": [40, 52], "function": 40, "melanocyt": 40, "ey": 40, "150k": 40, "retin": 40, "bipolar": 40, "neuron": 40, "dopaminerg": 40, "brain": 40, "pulmonari": 40, "ionocyt": 40, "tabula": [40, 51], "sapien": 40, "sex": 41, "v": 41, "nucleu": 41, "sub": 41, "qc": 41, "metric": 41, "creat": [41, 51, 55, 58], "geneform": [42, 46], "class": [42, 58], "predict": [42, 44, 58], "system": [42, 44], "fine": 42, "tune": 42, "prepar": 42, "subclass": 42, "infer": [42, 44], "load": [42, 46, 52], "token": 42, "result": 42, "gener": [42, 47], "pbmc": 42, "3k": 42, "join": 42, "seq": 43, "account": 43, "valid": 43, "through": 43, "train": [44, 58], "pretrain": 44, "summar": 45, "subset": 45, "select": [45, 53], "value_filt": 45, "collabor": 46, "storag": [46, 52], "format": [46, 52], "associ": [46, 52], "obsm": [46, 52], "slot": [46, 52], "experimentaxisqueri": [46, 52], "dens": [46, 52], "numpi": [46, 52], "arrai": [46, 52], "citat": 47, "string": 47, "onlin": 48, "algorithm": 48, "mean": [48, 57], "per": 48, "group": 48, "measur": 49, "id": 49, "sourc": 50, "file": 50, "filter": 51, "muri": 51, "seni": 51, "frame": 51, "core": [51, 57], "oper": 51, "gget": 53, "modul": 53, "set": 53, "up": 53, "plot": 53, "dot": 53, "similar": 53, "those": 53, "shown": 53, "onli": 53, "correspond": 53, "command": 53, "line": 53, "census_summary_cell_count": 55, "datafram": 55, "valu": 55, "The": 57, "explain": 58, "paramet": 58, "split": 58, "dataload": 58, "make": 58, "build": 59, "process": 59}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"cellxgene_census.download_source_h5ad": [[0, "cellxgene-census-download-source-h5ad"]], "cellxgene_census.experimental.get_all_available_embeddings": [[1, "cellxgene-census-experimental-get-all-available-embeddings"]], "cellxgene_census.experimental.get_all_census_versions_with_embedding": [[2, "cellxgene-census-experimental-get-all-census-versions-with-embedding"]], "cellxgene_census.experimental.get_embedding": [[3, "cellxgene-census-experimental-get-embedding"]], "cellxgene_census.experimental.get_embedding_metadata": [[4, "cellxgene-census-experimental-get-embedding-metadata"]], "cellxgene_census.experimental.get_embedding_metadata_by_name": [[5, "cellxgene-census-experimental-get-embedding-metadata-by-name"]], "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder": [[6, "cellxgene-census-experimental-ml-huggingface-celldatasetbuilder"]], "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer": [[7, "cellxgene-census-experimental-ml-huggingface-geneformertokenizer"]], "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe": [[8, "cellxgene-census-experimental-ml-pytorch-experimentdatapipe"]], "cellxgene_census.experimental.ml.pytorch.Stats": [[9, "cellxgene-census-experimental-ml-pytorch-stats"]], "cellxgene_census.experimental.ml.pytorch.experiment_dataloader": [[10, "cellxgene-census-experimental-ml-pytorch-experiment-dataloader"]], "cellxgene_census.experimental.pp.get_highly_variable_genes": [[11, "cellxgene-census-experimental-pp-get-highly-variable-genes"]], "cellxgene_census.experimental.pp.highly_variable_genes": [[12, "cellxgene-census-experimental-pp-highly-variable-genes"]], "cellxgene_census.experimental.pp.mean_variance": [[13, "cellxgene-census-experimental-pp-mean-variance"]], "cellxgene_census.get_anndata": [[14, "cellxgene-census-get-anndata"]], "cellxgene_census.get_census_version_description": [[15, "cellxgene-census-get-census-version-description"]], "cellxgene_census.get_census_version_directory": [[16, "cellxgene-census-get-census-version-directory"]], "cellxgene_census.get_default_soma_context": [[17, "cellxgene-census-get-default-soma-context"]], "cellxgene_census.get_obs": [[18, "cellxgene-census-get-obs"]], "cellxgene_census.get_presence_matrix": [[19, "cellxgene-census-get-presence-matrix"]], "cellxgene_census.get_source_h5ad_uri": [[20, "cellxgene-census-get-source-h5ad-uri"]], "cellxgene_census.get_var": [[21, "cellxgene-census-get-var"]], "cellxgene_census.open_soma": [[22, "cellxgene-census-open-soma"]], "What\u2019s new?": [[23, "what-s-new"]], "2023": [[23, "id1"]], "2024": [[23, "id2"]], "R package cellxgene.census V1 is out!": [[24, "r-package-cellxgene-census-v1-is-out"]], "Installation and usage": [[24, "installation-and-usage"]], "Census R package is made possible by tiledbsoma": [[24, "census-r-package-is-made-possible-by-tiledbsoma"]], "Efficient access to single-cell data for >33M cells from R": [[24, "efficient-access-to-single-cell-data-for-33m-cells-from-r"]], "Easy-to-use handles to the cloud-hosted Census data": [[24, "easy-to-use-handles-to-the-cloud-hosted-census-data"]], "Querying and reading single-cell metadata from Census": [[24, "querying-and-reading-single-cell-metadata-from-census"]], "Exporting Census slices to Seurat and SingleCellExperiment": [[24, "exporting-census-slices-to-seurat-and-singlecellexperiment"]], "Streaming data incrementally in chunks": [[24, "streaming-data-incrementally-in-chunks"]], "Memory-efficient implementations of commonly used single-cell methods": [[25, "memory-efficient-implementations-of-commonly-used-single-cell-methods"]], "Efficient calculation of average and variance gene expression across millions of cells": [[25, "efficient-calculation-of-average-and-variance-gene-expression-across-millions-of-cells"]], "How it works": [[25, "how-it-works"], [25, "id1"]], "Example: KRAS and AQP4 average and variance expression in lung epithelial cells": [[25, "example-kras-and-aqp4-average-and-variance-expression-in-lung-epithelial-cells"]], "Efficient calculation of highly variable genes across millions of cells": [[25, "efficient-calculation-of-highly-variable-genes-across-millions-of-cells"]], "Example: Finding highly variable genes for all cells of the human esophagus": [[25, "example-finding-highly-variable-genes-for-all-cells-of-the-human-esophagus"]], "Introducing a normalized layer and pre-calculated cell and gene statistics in Census": [[26, "introducing-a-normalized-layer-and-pre-calculated-cell-and-gene-statistics-in-census"]], "Description of new data added to Census": [[26, "description-of-new-data-added-to-census"]], "Added a new library-size normalized layer": [[26, "added-a-new-library-size-normalized-layer"]], "Enhanced gene metadata": [[26, "enhanced-gene-metadata"]], "Enhanced cell metadata": [[26, "enhanced-cell-metadata"]], "How to use the new features": [[26, "how-to-use-the-new-features"]], "Exporting the normalized data to existing single-cell toolkits": [[26, "exporting-the-normalized-data-to-existing-single-cell-toolkits"]], "Accessing library-size normalized data layer via TileDB-SOMA": [[26, "accessing-library-size-normalized-data-layer-via-tiledb-soma"]], "Utilizing pre-calculated stats for querying obs and var": [[26, "utilizing-pre-calculated-stats-for-querying-obs-and-var"]], "Help us improve these data additions": [[26, "help-us-improve-these-data-additions"]], "Census supports categoricals for cell metadata": [[27, "census-supports-categoricals-for-cell-metadata"]], "Potential breaking changes": [[27, "potential-breaking-changes"]], "Identifying the obs columns encoded as categorical": [[27, "identifying-the-obs-columns-encoded-as-categorical"]], "CZ CELLxGENE Discover Census in AWS": [[28, "cz-cellxgene-discover-census-in-aws"]], "Census data available in AWS": [[28, "census-data-available-in-aws"]], "Data specifications": [[28, "data-specifications"]], "Data release versioning": [[28, "data-release-versioning"]], "How to access AWS Census data": [[28, "how-to-access-aws-census-data"]], "AWS CLI for programatic downloads": [[28, "aws-cli-for-programatic-downloads"]], "CELLxGENE Census API (Python and R)": [[28, "cellxgene-census-api-python-and-r"]], "TileDB-SOMA API (Python and R)": [[28, "tiledb-soma-api-python-and-r"]], "FAQ": [[29, "faq"]], "Why should I use the Census?": [[29, "why-should-i-use-the-census"]], "What data is contained in the Census?": [[29, "what-data-is-contained-in-the-census"]], "How do I cite the use of the Census for a publication?": [[29, "how-do-i-cite-the-use-of-the-census-for-a-publication"]], "Why does the Census not have a normalized layer or embeddings?": [[29, "why-does-the-census-not-have-a-normalized-layer-or-embeddings"]], "How does the Census differentiate from other tools?": [[29, "how-does-the-census-differentiate-from-other-tools"]], "Can I query human and mouse data in a single query?": [[29, "can-i-query-human-and-mouse-data-in-a-single-query"]], "Where are the Census data hosted?": [[29, "where-are-the-census-data-hosted"]], "Can I retrieve the original H5AD datasets from which the Census was built?": [[29, "can-i-retrieve-the-original-h5ad-datasets-from-which-the-census-was-built"]], "How can I increase the performance of my queries?": [[29, "how-can-i-increase-the-performance-of-my-queries"]], "Can I use conda to install the Census Python API?": [[29, "can-i-use-conda-to-install-the-census-python-api"]], "How can I ask for support?": [[29, "how-can-i-ask-for-support"]], "How can I ask for new features?": [[29, "how-can-i-ask-for-new-features"]], "How can I contribute my data to the Census?": [[29, "how-can-i-contribute-my-data-to-the-census"]], "Why do I get an ArraySchema error when opening the Census?": [[29, "why-do-i-get-an-arrayschema-error-when-opening-the-census"]], "Why do I get an error when running import cellxgene_census on Databricks?": [[29, "why-do-i-get-an-error-when-running-import-cellxgene-census-on-databricks"]], "Census data releases": [[30, "census-data-releases"]], "What is a Census data release?": [[30, "what-is-a-census-data-release"]], "Long-term supported (LTS) Census releases": [[30, "long-term-supported-lts-census-releases"]], "Weekly Census releases (latest)": [[30, "weekly-census-releases-latest"]], "List of LTS Census data releases": [[30, "list-of-lts-census-data-releases"]], "LTS 2023-12-15": [[30, "lts-2023-12-15"]], "Version information": [[30, "version-information"], [30, "id1"], [30, "id4"]], "Cell and donor counts": [[30, "cell-and-donor-counts"], [30, "id2"], [30, "id5"]], "Cell metadata": [[30, "cell-metadata"], [30, "id3"], [30, "id6"], [38, "Cell-metadata"]], "Cell embbedings": [[30, "cell-embbedings"]], "LTS 2023-07-25": [[30, "lts-2023-07-25"]], "LTS 2023-05-15": [[30, "lts-2023-05-15"]], "\ud83d\udd34 Errata \ud83d\udd34": [[30, "errata"]], "Duplicate observations with is_primary_data = True": [[30, "duplicate-observations-with-is-primary-data-true"]], "Compatibility with package versions": [[30, "compatibility-with-package-versions"]], "Installation": [[31, "installation"], [33, "installation"]], "Requirements": [[31, "requirements"], [40, "Requirements"], [42, "Requirements"], [44, "Requirements"], [47, "Requirements"]], "Python": [[31, "python"]], "R": [[31, "r"]], "CZ CELLxGENE Discover Census": [[32, "cz-cellxgene-discover-census"], [37, "cz-cellxgene-discover-census"]], "Citing Census": [[32, "citing-census"], [37, "citing-census"]], "Census Capabilities": [[32, "census-capabilities"], [37, "census-capabilities"]], "Census Data and Schema": [[32, "census-data-and-schema"], [37, "census-data-and-schema"]], "Census Data Releases": [[32, "census-data-releases"], [37, "census-data-releases"]], "Questions, Feedback and Issues": [[32, "questions-feedback-and-issues"], [37, "questions-feedback-and-issues"]], "Coming Soon!": [[32, "coming-soon"], [37, "coming-soon"]], "Projects and Tools Using Census": [[32, "projects-and-tools-using-census"], [37, "projects-and-tools-using-census"]], "Quick start": [[33, "quick-start"], [46, "Quick-start"], [52, "Quick-start"]], "Python quick start": [[33, "python-quick-start"]], "Querying a slice of cell metadata": [[33, "querying-a-slice-of-cell-metadata"], [33, "id1"]], "Obtaining a slice as AnnData": [[33, "obtaining-a-slice-as-anndata"]], "Memory-efficient queries": [[33, "memory-efficient-queries"], [33, "id2"]], "R quick start": [[33, "r-quick-start"]], "Obtaining a slice as a Seurat or SingleCellExperiment object": [[33, "obtaining-a-slice-as-a-seurat-or-singlecellexperiment-object"]], "Census data and schema": [[34, "census-data-and-schema"]], "Schema": [[34, "schema"], [35, "schema"]], "Census summary info \"census_info\"": [[34, "census-summary-info-census-info"]], "Census single-cell data \"census_data\"": [[34, "census-single-cell-data-census-data"]], "Data included in the Census": [[34, "data-included-in-the-census"]], "SOMA objects": [[34, "soma-objects"]], "CZ CELLxGENE Discover Census Schema": [[35, "cz-cellxgene-discover-census-schema"]], "Census overview": [[35, "census-overview"]], "Definitions": [[35, "definitions"]], "Census Schema versioning": [[35, "census-schema-versioning"]], "Data included": [[35, "data-included"]], "Species": [[35, "species"]], "Multi-species data constraints": [[35, "multi-species-data-constraints"]], "Assays": [[35, "assays"], [41, "Assays"]], "Full-gene sequencing assays": [[35, "full-gene-sequencing-assays"]], "Data matrix types": [[35, "data-matrix-types"]], "Sample types": [[35, "sample-types"]], "Repeated data": [[35, "repeated-data"]], "Data encoding and organization": [[35, "data-encoding-and-organization"]], "Census information census_obj[\"census_info\"] - SOMACollection": [[35, "census-information-census-obj-census-info-somacollection"]], "Census metadata \u2013 census_obj\u200b\u200b[\"census_info\"][\"summary\"] \u2013 SOMADataFrame": [[35, "census-metadata-census-obj-census-info-summary-somadataframe"]], "Census table of CELLxGENE Discover datasets \u2013 census_obj[\"census_info\"][\"datasets\"] \u2013 SOMADataFrame": [[35, "census-table-of-cellxgene-discover-datasets-census-obj-census-info-datasets-somadataframe"]], "Census summary cell counts \u2013 census_obj[\"census_info\"][\"summary_cell_counts\"] \u2013 SOMADataframe": [[35, "census-summary-cell-counts-census-obj-census-info-summary-cell-counts-somadataframe"]], "Census table of organisms \u2013 census_obj[\"census_info\"][\"organisms\"] \u2013 SOMADataframe": [[35, "census-table-of-organisms-census-obj-census-info-organisms-somadataframe"]], "Census Data \u2013 census_obj[\"census_data\"][organism] \u2013 SOMAExperiment": [[35, "census-data-census-obj-census-data-organism-somaexperiment"]], "Matrix Data, count (raw) matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].X[\"raw\"] \u2013 SOMASparseNDArray": [[35, "matrix-data-count-raw-matrix-census-obj-census-data-organism-ms-rna-x-raw-somasparsendarray"]], "Matrix Data, normalized count matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].X[\"normalized\"] \u2013 SOMASparseNDArray": [[35, "matrix-data-normalized-count-matrix-census-obj-census-data-organism-ms-rna-x-normalized-somasparsendarray"]], "Feature metadata \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"].var \u2013 SOMADataFrame": [[35, "feature-metadata-census-obj-census-data-organism-ms-rna-var-somadataframe"]], "Feature dataset presence matrix \u2013 census_obj[\"census_data\"][organism].ms[\"RNA\"][\"feature_dataset_presence_matrix\"] \u2013 SOMASparseNDArray": [[35, "feature-dataset-presence-matrix-census-obj-census-data-organism-ms-rna-feature-dataset-presence-matrix-somasparsendarray"]], "Cell metadata \u2013 census_obj[\"census_data\"][organism].obs \u2013 SOMADataFrame": [[35, "cell-metadata-census-obj-census-data-organism-obs-somadataframe"]], "Changelog": [[35, "changelog"]], "Version 2.0.0": [[35, "version-2-0-0"]], "Version 1.3.0": [[35, "version-1-3-0"]], "Version 1.2.0": [[35, "version-1-2-0"]], "Version 1.1.0": [[35, "version-1-1-0"]], "Version 1.0.0": [[35, "version-1-0-0"]], "Version 0.1.1": [[35, "version-0-1-1"]], "Version 0.1.0": [[35, "version-0-1-0"]], "Version 0.0.1": [[35, "version-0-0-1"]], "Python tutorials": [[36, "python-tutorials"]], "Exporting data": [[36, "exporting-data"]], "[NEW! \ud83d\ude80] Using integrated embeddings and models": [[36, "new-using-integrated-embeddings-and-models"]], "Understanding Census data": [[36, "understanding-census-data"]], "Analyzing Census data": [[36, "analyzing-census-data"]], "Scalable computing": [[36, "scalable-computing"]], "Scalable machine learning": [[36, "scalable-machine-learning"]], "Learning about the CZ CELLxGENE Census": [[38, "Learning-about-the-CZ-CELLxGENE-Census"]], "Opening the Census": [[38, "Opening-the-Census"], [45, "Opening-the-Census"], [49, "Opening-the-Census"]], "Census organization": [[38, "Census-organization"]], "Main Census components": [[38, "Main-Census-components"]], "Census summary info": [[38, "Census-summary-info"]], "Census data": [[38, "Census-data"]], "Gene metadata": [[38, "Gene-metadata"]], "Census summary content tables": [[38, "Census-summary-content-tables"]], "Cell counts by cell metadata": [[38, "Cell-counts-by-cell-metadata"]], "Example: cell metadata included in the summary counts table": [[38, "Example:-cell-metadata-included-in-the-summary-counts-table"]], "Example: cell counts for each sequencing assay in human data": [[38, "Example:-cell-counts-for-each-sequencing-assay-in-human-data"]], "Example: number of microglial cells in the Census": [[38, "Example:-number-of-microglial-cells-in-the-Census"]], "Understanding Census contents beyond the summary tables": [[38, "Understanding-Census-contents-beyond-the-summary-tables"]], "Example: all cell types available in human": [[38, "Example:-all-cell-types-available-in-human"]], "Example: cell types available in human liver": [[38, "Example:-cell-types-available-in-human-liver"]], "Example: diseased T cells in human tissues": [[38, "Example:-diseased-T-cells-in-human-tissues"]], "Integrating multi-dataset slices of data": [[39, "Integrating-multi-dataset-slices-of-data"]], "Finding and fetching data from mouse liver (10X Genomics and Smart-Seq2)": [[39, "Finding-and-fetching-data-from-mouse-liver-(10X-Genomics-and-Smart-Seq2)"]], "Gene-length normalization of Smart-Seq2 data.": [[39, "Gene-length-normalization-of-Smart-Seq2-data."]], "Integration with scvi-tools": [[39, "Integration-with-scvi-tools"]], "Inspecting data prior to integration": [[39, "Inspecting-data-prior-to-integration"]], "Data integration with scVI": [[39, "Data-integration-with-scVI"]], "Integration with batch defined as dataset_id": [[39, "Integration-with-batch-defined-as-dataset_id"]], "Integration with batch defined as dataset_id + donor_id": [[39, "Integration-with-batch-defined-as-dataset_id-+-donor_id"]], "Integration with batch defined as dataset_id + donor_id + assay_ontology_term_id + suspension_type": [[39, "Integration-with-batch-defined-as-dataset_id-+-donor_id-+-assay_ontology_term_id-+-suspension_type"]], "Exploring biologically relevant clusters in Census embeddings": [[40, "Exploring-biologically-relevant-clusters-in-Census-embeddings"]], "Background": [[40, "Background"], [52, "Background"]], "Imports and function definitions": [[40, "Imports-and-function-definitions"]], "Melanocytes in eye": [[40, "Melanocytes-in-eye"]], "Sample and fetch 150k cells from eye tissue": [[40, "Sample-and-fetch-150k-cells-from-eye-tissue"]], "Observations": [[40, "Observations"], [40, "id1"], [40, "id2"]], "Retinal bipolar neurons in eye": [[40, "Retinal-bipolar-neurons-in-eye"]], "Dopaminergic neurons in brain": [[40, "Dopaminergic-neurons-in-brain"]], "Sample and fetch 150k cells from brain tissue": [[40, "Sample-and-fetch-150k-cells-from-brain-tissue"]], "Pulmonary ionocytes in lung (Tabula Sapiens)": [[40, "Pulmonary-ionocytes-in-lung-(Tabula-Sapiens)"]], "Fetch lung cells from Tabula Sapiens": [[40, "Fetch-lung-cells-from-Tabula-Sapiens"]], "Exploring all data from a tissue": [[41, "Exploring-all-data-from-a-tissue"]], "Learning about the lung data in the Census": [[41, "Learning-about-the-lung-data-in-the-Census"]], "Learning about cells of lung data": [[41, "Learning-about-cells-of-lung-data"]], "Datasets": [[41, "Datasets"]], "Disease": [[41, "Disease"]], "Sex": [[41, "Sex"]], "Cell vs nucleus": [[41, "Cell-vs-nucleus"]], "Cell types": [[41, "Cell-types"]], "Sub-tissues": [[41, "Sub-tissues"]], "Learning about genes of lung data": [[41, "Learning-about-genes-of-lung-data"]], "Summary of lung metadata": [[41, "Summary-of-lung-metadata"]], "Fetching all single-cell human lung data from the Census": [[41, "Fetching-all-single-cell-human-lung-data-from-the-Census"]], "Calculating QC metrics of the lung data": [[41, "Calculating-QC-metrics-of-the-lung-data"]], "Creating a normalized expression layer and embeddings": [[41, "Creating-a-normalized-expression-layer-and-embeddings"]], "Geneformer for cell class prediction and data projection": [[42, "Geneformer-for-cell-class-prediction-and-data-projection"]], "System requirements": [[42, "System-requirements"], [44, "System-requirements"]], "Downloading example data": [[42, "Downloading-example-data"], [44, "Downloading-example-data"]], "Downloading the fine-tuned Geneformer model": [[42, "Downloading-the-fine-tuned-Geneformer-model"]], "Importing required packages": [[42, "Importing-required-packages"]], "Preparing data and model": [[42, "Preparing-data-and-model"]], "Preparing single-cell data": [[42, "Preparing-single-cell-data"]], "Preparing data from model": [[42, "Preparing-data-from-model"]], "Using the Geneformer fine-tuned model for cell subclass inference": [[42, "Using-the-Geneformer-fine-tuned-model-for-cell-subclass-inference"]], "Loading tokenized data": [[42, "Loading-tokenized-data"]], "Performing inference of cell subclass": [[42, "Performing-inference-of-cell-subclass"]], "Inspecting inference results": [[42, "Inspecting-inference-results"]], "Using the Geneformer fine-tuned model for data projection": [[42, "Using-the-Geneformer-fine-tuned-model-for-data-projection"]], "Generating Geneformer embeddings for 10X PBMC 3K data": [[42, "Generating-Geneformer-embeddings-for-10X-PBMC-3K-data"]], "Joining Geneformer embeddings from 10X PBMC 3K data with other Census datasets": [[42, "Joining-Geneformer-embeddings-from-10X-PBMC-3K-data-with-other-Census-datasets"]], "Normalizing full-length gene sequencing data": [[43, "Normalizing-full-length-gene-sequencing-data"]], "Opening the census": [[43, "Opening-the-census"], [54, "Opening-the-census"]], "Fetching full-length example sequencing data (Smart-Seq)": [[43, "Fetching-full-length-example-sequencing-data-(Smart-Seq)"]], "Normalizing expression to account for gene length": [[43, "Normalizing-expression-to-account-for-gene-length"]], "Validation through clustering exploration": [[43, "Validation-through-clustering-exploration"]], "scVI for cell type prediction and data projection": [[44, "scVI-for-cell-type-prediction-and-data-projection"]], "Downloading the trained scVI model": [[44, "Downloading-the-trained-scVI-model"]], "Using the scVI pretrained model for data projection": [[44, "Using-the-scVI-pretrained-model-for-data-projection"]], "Using the scVI pretrained model for cell cell type inference.": [[44, "Using-the-scVI-pretrained-model-for-cell-cell-type-inference."]], "Summarizing cell and gene metadata": [[45, "Summarizing-cell-and-gene-metadata"]], "Summarizing cell metadata": [[45, "Summarizing-cell-metadata"]], "Example: Summarize all cell types": [[45, "Example:-Summarize-all-cell-types"]], "Example: Summarize a subset of cell types, selected with a value_filter": [[45, "Example:-Summarize-a-subset-of-cell-types,-selected-with-a-value_filter"]], "Full Census metadata stats": [[45, "Full-Census-metadata-stats"]], "Access CELLxGENE collaboration embeddings (scVI, Geneformer)": [[46, "Access-CELLxGENE-collaboration-embeddings-(scVI,-Geneformer)"]], "Storage format": [[46, "Storage-format"], [52, "Storage-format"]], "Query cells and load associated embeddings": [[46, "Query-cells-and-load-associated-embeddings"], [52, "Query-cells-and-load-associated-embeddings"]], "Loading embeddings into an AnnData obsm slot": [[46, "Loading-embeddings-into-an-AnnData-obsm-slot"]], "AnnData embeddings via cellxgene_census.get_anndata()": [[46, "AnnData-embeddings-via-cellxgene_census.get_anndata()"], [52, "AnnData-embeddings-via-cellxgene_census.get_anndata()"]], "AnnData embeddings via ExperimentAxisQuery": [[46, "AnnData-embeddings-via-ExperimentAxisQuery"], [52, "AnnData-embeddings-via-ExperimentAxisQuery"]], "Load an embedding into a dense NumPy array": [[46, "Load-an-embedding-into-a-dense-NumPy-array"], [52, "Load-an-embedding-into-a-dense-NumPy-array"]], "Generating citations for Census slices": [[47, "Generating-citations-for-Census-slices"]], "Generating citation strings": [[47, "Generating-citation-strings"]], "Via cell metadata query": [[47, "Via-cell-metadata-query"]], "Via AnnData query": [[47, "Via-AnnData-query"]], "Computing on X using online (incremental) algorithms": [[48, "Computing-on-X-using-online-(incremental)-algorithms"]], "Incremental count and mean calculation.": [[48, "Incremental-count-and-mean-calculation."]], "Incremental variance calculation": [[48, "Incremental-variance-calculation"]], "Counting cells per gene, grouped by dataset_id": [[48, "Counting-cells-per-gene,-grouped-by-dataset_id"]], "Genes measured in each cell (dataset presence matrix)": [[49, "Genes-measured-in-each-cell-(dataset-presence-matrix)"]], "Fetching the IDs of the Census datasets": [[49, "Fetching-the-IDs-of-the-Census-datasets"]], "Fetching the dataset presence matrix": [[49, "Fetching-the-dataset-presence-matrix"]], "Identifying genes measured in a specific dataset.": [[49, "Identifying-genes-measured-in-a-specific-dataset."]], "Identifying datasets that measured specific genes": [[49, "Identifying-datasets-that-measured-specific-genes"]], "Identifying all genes measured in a dataset": [[49, "Identifying-all-genes-measured-in-a-dataset"]], "Exploring the Census Datasets table": [[50, "Exploring-the-Census-Datasets-table"]], "Fetching the datasets table": [[50, "Fetching-the-datasets-table"]], "Fetching the expression data from a single dataset": [[50, "Fetching-the-expression-data-from-a-single-dataset"]], "Downloading the original source H5AD file of a dataset.": [[50, "Downloading-the-original-source-H5AD-file-of-a-dataset."]], "Understanding and filtering out duplicate cells": [[51, "Understanding-and-filtering-out-duplicate-cells"]], "Why are there duplicate cells in the Census?": [[51, "Why-are-there-duplicate-cells-in-the-Census?"]], "An example: duplicate cells in the Tabula Muris Senis data": [[51, "An-example:-duplicate-cells-in-the-Tabula-Muris-Senis-data"]], "Filtering out duplicate cells": [[51, "Filtering-out-duplicate-cells"]], "Filtering out duplicate cells when reading the obs data frame.": [[51, "Filtering-out-duplicate-cells-when-reading-the-obs-data-frame."]], "Filtering out duplicate cells when creating an AnnData": [[51, "Filtering-out-duplicate-cells-when-creating-an-AnnData"]], "Filtering out duplicate cells for out-of-core operations.": [[51, "Filtering-out-duplicate-cells-for-out-of-core-operations."]], "Access CELLxGENE-hosted embeddings": [[52, "Access-CELLxGENE-hosted-embeddings"]], "Contents": [[52, "Contents"]], "Load an embedding into an AnnData obsm slot": [[52, "Load-an-embedding-into-an-AnnData-obsm-slot"]], "Load embeddings and fetch associated Census data": [[52, "Load-embeddings-and-fetch-associated-Census-data"]], "Embedding Metadata": [[52, "Embedding-Metadata"]], "Querying data using the gget cellxgene module": [[53, "Querying-data-using-the-gget-cellxgene-module"]], "Install gget and set up cellxgene module": [[53, "Install-gget-and-set-up-cellxgene-module"]], "Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)": [[53, "Fetch-an-AnnData-object-by-selecting-gene(s),-tissue(s)-and-cell-type(s)"]], "Plot a dot plot similar to those shown on the CZ CELLxGENE Discover Gene Expression": [[53, "Plot-a-dot-plot-similar-to-those-shown-on-the-CZ-CELLxGENE-Discover-Gene-Expression"]], "Fetch only cell metadata (corresponds to AnnData.obs)": [[53, "Fetch-only-cell-metadata-(corresponds-to-AnnData.obs)"]], "Use gget cellxgene from the command line": [[53, "Use-gget-cellxgene-from-the-command-line"]], "Querying and fetching the single-cell data and cell/gene metadata.": [[54, "Querying-and-fetching-the-single-cell-data-and-cell/gene-metadata."]], "Querying expression data": [[54, "Querying-expression-data"]], "Querying cell metadata (obs)": [[54, "Querying-cell-metadata-(obs)"]], "Querying gene metadata (var)": [[54, "Querying-gene-metadata-(var)"]], "Exploring pre-calculated summary cell counts": [[55, "Exploring-pre-calculated-summary-cell-counts"]], "Fetching the census_summary_cell_counts dataframe": [[55, "Fetching-the-census_summary_cell_counts-dataframe"]], "Creating summary counts beyond pre-calculated values.": [[55, "Creating-summary-counts-beyond-pre-calculated-values."]], "Experimental Highly Variable Genes API": [[56, "Experimental-Highly-Variable-Genes-API"]], "get_highly_variable_genes": [[56, "get_highly_variable_genes"]], "highly_variable_genes": [[56, "highly_variable_genes"]], "Out-of-core (incremental) mean and variance calculation": [[57, "Out-of-core-(incremental)-mean-and-variance-calculation"]], "The mean and variance API": [[57, "The-mean-and-variance-API"]], "Example: calculate mean and variance for a slice of the Census": [[57, "Example:-calculate-mean-and-variance-for-a-slice-of-the-Census"]], "Training a PyTorch Model": [[58, "Training-a-PyTorch-Model"]], "Open the Census": [[58, "Open-the-Census"]], "Create an ExperimentDataPipe": [[58, "Create-an-ExperimentDataPipe"]], "ExperimentDataPipe class explained": [[58, "ExperimentDataPipe-class-explained"]], "ExperimentDataPipe parameters explained": [[58, "ExperimentDataPipe-parameters-explained"]], "Split the dataset": [[58, "Split-the-dataset"]], "Create the DataLoader": [[58, "Create-the-DataLoader"]], "Define the model": [[58, "Define-the-model"]], "Train the model": [[58, "Train-the-model"]], "Make predictions with the model": [[58, "Make-predictions-with-the-model"]], "Python API": [[59, "module-cellxgene_census"]], "Open/retrieve Cell Census data": [[59, "open-retrieve-cell-census-data"]], "Get slice as AnnData": [[59, "get-slice-as-anndata"]], "Feature presence matrix": [[59, "feature-presence-matrix"]], "Versioning of Cell Census builds": [[59, "versioning-of-cell-census-builds"]], "Experimental: Machine Learning": [[59, "experimental-machine-learning"]], "Experimental: Processing": [[59, "experimental-processing"]], "Experimental: Embeddings": [[59, "experimental-embeddings"]]}, "indexentries": {"download_source_h5ad() (in module cellxgene_census)": [[0, "cellxgene_census.download_source_h5ad"]], "get_all_available_embeddings() (in module cellxgene_census.experimental)": [[1, "cellxgene_census.experimental.get_all_available_embeddings"]], "get_all_census_versions_with_embedding() (in module cellxgene_census.experimental)": [[2, "cellxgene_census.experimental.get_all_census_versions_with_embedding"]], "get_embedding() (in module cellxgene_census.experimental)": [[3, "cellxgene_census.experimental.get_embedding"]], "get_embedding_metadata() (in module cellxgene_census.experimental)": [[4, "cellxgene_census.experimental.get_embedding_metadata"]], "get_embedding_metadata_by_name() (in module cellxgene_census.experimental)": [[5, "cellxgene_census.experimental.get_embedding_metadata_by_name"]], "celldatasetbuilder (class in cellxgene_census.experimental.ml.huggingface)": [[6, "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder"]], "__init__() (cellxgene_census.experimental.ml.huggingface.celldatasetbuilder method)": [[6, "cellxgene_census.experimental.ml.huggingface.CellDatasetBuilder.__init__"]], "geneformertokenizer (class in cellxgene_census.experimental.ml.huggingface)": [[7, "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer"]], "__init__() (cellxgene_census.experimental.ml.huggingface.geneformertokenizer method)": [[7, "cellxgene_census.experimental.ml.huggingface.GeneformerTokenizer.__init__"]], "experimentdatapipe (class in cellxgene_census.experimental.ml.pytorch)": [[8, "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe"]], "__init__() (cellxgene_census.experimental.ml.pytorch.experimentdatapipe method)": [[8, "cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe.__init__"]], "stats (class in cellxgene_census.experimental.ml.pytorch)": [[9, "cellxgene_census.experimental.ml.pytorch.Stats"]], "__init__() (cellxgene_census.experimental.ml.pytorch.stats method)": [[9, "cellxgene_census.experimental.ml.pytorch.Stats.__init__"]], "experiment_dataloader() (in module cellxgene_census.experimental.ml.pytorch)": [[10, "cellxgene_census.experimental.ml.pytorch.experiment_dataloader"]], "get_highly_variable_genes() (in module cellxgene_census.experimental.pp)": [[11, "cellxgene_census.experimental.pp.get_highly_variable_genes"]], "highly_variable_genes() (in module cellxgene_census.experimental.pp)": [[12, "cellxgene_census.experimental.pp.highly_variable_genes"]], "mean_variance() (in module cellxgene_census.experimental.pp)": [[13, "cellxgene_census.experimental.pp.mean_variance"]], "get_anndata() (in module cellxgene_census)": [[14, "cellxgene_census.get_anndata"]], "get_census_version_description() (in module cellxgene_census)": [[15, "cellxgene_census.get_census_version_description"]], "get_census_version_directory() (in module cellxgene_census)": [[16, "cellxgene_census.get_census_version_directory"]], "get_default_soma_context() (in module cellxgene_census)": [[17, "cellxgene_census.get_default_soma_context"]], "get_obs() (in module cellxgene_census)": [[18, "cellxgene_census.get_obs"]], "get_presence_matrix() (in module cellxgene_census)": [[19, "cellxgene_census.get_presence_matrix"]], "get_source_h5ad_uri() (in module cellxgene_census)": [[20, "cellxgene_census.get_source_h5ad_uri"]], "get_var() (in module cellxgene_census)": [[21, "cellxgene_census.get_var"]], "open_soma() (in module cellxgene_census)": [[22, "cellxgene_census.open_soma"]], "cellxgene_census": [[59, "module-cellxgene_census"]], "module": [[59, "module-cellxgene_census"]]}}) \ No newline at end of file

    cellxgene_census.experimental.get_embedding