Skip to content

Commit

Permalink
improved seurat
Browse files Browse the repository at this point in the history
  • Loading branch information
Matiss Ozols committed Jun 18, 2024
1 parent e85cabe commit 3003d72
Show file tree
Hide file tree
Showing 7 changed files with 404 additions and 136 deletions.
269 changes: 153 additions & 116 deletions bin/2.integrate.R

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions bin/2.process_donor_data_for_integration.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ for (donor in names(sobj_per_donor)) {

# Check number of PCs
dims2 <- dim(sobj_per_donor[[donor]]@assays$RNA$counts)
num_pcs <- min(20, dims2[2] - 1)
if (num_pcs < 20) {
num_pcs <- min(50, dims2[2] - 1)
if (num_pcs < 50) {
donors_to_drop <- append(donors_to_drop, donor)
}

# Run PCA on all variable genes
sobj_per_donor[[donor]] <- RunPCA(sobj_per_donor[[donor]], dims = 1:10, npcs = 20, verbose = FALSE)
sobj_per_donor[[donor]] <- RunPCA(sobj_per_donor[[donor]], dims = 1:10, npcs = num_pcs, verbose = FALSE)
p2 <- DimPlot(sobj_per_donor[[donor]]) + ggtitle('Before cell cycle correction (all genes)')
sobj_per_donor[[donor]]@project.name <- paste0(sample_name, '-', donor)
}
Expand Down
139 changes: 139 additions & 0 deletions bin/totalVI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python

__date__ = '2020-05-26'
__version__ = '0.0.1'

import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import gridspec
import numpy as np
import pandas as pd
import scipy as sci
import scanpy as sc
import copy
import scvi
import argparse
import glob
import pandas as pd



# This code will gather the most important graphs and place them in a summary folder.
parser = argparse.ArgumentParser(
description="""
Collect inputs
"""
)

parser.add_argument(
'-h5ad_file', '--h5ad_file',
action='store',
dest='h5ad_file',
required=True,
help='Input h5ad_file after normalisation'
)



options = parser.parse_args()

sc.set_figure_params(figsize=(5,5), dpi=150)

#import single cell data and CITE-seq data
# SLEmap = sc.read('adata-normalized.h5ad')
SLEmap = sc.read(options.h5ad_file)

all_cite_files = glob.glob("./*/*.matrix.csv")
CITE = pd.DataFrame()
for f1 in all_cite_files:
c1 = pd.read_csv(f1,index_col=0)
CITE=pd.concat([CITE,c1])

# merge citeseq data to obs of single cell data
SLEmap.obs["Barcode"] = SLEmap.obs.index.str.split('__').str[0]
SLEmap.obs = SLEmap.obs.merge(CITE, left_on=['Barcode'],right_index=True,how='left', indicator=True)


# make citeseq data to obsm single cell data : required for totalVI
CITE_2 = SLEmap.obs[CITE.columns].copy()
SLEmap.obsm['protein_expression'] = CITE_2

# keep only cells passing QC and highly variable genes
SLEmap = SLEmap[SLEmap.obs["cell_passes_qc"],:]
SLEmap = SLEmap[SLEmap.obs["cell_passes_hard_filters"],:]
SLEmap = SLEmap[:,SLEmap.var["highly_variable"]]

#run totalVI
SLEmap = SLEmap.copy()
scvi.model.TOTALVI.setup_anndata(SLEmap, protein_expression_obsm_key="protein_expression",batch_key="experiment_id")
model = scvi.model.TOTALVI(SLEmap,latent_distribution="normal",n_layers_decoder=2)

model.train()
model.save("./scvi_model_300samples",adata=SLEmap, overwrite=True)


# Get latent expression from model: used for UMAP calculations
SLEmap.obsm["X_totalVI"] = model.get_latent_representation()


# Get denoised protein values: can fail with low memory: use 400 minimum for n_samples=25.
# If it keeps failing lower n_samples or don't use it and use dsb values instead
rna, protein = model.get_normalized_expression(n_samples=25,return_mean=True)
SLEmap.obsm["denoised_protein"] = protein

sc.pp.neighbors(SLEmap, use_rep="X_totalVI",n_neighbors=15)
sc.tl.umap(SLEmap)


sc.set_figure_params(figsize=(5,5), dpi=150)

def panel_grid(hspace, wspace, ncols, num_panels):
"""Init plot."""
n_panels_x = min(ncols, num_panels)
n_panels_y = np.ceil(num_panels / n_panels_x).astype(int)
if wspace is None:
# try to set a wspace that is not too large or too small given the
# current figure size
wspace = 0.75 / rcParams['figure.figsize'][0] + 0.02
# each panel will have the size of rcParams['figure.figsize']
fig = plt.figure(
figsize=(
n_panels_x * rcParams['figure.figsize'][0] * (1 + wspace),
n_panels_y * rcParams['figure.figsize'][1],
)
)
left = 0.2 / n_panels_x
bottom = 0.13 / n_panels_y
gs = gridspec.GridSpec(
nrows=n_panels_y,
ncols=n_panels_x,
left=left,
right=1 - (n_panels_x - 1) * left - 0.01 / n_panels_x,
bottom=bottom,
top=1 - (n_panels_y - 1) * bottom - 0.1 / n_panels_y,
hspace=hspace,
wspace=wspace
)
return fig, gs

fig, grid = panel_grid(
hspace=0.125*2,
wspace=None,
ncols=4,
num_panels=2
)

sc.pl.umap(
SLEmap,
color=["Azimuth:predicted.celltype.l1","Azimuth:predicted.celltype.l2"],
# Setting a smaller point size to get prevent overlap
size=0.7,save='plots.pdf'
)

fig.savefig(
'umap.png',
#dpi=300,
bbox_inches='tight'
)

SLEmap.write("./totalVI_integrated.h5ad")
36 changes: 33 additions & 3 deletions modules/nf-core/modules/citeseq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ process DSB {
"""
}

process DSB_INTEGRATE{

process COMBINE_INTEGRATIONS{
label 'process_medium'
tag "${sample_name}"
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
Expand All @@ -91,6 +90,36 @@ process DSB_INTEGRATE{
output:
// path("out"), emit: outdir
path("*all_samples_integrated.RDS"), emit: tmp_rds_file

input:
path(rds_files)

script:

"""
echo 'running1'
2.combine.R
"""

}


process DSB_INTEGRATE{

label 'process_high'
tag "${sample_name}"
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://yascp.cog.sanger.ac.uk/public/singularity_images/azimuth_dsb_6_03_2024.sif"
} else {
container "mercury/azimuth_dsb:6_03_2024"
}

publishDir path: "${params.outdir}/citeseq/all_data_integrated", mode: "${params.copy_mode}",
overwrite: "true"

output:
// path("out"), emit: outdir
path("*__integration_*.RDS"), emit: tmp_rds_file
path("figures__*/*"), emit: figs


Expand All @@ -103,6 +132,7 @@ process DSB_INTEGRATE{
val(ndim_sct)
val(ndim_citeBgRemoved)
val(ndim_cite_integrated)
val(method)
script:

if (vars_to_regress == ''){
Expand All @@ -111,7 +141,7 @@ process DSB_INTEGRATE{

"""
echo 'running1'
2.integrate.R ${vars_to_regress} ${k_anchor} ${dims} ${ndim_sct} ${ndim_citeBgRemoved} ${ndim_cite_integrated}
2.integrate.R ${vars_to_regress} ${k_anchor} ${dims} ${ndim_sct} ${ndim_citeBgRemoved} ${ndim_cite_integrated} ${method}
"""

}
Expand Down
29 changes: 29 additions & 0 deletions modules/nf-core/modules/totalVi/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process TOTAL_VI_INTEGRATION{
label 'process_medium'
if (params.utilise_gpu){
label 'gpu'
}else{
label 'process_medium'
// label 'process_low'
}

if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "/lustre/scratch123/hgi/teams/hgi/mo11/tmp_projects/sle_project/5.sle_noCB_fullRun_MAD_perPool/yascp_totalvi_v1.sif"
} else {
container "wtsihgi/nf_scrna_qc:6bb6af5"
}

input:
path(adata)
path(citedata)

// output:
// val(outdir, emit: outdir)

script:

"""
totalVI.py -h5ad_file ${adata}
"""

}
47 changes: 37 additions & 10 deletions subworkflows/qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ include {UMAP; UMAP as UMAP_HARMONY; UMAP as UMAP_BBKNN;} from "$projectDir/modu
include {CLUSTERING; CLUSTERING as CLUSTERING_HARMONY; CLUSTERING as CLUSTERING_BBKNN;} from "$projectDir/modules/nf-core/modules/clustering/main"
include {CELL_HARD_FILTERS} from "$projectDir/modules/nf-core/modules/cell_hard_filters/main"
include {DONT_INTEGRATE} from "$projectDir/modules/nf-core/modules/reduce_dims/main"
include { DSB_PROCESS; PREPROCESS_PROCESS; DSB_INTEGRATE; MULTIMODAL_INTEGRATION; VDJ_INTEGRATION } from '../modules/nf-core/modules/citeseq/main'
include {TOTAL_VI_INTEGRATION} from "$projectDir/modules/nf-core/modules/totalVi/main"
include { DSB_PROCESS; PREPROCESS_PROCESS; DSB_INTEGRATE as DSB_INTEGRATE_SCT;DSB_INTEGRATE as DSB_INTEGRATE_CITE;DSB_INTEGRATE as DSB_INTEGRATE_cite_bgRemoved; MULTIMODAL_INTEGRATION; VDJ_INTEGRATION } from '../modules/nf-core/modules/citeseq/main'

workflow qc {
take:
Expand Down Expand Up @@ -86,6 +87,10 @@ workflow qc {
NORMALISE_AND_PCA.out.sample_QCd_adata.flatten().map{sample -> tuple("${sample}".replaceFirst(/___sample_QCd_adata.h5ad/,"").replaceFirst(/.*\//,""),sample)}.set{alt_input}
channel_dsb2 = channel_dsb.combine(alt_input, by: 0)
DSB_PROCESS(channel_dsb2)
// SCT,CITE,cite_bgRemoved
if(params.totalVi.run_process){
TOTAL_VI_INTEGRATION(NORMALISE_AND_PCA.out.anndata,DSB_PROCESS.out.citeseq_rsd.collect())
}

DSB_PROCESS.out.ch_for_norm.subscribe { println "1:: DSB_PROCESS.out.ch_for_norm: $it" }

Expand All @@ -100,24 +105,46 @@ workflow qc {
matched_donors.subscribe { println "1:: matched_donors $it" }
PREPROCESS_PROCESS(inp4,params.reduced_dims.vars_to_regress.value)

DSB_INTEGRATE(
DSB_INTEGRATE_SCT(
PREPROCESS_PROCESS.out.tmp_rsd.collect(),
params.reduced_dims.vars_to_regress.value,
params.reduced_dims.seurat_integration.k_anchor,
params.reduced_dims.seurat_integration.dims,
params.reduced_dims.seurat_integration.ndim_sct,
params.reduced_dims.seurat_integration.ndim_citeBgRemoved,
params.reduced_dims.seurat_integration.ndim_cite_integrated
)

MULTIMODAL_INTEGRATION(
DSB_INTEGRATE.out.tmp_rds_file,
params.reduced_dims.seurat_integration.ndim_cite_integrated,
'SCT'
)

VDJ_INTEGRATION(
chanel_cr_outs.collect(),
MULTIMODAL_INTEGRATION.out.wnn_integrated_file
DSB_INTEGRATE_CITE(
PREPROCESS_PROCESS.out.tmp_rsd.collect(),
params.reduced_dims.vars_to_regress.value,
params.reduced_dims.seurat_integration.k_anchor,
params.reduced_dims.seurat_integration.dims,
params.reduced_dims.seurat_integration.ndim_sct,
params.reduced_dims.seurat_integration.ndim_citeBgRemoved,
params.reduced_dims.seurat_integration.ndim_cite_integrated,
'CITE'
)
DSB_INTEGRATE_cite_bgRemoved(
PREPROCESS_PROCESS.out.tmp_rsd.collect(),
params.reduced_dims.vars_to_regress.value,
params.reduced_dims.seurat_integration.k_anchor,
params.reduced_dims.seurat_integration.dims,
params.reduced_dims.seurat_integration.ndim_sct,
params.reduced_dims.seurat_integration.ndim_citeBgRemoved,
params.reduced_dims.seurat_integration.ndim_cite_integrated,
'cite_bgRemoved'
)

// MULTIMODAL_INTEGRATION(
// DSB_INTEGRATE.out.tmp_rds_file,
// )

// VDJ_INTEGRATION(
// chanel_cr_outs.collect(),
// MULTIMODAL_INTEGRATION.out.wnn_integrated_file
// )
}


Expand Down
14 changes: 10 additions & 4 deletions workflows/yascp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,16 @@ workflow YASCP {

if (params.citeseq){

vireo_paths = params.outdir
? Channel.fromPath("${params.outdir}/deconvolution/vireo/*/vireo_*", checkIfExists:true, type: 'dir')
: Channel.fromPath("${launchDir}/${params.outdir}/deconvolution/vireo/*/vireo_*", type: 'dir')

// vireo_paths1 = params.outdir
// ? Channel.fromPath("${params.outdir}/deconvolution/vireo/*/vireo_*", checkIfExists:true, type: 'dir')
// : Channel.fromPath("${launchDir}/${params.outdir}/deconvolution/vireo/*/vireo_*", checkIfExists:true, type: 'dir')
vireo_paths2 = params.outdir
? Channel.fromPath("${params.outdir}/deconvolution/vireo/vireo_*", checkIfExists:true, type: 'dir')
: Channel.fromPath("${launchDir}/${params.outdir}/deconvolution/vireo/vireo_*", checkIfExists:true, type: 'dir')
vireo_paths3 = params.outdir
? Channel.fromPath("${params.outdir}/deconvolution/vireo/*/*/vireo_*", checkIfExists:true, type: 'dir')
: Channel.fromPath("${launchDir}/${params.outdir}/deconvolution/vireo/*/*/vireo_*", checkIfExists:true, type: 'dir')
vireo_paths = vireo_paths3.mix(vireo_paths2)//.mix(vireo_paths3)
GENOTYPE_MATCHER(vireo_paths.collect())
matched_donors = GENOTYPE_MATCHER.out.matched_donors
}else{
Expand Down

0 comments on commit 3003d72

Please sign in to comment.