Merge branch 'devel'

compbiomed · Jul 26, 2023 · 990e89e · 990e89e
2 parents 8833314 + 2a6b81c
commit 990e89e
Show file tree

Hide file tree

Showing 367 changed files with 6,783 additions and 4,036 deletions.
diff --git a/.github/workflows/BioC-check.yaml b/.github/workflows/BioC-check.yaml
@@ -25,11 +25,11 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: r-lib/actions/setup-pandoc@v1
+      - uses: r-lib/actions/setup-pandoc@v2
 
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,5 +1,7 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+
+# R cmd check for singleCellTK
 on:
   push:
     branches: [master, devel]

diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ vignettes/articles/raw_data/*
 raw_data/*
 *.rds
 .RDataTmp
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: singleCellTK
 Type: Package
 Title: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data
-Version: 2.10.0
+Version: 2.10.1
 Authors@R: c(person(given="Yichen", family="Wang", email="[email protected]", role=c("aut", "cre"),
                     comment = c(ORCID = "0000-0003-4347-5199")),
              person(given="Irzam", family="Sarfraz", email="[email protected]", role=c("aut"),
@@ -21,6 +21,7 @@ Authors@R: c(person(given="Yichen", family="Wang", email="[email protected]", role=
              person(given="Zhe", family="Wang", email="[email protected]", role=c("aut")),
              person(given=c("W.", "Evan"), family="Johnson", email="[email protected]", role=c("aut"),
                     comment = c(ORCID = "0000-0002-6247-6595")),
+             person(given="Ming", family="Liu", email="[email protected]", role=c("aut")),
              person(given=c("Joshua", "David"), family="Campbell", email="[email protected]", role=c("aut"))
              )
 Depends:
@@ -37,6 +38,7 @@ biocViews: SingleCell, GeneExpression, DifferentialExpression, Alignment,
 LazyData: FALSE
 Imports:
     ape,
+    anndata,
     AnnotationHub,
     batchelor,
     BiocParallel,
@@ -87,7 +89,7 @@ Imports:
     reshape2,
     shinyalert,
     circlize,
-    enrichR,
+    enrichR (>= 3.2),
     celda,
     shinycssloaders,
     DropletUtils,

diff --git a/Dockerfile b/Dockerfile
@@ -1,16 +1,51 @@
-FROM rocker/shiny-verse:4.0.3
+FROM rocker/shiny-verse:latest
 
-MAINTAINER David Jenkins <[email protected]>
+#Install dependencies on Ubuntu
+RUN buildDeps='libpq-dev build-essential libcurl4-openssl-dev libxml2-dev libssl-dev libssh2-1-dev python3-pip libv8-dev pandoc' apt-get update && apt-get install -y \
+	libpq-dev \
+	libgeos-dev \
+	build-essential \
+	libcurl4-openssl-dev \
+	libxml2-dev \
+	libssl-dev \
+	libssh2-1-dev \
+	libv8-dev \
+	libmagick++-dev \
+	libcairo2-dev \
+	pandoc \
+	python3-pip && apt-get purge -y --auto-remove $buildDeps && apt-get install -y curl && echo
 
-COPY . /sctk
+RUN export CFLAGS="-O3 -march=nehalem" && pip3 install --upgrade pip && pip3 install numpy llvmlite scrublet virtualenv scanpy
+RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg  add - && apt-get update -y && apt-get install google-cloud-cli -y
 
-RUN apt-get -y update -qq \ 
-  && apt-get install -y --no-install-recommends \
-    libjpeg-dev libv8-dev libbz2-dev liblzma-dev libglpk-dev libmagick++-6.q16-dev \
-  && R -e "devtools::install_deps('/sctk', dependencies = TRUE)" \
-  && R -e "devtools::build('/sctk')" \
-  && R -e "install.packages('singleCellTK_1.7.5.tar.gz', repos = NULL, type = 'source')"
+#Add singleCellTK directory and script to docker
+RUN mkdir -p /SCTK_docker/ && mkdir /SCTK_docker/script && mkdir /SCTK_docker/modes 
 
-EXPOSE 3838
+#ADD ./install_packages.R /SCTK_docker/script
+ADD ./exec/SCTK_runQC.R /SCTK_docker/script
 
-CMD ["R", "-e", "shiny::runApp('/sctk/inst/shiny', port = 3838, host = '0.0.0.0')"]
+#Install necessary R packages
+RUN R -e "install.packages('BiocManager')"
+RUN R -e "BiocManager::install('edgeR')"
+RUN R -e "install.packages('SeuratObject')"
+RUN R -e "install.packages('scran')"
+RUN R -e "install.packages('Seurat')"
+#RUN R -e "install.packages('shiny')"
+RUN R -e "install.packages('RCurl')"
+RUN R -e "install.packages('rversions')"
+RUN R -e "install.packages('usethis')"
+RUN R -e "install.packages('optparse', dependencies = TRUE)"
+RUN R -e "install.packages('optparse')"
+RUN R -e "install.packages('kableExtra')"
+RUN R -e "BiocManager::install('TENxPBMCData')"
+RUN R -e "BiocManager::install('scRNAseq')"
+RUN R -e "BiocManager::install('celda')"
+#RUN R -e "devtools::install_github('wleepang/shiny-directory-input')"
+RUN R -e "options(timeout=360000)" \
+	&& R -e "devtools::install_github('mingl1997/singleCellTK', ref = 'devel', force = TRUE, dependencies = TRUE)"
+
+RUN R -e "install.packages('reticulate')"
+RUN R -e "Sys.setenv(RETICULATE_PYTHON = '/usr/bin/python3')"
+RUN R -e "reticulate::py_config()"
+
+ENTRYPOINT ["Rscript", "/usr/local/lib/R/site-library/singleCellTK/exec/SCTK_runQC.R"]
diff --git a/NAMESPACE b/NAMESPACE
@@ -80,6 +80,7 @@ export(plotBarcodeRankScatter)
 export(plotBatchCorrCompare)
 export(plotBatchVariance)
 export(plotBcdsResults)
+export(plotBubble)
 export(plotClusterAbundance)
 export(plotCxdsResults)
 export(plotDEGHeatmap)
@@ -167,6 +168,7 @@ export(runBBKNN)
 export(runBarcodeRankDrops)
 export(runBcds)
 export(runCellQC)
+export(runClusterSummaryMetrics)
 export(runComBatSeq)
 export(runCxds)
 export(runCxdsBcdsHybrid)
@@ -295,6 +297,7 @@ importFrom(dplyr,group_by)
 importFrom(dplyr,summarize)
 importFrom(magrittr,"%>%")
 importFrom(methods,slot)
+importFrom(reshape2,melt)
 importFrom(reticulate,import)
 importFrom(reticulate,py_module_available)
 importFrom(reticulate,py_set_seed)

diff --git a/NEWS.md b/NEWS.md
@@ -1,8 +1,17 @@
+Changes in Version 2.10.1 (2023-07-26)
+================================================================================
+* Added function for bubble plot
+* In SCTK-QC pipeline, added support for batch processing multiple inputs
+* In SCTK-QC pipeline, added support for importing and exporting AnnData objects
+* In SCTK-QC pipeline, fixed a bug causing YAML output files to be empty
+* Update the SCTK-QC tutorial 
+* Fixed bug in combineSCE causing it to create multiple copies of row or column data
+
 Changes in Version 2.10.0 (2023-04-25)
 ================================================================================
 * Updated version to match Bioconductor 3.17
 
-Changes in Version 2.8.1 (2022-03-10)
+Changes in Version 2.8.1 (2023-03-10)
 ================================================================================
 * Added scanpy wrapper functions for use from console
 * Added scanpy UI curated workflow

diff --git a/R/combineSCE.R b/R/combineSCE.R
@@ -70,30 +70,36 @@
 }
 
 
-.mergeRowDataSCE <- function(sceList, by.r) {
-  feList <- lapply(sceList, function(x){
-    rw <- SummarizedExperiment::rowData(x)
-    rw[['rownames']] <- rownames(rw)
-    return(rw)
-  })
-
-  ## Get merged rowData
-  by.r <- unique(c('rownames', by.r))
-  unionFe <- Reduce(function(r1, r2) merge(r1, r2, by=by.r, all=TRUE), feList)
-  allGenes <- unique(unlist(lapply(feList, rownames)))
-
-  ## rowData
-  newFe <- unionFe
-  if (nrow(newFe) != length(allGenes)) {
-    warning("Conflicts were found when merging two rowData. ",
-            "Resolved the conflicts by choosing the first entries.",
-            "To avoid conflicts, please provide the 'by.r' arguments to ",
-            "specify columns in rowData that does not have conflict between two singleCellExperiment object. ")
-    newFe <- newFe[!duplicated(newFe$rownames), ]
-  }
-  rownames(newFe) <- newFe[['rownames']]
-  newFe <- newFe[allGenes,]
-  return(newFe)
+.mergeRowDataSCE <- function(sce.list, by.r) {
+    feList <- lapply(sce.list, function(x){
+        rw <- SummarizedExperiment::rowData(x)
+        rw[['rownames']] <- rownames(rw)
+        return(rw)
+    })
+
+    ## Get merged rowData
+    if (is.null(by.r)) {
+        by.r <- unique(c('rownames', by.r))
+        unionFe <- Reduce(function(r1, r2) merge(r1, r2, by=unique(c(by.r, intersect(names(r1), names(r2)))), all=TRUE), feList)
+    }
+    else {
+      by.r <- unique(c('rownames', by.r))
+      unionFe <- Reduce(function(r1, r2) merge(r1, r2, by=by.r, all=TRUE), feList)
+    }
+    allGenes <- unique(unlist(lapply(feList, rownames)))
+
+    ## rowData
+    newFe <- unionFe
+    if (nrow(newFe) != length(allGenes)) {
+        warning("Conflicts were found when merging two rowData. ",
+                "Resolved the conflicts by choosing the first entries.",
+                "To avoid conflicts, please provide the 'by.r' arguments to ",
+                "specify columns in rowData that does not have conflict between two singleCellExperiment object. ")
+        newFe <- newFe[!duplicated(newFe$rownames), ]
+    }
+    rownames(newFe) <- newFe[['rownames']]
+    newFe <- newFe[allGenes,]
+    return(newFe)
 }
 
 .mergeColDataSCE <- function(sceList, by.c) {
@@ -103,8 +109,15 @@
     return(cD)
   })
 
-  by.c <- unique(c("rownames", by.c))
-  unionCb <- Reduce(function(c1, c2) merge(c1, c2, by=by.c, all=TRUE), cbList)
+  # Merge columns
+  if (is.null(by.c)) {
+    by.c <- unique(c("rownames", by.c))
+    unionCb <- Reduce(function(c1, c2) merge(c1, c2, by=unique(c(by.c, intersect(names(c1), names(c2)))), all=TRUE), cbList)
+  }
+  else {
+    by.c <- unique(c("rownames", by.c))
+    unionCb <- Reduce(function(c1, c2) merge(c1, c2, by=by.c, all=TRUE), cbList)
+  }
   rownames(unionCb) <- unionCb[['rownames']]
   newCbList <- list()
   for (i in seq_along(sceList)) {
@@ -265,6 +278,9 @@ combineSCE <- function(sceList, by.r = NULL, by.c = NULL, combined = TRUE){
   if (length(sceList) == 1) {
     return(sceList[[1]])
   }
+  if (typeof(sceList) != "list") {
+    stop("Error in combineSCE: input must be a list of SCE objects")
+  }
   ##  rowData
   newFeList <- .mergeRowDataSCE(sceList, by.r)
   ## colData

diff --git a/R/exportSCEtoAnndata.R b/R/exportSCEtoAnndata.R
@@ -55,10 +55,10 @@ exportSCEtoAnnData <- function(sce,
   if (file.exists(filePath) && !isTRUE(overwrite)) {
     stop(paste0(path, " already exists. Change 'outputDir' or set 'overwrite' to TRUE."))
   }
-  if (isTRUE(forceDense)) {
-    forceDense <- "True"
-  } else if (isFALSE(forceDense)) {
-    forceDense <- "False"
+  if (isFALSE(forceDense)) {
+    forceDense <- NULL
+  } else if (isTRUE(forceDense)) {
+    forceDense <- "X"
   } else {
     stop("Argument `forceDense` should be `TRUE` or `FALSE`")
   }
@@ -69,8 +69,17 @@ exportSCEtoAnnData <- function(sce,
     }
   }
   annData <- .sce2adata(sce, useAssay)
-  annData$write_h5ad(filePath,
-                     compression = compression,
-                     compression_opts = compressionOpts,
-                     force_dense = forceDense)
+  if (is.null(forceDense)) {
+    anndata::write_h5ad(annData, 
+                      filePath,
+                      compression = compression,
+                      compression_opts = compressionOpts)
+  }
+  else {
+    anndata::write_h5ad(annData, 
+                      filePath,
+                      compression = compression,
+                      compression_opts = compressionOpts,
+                      as.dense = forceDense)
+  }
 }
diff --git a/R/importAnnData.R b/R/importAnnData.R
@@ -28,7 +28,7 @@
                               colData = sce_coldata)
   colnames(sce) <- paste0(sampleName,"_",colnames(sce))
 
-  multi_Assay <- reticulate::py_to_r(anndata$layers$as_dict())
+  multi_Assay <- reticulate::py_to_r(reticulate::dict(anndata$layers))
   for(assay_name in names(multi_Assay)){
     tryCatch({
       SummarizedExperiment::assay(sce, assay_name, withDimnames = FALSE) <- t(reticulate::py_to_r(multi_Assay[[assay_name]]))
@@ -126,6 +126,8 @@ importAnnData <- function(sampleDirs = NULL,
                           rowNamesDedup = TRUE) {
 
   if (length(sampleDirs)!=length(sampleNames)){
+    print(length(sampleDirs))
+    print(length(sampleNames))
     stop("Number of sampleDirs must be equal to number of SampleNames. Please provide sample names for all input directories")
   }
 
@@ -152,16 +154,3 @@ importAnnData <- function(sampleDirs = NULL,
 
   return(sce)
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/importCellRanger.R b/R/importCellRanger.R
@@ -170,7 +170,7 @@
         }
 
         if (gzipped != "auto") {
-            if (length(gzipped) != sampleLength & length(gzipped) != 1) {
+            if (length(gzipped) != sampleLength && length(gzipped) != 1) {
                 stop("'sampleDirs' and 'gzipped' have unequal lengths!")
             }
         }
@@ -235,7 +235,7 @@
             }
 
             if (gzipped != "auto") {
-                if (sampleLength != length(gzipped) & length(gzipped) != 1) {
+                if (sampleLength != length(gzipped) && length(gzipped) != 1) {
                     stop("The length of 'gzipped' does not match",
                         " length of",
                         " subdirectories in 'cellRangerDirs'!")
@@ -294,7 +294,7 @@
             }
 
             if (gzipped != "auto") {
-                if (length(gzipped) != sampleLength & length(gzipped) != 1) {
+                if (length(gzipped) != sampleLength && length(gzipped) != 1) {
                     stop("'gzipped' and 'unlist(sampleDirs)'",
                         " have unequal lengths!")
                 }
@@ -642,7 +642,7 @@ importCellRangerV2 <- function(
     dataTypeV2 <- match.arg(dataTypeV2)
 
     if (is.null(cellRangerOutsV2)) {
-        if (is.null(reference) | is.null(dataTypeV2)) {
+        if (is.null(reference) || is.null(dataTypeV2)) {
             stop("'reference' and 'dataTypeV2' are required ",
                  "when 'cellRangerOutsV2' is not specified!")
         }
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,4 @@ vignettes/articles/raw_data/* @@
     raw_data/*
     *.rds
     .RDataTmp
+    docs