addressing reviewer comments

ctlab · Oct 16, 2023 · b1e0251 · b1e0251
1 parent f1ce286
commit b1e0251
Show file tree

Hide file tree

Showing 40 changed files with 480 additions and 394 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,15 @@
 Package: phantasusLite
 Type: Package
 Title: Loading and annotation RNA-Seq counts matrices 
-Version: 0.99.1
+Version: 0.99.2
 Authors@R: c(person("Rita", "Sablina", role = "aut"),        
              person("Maxim", "Kleverov", role = "aut"),        
              person("Alexey", "Sergushichev", email = "[email protected]", role = c("aut", "cre")))
 Description: PhantasusLite – a lightweight package with helper functions of general interest 
     extracted from phantasus package. In parituclar it simplifies working with public
     RNA-seq datasets from GEO by providing access to the remote
     HSDS repository with the precomputed gene counts from ARCHS4 and DEE2 projects.
-Depends: R (>= 4.2)
+Depends: R (>= 4.3)
 Imports: 
     data.table, 
     rhdf5client(>= 1.21.5), 
@@ -28,6 +28,9 @@ Suggests:
     testthat (>= 3.0.0), 
     knitr, 
     rmarkdown, 
+    BiocStyle,
     GEOquery
 VignetteBuilder: knitr
 Config/testthat/edition: 3
+URL: https://github.com/ctlab/phantasusLite/
+BugReports: https://github.com/ctlab/phantasusLite/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,8 +4,8 @@ export(getHSDSFileList)
 export(inferCondition)
 export(loadCountsFromH5FileHSDS)
 export(loadCountsFromHSDS)
-export(read.gct)
-export(write.gct)
+export(readGct)
+export(writeGct)
 import(Biobase)
 import(data.table)
 import(rhdf5)

diff --git a/R/gct.R b/R/gct.R
@@ -7,17 +7,17 @@
 #' @return ExpressionSet object
 #'
 #' @examples
-#' es <- read.gct(system.file("testdata/gct/test.gct", package="phantasusLite"))
+#' es <- readGct(system.file("extdata/testdata/gct/test.gct", package="phantasusLite"))
 #' @export
-read.gct <- function(gct) {
+readGct <- function(gct) {
     meta <- readLines(gct, n = 3)
     version <- meta[1]
     size <- as.numeric(unlist(strsplit(meta[2], "\t")))
-    
+
     if (grepl("^#1.3", version)) {
         # number of column annotations = number of additional rows
         ann.col <- size[4]
-        
+
         # number of row annotations = number of additional columns
         ann.row <- size[3]
     } else if (grepl("^#1.2", version)) {
@@ -26,7 +26,7 @@ read.gct <- function(gct) {
     } else {
         stop("Unsupported version of gct: use 1.2 or 1.3")
     }
-    
+
     colNames <- unlist(strsplit(meta[3], "\t"))
     if (grepl("/", colNames[1])) {
         rowIdField <- sub("(.*)/(.*)", "\\1", colNames[1])
@@ -35,46 +35,46 @@ read.gct <- function(gct) {
         rowIdField <- "id"
         colIdField <- "id"
     }
-    
+
     colNames[1] <- rowIdField
-    
-    t <- fread(gct, 
-               sep="\t", col.names = colNames, 
+
+    t <- fread(gct,
+               sep="\t", col.names = colNames,
                skip = 2 + 1 + ann.col)
-    
-    
+
+
     rn <- t[[1]]
-    
+
     if (any(duplicated(rn))) {
         warning(sprintf("duplicated row IDs: %s; they were renamed",
                         paste0(rn[head(which(duplicated(rn)))], collapse = " ")))
         rn <- make.unique(rn)
     }
-    
+
     exp <- as.matrix(t[, (ann.row + 2):ncol(t)])
     rownames(exp) <- rn
-    
-    
-    
+
+
+
     fdata <- makeAnnotated(t[, seq_len(ann.row + 1), with=FALSE])
     rownames(fdata) <- rn
-    
-    
+
+
     if (ann.col > 0) {
-        pdata.raw <- t(fread(gct, skip = 2, nrows = ann.col + 1, header=FALSE, 
+        pdata.raw <- t(fread(gct, skip = 2, nrows = ann.col + 1, header=FALSE,
                    colClasses = "character"))
         pdata <- data.frame(pdata.raw[seq_len(ncol(exp)) + 1 + ann.row,],
                             stringsAsFactors = FALSE)
         colnames(pdata) <- pdata.raw[1, ]
         colnames(pdata)[1] <- colIdField
         rownames(pdata) <- colnames(exp)
         pdata <- makeAnnotated(pdata)
-        
+
         res <- ExpressionSet(exp, featureData = fdata, phenoData = pdata)
     } else {
         res <- ExpressionSet(exp, featureData = fdata)
     }
-    
+
     res
 }
 
@@ -85,39 +85,39 @@ read.gct <- function(gct) {
 #' @param gzip Whether to gzip apply gzip-compression for the output file#'
 #' @return Result of the closing file (as in `close()` function`)
 #' @examples
-#' es <- read.gct(system.file("testdata/gct/test.gct", package="phantasusLite"))
+#' es <- readGct(system.file("extdata/testdata/gct/test.gct", package="phantasusLite"))
 #' out <- tempfile(fileext = ".gct.gz")
-#' write.gct(es, out, gzip=TRUE)
+#' writeGct(es, out, gzip=TRUE)
 #' @import Biobase
 #' @export
-write.gct <- function(es, file, gzip=FALSE) {
+writeGct <- function(es, file, gzip=FALSE) {
     if (gzip) {
         con <- gzfile(file)
     } else {
         con <- file(file)
     }
     open(con, open="w")
     writeLines("#1.3", con)
-    
+
     pd <- pData(es)
     fd <- fData(es)
-    
+
     ann.col <- ncol(pData(es))
     ann.row <- ncol(fData(es))
-    writeLines(sprintf("%s\t%s\t%s\t%s", 
-                       nrow(es), ncol(es), 
+    writeLines(sprintf("%s\t%s\t%s\t%s",
+                       nrow(es), ncol(es),
                        ann.row-1, ann.col-1), con)
-    
+
     if (ann.col == 0 && ann.row == 0) {
         stop("There should be at least one row and one column annotation")
     }
-    
+
     idCols <- c(head(colnames(fd), 1), head(colnames(pd), 1))
     idCols <- unique(idCols)
     idCols <- paste0(idCols, collapse="/")
-        
+
     writeLines(paste0(c(idCols, tail(colnames(fd), -1), pd[[1]]), collapse="\t"), con)
-    
+
     ann.col.table <- t(as.matrix(pd[, tail(seq_along(pd), -1), drop=FALSE]))
     ann.col.table <- cbind(
         tail(colnames(pd), -1),
@@ -132,6 +132,7 @@ write.gct <- function(es, file, gzip=FALSE) {
 makeAnnotated <- function(data) {
     meta <- data.frame(labelDescription = colnames(data))
     rownames(meta) <- colnames(data)
-
+
+    methods::new("AnnotatedDataFrame", data = data, varMeta = meta)
     methods::new("AnnotatedDataFrame", data = data, varMeta = meta)
 }
diff --git a/R/getCondition.R b/R/getCondition.R
@@ -1,6 +1,7 @@
 #' Removes repeated words from conditions
 #' @param titles, contains titles
 #' @return titles without repeated words
+#' @keywords internal
 removeRepeatWords <- function(titles) {
   titles_without_repeat_words <- titles
   repeat_words <- regmatches(titles, regexpr("(?![-+}\\]\\)])(\\W|_)*\\w*$", titles, ignore.case = TRUE, perl = TRUE))
@@ -15,6 +16,7 @@ removeRepeatWords <- function(titles) {
 #' Creates condition from the samples titles
 #' @param gse_titles, contains titles
 #' @return List of conditions and replicates
+#' @keywords internal
 inferConditionImpl <- function(gse_titles) {
   inferCondition <- gse_titles
   rep_num <- NULL