Skip to content

Commit

Permalink
addressing reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
assaron committed Oct 16, 2023
1 parent f1ce286 commit b1e0251
Show file tree
Hide file tree
Showing 40 changed files with 480 additions and 394 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
Package: phantasusLite
Type: Package
Title: Loading and annotation RNA-Seq counts matrices
Version: 0.99.1
Version: 0.99.2
Authors@R: c(person("Rita", "Sablina", role = "aut"),
person("Maxim", "Kleverov", role = "aut"),
person("Alexey", "Sergushichev", email = "[email protected]", role = c("aut", "cre")))
Description: PhantasusLite – a lightweight package with helper functions of general interest
extracted from phantasus package. In parituclar it simplifies working with public
RNA-seq datasets from GEO by providing access to the remote
HSDS repository with the precomputed gene counts from ARCHS4 and DEE2 projects.
Depends: R (>= 4.2)
Depends: R (>= 4.3)
Imports:
data.table,
rhdf5client(>= 1.21.5),
Expand All @@ -28,6 +28,9 @@ Suggests:
testthat (>= 3.0.0),
knitr,
rmarkdown,
BiocStyle,
GEOquery
VignetteBuilder: knitr
Config/testthat/edition: 3
URL: https://github.com/ctlab/phantasusLite/
BugReports: https://github.com/ctlab/phantasusLite/issues
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ export(getHSDSFileList)
export(inferCondition)
export(loadCountsFromH5FileHSDS)
export(loadCountsFromHSDS)
export(read.gct)
export(write.gct)
export(readGct)
export(writeGct)
import(Biobase)
import(data.table)
import(rhdf5)
Expand Down
67 changes: 34 additions & 33 deletions R/gct.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
#' @return ExpressionSet object
#'
#' @examples
#' es <- read.gct(system.file("testdata/gct/test.gct", package="phantasusLite"))
#' es <- readGct(system.file("extdata/testdata/gct/test.gct", package="phantasusLite"))
#' @export
read.gct <- function(gct) {
readGct <- function(gct) {
meta <- readLines(gct, n = 3)
version <- meta[1]
size <- as.numeric(unlist(strsplit(meta[2], "\t")))

if (grepl("^#1.3", version)) {
# number of column annotations = number of additional rows
ann.col <- size[4]

# number of row annotations = number of additional columns
ann.row <- size[3]
} else if (grepl("^#1.2", version)) {
Expand All @@ -26,7 +26,7 @@ read.gct <- function(gct) {
} else {
stop("Unsupported version of gct: use 1.2 or 1.3")
}

colNames <- unlist(strsplit(meta[3], "\t"))
if (grepl("/", colNames[1])) {
rowIdField <- sub("(.*)/(.*)", "\\1", colNames[1])
Expand All @@ -35,46 +35,46 @@ read.gct <- function(gct) {
rowIdField <- "id"
colIdField <- "id"
}

colNames[1] <- rowIdField
t <- fread(gct,
sep="\t", col.names = colNames,

t <- fread(gct,
sep="\t", col.names = colNames,
skip = 2 + 1 + ann.col)


rn <- t[[1]]

if (any(duplicated(rn))) {
warning(sprintf("duplicated row IDs: %s; they were renamed",
paste0(rn[head(which(duplicated(rn)))], collapse = " ")))
rn <- make.unique(rn)
}

exp <- as.matrix(t[, (ann.row + 2):ncol(t)])
rownames(exp) <- rn



fdata <- makeAnnotated(t[, seq_len(ann.row + 1), with=FALSE])
rownames(fdata) <- rn


if (ann.col > 0) {
pdata.raw <- t(fread(gct, skip = 2, nrows = ann.col + 1, header=FALSE,
pdata.raw <- t(fread(gct, skip = 2, nrows = ann.col + 1, header=FALSE,
colClasses = "character"))
pdata <- data.frame(pdata.raw[seq_len(ncol(exp)) + 1 + ann.row,],
stringsAsFactors = FALSE)
colnames(pdata) <- pdata.raw[1, ]
colnames(pdata)[1] <- colIdField
rownames(pdata) <- colnames(exp)
pdata <- makeAnnotated(pdata)

res <- ExpressionSet(exp, featureData = fdata, phenoData = pdata)
} else {
res <- ExpressionSet(exp, featureData = fdata)
}

res
}

Expand All @@ -85,39 +85,39 @@ read.gct <- function(gct) {
#' @param gzip Whether to gzip apply gzip-compression for the output file#'
#' @return Result of the closing file (as in `close()` function`)
#' @examples
#' es <- read.gct(system.file("testdata/gct/test.gct", package="phantasusLite"))
#' es <- readGct(system.file("extdata/testdata/gct/test.gct", package="phantasusLite"))
#' out <- tempfile(fileext = ".gct.gz")
#' write.gct(es, out, gzip=TRUE)
#' writeGct(es, out, gzip=TRUE)
#' @import Biobase
#' @export
write.gct <- function(es, file, gzip=FALSE) {
writeGct <- function(es, file, gzip=FALSE) {
if (gzip) {
con <- gzfile(file)
} else {
con <- file(file)
}
open(con, open="w")
writeLines("#1.3", con)

pd <- pData(es)
fd <- fData(es)

ann.col <- ncol(pData(es))
ann.row <- ncol(fData(es))
writeLines(sprintf("%s\t%s\t%s\t%s",
nrow(es), ncol(es),
writeLines(sprintf("%s\t%s\t%s\t%s",
nrow(es), ncol(es),
ann.row-1, ann.col-1), con)

if (ann.col == 0 && ann.row == 0) {
stop("There should be at least one row and one column annotation")
}

idCols <- c(head(colnames(fd), 1), head(colnames(pd), 1))
idCols <- unique(idCols)
idCols <- paste0(idCols, collapse="/")

writeLines(paste0(c(idCols, tail(colnames(fd), -1), pd[[1]]), collapse="\t"), con)

ann.col.table <- t(as.matrix(pd[, tail(seq_along(pd), -1), drop=FALSE]))
ann.col.table <- cbind(
tail(colnames(pd), -1),
Expand All @@ -132,6 +132,7 @@ write.gct <- function(es, file, gzip=FALSE) {
makeAnnotated <- function(data) {
meta <- data.frame(labelDescription = colnames(data))
rownames(meta) <- colnames(data)


methods::new("AnnotatedDataFrame", data = data, varMeta = meta)
methods::new("AnnotatedDataFrame", data = data, varMeta = meta)
}
2 changes: 2 additions & 0 deletions R/getCondition.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Removes repeated words from conditions
#' @param titles, contains titles
#' @return titles without repeated words
#' @keywords internal
removeRepeatWords <- function(titles) {
titles_without_repeat_words <- titles
repeat_words <- regmatches(titles, regexpr("(?![-+}\\]\\)])(\\W|_)*\\w*$", titles, ignore.case = TRUE, perl = TRUE))
Expand All @@ -15,6 +16,7 @@ removeRepeatWords <- function(titles) {
#' Creates condition from the samples titles
#' @param gse_titles, contains titles
#' @return List of conditions and replicates
#' @keywords internal
inferConditionImpl <- function(gse_titles) {
inferCondition <- gse_titles
rep_num <- NULL
Expand Down
Loading

0 comments on commit b1e0251

Please sign in to comment.