From 6fda9b4b3603e72ce28798dee54fb8a4bbd04668 Mon Sep 17 00:00:00 2001 From: phauchamps Date: Tue, 16 Jan 2024 11:19:28 +0100 Subject: [PATCH] - corrected formatting bug in documentation of channelSummaryStats.Rd() - corrected bug in channelSummaryStats (when channels are not explicitly provided) - bumped version to 0.99.8 --- DESCRIPTION | 2 +- NEWS.md | 2 ++ R/stats.R | 14 +++++++++++--- man/channelSummaryStats.Rd | 18 +++--------------- tests/testthat/test-stats.R | 29 +++++++++++++++++++++++++++-- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e4f46d..3a30ad4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CytoMDS Title: Low Dimensions projection of cytometry samples -Version: 0.99.7 +Version: 0.99.8 Authors@R: c(person(given = "Philippe", family = "Hauchamps", diff --git a/NEWS.md b/NEWS.md index ee0f899..c3f2612 100644 --- a/NEWS.md +++ b/NEWS.md @@ -45,6 +45,8 @@ for not loading the whole flowSet in memory at once. unidimensional histograms and store them instead of recalculating them each time a distance between 2 samples is calculated. This improves CPU time and memory consumption. + +### CytoMDS 0.99.8 - renamed `getChannelSummaryStats()` into `channelSummaryStats()` - in `channelSummaryStats(), added support for `BiocParallel`, and allowed for not loading the whole flowSet in memory at once. diff --git a/R/stats.R b/R/stats.R index 1673e14..7a57991 100644 --- a/R/stats.R +++ b/R/stats.R @@ -1106,6 +1106,10 @@ pairwiseEMDDist <- function( packages = c("flowCore"))) { #browser() + if (!is.numeric(nSamples) || nSamples < 1) { + stop("nSamples should be a numeric >= 1") + } + nStats <- length(statFUNs) if (nStats < 1) { stop("At least one stat function should be provided for calculation") @@ -1241,7 +1245,7 @@ pairwiseEMDDist <- function( # rearrange outputs - nCh <- length(channels) + nCh <- ncol(statMatBlockList[[1]][[1]]) chStats <- list() for (s in seq_along(statFUNs)){ chStats[[s]] <- matrix( @@ -1266,8 +1270,9 @@ pairwiseEMDDist <- function( chStats } -#' @title Calculate a summary statistic of some channels of -#' all flowFrames of a flowSet +#' @title Summary statistics per channel computation +#' @description Computation of summary statistic for selected channels, +#' for all flowFrames of a flowSet. #' This method provides two different input modes: #' - the user provides directly a flowSet loaded in memory (RAM). #' - the user provides (1.) a number of samples `nSamples`; (2.) an ad-hoc @@ -1377,6 +1382,9 @@ channelSummaryStats <- function( return(fs[[ffIndex]]) } nSamples <- length(x) + if (nSamples < 1) { + stop("empty flowSet passed") + } chStats <- .channelSummaryStats( nSamples = nSamples, loadFlowFrameFUN = getFF, diff --git a/man/channelSummaryStats.Rd b/man/channelSummaryStats.Rd index 0dc5c49..45a0922 100644 --- a/man/channelSummaryStats.Rd +++ b/man/channelSummaryStats.Rd @@ -2,19 +2,7 @@ % Please edit documentation in R/stats.R \name{channelSummaryStats} \alias{channelSummaryStats} -\title{Calculate a summary statistic of some channels of -all flowFrames of a flowSet -This method provides two different input modes: -\itemize{ -\item the user provides directly a flowSet loaded in memory (RAM). -\item the user provides (1.) a number of samples \code{nSamples}; (2.) an ad-hoc -function that takes as input an index between 1 and \code{nSamples}, and codes -the method to load the corresponding flowFrame in memory; -Optional row and column ranges can be provided to limit the calculation -to a specific rectangle of the matrix. These i.e. can be specified as a way -to split heavy calculations of large distance matrices -on several computation nodes. -}} +\title{Summary statistics per channel computation} \usage{ channelSummaryStats( x, @@ -81,8 +69,8 @@ Exception: if only one stat function (and not a list) is passed in \code{statFUNs}, the return value is simplified to the stat matrix itself. } \description{ -Calculate a summary statistic of some channels of -all flowFrames of a flowSet +Computation of summary statistic for selected channels, +for all flowFrames of a flowSet. This method provides two different input modes: \itemize{ \item the user provides directly a flowSet loaded in memory (RAM). diff --git a/tests/testthat/test-stats.R b/tests/testthat/test-stats.R index 5b0dcbd..0646c80 100644 --- a/tests/testthat/test-stats.R +++ b/tests/testthat/test-stats.R @@ -617,7 +617,7 @@ test_that("channelSummaryStats works", { fsAll, channels = channelsOrMarkers, statFUNs = list("mean" = mean, "std.dev" = stats::sd), - verbose = TRUE) + verbose = FALSE) expect_equal(names(ret), c("mean", "std.dev")) expect_equal(unname(rownames(ret[[1]])), flowCore::sampleNames(fsAll)) @@ -676,6 +676,31 @@ test_that("channelSummaryStats works", { expect_equal(unname(ret[1,1]), 1.900298) expect_equal(unname(ret[1,2]), 1.39186533) expect_equal(unname(ret[1,3]), 1.8544648) + + # case where no channels is provided + ret <- channelSummaryStats( + fsAll, + statFUNs = list("mean" = mean, "std.dev" = stats::sd), + verbose = FALSE + ) + + allSignalChannels <- + flowCore::colnames(fsAll)[CytoPipeline::areSignalCols(fsAll)] + nSignalCh <- length(allSignalChannels) + + allSignalChannelNames <- allSignalChannels + for (i in seq_along(allSignalChannels)) { + channelMarker <- + flowCore::getChannelMarker(fsAll[[1]], allSignalChannels[i])$desc + if (!is.null(channelMarker) && !is.na(channelMarker)){ + allSignalChannelNames[i] <- channelMarker + } + } + + expect_equal(unname(rownames(ret[[1]])), + c("Donor1", "Donor2", "Agg1", "Agg2", "Agg3")) + expect_equal(unname(colnames(ret[[1]])), allSignalChannelNames) + }) test_that("channelSummaryStats dynamic memory loading simulation", { @@ -690,7 +715,7 @@ test_that("channelSummaryStats dynamic memory loading simulation", { channelsOrMarkers <- c("FSC-A", "SSC-A", "BV785 - CD3") nSamples <- 10 - verbose <- TRUE + verbose <- FALSE ret <- CytoMDS::channelSummaryStats( x = nSamples, loadFlowFrameFUN = simulMemoryLoad,