diff --git a/DESCRIPTION b/DESCRIPTION index 923d0f0..88d8a47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: w4mclassfilter -Version: 0.98.6 -Date: 2017-12-08 +Version: 0.98.7 +Date: 2018-01-20 Title: W4M Class Filter Description: Filter Workflow4Metabolomics dataMatrix, sampleMetadata, and variableMetadata files by sample-class or variable-attribute range, imputing zero for NA values and eliminating zero-variance rows and columns from the data-matrix. Author: Arthur C Eschenlauer diff --git a/R/ClassFilter.R b/R/ClassFilter.R index 205f3b4..5d0626d 100644 --- a/R/ClassFilter.R +++ b/R/ClassFilter.R @@ -149,11 +149,11 @@ w4m__nonzero_var <- function(m) { utils::str(x) stop("matrix has no columns") } - if ( is.numeric(x) ) { # exclude any rows with zero variance row.vars <- w4m__var_by_rank_or_file(x, dim = 1) nonzero.row.vars <- row.vars > 0 nonzero.rows <- row.vars[nonzero.row.vars] + if ( is.numeric(x) ) { if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { row.names <- attr(nonzero.rows,"names") x <- x[ row.names, , drop = FALSE ] @@ -219,20 +219,21 @@ w4m__nonzero_var <- function(m) { #' #' Please see the package vignette for further details. #' -#' @param dataMatrix_in input data matrix (rows are feature names, columns are sample names -#' @param sampleMetadata_in input sample metadata (rows are sample names, one column's name matches class_column) -#' @param variableMetadata_in input variable metadata (rows are variable names) -#' @param dataMatrix_out output data matrix (rows are feature names, columns are sample names -#' @param sampleMetadata_out output sample metadata (rows are sample names, one column's name matches class_column) -#' @param variableMetadata_out output variable metadata (rows are variable names) -#' @param classes character array: names of sample classes to include or exclude; default is an empty array -#' @param include logical: TRUE, include named sample classes; FALSE (the default), exclude named sample classes -#' @param class_column character: name of "class" column, defaults to "class" -#' @param samplename_column character: name of column with sample name, defaults to "sampleMetadata" -#' @param name_varmetadata_col1 logical: TRUE, name column 1 of variable metadata as "variableMetadata"; FALSE, no change; default is TRUE -#' @param variable_range_filter character array: array of filters specified as 'variableMetadataColumnName:min:max'; default is empty array -#' @param data_imputation function(m): default imputation method for 'intb' data, where intensities have background subtracted - impute zero for NA -#' @param failure_action function(x, ...): action to take upon failure - defaults to 'print(x,...)' +#' @param dataMatrix_in input data matrix (rows are feature names, columns are sample names +#' @param sampleMetadata_in input sample metadata (rows are sample names, one column's name matches class_column) +#' @param variableMetadata_in input variable metadata (rows are variable names) +#' @param dataMatrix_out output data matrix (rows are feature names, columns are sample names +#' @param sampleMetadata_out output sample metadata (rows are sample names, one column's name matches class_column) +#' @param variableMetadata_out output variable metadata (rows are variable names) +#' @param classes character array: names of sample classes to include or exclude; default is an empty array +#' @param include logical: TRUE, include named sample classes; FALSE (the default), exclude named sample classes +#' @param class_column character: name of "class" column, defaults to "class" +#' @param samplename_column character: name of column with sample name, defaults to "sampleMetadata" +#' @param name_varmetadata_col1 logical: TRUE, name column 1 of variable metadata as "variableMetadata"; FALSE, no change; default is TRUE +#' @param name_smplmetadata_col1 logical: TRUE, name column 1 of sample metadata as "sampleMetadata"; FALSE, no change; default is TRUE +#' @param variable_range_filter character array: array of filters specified as 'variableMetadataColumnName:min:max'; default is empty array +#' @param data_imputation function(m): default imputation method for 'intb' data, where intensities have background subtracted - impute zero for NA +#' @param failure_action function(x, ...): action to take upon failure - defaults to 'print(x,...)' #' #' @return logical: TRUE only if filtration succeeded #' @@ -284,6 +285,7 @@ w4m_filter_by_sample_class <- function( , class_column = "class" # character: name of "class" column, defaults to "class" , samplename_column = "sampleMetadata" # character: name of column with sample name, defaults to "sampleMetadata" , name_varmetadata_col1 = TRUE # logical: TRUE, name column 1 of variable metadata as "variableMetadata"; FALSE, no change; default is TRUE +, name_smplmetadata_col1 = TRUE # logical: TRUE, name column 1 of variable metadata as "variableMetadata"; FALSE, no change; default is TRUE , variable_range_filter = c() # character array: array of filters specified as 'variableMetadataColumnName:min:max'; default is empty array , data_imputation = w4m_filter_imputation # function(m): default imputation method is for 'intb' data, where intensities have background subtracted - impute zero for NA , failure_action = print # function(x, ...): action to take upon failure - defaults to 'print(x,...)' @@ -435,7 +437,10 @@ w4m_filter_by_sample_class <- function( } # extract rownames - rownames(smpl_metadata) <- smpl_metadata[,samplename_column] + if (name_smplmetadata_col1) { + colnames(smpl_metadata)[1] <- "sampleMetadata" + } + rownames(smpl_metadata) <- smpl_metadata[ , samplename_column] if (nchar(class_column) > 0 && length(classes) > 0) { # select the first column of the rows indicated by classes, include, & class_column, but don't drop dimension @@ -449,7 +454,7 @@ w4m_filter_by_sample_class <- function( , Reduce( `|` , lapply(X = classes, FUN = function(pattern) { - grepl(pattern = pattern, x = smpl_metadata[,class_column]) + grepl(pattern = pattern, x = smpl_metadata[ , class_column]) }) ) ) @@ -485,8 +490,8 @@ w4m_filter_by_sample_class <- function( err.env$msg <- "no message setting vrbl_metadata rownames" tryCatch( expr = { - rownames(vrbl_metadata) <- make.names( vrbl_metadata[,1], unique = TRUE ) - vrbl_metadata[,1] <- rownames(vrbl_metadata) + rownames(vrbl_metadata) <- make.names( vrbl_metadata[ , 1 ], unique = TRUE ) + vrbl_metadata[ , 1 ] <- rownames(vrbl_metadata) if (name_varmetadata_col1) { colnames(vrbl_metadata)[1] <- "variableMetadata" } diff --git a/README.md b/README.md index 6bcf41d..bc8af85 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,17 @@ SOFTWARE. ## NEWS +### CHANGES IN VERSION 0.98.7 + +#### NEW FEATURES + +* First column of sample metadata is by default renamed to "sampleMetadata" unless + argument 'name_smplmetadata_col1' is supplied and set to FALSE. + +#### INTERNAL MODIFICATIONS + +* none + ### CHANGES IN VERSION 0.98.6 #### NEW FEATURES diff --git a/inst/NEWS b/inst/NEWS index 1fb03bc..048cf66 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,10 @@ +CHANGES IN VERSION 0.98.7 +------------------------------ + +SIGNIFICANT USER-VISIBLE CHANGES + + o First column of sample metadata is by default renamed to "sampleMetadata" unless argument 'name_smplmetadata_col1' is supplied and set to FALSE. + CHANGES IN VERSION 0.98.6 ------------------------------ diff --git a/man/w4m_filter_by_sample_class.Rd b/man/w4m_filter_by_sample_class.Rd index 0d591b3..323102b 100644 --- a/man/w4m_filter_by_sample_class.Rd +++ b/man/w4m_filter_by_sample_class.Rd @@ -8,8 +8,8 @@ w4m_filter_by_sample_class(dataMatrix_in, sampleMetadata_in, variableMetadata_in, dataMatrix_out, sampleMetadata_out, variableMetadata_out, classes = c(), include = FALSE, class_column = "class", samplename_column = "sampleMetadata", name_varmetadata_col1 = TRUE, - variable_range_filter = c(), data_imputation = w4m_filter_imputation, - failure_action = print) + name_smplmetadata_col1 = TRUE, variable_range_filter = c(), + data_imputation = w4m_filter_imputation, failure_action = print) } \arguments{ \item{dataMatrix_in}{input data matrix (rows are feature names, columns are sample names} @@ -34,6 +34,8 @@ w4m_filter_by_sample_class(dataMatrix_in, sampleMetadata_in, \item{name_varmetadata_col1}{logical: TRUE, name column 1 of variable metadata as "variableMetadata"; FALSE, no change; default is TRUE} +\item{name_smplmetadata_col1}{logical: TRUE, name column 1 of sample metadata as "sampleMetadata"; FALSE, no change; default is TRUE} + \item{variable_range_filter}{character array: array of filters specified as 'variableMetadataColumnName:min:max'; default is empty array} \item{data_imputation}{function(m): default imputation method for 'intb' data, where intensities have background subtracted - impute zero for NA} diff --git a/tests/testthat/input_sampleMetadata.tsv b/tests/testthat/input_sampleMetadata.tsv index 00e90a4..ddeb3da 100755 --- a/tests/testthat/input_sampleMetadata.tsv +++ b/tests/testthat/input_sampleMetadata.tsv @@ -1,4 +1,4 @@ -sampleMetadata injectionOrder mode age bmi gender +smplMetadata injectionOrder mode age bmi gender HU_017 2 pos 41 23.03 M HU_028 7 pos 41 23.92 F HU_034 9 pos 52 23.37 M diff --git a/vignettes/w4mclassfilter.Rmd b/vignettes/w4mclassfilter.Rmd index 36139cb..f52221b 100644 --- a/vignettes/w4mclassfilter.Rmd +++ b/vignettes/w4mclassfilter.Rmd @@ -33,7 +33,8 @@ or to address several data issues that may impede downstream statistical analysi * Features may be eliminated by specifying minimum or maximum intensity (or both) allowable in columns of `dataMatrix` for at least one sample for each feature ("range of row-maximum for each feature"). * Missing values in `dataMatrix` are imputed either to zero (by default) or to the result of a user-supplied imputation function. * Features and samples that have zero variance are eliminated. -* Samples and features are sorted alphabetically in rows and columns of `variableMetadata`, `sampleMetadata`, and `dataMatrix` +* Samples and features are sorted alphabetically in rows and columns of `variableMetadata`, `sampleMetadata`, and `dataMatrix` +* By default, the names of the first columns of `variableMetadata` and `sampleMetadata` are set respectively to `"variableMetadata"` and `"sampleMetadata"` ### How the `w4m_filter_by_sample_class` function is used @@ -264,3 +265,10 @@ R package v0.98.6 - Support eliminating features whose attributes fall outside specified ranges. For more detail, see "Feature- and Sample-Elimination" above. - Sort sample names and feature names because some statistical tools expect the same order in `dataMatrix` row and column names as in the corresponding metadata files. + +### New in release v0.98.7 - rename first column of sample metadata + +R package v0.98.7 + + - First column of sample metadata is by default renamed to "sampleMetadata" unless + argument 'name_smplmetadata_col1' is supplied and set to FALSE.