diff --git a/DESCRIPTION b/DESCRIPTION index cc32c64..b8c5d64 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,4 +52,3 @@ Suggests: knitr, testthat, covr - diff --git a/NAMESPACE b/NAMESPACE index bed7071..6147233 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -70,8 +70,10 @@ importFrom(grDevices,colorRamp) importFrom(grDevices,rgb) importFrom(keras,load_model_hdf5) importFrom(keras,predict_classes) +importFrom(stats,IQR) importFrom(stats,approx) importFrom(stats,median) +importFrom(stats,quantile) importFrom(stats,sd) importFrom(utils,data) importFrom(utils,read.csv) diff --git a/R/IL_functions.R b/R/IL_functions.R index ee45177..1bf73c5 100644 --- a/R/IL_functions.R +++ b/R/IL_functions.R @@ -1,206 +1,220 @@ -#'@title generateIL -#'@description Groups the SOIs generated in the previous step and sets them up -#' in an inclusion list ready for the MS/MS analysis. It takes into account the -#' instrument precursor filter mz range and groups the SOIs accordingly. The -#' user can also specify whether they want to use all SOIs or prioritize SOIs -#' with a certain annotation (M+H adducts, prioritize SOIs with adequate -#' isotopic patterns, etc.). -#'@param struct The RHermesExp object in which to save the resulting IL -#'@param id The SOI list ID from which to generate the IL -#'@param par An inclusion list parameter object, generated with ILParam() -#'@return An RHermes object with the IL inside a new MS2Exp entry, inside the -#' data slot -#'@examples -#'if(FALSE){ -#' par <- ILParam(filtermz = 0.5, priorization = "full") -#' myHermes <- generateIL(myHermes, 1, par) -#'} -#'@importFrom dplyr between -#'@export -setGeneric("generateIL", function(struct, id, par) { - standardGeneric("generateIL") -}) -#' @rdname generateIL -setMethod("generateIL", c("RHermesExp", "numeric", "ANY"), - function(struct, id, par = ILParam()) { - validObject(struct) - validObject(par) - if (!is(par, "ILParam")) { - stop("Please enter a valid IL parameter set with ILParam()") - } - if (id > length(struct@data@SOI)) { - stop("The entered ID is greater than the number of SOI lists") - } - message("Now processing the IL:") - struct@data@MS2Exp <- c(struct@data@MS2Exp, - RHermesMS2Exp( - IL = inclusionList(struct, par, id), - MS2Data = list(), - Ident = list() - )) - message("Done!") - return(struct) -}) - -inclusionList <- function(struct, params, id) { - SoiList <- struct@data@SOI[[id]] - ppm <- struct@metadata@ExpParam@ppm - GL <- SoiList@SOIList - rtmargin <- params@rtmargin - priorization <- params@priorization - adduct <- params@ad - filtermz <- params@filtermz - filterrt <- params@filterrt - if (priorization == "yes") { - GL <- GLprior(GL, adduct, rtmargin, ppm) - low <- which(GL$MaxInt < 50000) - rare <- which(vapply(GL$ad[low], function(x) { - !any(unlist(x) %in% c("M+H", "M+NH4", "M+Na", "M+K", "M+", "M-H", - "M+Cl", "M+Br", "M+H2O-H")) - }, logical(1))) - if (length(rare) != 0) {GL <- GL[-low[rare], ]} - } - GL <- GL[, c("start", "end", "formula", "mass", "MaxInt", "anot")] - GL$originalSOI <- seq_len(nrow(GL)) - - GL <- lapply(seq_len(nrow(GL)), function(x) { - cur <- GL[x, ] - df <- as.data.frame(matrix(unlist(strsplit(cur$anot[[1]], "_")), - ncol = 2, byrow = TRUE)) - dplyr::bind_cols(cur[rep(1, nrow(df)), ], df) - }) - GL <- do.call(rbind, GL) - colnames(GL)[c(8, 9)] <- c("f", "ad") - GL$f <- as.character(GL$f) - GL$ad <- as.character(GL$ad) - - ##Adduct Priorization - if (priorization == "only") { - message(paste("Selecting only", adduct)) - GL <- GL[which(GL$ad %in% unlist(adduct)), ] - } - - #Group entries by similar characterization attributes - GL <- GLgroup(GL, rtmargin, ppm) - - #Tidy the entries, sort them by relevance and store them into a internal DF - #Keep the important info for MS2 acquisition (rti, rtf and mass for - #planning injections) - GL <- GLtidy(GL, filterrt, filtermz) - - GL$entrynames <- vapply(GL$jointentries, function(entry) { - res <- lapply(entry$metadata, function(subentry) { - return(paste(unique(paste(subentry$f, subentry$ad, sep = "$"), - collapse = "#"))) - }) - return(paste(unique(unlist(res)), sep = "$", collapse = "#")) - }, character(1)) - GL$entrynames <- unlist(GL$entrynames) - - PL_id <- which(struct@metadata@filenames == SoiList@filename)[1] - raw <- PL(struct, PL_id)@raw - if(nrow(raw) != 0){ - GL$XIC <- lapply(seq_len(nrow(GL)), function(x){ - cur <- GL[x,] - mzs <- c(min(cur$mass) * (1 - ppm * 1e-6), - max(cur$mass) * (1 + ppm * 1e-6)) - calculate_XIC_estimation(raw, - mzs, - c(cur$start, cur$end)) - }) - } - GL <- RHermesIL(IL = as.data.table(GL[, -5]), annotation = GL$jointentries, - SOInum = id, ILParam = params) - return(GL) -} - -GLprior <- function(GL, ad, rtmargin, ppm) { - if (!"adrows" %in% colnames(GL)) { - warning("No adduct similarity annotation has been found on this SOI list - Adduct priorization failed. - Please use filterSOI() before priorization.") - return(GL) - } - for (i in ad) { - message(paste("Prioritizing", i)) - priorityentries <- GL[unlist(vapply(GL$ad, function(x) { - any(x %in% i) - }, FUN.VALUE = logical(1))), ] - if (nrow(priorityentries) == 0) { - next - } - connections <- unlist(lapply(priorityentries$adrows, function(ads) { - ads[[i]] - })) - toremove <- which(GL$originalID %in% connections) - for (j in toremove) { - st <- GL$start[j] - end <- GL$end[j] - m <- GL$mass[j] - idx <- which(between(GL$start, st - rtmargin, end + rtmargin) & - between(GL$end, st - rtmargin, end + rtmargin) & - between(GL$mass, m - m * 2*ppm/1e+06, m + m * 2 * - ppm/1e+06)) - toremove <- c(toremove, idx) - } - if (length(toremove) == 0) {next} - GL <- GL[-unique(toremove), ] - } - return(GL) -} - -GLgroup <- function(GL, rtmargin, ppm) { - message("Grouping the entries") - out <- list() - repeat { - cur <- GL[1, ] - st <- cur$start - end <- cur$end - m <- cur$mass - idx <- which(between(GL$start, st - rtmargin, end + rtmargin) & - between(GL$end, st - rtmargin, end + rtmargin) & - between(GL$mass, m - m * ppm/1e+06, m + m * ppm/1e+06)) - out <- c(out, list(GL[idx, ])) - GL <- GL[-idx, ] - if (nrow(GL) == 0) { - break - } - } - return(out) -} - -GLtidy <- function(GL, filterrt, filtermz) { - message("Tidying the entries") - GL <- lapply(GL, function(entry) { - # idx <- order(-entry$adfound, -entry$isofound) - idx <- order(-entry$MaxInt) - entry <- entry[idx, ] - chosen <- entry[1, c("start", "end", "mass", "MaxInt")] - chosen$metadata <- list(list(entry)) - return(chosen) - }) - GL <- do.call(rbind, GL) - GL <- GL[order(GL$mass), ] - out <- list() - repeat { - cur <- GL[1, ] - st <- cur$start - end <- cur$end - m <- cur$mass - idx <- which((between(GL$start, st - filterrt, end + filterrt) | - between(GL$end, st - filterrt, end + filterrt)) & - between(GL$mass, m - filtermz, m + filtermz)) - uniondf <- data.frame(start = min(GL[idx, "start"]), - end = max(GL[idx, "end"]), - mass = mean(unlist(GL[idx, "mass"])), - MaxInt = max(GL[idx, "MaxInt"])) - uniondf$jointentries <- list(GL[idx, ]) - out <- c(out, list(uniondf)) - GL <- GL[-idx, ] - if (nrow(GL) == 0) { - break - } - } - GL <- do.call(rbind, out) - return(GL) -} +#'@title generateIL +#'@description Groups the SOIs generated in the previous step and sets them up +#' in an inclusion list ready for the MS/MS analysis. It takes into account the +#' instrument precursor filter mz range and groups the SOIs accordingly. The +#' user can also specify whether they want to use all SOIs or prioritize SOIs +#' with a certain annotation (M+H adducts, prioritize SOIs with adequate +#' isotopic patterns, etc.). +#'@param struct The RHermesExp object in which to save the resulting IL +#'@param id The SOI list ID from which to generate the IL +#'@param par An inclusion list parameter object, generated with ILParam() +#'@return An RHermes object with the IL inside a new MS2Exp entry, inside the +#' data slot +#'@examples +#'if(FALSE){ +#' par <- ILParam(filtermz = 0.5, priorization = "full") +#' myHermes <- generateIL(myHermes, 1, par) +#'} +#'@importFrom dplyr between +#'@export +setGeneric("generateIL", function(struct, id, par) { + standardGeneric("generateIL") +}) +#' @rdname generateIL +setMethod("generateIL", c("RHermesExp", "numeric", "ANY"), + function(struct, id, par = ILParam()) { + validObject(struct) + validObject(par) + if (!is(par, "ILParam")) { + stop("Please enter a valid IL parameter set with ILParam()") + } + if (id > length(struct@data@SOI)) { + stop("The entered ID is greater than the number of SOI lists") + } + message("Now processing the IL:") + struct@data@MS2Exp <- c(struct@data@MS2Exp, + RHermesMS2Exp( + IL = inclusionList(struct, par, id), + MS2Data = list(), + Ident = list() + )) + message("Done!") + return(struct) +}) + +inclusionList <- function(struct, params, id) { + SoiList <- struct@data@SOI[[id]] + ppm <- struct@metadata@ExpParam@ppm + oSoi <- SoiList@SOIList + rtmargin <- params@rtmargin + priorization <- params@priorization + adduct <- params@ad + filtermz <- params@filtermz + filterrt <- params@filterrt + if (priorization == "yes") { + GL <- GLprior(oSoi, adduct, rtmargin, ppm) + low <- which(GL$MaxInt < 50000) + rare <- which(vapply(GL$ad[low], function(x) { + !any(unlist(x) %in% c("M+H", "M+NH4", "M+Na", "M+K", "M+", "M-H", + "M+Cl", "M+Br", "M+H2O-H")) + }, logical(1))) + if (length(rare) != 0) {GL <- GL[-low[rare], ]} + GL <- GL[, c("start", "end", "formula", "mass", "MaxInt", "anot")] + GL$originalSOI <- unlist(sapply(seq(nrow(GL)),function(j){ + y <- GL[j,] + which(oSoi$start==y$start & + oSoi$end==y$end & + oSoi$mass==y$mass & + oSoi$formula==y$formula) + })) + }else{ + GL <- oSoi[, c("start", "end", "formula", "mass", "MaxInt", "anot")] + GL$originalSOI <- seq_len(nrow(GL)) + } + # GL$originalSOI <- seq_len(nrow(GL)) + + GL <- lapply(seq_len(nrow(GL)), function(x) { + cur <- GL[x, ] + df <- as.data.frame(matrix(unlist(strsplit(cur$anot[[1]], "_")), + ncol = 2, byrow = TRUE)) + dplyr::bind_cols(cur[rep(1, nrow(df)), ], df) + }) + GL <- do.call(rbind, GL) + colnames(GL)[c(8, 9)] <- c("f", "ad") + GL$f <- as.character(GL$f) + GL$ad <- as.character(GL$ad) + + ##Adduct Priorization + if (priorization == "only") { + message(paste("Selecting only", adduct)) + GL <- GL[which(GL$ad %in% unlist(adduct)), ] + } + + #Group entries by similar characterization attributes + GL <- GLgroup(GL, rtmargin, ppm) + + #Tidy the entries, sort them by relevance and store them into a internal DF + #Keep the important info for MS2 acquisition (rti, rtf and mass for + #planning injections) + GL <- GLtidy(GL, filterrt, filtermz) + + GL$entrynames <- vapply(GL$jointentries, function(entry) { + res <- lapply(entry$metadata, function(subentry) { + return(paste(unique(paste(subentry$f, subentry$ad, sep = "$"), + collapse = "#"))) + }) + return(paste(unique(unlist(res)), sep = "$", collapse = "#")) + }, character(1)) + GL$entrynames <- unlist(GL$entrynames) + + PL_id <- which(struct@metadata@filenames == SoiList@filename)[1] + raw <- PL(struct, PL_id)@raw + if(nrow(raw) != 0){ + GL$XIC <- lapply(seq_len(nrow(GL)), function(x){ + cur <- GL[x,] + # mzs <- c(min(cur$mass) * (1 - ppm * 1e-6), + # max(cur$mass) * (1 + ppm * 1e-6)) + # mzs <- c(min(cur$mass) - filtermz, + # max(cur$mass) + filtermz) + mzs <- c(min(cur$jointentries[[1]]$mass) * (1 - ppm * 1e-6), + max(cur$jointentries[[1]]$mass) * (1 + ppm * 1e-6)) + calculate_XIC_estimation(raw, + mzs, + c(cur$start, cur$end)) + }) + } + GL <- RHermesIL(IL = as.data.table(GL[, -5]), annotation = GL$jointentries, + SOInum = id, ILParam = params) + return(GL) +} + +GLprior <- function(GL, ad, rtmargin, ppm) { + if (!"adrows" %in% colnames(GL)) { + warning("No adduct similarity annotation has been found on this SOI list + Adduct priorization failed. + Please use filterSOI() before priorization.") + return(GL) + } + for (i in ad) { + message(paste("Prioritizing", i)) + priorityentries <- GL[unlist(vapply(GL$ad, function(x) { + any(x %in% i) + }, FUN.VALUE = logical(1))), ] + if (nrow(priorityentries) == 0) { + next + } + connections <- unlist(lapply(priorityentries$adrows, function(ads) { + ads[[i]] + })) + toremove <- which(GL$originalID %in% connections) + for (j in toremove) { + st <- GL$start[j] + end <- GL$end[j] + m <- GL$mass[j] + idx <- which(between(GL$start, st - rtmargin, end + rtmargin) & + between(GL$end, st - rtmargin, end + rtmargin) & + between(GL$mass, m - m * 2*ppm/1e+06, m + m * 2 * + ppm/1e+06)) + toremove <- c(toremove, idx) + } + if (length(toremove) == 0) {next} + GL <- GL[-unique(toremove), ] + } + return(GL) +} + +GLgroup <- function(GL, rtmargin, ppm) { + message("Grouping the entries") + out <- list() + repeat { + cur <- GL[1, ] + st <- cur$start + end <- cur$end + m <- cur$mass + idx <- which(between(GL$start, st - rtmargin, end + rtmargin) & + between(GL$end, st - rtmargin, end + rtmargin) & + between(GL$mass, m - m * ppm/1e+06, m + m * ppm/1e+06)) + out <- c(out, list(GL[idx, ])) + GL <- GL[-idx, ] + if (nrow(GL) == 0) { + break + } + } + return(out) +} + +GLtidy <- function(GL, filterrt, filtermz) { + message("Tidying the entries") + GL <- lapply(GL, function(entry) { + # idx <- order(-entry$adfound, -entry$isofound) + idx <- order(-entry$MaxInt) + entry <- entry[idx, ] + chosen <- entry[1, c("start", "end", "mass", "MaxInt")] + chosen$metadata <- list(list(entry)) + return(chosen) + }) + GL <- do.call(rbind, GL) + GL <- GL[order(GL$mass), ] + out <- list() + repeat { + cur <- GL[1, ] + st <- cur$start + end <- cur$end + m <- cur$mass + idx <- which((between(GL$start, st - filterrt, end + filterrt) | + between(GL$end, st - filterrt, end + filterrt)) & + between(GL$mass, m - filtermz, m + filtermz)) + uniondf <- data.frame(start = min(GL[idx, "start"]), + end = max(GL[idx, "end"]), + mass = mean(unlist(GL[idx, "mass"])), + MaxInt = max(GL[idx, "MaxInt"])) + uniondf$jointentries <- list(GL[idx, ]) + out <- c(out, list(uniondf)) + GL <- GL[-idx, ] + if (nrow(GL) == 0) { + break + } + } + GL <- do.call(rbind, out) + return(GL) +} diff --git a/R/IL_methods.R b/R/IL_methods.R index 69e1a84..f28c0c0 100644 --- a/R/IL_methods.R +++ b/R/IL_methods.R @@ -1,324 +1,334 @@ -#'@title filterIL -#'@description Filters out IL entries lower than a specified intensity in a -#' given RT interval. -#'@param struct The RHermesExp object. -#'@param id The IL ID in the RHermesExp object. The IDs are assigned by the -#' order in which the IL are generated. -#'@param rts Time interval to filter (two numeric values - start, end), -#' in seconds -#'@param minint Minimum entry intensity to be retained. All entries -#'<= minint will be removed in the specified rt interval. Defaults to -#'infinity, so all IL entries in the range are removed. -#'@return Nothing. As a side effect, it generates one/multiple .csv -#' files with the inclusion list data -#'@examples -#'if(FALSE){ -#' filterIL(myHermes, 1, c(0,200), minint = 1e6) -#'} -#'@export -setGeneric("filterIL", function(struct, id, rts, minint = Inf) { - standardGeneric("filterIL") -}) -#' @rdname filterIL -setMethod("filterIL", c("RHermesExp", "numeric", "numeric", "ANY"), -function(struct, id, rts, minint = Inf) { - if (length(rts) != 2) { - stop("Please input just two RT values corresponding to the starting RT - and ending RT you want to filter") - } - IL <- struct@data@MS2Exp[[id]]@IL@IL - anot <- struct@data@MS2Exp[[id]]@IL@annotation - which_to_remove <- which(IL$start >= rts[1] & IL$end <= rts[2] & - IL$MaxInt < minint) - if (length(which_to_remove) != 0) { - struct@data@MS2Exp[[id]]@IL@IL <- IL[-which_to_remove] - struct@data@MS2Exp[[id]]@IL@annotation <- anot[-which_to_remove] - struct <- setTime(struct, paste("IL entry", id, "filtered between", - rts[1], "and", rts[2], "taking", minint, - "as minimum intensity")) - } - return(struct) -}) - - -#'@title exportIL -#'@md -#'@description Organizes the IL entries into multiple injections -#' taking into account the user-specified parameters. Outputs a -#' single or multiple csv files that serve as input for the MS -#' to performed MSMS analysis. -#' -#'@param struct The RHermesExp object. -#'@param id The IL ID in the RHermesExp object. The IDs are -#' assigned by the order in which the IL are generated. -#'@param folder A string containing the folder to save the IL -#' csv/s into. By default will be your working directory -#'@param maxOver Numeric, very important. It's the number of -#' mz-rt segments that can be monitored at the same time by the -#' MS instrument. Higher numbers lead to less injections but the -#' number of scans for each IL entry will be reduced and gives -#' problems when deconvoluting the MS2 spectras. -#'@param sepFiles Logical, whether to generate a single csv file -#' or multiple csvs, each corresponding to each -#' injection/chromatographic run. From our experience with an -#' Orbitrap Fusion, separate csvs will simplify the task. -#'@return Nothing. As a side effect, it generates one/multiple -#' .csv files with the inclusion list data -#'@examples -#'if(FALSE){ -#' exportIL(myHermes, 1, 'C:/SomeFolder', maxOver = 5, sepFiles = FALSE) -#'} -#'@export -setGeneric("exportIL", function(struct, id, file = "./InclusionList", - mode = "both", maxOver = 5, defaultIT = 100, - sepFiles = FALSE, maxInjections = 9999) { - standardGeneric("exportIL") -}) - -#'@rdname exportIL -setMethod("exportIL", c("RHermesExp", "numeric", "ANY", "ANY", "ANY", "ANY"), -function(struct, id, file = "./InclusionList", mode = "both", maxOver = 5, - defaultIT = 100, sepFiles = FALSE, maxInjections = 9999) { - validObject(struct) - if (length(struct@data@MS2Exp) == 0) { - stop("This object doesn't have any ILs") - } - if (!between(id, 1, length(struct@data@MS2Exp))) { - stop("Please enter a valid IL number") - } - IL <- struct@data@MS2Exp[[id]]@IL@IL - IL$IT <- defaultIT - plan <- injectionPlanner(IL, 10, maxOver, byMaxInt = TRUE, - injections = maxInjections, - mode = mode) - if (sepFiles) { - for (x in seq_along(plan)) { - p <- plan[[x]] - p$IT[p$IT > 1500] <- 1500 - p$f <- character(nrow(p)) - p$ad <- character(nrow(p)) - p$z <- 1 - p <- p[, c("f", "ad", "mass", "z", "start", "end", "IT")] - #Setting column style for Thermo Xcalibur import - colnames(p) <- c("Formula", "Adduct", "m/z", "z", "t start (min)", - "t stop (min)", "Maximum Injection time (ms)") - p[, 5] <- p[, 5]/60 - p[, 6] <- p[, 6]/60 - #Added as result of Michi's comment - p <- cbind(data.frame(Compound = seq_len(nrow(p))), p) - write.csv(p, file = paste0(file, "_Injection_", x, ".csv"), - row.names = FALSE) - } - } else { - plandf <- do.call(rbind, lapply(seq_along(plan), function(x) { - p <- plan[[x]] - p$ID <- x - return(p[,c("start", "end", "mass", "MaxInt", "ID")]) - })) - write.csv(plandf, paste0(file, "_complete.csv"), row.names = FALSE) - } - return() -}) - - -injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE, - mode = "continuous", returnAll = FALSE) { - if (returnAll) {injections <- 1e4} - if (byMaxInt) {IL <- IL[order(-IL$MaxInt), ]} - idx <- which(is.na(IL$start) | is.na(IL$end)) #NA depuration - if (length(idx) != 0) {IL <- IL[-idx, ]} - plan <- list() - - if(mode != "continuous"){ - message("Calculating high IT scans") - deep_IL <- calculate_deep_IL(IL) - } else { - deep_IL <- data.frame() - } - - mint <- min(IL$start) - maxt <- max(IL$end) - - if(mode %in% c("continuous", "both")){ - while (nrow(IL) != 0 & injections > 0) { - timeInt <- seq(mint, - maxt, - by = 0.005) - OL <- rep(0, length(timeInt)) - ok_entries <- c() - for (i in seq_len(nrow(IL))) { - timeidx <- which(timeInt >= IL$start[i] & timeInt <= IL$end[i]) - if (any(OL[timeidx] >= maxover)) {next} - ok_entries <- c(ok_entries, i) - OL[timeidx] <- OL[timeidx] + 1 - } - curinj <- IL[ok_entries, ] - IL <- IL[-ok_entries, ] - - if(mode == "both" & any(OL == 0)){ - deep_ok_entries <- c() - for (i in seq_len(nrow(deep_IL))) { - timeidx <- which(timeInt >= deep_IL$start[i] & - timeInt <= deep_IL$end[i]) - if (any(OL[timeidx] >= 1)) {next} - deep_ok_entries <- c(deep_ok_entries, i) - OL[timeidx] <- OL[timeidx] + 1 - } - if(length(deep_ok_entries) != 0){ - deep_curinj <- deep_IL[deep_ok_entries, ] - deep_IL <- deep_IL[-deep_ok_entries, ] - curinj <- rbind(curinj, deep_curinj, fill = TRUE) - } - } - - plan <- c(plan, list(curinj)) - injections <- injections - 1 - } - if(mode == "both" & nrow(deep_IL) != 0){ - message(paste(nrow(deep_IL), "high IT scans could not be planned", - "within the continuous MS2 injections.", - "Adding them separately.")) - while (nrow(deep_IL) != 0 & injections > 0) { - timeInt <- seq(min(deep_IL$start, na.rm = TRUE), - max(deep_IL$end, na.rm = TRUE), - by = 0.005) - OL <- rep(0, length(timeInt)) - deep_ok_entries <- c() - for (i in seq_len(nrow(deep_IL))) { - timeidx <- which(timeInt >= deep_IL$start[i] & - timeInt <= deep_IL$end[i]) - if (any(OL[timeidx] >= 1)) {next} - deep_ok_entries <- c(deep_ok_entries, i) - OL[timeidx] <- OL[timeidx] + 1 - } - deep_curinj <- deep_IL[deep_ok_entries, ] - deep_IL <- deep_IL[-deep_ok_entries, ] - plan <- c(plan, list(deep_curinj)) - injections <- injections - 1 - } - } - } else { - while (nrow(deep_IL) != 0 & injections > 0) { - timeInt <- seq(min(deep_IL$start, na.rm = TRUE), - max(deep_IL$end, na.rm = TRUE), - by = 0.005) - OL <- rep(0, length(timeInt)) - deep_ok_entries <- c() - for (i in seq_len(nrow(deep_IL))) { - timeidx <- which(timeInt >= deep_IL$start[i] & - timeInt <= deep_IL$end[i]) - if (any(OL[timeidx] >= 1)) {next} - deep_ok_entries <- c(deep_ok_entries, i) - OL[timeidx] <- OL[timeidx] + 1 - } - deep_curinj <- deep_IL[deep_ok_entries, ] - deep_IL <- deep_IL[-deep_ok_entries, ] - plan <- c(plan, list(deep_curinj)) - injections <- injections - 1 - } - } - - - if (nrow(IL) != 0) { - message(paste0(nrow(IL), "ILs haven't been added to the injection plan", - " due to lack of space. Try again with more injections,", - " more maxover or returnAll = TRUE")) - } - # print(plot(cumsum(sapply(plan, nrow)))) - # hist_data <- lapply(seq_along(plan), function(x){ - # loc_plan <- plan[[x]] - # p <- hist(x = loc_plan$MaxInt %>% log10, - # breaks = seq(4,10,0.5))$counts - # # p <- p / nrow(loc_plan) * 100 - # data.frame(sample = x, int = seq(4.25,10,0.5), p = p) - # }) - # hist_data <- do.call(rbind, hist_data) - # print(ggplotly(ggplot(hist_data) + geom_tile(aes(x=sample, y = int, fill = p)))) - return(plan) -} - - -#'@export -#'@rdname RHermesIL-class -#' @param object An RHermesIL object -setMethod("show", "RHermesIL", function(object){ - message("Info about the IL:") - message(paste("\tIL entries:", nrow(object@IL))) - message(paste("\tSOI index:", object@SOInum)) -}) - -calculate_XIC_estimation <- function(raw, mzs, rts){ - points <- filter(raw, between(mz, mzs[1], mzs[2])) - points <- filter(points, between(rt, rts[1], rts[2])) - xic <- sapply(unique(points$rt), function(rt){ - sum(points$rtiv[points$rt == rt]) - }) - xic <- data.frame(rt = unique(points$rt), int = xic) - return(xic) -} - - -calculate_deep_IL <- function(IL, intThr = 1e4){ - IL_deep <- IL %>% - filter(.data$MaxInt > intThr) - IL_deep <- lapply(seq_len(nrow(IL_deep)),function(entry){ - cur <- IL_deep[entry,] - scans <- calculate_best_interval(cur$XIC[[1]], objective = 3e4) - scans <- as.data.frame(scans) - if(nrow(scans) == 0){return()} - colnames(scans) <- c("start", "end") - scans$mass <- cur$mass - scans$MaxInt <- cur$MaxInt - scans$entrynames <- cur$entrynames - scans$XIC <- cur$XIC - return(scans) - }) - IL_deep <- do.call(rbind, IL_deep) - IL_deep$IT <- (IL_deep$end - IL_deep$start) * 1000 - return(IL_deep) -} - -calculate_apex <- function(scans){ - scans$rt[which.max(scans$int)] -} - -calculate_best_interval <- function(scans, objective = 1e6, maxIT = 2000){ - if(nrow(scans) > 10){ - apexes <- scans$rt[inflect(scans$int, 4)] - if(length(apexes) == 0){apexes <- calculate_apex(scans)} - } else { - apexes <- calculate_apex(scans) - } - intervals <- lapply(apexes, function(apex){ - maxt <- min(max(apex - min(scans$rt), max(scans$rt) - apex), maxIT/1000) - scans <- approx(scans$rt, scans$int, n = 1000) %>% do.call(rbind, .) %>% - t %>% as.data.frame %>% rename(rt = x, rtiv = y) - for(t in seq(0.1, maxt, 0.01)){ - integral <- calculate_integral(filter(scans, - between(rt, apex-t, apex+t))) - if(integral > objective){return(c(apex-t, apex+t))} - } - return(c(apex-maxt, apex+maxt)) - }) - return(do.call(rbind, intervals)) -} - -calculate_integral <- function(scans){ - sum(diff(scans[[1]]) * (scans[[2]][-nrow(scans)] + diff(scans[[2]])/2)) -} - - -# Authorship Evan Friedland, Stackoverflow #6836409 -inflect <- function(x, threshold = 1){ - up <- sapply(1:threshold, function(n) c(x[-(seq(n))], rep(NA, n))) - down <- sapply(-1:-threshold, function(n){ - c(rep(NA, abs(n)), - x[-seq(length(x), length(x) - abs(n) + 1)]) - }) - a <- cbind(x,up,down) - which(apply(a, 1, max) == a[,1]) -} - - - - +#'@title filterIL +#'@description Filters out IL entries lower than a specified intensity in a +#' given RT interval. +#'@param struct The RHermesExp object. +#'@param id The IL ID in the RHermesExp object. The IDs are assigned by the +#' order in which the IL are generated. +#'@param rts Time interval to filter (two numeric values - start, end), +#' in seconds +#'@param minint Minimum entry intensity to be retained. All entries +#'<= minint will be removed in the specified rt interval. Defaults to +#'infinity, so all IL entries in the range are removed. +#'@return Nothing. As a side effect, it generates one/multiple .csv +#' files with the inclusion list data +#'@examples +#'if(FALSE){ +#' filterIL(myHermes, 1, c(0,200), minint = 1e6) +#'} +#'@export +setGeneric("filterIL", function(struct, id, rts, minint = Inf) { + standardGeneric("filterIL") +}) +#' @rdname filterIL +setMethod("filterIL", c("RHermesExp", "numeric", "numeric", "ANY"), +function(struct, id, rts, minint = Inf) { + if (length(rts) != 2) { + stop("Please input just two RT values corresponding to the starting RT + and ending RT you want to filter") + } + IL <- struct@data@MS2Exp[[id]]@IL@IL + anot <- struct@data@MS2Exp[[id]]@IL@annotation + which_to_remove <- which(IL$start >= rts[1] & IL$end <= rts[2] & + IL$MaxInt < minint) + if (length(which_to_remove) != 0) { + struct@data@MS2Exp[[id]]@IL@IL <- IL[-which_to_remove] + struct@data@MS2Exp[[id]]@IL@annotation <- anot[-which_to_remove] + struct <- setTime(struct, paste("IL entry", id, "filtered between", + rts[1], "and", rts[2], "taking", minint, + "as minimum intensity")) + } + return(struct) +}) + + +#'@title exportIL +#'@md +#'@description Organizes the IL entries into multiple injections taking into +#' account the user-specified parameters. Outputs a single or multiple csv +#' files that serve as input for the MS to performed MSMS analysis. +#' +#'@param struct The RHermesExp object. +#'@param id The IL ID in the RHermesExp object. The IDs are assigned by the +#' order in which the IL are generated. +#'@param file A string containing the folder to save the IL csv/s into and the +#' basename for the file. By default will be your working directory and the +#' default name is 'InclusionList' as in './InclusionList'. +#'@param mode Whether to plan set of continuous MS2 entries (PRM), adaptative +#' injection time scans at the apexes of the entries' peaks or both (which +#' optimizes instrument free time). Options are: "continuous", "deep" or +#' "both". +#'@param maxOver Numeric, very important. It's the number of mz-rt segments that +#' can be monitored at the same time by the MS instrument. Higher numbers lead +#' to less injections but the number of scans for each IL entry will be reduced +#' and gives problems when deconvoluting the MS2 spectras. It is ignored if +#' mode = "deep". Defaults to 5. +#'@param defaultIT Numeric, the default IT in ms for continuous MS2 scans (only +#' aplicable in Orbitrap instruments and for "continuous" and "both" modes). +#' Defaults to 100ms. +#'@param maxInjections Numeric, the maximum number of planned injections to +#' export. Defaults to 9999 to export all of them. +#'@param sepFiles Logical, whether to generate a single csv file or multiple +#' csvs, each corresponding to each injection/chromatographic run. From our +#' experience with an Orbitrap Fusion, separate csvs will simplify the task. +#'@return Nothing. As a side effect, it generates one/multiple .csv files with +#' the inclusion list data +#'@examples +#'if(FALSE){ +#' exportIL(myHermes, 1, 'C:/SomeFolder', maxOver = 5, sepFiles = FALSE) +#'} +#'@export +setGeneric("exportIL", function(struct, id, file = "./InclusionList", + mode = "both", maxOver = 5, defaultIT = 100, + sepFiles = FALSE, maxInjections = 9999) { + standardGeneric("exportIL") +}) + +#'@rdname exportIL +setMethod("exportIL", c("RHermesExp", "numeric", "ANY", "ANY", "ANY", "ANY"), +function(struct, id, file = "./InclusionList", mode = "both", maxOver = 5, + defaultIT = 100, sepFiles = FALSE, maxInjections = 9999) { + validObject(struct) + if (length(struct@data@MS2Exp) == 0) { + stop("This object doesn't have any ILs") + } + if (!between(id, 1, length(struct@data@MS2Exp))) { + stop("Please enter a valid IL number") + } + IL <- struct@data@MS2Exp[[id]]@IL@IL + IL$IT <- defaultIT + plan <- injectionPlanner(IL, 10, maxOver, byMaxInt = TRUE, + injections = maxInjections, + mode = mode) + if (sepFiles) { + for (x in seq_along(plan)) { + p <- plan[[x]] + p$IT[p$IT > 1500] <- 1500 + p$f <- character(nrow(p)) + p$ad <- character(nrow(p)) + p$z <- 1 + p <- p[, c("f", "ad", "mass", "z", "start", "end", "IT")] + #Setting column style for Thermo Xcalibur import + colnames(p) <- c("Formula", "Adduct", "m/z", "z", "t start (min)", + "t stop (min)", "Maximum Injection Time (ms)") + p[, 5] <- floor(p[, 5]*100/60)/100 + p[, 6] <- ceiling(p[, 6]*100/60)/100 + # JC: need to consider max absolute RT + #Added as result of Michi's comment + p <- cbind(data.frame(Compound = seq_len(nrow(p))), p) + write.csv(p, file = paste0(file, "_Injection_", x, ".csv"), + quote=F, row.names = FALSE) + } + } else { + plandf <- do.call(rbind, lapply(seq_along(plan), function(x) { + p <- plan[[x]] + p$ID <- x + return(p[,c("start", "end", "mass", "MaxInt", "ID")]) + })) + write.csv(plandf, paste0(file, "_complete.csv"), row.names = FALSE) + } + return() +}) + + +injectionPlanner <- function(IL, injections, maxover, byMaxInt = TRUE, + mode = "continuous", returnAll = FALSE) { + if (returnAll) {injections <- 1e4} + if (byMaxInt) {IL <- IL[order(-IL$MaxInt), ]} + idx <- which(is.na(IL$start) | is.na(IL$end)) #NA depuration + if (length(idx) != 0) {IL <- IL[-idx, ]} + plan <- list() + + if(mode != "continuous"){ + message("Calculating high IT scans") + deep_IL <- calculate_deep_IL(IL) + } else { + deep_IL <- data.frame() + } + + mint <- min(IL$start) + maxt <- max(IL$end) + + if(mode %in% c("continuous", "both")){ + while (nrow(IL) != 0 & injections > 0) { + timeInt <- seq(mint, + maxt, + by = 0.005) + OL <- rep(0, length(timeInt)) + ok_entries <- c() + for (i in seq_len(nrow(IL))) { + timeidx <- which(timeInt >= IL$start[i] & timeInt <= IL$end[i]) + if (any(OL[timeidx] >= maxover)) {next} + ok_entries <- c(ok_entries, i) + OL[timeidx] <- OL[timeidx] + 1 + } + curinj <- IL[ok_entries, ] + IL <- IL[-ok_entries, ] + + if(mode == "both" & any(OL == 0)){ + deep_ok_entries <- c() + for (i in seq_len(nrow(deep_IL))) { + timeidx <- which(timeInt >= deep_IL$start[i] & + timeInt <= deep_IL$end[i]) + if (any(OL[timeidx] >= 1)) {next} + deep_ok_entries <- c(deep_ok_entries, i) + OL[timeidx] <- OL[timeidx] + 1 + } + if(length(deep_ok_entries) != 0){ + deep_curinj <- deep_IL[deep_ok_entries, ] + deep_IL <- deep_IL[-deep_ok_entries, ] + curinj <- rbind(curinj, deep_curinj, fill = TRUE) + } + } + + plan <- c(plan, list(curinj)) + injections <- injections - 1 + } + if(mode == "both" & nrow(deep_IL) != 0){ + message(paste(nrow(deep_IL), "high IT scans could not be planned", + "within the continuous MS2 injections.", + "Adding them separately.")) + while (nrow(deep_IL) != 0 & injections > 0) { + timeInt <- seq(min(deep_IL$start, na.rm = TRUE), + max(deep_IL$end, na.rm = TRUE), + by = 0.005) + OL <- rep(0, length(timeInt)) + deep_ok_entries <- c() + for (i in seq_len(nrow(deep_IL))) { + timeidx <- which(timeInt >= deep_IL$start[i] & + timeInt <= deep_IL$end[i]) + if (any(OL[timeidx] >= 1)) {next} + deep_ok_entries <- c(deep_ok_entries, i) + OL[timeidx] <- OL[timeidx] + 1 + } + deep_curinj <- deep_IL[deep_ok_entries, ] + deep_IL <- deep_IL[-deep_ok_entries, ] + plan <- c(plan, list(deep_curinj)) + injections <- injections - 1 + } + } + } else { + while (nrow(deep_IL) != 0 & injections > 0) { + timeInt <- seq(min(deep_IL$start, na.rm = TRUE), + max(deep_IL$end, na.rm = TRUE), + by = 0.005) + OL <- rep(0, length(timeInt)) + deep_ok_entries <- c() + for (i in seq_len(nrow(deep_IL))) { + timeidx <- which(timeInt >= deep_IL$start[i] & + timeInt <= deep_IL$end[i]) + if (any(OL[timeidx] >= 1)) {next} + deep_ok_entries <- c(deep_ok_entries, i) + OL[timeidx] <- OL[timeidx] + 1 + } + deep_curinj <- deep_IL[deep_ok_entries, ] + deep_IL <- deep_IL[-deep_ok_entries, ] + plan <- c(plan, list(deep_curinj)) + injections <- injections - 1 + } + } + + + if (nrow(IL) != 0) { + message(paste0(nrow(IL), "ILs haven't been added to the injection plan", + " due to lack of space. Try again with more injections,", + " more maxover or returnAll = TRUE")) + } + # print(plot(cumsum(sapply(plan, nrow)))) + # hist_data <- lapply(seq_along(plan), function(x){ + # loc_plan <- plan[[x]] + # p <- hist(x = loc_plan$MaxInt %>% log10, + # breaks = seq(4,10,0.5))$counts + # # p <- p / nrow(loc_plan) * 100 + # data.frame(sample = x, int = seq(4.25,10,0.5), p = p) + # }) + # hist_data <- do.call(rbind, hist_data) + # print(ggplotly(ggplot(hist_data) + geom_tile(aes(x=sample, y = int, fill = p)))) + return(plan) +} + + +#'@export +#'@rdname RHermesIL-class +#' @param object An RHermesIL object +setMethod("show", "RHermesIL", function(object){ + message("Info about the IL:") + message(paste("\tIL entries:", nrow(object@IL))) + message(paste("\tSOI index:", object@SOInum)) +}) + +calculate_XIC_estimation <- function(raw, mzs, rts){ + points <- filter(raw, between(.data$mz, mzs[1], mzs[2])) + points <- filter(points, between(.data$rt, rts[1], rts[2])) + xic <- sapply(unique(points$rt), function(rt){ + sum(points$rtiv[points$rt == rt]) + }) + xic <- data.frame(rt = unique(points$rt), int = xic) + return(xic) +} + + +calculate_deep_IL <- function(IL, intThr = 1e4){ + IL_deep <- IL %>% + filter(.data$MaxInt > intThr) + IL_deep <- lapply(seq_len(nrow(IL_deep)),function(entry){ + cur <- IL_deep[entry,] + scans <- calculate_best_interval(cur$XIC[[1]], objective = 3e4) + scans <- as.data.frame(scans) + if(nrow(scans) == 0){return()} + colnames(scans) <- c("start", "end") + scans$mass <- cur$mass + scans$MaxInt <- cur$MaxInt + scans$entrynames <- cur$entrynames + scans$XIC <- cur$XIC + return(scans) + }) + IL_deep <- do.call(rbind, IL_deep) + IL_deep$IT <- (IL_deep$end - IL_deep$start) * 1000 + return(IL_deep) +} + +calculate_apex <- function(scans){ + scans$rt[which.max(scans$int)] +} + +calculate_best_interval <- function(scans, objective = 1e6, maxIT = 2000){ + if(nrow(scans) > 10){ + apexes <- scans$rt[inflect(scans$int, 4)] + if(length(apexes) == 0){apexes <- calculate_apex(scans)} + } else { + apexes <- calculate_apex(scans) + } + intervals <- lapply(apexes, function(apex){ + maxt <- min(max(apex - min(scans$rt), max(scans$rt) - apex), maxIT/1000) + scans <- approx(scans$rt, scans$int, n = 1000) %>% do.call(rbind, .) %>% + t %>% as.data.frame + for(t in seq(0.1, maxt, 0.01)){ + integral <- calculate_integral(filter(scans, + between(.data$x, + apex-t, apex+t))) + if(integral > objective){return(c(apex-t, apex+t))} + } + return(c(apex-maxt, apex+maxt)) + }) + return(do.call(rbind, intervals)) +} + +calculate_integral <- function(scans){ + sum(diff(scans[[1]]) * (scans[[2]][-nrow(scans)] + diff(scans[[2]])/2)) +} + + +# Authorship Evan Friedland, Stackoverflow #6836409 +inflect <- function(x, threshold = 1){ + up <- sapply(1:threshold, function(n) c(x[-(seq(n))], rep(NA, n))) + down <- sapply(-1:-threshold, function(n){ + c(rep(NA, abs(n)), + x[-seq(length(x), length(x) - abs(n) + 1)]) + }) + a <- cbind(x,up,down) + which(apply(a, 1, max) == a[,1]) +} + + + + diff --git a/R/MS1_functions.R b/R/MS1_functions.R index c3bd790..2d0672a 100644 --- a/R/MS1_functions.R +++ b/R/MS1_functions.R @@ -91,6 +91,7 @@ import_and_filter <- function(lf, minpks = 20, noise = 1000) { fileml <- mzR::openMSfile(lf) plist <- mzR::peaks(fileml) h <- mzR::header(fileml) + #Filtering header h <- h[, -which(vapply(h, function(x) all(x == 0), FUN.VALUE = logical(1)))] if (any(h$peaksCount < minpks)) { @@ -98,6 +99,11 @@ import_and_filter <- function(lf, minpks = 20, noise = 1000) { plist <- plist[-which(h$peaksCount < minpks)] h <- h[-which(h$peaksCount < minpks), ] } + #Removing all MS>1 scans + if(any(h$msLevel != 1)){ + plist <- plist[-which(h$msLevel != 1)] + h <- h[-which(h$msLevel != 1), ] + } raw <- lapply(seq_along(plist), function(x) { #Extracting raw data into a DT rpeaks <- plist[[x]] @@ -110,11 +116,9 @@ import_and_filter <- function(lf, minpks = 20, noise = 1000) { } IonicForm <- function(F_DB, Ad_DB) { - suppressWarnings({ - RES <- bplapply(seq_len(nrow(F_DB)), calculate_ionic_forms, - BPPARAM = bpparam(), F_DB = F_DB, - Ad_DB = Ad_DB) - }) + RES <- lapply(seq_len(nrow(F_DB)), calculate_ionic_forms, + F_DB = F_DB, + Ad_DB = Ad_DB) db <- do.call(rbind, lapply(RES, function(x) {x[[1]]})) db <- db[!duplicated(db[, 1]), ] connections <- do.call(rbind, lapply(RES, function(x) {x[[2]]})) @@ -297,12 +301,10 @@ IsoCalc <- function(DB, FWHM, intTHR, kTHR, instr = "Orbitrap", refm = 200) { rm(DB) #Free up space for Windows SOCK users message("") message("Calculating isotopes given the instrumental resolution: ") - - suppressWarnings({ - testres <- bplapply(testiso, - isocalc_parallel, kTHR, resol_factor, - isotopecode, isOrbi, BPPARAM = bpparam()) - }) #Suppress warnings to avoid the split() "object not multiple of ..." + + testres <- lapply(testiso, + isocalc_parallel, kTHR, resol_factor, + isotopecode, isOrbi) #All different isopologues detected in the ionic formula set and their #deltaM with respect to M0 @@ -381,80 +383,65 @@ OptScanSearch <- function(DB, raw, ppm, IsoList, labelled = FALSE, else{return(0)} })) } - - ncores <- ifelse(is.numeric(bpparam()$workers[[1]]), - yes = bpparam()$workers, no = 1) - - #Splitting the formulas into a list (with l = number of workers) to reduce - #time loss associated with variable loading (in SOCK only) - flist <- split(DB, f = seq_len(ncores)) - PLresults <- bplapply(seq_along(flist), PLparallelfun, flist, raw, IsoList, - labelled, ppm, minhit, BPPARAM = bpparam()) - PLresults <- do.call(rbind, PLresults) - - #Output coherence with PLProcesser input - return(PLresults[, c(3, 2, 4, 5, 1)]) -} - - -PLparallelfun <- function(gr, flist, raw, IsoList, labelled, ppm, minhit){ - curDB <- flist[[gr]] pmz <- as.matrix(raw[, 1]) - curiso <- IsoList[[1]][curDB$f] - localRES <- lapply(seq_len(nrow(curDB)), function(i) { - n <- curDB$f[i] - mass <- curDB$m[i] + curiso <- IsoList[[1]][DB$f] + PLresults <- lapply(seq_len(nrow(DB)), function(i) { + n <- DB$f[i] + mass <- DB$m[i] formula <- as.character(n) if (!labelled) { - regularProc(curDB, mass, formula, pmz, curiso, ppm, - IsoList, minhit, i, raw) + regularProc(DB, mass, formula, pmz, curiso, ppm, + IsoList, minhit, i, raw) } else { - numC <- curDB$numC[i] - labelledProc(curDB, mass, formula, pmz, curiso, ppm, - IsoList, minhit, i, raw, numC) + numC <- DB$numC[i] + labelledProc(DB, mass, formula, pmz, curiso, ppm, + IsoList, minhit, i, raw, numC) } }) - return(do.call(rbind, localRES)) + PLresults <- rbindlist(PLresults) + + #Output coherence with PLProcesser input + return(PLresults[, c(3, 2, 4, 5, 1)]) } + regularProc <- function(curDB, mass, formula, pmz, curiso, ppm, IsoList, minhit, i, raw){ if (mass < pmz[1, 1] | mass > pmz[nrow(pmz), 1]) {return()} ss <- binarySearch(pmz, mass, ppm) if (length(ss) < minhit) {return()} #Return nothing if no M0 hit - isofactors <- curiso[[i]] #If hit, let's find the isotopologues isodf <- IsoList[[2]][isofactors, ] ch <- curDB[i, 3] #Charge to normalize isotope deltam's scanid <- raw[ss] - scanid$formv <- formula scanid$isov <- "M0" - isom <- mass + (isodf$deltam/abs(ch[[1]])) - isoss <- lapply(isom, function(m) { - if (m < pmz[1, 1] | m > pmz[nrow(pmz), 1]) {return()} - #Added small multiplicative factor to ppm. We've seen that - #isotope peaks may have a bit more error than M0 - binarySearch(pmz, m, ppm * 1.5) - }) - isol <- vapply(isoss, length, FUN.VALUE = numeric(1)) - isoidx <- which(isol != 0) - if (length(isoidx) != 0) { - isoentries <- do.call(rbind, - lapply(isoidx, - function(x) { - isoid <- raw[isoss[[x]]] - isoid$formv <- formula - isoid$isov <- as.character(isodf$ID[x]) - return(isoid) - } + valid <- isom >= pmz[1, 1] & isom <= pmz[nrow(pmz), 1] + if(any(valid)){ + isoss <- lapply(isom[valid], function(m) { + #Added small multiplicative factor to ppm. We've seen that + #isotope peaks may have a bit more error than M0 + binarySearch(pmz, m, ppm * 1.5) + }) + isol <- vapply(isoss, function(x) length(x) != 0, FUN.VALUE = T) + + if (any(isol)) { + isoentries <- do.call(rbind, + lapply(which(isol), + function(x) { + isoid <- raw[isoss[[x]]] + isoid$isov <- as.character(isodf$ID[valid][x]) + return(isoid) + } + ) ) - ) - isoentries <- isoentries[isoentries$rt %in% unique(scanid$rt), ] - scanid <- rbind(scanid, isoentries) + isoentries <- isoentries[isoentries$rt %in% unique(scanid$rt), ] + scanid <- rbind(scanid, isoentries) + } } + scanid$formv <- formula return(scanid) } @@ -512,14 +499,12 @@ labelledProc <- function(curDB, mass, formula, pmz, curiso, ppm, } binarySearch <- function(plist, m, ppm) { - i <- round((m - plist[1, 1])/(plist[nrow(plist), 1] - plist[1,1]) * - nrow(plist))[[1]] #Biased start based on target - if (i == 0) {i <- 1} + i <- floor(nrow(plist)/2) ub <- nrow(plist) lb <- 1 cycles <- 1 out <- FALSE - while (cycles < 50) { + while (cycles < 30) { dist <- (plist[i, 1] - m)/m * 1e+06 if (i == ub) { break @@ -567,5 +552,3 @@ binarySearch <- function(plist, m, ppm) { return(bot:top) } - - diff --git a/R/MS2_functions.R b/R/MS2_functions.R index 5d9fce8..4ffe133 100644 --- a/R/MS2_functions.R +++ b/R/MS2_functions.R @@ -79,8 +79,10 @@ function(struct, id, MS2files, sstype = "regular", useDB = FALSE, purifiedSpectra <- CliqueMSMS(MS2Exp, idx, sstype = sstype) if (!useDB) { message("No spectral matching was performed. Done!") - purifiedSpectra$results <- rep("No DB matching", - times = nrow(purifiedSpectra)) + purifiedSpectra$results <- rep( + "No MS2 Spectral DB matching", + times = nrow(purifiedSpectra) + ) MS2Exp@Ident <- list(MS2Features = purifiedSpectra, DatabaseSpectra = list(), MS2_correspondance = list()) diff --git a/R/Metadata_methods.R b/R/Metadata_methods.R index 460c45f..16caee4 100644 --- a/R/Metadata_methods.R +++ b/R/Metadata_methods.R @@ -164,7 +164,7 @@ function(struct, db = "hmdb", adcharge = 1, admult = 1, DBfile = "", row.names(struct@metadata@ExpParam@adlist) <- NULL if(all(!is.na(adlist))){ struct@metadata@ExpParam@adlist <- filter(adlist(struct), - adduct %in% adlist) + .data$adduct %in% adlist) if(nrow(adlist(struct)) == 0){ warning("No adducts remaining, please check the adduct names.") } diff --git a/R/SOI_functions.R b/R/SOI_functions.R index 2b2552d..85df955 100644 --- a/R/SOI_functions.R +++ b/R/SOI_functions.R @@ -157,10 +157,11 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { } Groups <- distinct(Groups) + ## Initial Peak Retrieval message("Initial peak retrieval:") - peakscol <- bplapply(seq_len(nrow(Groups)), retrievePeaks, - Groups, DataPL, BPPARAM = bpparam()) + peakscol <- lapply(seq_len(nrow(Groups)), retrievePeaks, + Groups, DataPL) Groups$peaks <- peakscol ## Group Characterization message("") @@ -217,13 +218,13 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { message("Calculating chaos:") Groups$chaos <- lapply(Groups$peaks, function(x) { - rho_chaos(x, nlevels = 20, fillGaps = TRUE) + rho_chaos(x, nlevels = 10, fillGaps = TRUE) }) %>% as.numeric() setkeyv(Groups, c("formula")) message("Generating peaklist for plotting:") - plist <- bplapply(unique(Groups$formula), preparePlottingDF, - Groups, BPPARAM = bpparam()) + plist <- lapply(unique(Groups$formula), preparePlottingDF, + Groups) plist <- do.call(rbind, plist) plist$isov <- rep("M0", nrow(plist)) @@ -246,15 +247,15 @@ densityProc <- function(x, DataPL, h){ #would be included). Added a 1 for robustness. cutoff[cutoff < 1] <- median(c(cutoff[cutoff > 1], 1)) message("Running Density Interpreter") - nwork <- bpparam()$workers - if (!is.numeric(nwork)) nwork <- 1 - uf <- unique(DataPL$formv) - suppressWarnings({uf <- split(uf, seq_len(nwork))}) - id <- cumsum(vapply(c(0, uf), length, numeric(1))) - 1 - RES <- bplapply(seq_along(uf), parallelInterpreter, uf, cutoff, - BinRes, id, BPPARAM = bpparam()) + uf <- unique(DataPL$formv) + RES <- lapply(BinRes[-c(1,2)], parallelInterpreter, cutoff) + RES <- RES[vapply(RES, is.data.frame, logical(1))] + if(length(RES) == 0){return()} + RES <- lapply(names(RES), function(anot){ + RES[[anot]]$formula <- anot + RES[[anot]] + }) RES <- do.call(rbind, RES) - RES$formula <- unlist(uf)[RES$formula] #Changing from index to corresponding RT RES[, 1] <- (RES[, 1] * rtbin) - rtbin + floor(min(h$retentionTime)) + shift @@ -288,8 +289,8 @@ groupGrouper <- function(GR, i, Groups){ setkeyv(matched, "yid") ## Grouping entries and choosing lowest start and highest end time - res <- bplapply(unique(matched$yid), resolveGroup, Groups, - matched, first_df, BPPARAM = bpparam()) + res <- lapply(unique(matched$yid), resolveGroup, Groups, + matched, first_df) res <- do.call(rbind, res) end <- NULL; start <- NULL #To appease R CMD Check "no visible binding" @@ -351,8 +352,8 @@ groupShort <- function(Groups, maxlen, BPPARAM = bpparam()){ message("Shortening and selecting long groups:") SG <- filter(Groups, length <= maxlen) LG <- filter(Groups, length > maxlen) - LG <- bplapply(seq_len(nrow(LG)), parallelGroupShort, LG, - maxlen, BPPARAM = BPPARAM) + LG <- lapply(seq_len(nrow(LG)), parallelGroupShort, LG, + maxlen) LG <- do.call(rbind, LG) return(rbind(SG, LG)) } @@ -437,8 +438,8 @@ blankSubstraction <- function(Groups, blankPL){ setkeyv(blankPL, c("formv", "rt")) message("First cleaning") - toKeep <- bplapply(seq_len(nrow(Groups)), firstCleaning, Groups, blankPL, - BPPARAM = bpparam()) %>% unlist() + toKeep <- lapply(seq_len(nrow(Groups)), firstCleaning, Groups, blankPL) %>% + unlist() sure <- Groups[which(toKeep), ] if (any(toKeep)) {Groups <- Groups[-which(toKeep),]} reticulate::py_available(initialize = TRUE) @@ -449,8 +450,7 @@ blankSubstraction <- function(Groups, blankPL){ setkeyv(blankPL, "formv") message("Preparing input for ANN") - RES <- bplapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL, - BPPARAM = bpparam()) + RES <- lapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL) Groups$MLdata <- RES NAgroups <- do.call(rbind, lapply(RES, function(x){is.na(x[1])})) @@ -471,12 +471,13 @@ blankSubstraction <- function(Groups, blankPL){ Groups <- Groups[, -c("MLdata")] } } else { - warning(paste0("A Keras installation was not found and blank", + warning(paste("A Keras installation was not found and ANN blank", "substraction was not performed")) } Groups <- rbind(sure, Groups) } +#'@importFrom stats IQR quantile firstCleaning <- function(i, Groups, blankPL){ cur <- Groups[i, ] st <- cur$start @@ -496,6 +497,8 @@ firstCleaning <- function(i, Groups, blankPL){ blankCV <- IQR(blankpks$rtiv) / (quantile(blankpks$rtiv, 0.25) + quantile(blankpks$rtiv, 0.75)) + if(is.na(sampleCV) | is.na(blankCV)){return(FALSE)} + sampleMax <- max(peaks$rtiv) # blankMax <- max(blankpks$rtiv) q90_ratio <- quantile(peaks$rtiv,0.9) / quantile(blankpks$rtiv,0.9) @@ -519,31 +522,33 @@ prepareNetInput <- function(i, Groups, blankPL){ if (nrow(peaks) <= 2 | length(unique(peaks$rt)) <= 2) { return(NA) #No signal } - smooth_pks <- data.frame(approx(x = peaks, xout = seq(from = st, to = end, - length.out = Npoints), - rule = 1)) #Interpolate to N points - smooth_pks[is.na(smooth_pks[, "y"]), "y"] <- 0 - blankpks <- blankPL[.(f)] %>% filter(., .data$rt >= st - deltat & - .data$rt <= end + deltat & - .data$isov == "M0") - blankpks <- distinct(blankpks[, c(1, 2)]) - if (length(unique(blankpks$rt)) < 2) { - blankpks <- data.frame(rt = c(st, end), rtiv = c(0, 0)) - smooth_blankpks <- data.frame( - approx(x = blankpks, - xout = seq(from = st, to = end, length.out = Npoints), - rule = 1)) - } else { - smooth_blankpks <- data.frame( - approx(x = blankpks, - xout = seq(from = st, to = end, length.out = Npoints), - rule = 1)) - smooth_blankpks[is.na(smooth_blankpks[, "y"]), "y"] <- 0 - } - return(as.matrix(rbind(smooth_pks[, 2], smooth_blankpks[,2])) / - max(smooth_pks[, 2])) - }, error = function(cond) { - return(NA) + #Interpolate to N points + smooth_pks <- data.frame(approx(x = peaks, + xout = seq(from = st, to = end, + length.out = Npoints), + rule = 1, ties = min)) + smooth_pks[is.na(smooth_pks[, "y"]), "y"] <- 0 + blankpks <- blankPL[.(f)] %>% filter(., .data$rt >= st - deltat & + .data$rt <= end + deltat & + .data$isov == "M0") + blankpks <- distinct(blankpks[, c(1, 2)]) + if (length(unique(blankpks$rt)) < 2) { + blankpks <- data.frame(rt = c(st, end), rtiv = c(0, 0)) + smooth_blankpks <- data.frame( + approx(x = blankpks, + xout = seq(from = st, to = end, length.out = Npoints), + rule = 1, ties = min)) + } else { + smooth_blankpks <- data.frame( + approx(x = blankpks, + xout = seq(from = st, to = end, length.out = Npoints), + rule = 1, ties = min)) + smooth_blankpks[is.na(smooth_blankpks[, "y"]), "y"] <- 0 + } + return(as.matrix(rbind(smooth_pks[, 2], smooth_blankpks[,2])) / + max(smooth_pks[, 2])) + }, error = function(cond) { + return(NA) }) } @@ -564,66 +569,47 @@ preparePlottingDF <- function(i, Groups){ return(res) } -parallelFilter <- function(j, ScanResults, bins, timebin){ - lapply(j, function(i) { - data <- as.vector(ScanResults[.(i), "rt"]) - mint <- min(data) - maxt <- max(data) - res <- rep(0, length(bins)) - goodbins <- bins[between(bins, mint - timebin, maxt + timebin)] - if (length(goodbins) == 0) { - return(res) - } - l <- mapply(function(b1, b2) { - return(length(which(data > b1 & data < b2))) - }, goodbins[-length(goodbins)], goodbins[-1]) #n? of entries in the time bin - if (length(l) == 0) { - return(res) - } - st <- which(bins == goodbins[1]) - res[seq(st, (st + length(goodbins) - 2))] <- l +parallelFilter <- function(anot, ScanResults, bins, timebin){ + data <- as.vector(ScanResults[.(anot), "rt"]) + mint <- min(data) + maxt <- max(data) + res <- rep(0, length(bins)) + + #Shortcut to get in which bin each point is + l <- table(ceiling((data - bins[1]) / timebin)) + if (length(l) == 0) return(res) + res[as.numeric(names(l))] <- l return(res) - }) } densityFilter <- function(ScanResults, h, timebin, iso = "M0", tshift = 0) { #Setting time bins rtmin <- floor(min(h$retentionTime)) rtmax <- ceiling(max(h$retentionTime)) - bins <- seq(from = rtmin + tshift, to = rtmax + tshift, by = timebin) + bins <- seq(from = rtmin - tshift, to = rtmax + tshift, by = timebin) #Getting how many scans were taken on each bin scans <- lapply(bins, function(curbin) { return(h %>% filter(., .data$retentionTime > curbin & - .data$retentionTime <= curbin + timebin) %>% - dim(.) %>% .[1]) - }) + .data$retentionTime <= curbin + timebin) %>% + dim(.) %>% .[1]) + }) #Counting how many scan entries are on each bin - nwork <- bpparam()$workers - if (!is.numeric(nwork)) nwork <- 1 - - idx <- split(unique(ScanResults$formv), seq_len(nwork) * 5) setkeyv(ScanResults, c("formv", "rt")) - RES <- bplapply(idx, parallelFilter, ScanResults, bins, timebin, - BPPARAM = bpparam()) - - RES <- unlist(RES, recursive = FALSE) - names(RES) <- unlist(idx) + RES <- lapply(unique(ScanResults$formv), parallelFilter, ScanResults, + bins, timebin) + names(RES) <- unique(ScanResults$formv) RES <- c(list(unlist(scans)), list(bins), RES) names(RES)[c(1,2)] <- c("Bins","Scans") return(RES) } -parallelInterpreter <- function(x, uf, cutoff, BinRes, id) { - do.call(rbind, lapply((seq_along(uf[[x]])) + id[x] + 2, function(i) { - bin <- BinRes[[i]] - interlist <- densityInterpreter(bin, cutoff) - if (length(interlist[[1]]) == 0) {return()} - df1 <- data.frame(start = interlist[[1]], end = interlist[[2]], - formula = rep(i - 2, times = length(interlist[[1]]))) - return(df1) - })) +parallelInterpreter <- function(x, cutoff) { + interlist <- densityInterpreter(x, cutoff) + if (length(interlist[[1]]) == 0) {return()} + res <- data.frame(start = interlist[[1]], end = interlist[[2]]) + return(res) } densityInterpreter <- function(list, cutoff) { @@ -671,21 +657,23 @@ rho_chaos <- function(data, nlevels = 20, fillGaps = TRUE){ #### filterSOI-related #### -#' @title filterSOI -#' @author Roger Gine -#' @description Performs a series of filters and quality checks to a given SOI -#' list, removing unwanted SOIs in the process. -#' @inheritParams findSOI -#' @param id ID of the SOI list to be filtered/checked. -#' @param minint Minimun SOI intensity. All SOIs below this value will be -#' removed from the SOI list -#' @param isofidelity Boolean. Whether to perform an isotopic fidelity check. -#' @return A filtered SOI list. +#'@title filterSOI +#'@author Roger Gine +#'@description Performs a series of filters and quality checks to a given SOI +#' list, removing unwanted SOIs in the process. +#'@inheritParams findSOI +#'@param id ID of the SOI list to be filtered/checked. +#'@param minint Minimun SOI intensity. All SOIs below this value will be removed +#' from the SOI list +#'@param isofidelity Boolean. Whether to perform an isotopic fidelity check. +#'@param minscore Numeric. Minimum value (between 0 and 1) of isofidelity to +#' retain an entry. Defaults to 0.8. +#'@return A filtered SOI list. #' @examples #'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", #' package = "RHermes"))} #' struct <- filterSOI(struct, id = 1, minint = 10000, isofidelity = TRUE) -#' @export +#'@export setGeneric("filterSOI", function(struct, id, minint = 1e4, isofidelity, minscore = 0.8) { standardGeneric("filterSOI") }) @@ -718,9 +706,8 @@ setMethod("filterSOI", signature = c("RHermesExp", "numeric", "ANY", "ANY", "ANY # Isotopic pattern similarity message("Calculating isotopic fidelity metrics:") - isodata <- bplapply(with_isos, plotFidelity, struct = struct, - id = id, plot = FALSE, - BPPARAM = SerialParam(progressbar = FALSE)) + isodata <- lapply(with_isos, plotFidelity, struct = struct, + id = id, plot = FALSE) cos <- vapply(isodata, function(x){x[[3]]}, numeric(1)) soilist$isofidelity <- cos @@ -759,8 +746,7 @@ setMethod("filterSOI", signature = c("RHermesExp", "numeric", "ANY", "ANY", "ANY ##Recalculate peaklist for plotting setkeyv(soilist, c("formula")) message("Recalculating peaklist for plotting:") - plist <- bplapply(unique(soilist$formula), recalculateDF, soilist, - BPPARAM = bpparam()) + plist <- lapply(unique(soilist$formula), recalculateDF, soilist) plist <- do.call(rbind, plist) plist$isov <- rep("M0", nrow(plist)) @@ -787,16 +773,12 @@ recalculateDF <- function(i, soilist){ return(res) } - - - isoCos <- function(soilist, PL, isothr = 0.99) { PL <- PL@peaklist setkeyv(PL, c("formv")) message("Calculating isotope similarity:") - clist <- bplapply(seq_len(nrow(soilist)), parallelIsoCos, soilist, PL, - isothr, - BPPARAM = bpparam()) + clist <- lapply(seq_len(nrow(soilist)), parallelIsoCos, soilist, PL, + isothr) hits <- lapply(clist, function(x) {return(x[[1]])}) hitdf <- lapply(clist, function(x) {return(x[[2]])}) soilist$isofound <- as.numeric(hits) @@ -816,8 +798,8 @@ adCos <- function(soilist, FATable, adthr = 0.8) { vapply(x, function(y) {y[[2]]}, character(1)) }) setkey(FATable, "f") - clist <- bplapply(seq_len(nrow(soilist)), parallelAdCos, soilist, FATable, - adthr, BPPARAM = bpparam()) + clist <- lapply(seq_len(nrow(soilist)), parallelAdCos, soilist, FATable, + adthr) soilist$adrows <- clist return(soilist) } @@ -1122,8 +1104,8 @@ removeISF <- function(struct, id, DBpath = "D:/MS2ID_B2R_20201113_083214.rds", setkeyv(SL, "formula") #Recalculate plotting DF - plist <- bplapply(unique(SL$formula), preparePlottingDF, - SL, BPPARAM = bpparam()) + plist <- lapply(unique(SL$formula), preparePlottingDF, + SL) plist <- do.call(rbind, plist) plist$isov <- rep("M0", nrow(plist)) diff --git a/README.Rmd b/README.Rmd index faa2967..f887500 100644 --- a/README.Rmd +++ b/README.Rmd @@ -31,7 +31,8 @@ comes with an easy to use GUI that will guide you every step of the way. You are in control of your metabolites: whether it's natural products, biomedical or enviormental samples, `RHermes` has you covered. By restricting the formula database, -you can focus on just the compounds you are interested in. +you can focus on just the compounds you are interested in and achieve greater +metabolome coverage depth. Have you ever wished you could just **see** the metabolites in your data? With `RHermes` you can do that and much more. Say goodbye to manually calculating m/z's @@ -46,8 +47,7 @@ and our article preprint [here](https://www.biorxiv.org/content/10.1101/2021.03. The recommended system requirements are: -* At least a 4 core processor -* 8-16 GB of RAM or more +* At least 8-16 GB of RAM * An internet connection to perform KEGG queries ## Installation @@ -64,7 +64,7 @@ devtools::install_github("RogerGinBer/RHermes") ## Setup `RHermes` can perform almost all its functions after installation, but the SOI Blank Substraction step requires a valid `keras` and `tensorflow` installation -(which rely on Python code). +(which rely on Python). ### Option 1: Default installation For most users, both `keras` and `tensorflow` can be configured with: @@ -77,7 +77,8 @@ tensorflow::install_tensorflow() After which you can check the following: ```{r, eval = FALSE} tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", package = "RHermes")) +model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", + package = "RHermes")) is(model, "python.builtin.object") #Gives TRUE if the loading is successful. ``` If both commands don't give any error (the "Your CPU supports ..." warning is @@ -100,7 +101,8 @@ When finished, type in R: ```{r, eval = FALSE} reticulate::use_condaenv("newenv", required = TRUE) tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", package = "RHermes")) +model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", + package = "RHermes")) ``` Everything should run smoothly. If not, try manually installing Anaconda from @@ -125,10 +127,14 @@ example <- processMS1(example, #Generate SOIs example <- findSOI(example, getSOIpar(), 1) -#Generate an IL +#Generate an IL (Inclusion List) example <- generateIL(example, 1, ILParam()) ``` +With the generated inclusion list, you can export it and run a Parallel Reaction +Monitoring (PRM) MS2 experiment to reveal coeluting isomers or use any other MS2 mode +you see fit. + Or start the interactive GUI typing: ```{r, eval = FALSE} RHermesGUI() diff --git a/README.md b/README.md index af2915c..7c1e21d 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ the way. You are in control of your metabolites: whether it’s natural products, biomedical or enviormental samples, `RHermes` has you covered. By restricting the formula database, you can focus on just the compounds -you are interested in. +you are interested in and achieve greater metabolome coverage depth. Have you ever wished you could just **see** the metabolites in your data? With `RHermes` you can do that and much more. Say goodbye to @@ -39,8 +39,7 @@ For more info, check out the documentation The recommended system requirements are: -- At least a 4 core processor -- 8-16 GB of RAM or more +- At least 8-16 GB of RAM - An internet connection to perform KEGG queries ## Installation @@ -58,7 +57,7 @@ devtools::install_github("RogerGinBer/RHermes") `RHermes` can perform almost all its functions after installation, but the SOI Blank Substraction step requires a valid `keras` and -`tensorflow` installation (which rely on Python code). +`tensorflow` installation (which rely on Python). ### Option 1: Default installation @@ -74,7 +73,8 @@ After which you can check the following: ``` r tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", package = "RHermes")) +model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", + package = "RHermes")) is(model, "python.builtin.object") #Gives TRUE if the loading is successful. ``` @@ -101,7 +101,8 @@ When finished, type in R: ``` r reticulate::use_condaenv("newenv", required = TRUE) tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", package = "RHermes")) +model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", + package = "RHermes")) ``` Everything should run smoothly. If not, try manually installing Anaconda @@ -131,10 +132,14 @@ example <- processMS1(example, #Generate SOIs example <- findSOI(example, getSOIpar(), 1) -#Generate an IL +#Generate an IL (Inclusion List) example <- generateIL(example, 1, ILParam()) ``` +With the generated inclusion list, you can export it and run a Parallel +Reaction Monitoring (PRM) MS2 experiment to reveal coeluting isomers or +use any other MS2 mode you see fit. + Or start the interactive GUI typing: ``` r diff --git a/inst/app/PL_UI.R b/inst/app/PL_UI.R index 452a077..79b3148 100644 --- a/inst/app/PL_UI.R +++ b/inst/app/PL_UI.R @@ -227,7 +227,7 @@ PLServer <- function(id, struct){ struct$dataset <- setDB(struct$dataset, db = input$db, adcharge = as.numeric(input$adcharge), admult = as.numeric(input$admult), - filename = loadpath(), keggpath = keggpath) + DBfile = loadpath(), keggpath = keggpath) sel <- struct$dataset@metadata@ExpParam@adlist$adduct %in% input$ads ads <- struct$dataset@metadata@ExpParam@adlist[sel,] struct$dataset@metadata@ExpParam@adlist <- ads diff --git a/inst/app/www/BigLogo.png b/inst/app/www/BigLogo.png deleted file mode 100644 index 462719e..0000000 Binary files a/inst/app/www/BigLogo.png and /dev/null differ diff --git a/inst/extdata/exampleObject.rds b/inst/extdata/exampleObject.rds index 8ba9c26..911ae98 100644 Binary files a/inst/extdata/exampleObject.rds and b/inst/extdata/exampleObject.rds differ diff --git a/man/exportIL.Rd b/man/exportIL.Rd index 7766699..d1fb3f2 100644 --- a/man/exportIL.Rd +++ b/man/exportIL.Rd @@ -5,39 +5,68 @@ \alias{exportIL,RHermesExp,numeric-method} \title{exportIL} \usage{ -exportIL(struct, id, file = "./InclusionList", maxOver = 5, sepFiles = FALSE) +exportIL( + struct, + id, + file = "./InclusionList", + mode = "both", + maxOver = 5, + defaultIT = 100, + sepFiles = FALSE, + maxInjections = 9999 +) -\S4method{exportIL}{RHermesExp,numeric}(struct, id, file = "./InclusionList", maxOver = 5, sepFiles = FALSE) +\S4method{exportIL}{RHermesExp,numeric}( + struct, + id, + file = "./InclusionList", + mode = "both", + maxOver = 5, + defaultIT = 100, + sepFiles = FALSE, + maxInjections = 9999 +) } \arguments{ \item{struct}{The RHermesExp object.} -\item{id}{The IL ID in the RHermesExp object. The IDs are -assigned by the order in which the IL are generated.} +\item{id}{The IL ID in the RHermesExp object. The IDs are assigned by the +order in which the IL are generated.} -\item{maxOver}{Numeric, very important. It's the number of -mz-rt segments that can be monitored at the same time by the -MS instrument. Higher numbers lead to less injections but the -number of scans for each IL entry will be reduced and gives -problems when deconvoluting the MS2 spectras.} +\item{file}{A string containing the folder to save the IL csv/s into and the +basename for the file. By default will be your working directory and the +default name is 'InclusionList' as in './InclusionList'.} -\item{sepFiles}{Logical, whether to generate a single csv file -or multiple csvs, each corresponding to each -injection/chromatographic run. From our experience with an -Orbitrap Fusion, separate csvs will simplify the task.} +\item{mode}{Whether to plan set of continuous MS2 entries (PRM), adaptative +injection time scans at the apexes of the entries' peaks or both (which +optimizes instrument free time). Options are: "continuous", "deep" or +"both".} -\item{folder}{A string containing the folder to save the IL -csv/s into. By default will be your working directory} +\item{maxOver}{Numeric, very important. It's the number of mz-rt segments that +can be monitored at the same time by the MS instrument. Higher numbers lead +to less injections but the number of scans for each IL entry will be reduced +and gives problems when deconvoluting the MS2 spectras. It is ignored if +mode = "deep". Defaults to 5.} + +\item{defaultIT}{Numeric, the default IT in ms for continuous MS2 scans (only +aplicable in Orbitrap instruments and for "continuous" and "both" modes). +Defaults to 100ms.} + +\item{sepFiles}{Logical, whether to generate a single csv file or multiple +csvs, each corresponding to each injection/chromatographic run. From our +experience with an Orbitrap Fusion, separate csvs will simplify the task.} + +\item{maxInjections}{Numeric, the maximum number of planned injections to +export. Defaults to 9999 to export all of them.} } \value{ -Nothing. As a side effect, it generates one/multiple -.csv files with the inclusion list data +Nothing. As a side effect, it generates one/multiple .csv files with +the inclusion list data } \description{ -Organizes the IL entries into multiple injections -taking into account the user-specified parameters. Outputs a -single or multiple csv files that serve as input for the MS -to performed MSMS analysis. +Organizes the IL entries into multiple injections taking into +account the user-specified parameters. Outputs a single or multiple csv +files that serve as input for the MS to performed MSMS analysis. } \examples{ if(FALSE){ diff --git a/man/filterSOI.Rd b/man/filterSOI.Rd index a57cc5c..159c2a8 100644 --- a/man/filterSOI.Rd +++ b/man/filterSOI.Rd @@ -5,26 +5,29 @@ \alias{filterSOI,RHermesExp,numeric-method} \title{filterSOI} \usage{ -filterSOI(struct, id, minint = 10000, isofidelity) +filterSOI(struct, id, minint = 10000, isofidelity, minscore = 0.8) -\S4method{filterSOI}{RHermesExp,numeric}(struct, id, minint = 10000, isofidelity) +\S4method{filterSOI}{RHermesExp,numeric}(struct, id, minint = 10000, isofidelity, minscore = 0.8) } \arguments{ \item{struct}{The RHermesExp object to which the SOI list will be saved.} \item{id}{ID of the SOI list to be filtered/checked.} -\item{minint}{Minimun SOI intensity. All SOIs below this value will be -removed from the SOI list} +\item{minint}{Minimun SOI intensity. All SOIs below this value will be removed +from the SOI list} \item{isofidelity}{Boolean. Whether to perform an isotopic fidelity check.} + +\item{minscore}{Numeric. Minimum value (between 0 and 1) of isofidelity to +retain an entry. Defaults to 0.8.} } \value{ A filtered SOI list. } \description{ Performs a series of filters and quality checks to a given SOI - list, removing unwanted SOIs in the process. + list, removing unwanted SOIs in the process. } \examples{ \dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", diff --git a/tests/testthat/test-PL.R b/tests/testthat/test-PL.R index 6ccb191..7fc34bd 100644 --- a/tests/testthat/test-PL.R +++ b/tests/testthat/test-PL.R @@ -32,7 +32,7 @@ test_that("Labelled proc works",{ package = "RHermes"), labelled = TRUE) - expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), 1534) + expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), 1378) }) test_that("PL plot works", { diff --git a/tests/testthat/test-SOI.R b/tests/testthat/test-SOI.R index 0d61120..827d405 100644 --- a/tests/testthat/test-SOI.R +++ b/tests/testthat/test-SOI.R @@ -110,7 +110,7 @@ test_that("SOI are filtered correctly", { package = "RHermes")) myHermes <- filterSOI(myHermes, 1, 20000, TRUE) #Performs equal to the precalculated version - expect_equal(nrow(myHermes@data@SOI[[1]]@SOIList), 9) + expect_equal(nrow(myHermes@data@SOI[[1]]@SOIList), 8) })