From 9b4f85c926316f59fe64845f0717c306f1b50382 Mon Sep 17 00:00:00 2001 From: mattssca Date: Wed, 10 Jan 2024 13:34:49 -0800 Subject: [PATCH 1/5] New helper: purify_chr --- R/purify_chr.R | 55 +++++++++++++++++++++++++++++++++++++++++++++++ man/purify_chr.Rd | 34 +++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 R/purify_chr.R create mode 100644 man/purify_chr.Rd diff --git a/R/purify_chr.R b/R/purify_chr.R new file mode 100644 index 0000000..9af6ad4 --- /dev/null +++ b/R/purify_chr.R @@ -0,0 +1,55 @@ +#' @title Purify Chromosomes. +#' +#' @description Helper function for dealing with chromosome prefixes based on +#' the selected projection. +#' +#' @details This function accepts a data frame with a column to check for +#' prefixes. The function will add or remove prefixes based on the selected +#' projection. This function expects a column named "chrom" to be in the +#' incoming data frame. +#' +#' @param projection Required parameter, needed to determine if chromosomes +#' should be prefixed or not. +#' @param incoming_table Required parameter, a data frame to check for prefixes. +#' +#' @return A data frame with the same columns as the incoming data frame, but +#' with the prefixes added or removed based on the selected projection. +#' +#' @import dplyr stringr +#' +#' @export +#' +#' @examples +#' #Example 1 - Add prefixes to a data frame +#' my_data = data.frame(chrom = c("1", "2", "3")) +#' my_data = purify_chr(projection = "hg38", incoming_table = my_data) +#' +purify_chr <- function(projection = NULL, + incoming_table = NULL) { + + #checks + if(is.null(projection)){ + stop("You must provide a valid projection. + Available projections are hg38 and grch37.") + } + + if(is.null(incoming_table)){ + stop("You must provide a data table with `incoming_table`.") + } + + #deal with prefixes + if(projection == "hg38"){ + if(all(!str_detect(incoming_table$chrom, "chr"))){ + incoming_table = dplyr::mutate(incoming_table, chrom = paste0("chr", chrom)) + } + }else if(projection == "grch37"){ + if(all(str_detect(incoming_table$chrom, "chr"))){ + incoming_table = dplyr::mutate(incoming_table, chrom = gsub("chr", "", chrom)) + } + }else{ + stop(paste0("This function supports the following projections; hg38 and + grch37. The provided projection is: ", projection)) + } + + return(incoming_table) +} diff --git a/man/purify_chr.Rd b/man/purify_chr.Rd new file mode 100644 index 0000000..d0d6819 --- /dev/null +++ b/man/purify_chr.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/purify_chr.R +\name{purify_chr} +\alias{purify_chr} +\title{Purify Chromosomes.} +\usage{ +purify_chr(projection = NULL, incoming_table = NULL) +} +\arguments{ +\item{projection}{Required parameter, needed to determine if chromosomes +should be prefixed or not.} + +\item{incoming_table}{Required parameter, a data frame to check for prefixes.} +} +\value{ +A data frame with the same columns as the incoming data frame, but +with the prefixes added or removed based on the selected projection. +} +\description{ +Helper function for dealing with chromosome prefixes based on +the selected projection. +} +\details{ +This function accepts a data frame with a column to check for +prefixes. The function will add or remove prefixes based on the selected +projection. This function expects a column named "chrom" to be in the +incoming data frame. +} +\examples{ +#Example 1 - Add prefixes to a data frame +my_data = data.frame(chrom = c("1", "2", "3")) +my_data = purify_chr(projection = "hg38", incoming_table = my_data) + +} From 9b811869d7854b4243898045d4543eb7ba4bb140 Mon Sep 17 00:00:00 2001 From: mattssca Date: Wed, 10 Jan 2024 13:35:22 -0800 Subject: [PATCH 2/5] Updated unit test for get_gene_info --- tests/testthat/test-get_gene_info.R | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/testthat/test-get_gene_info.R diff --git a/tests/testthat/test-get_gene_info.R b/tests/testthat/test-get_gene_info.R new file mode 100644 index 0000000..5736524 --- /dev/null +++ b/tests/testthat/test-get_gene_info.R @@ -0,0 +1,41 @@ +#load pacakges +library(testthat) + + +test_that("Check for rows and column consistencies", { + expect_equal(nrow(get_gene_info(these_genes = "MYC", + projection = "grch37", + raw = TRUE)), 1) + + expect_equal(nrow(get_gene_info(these_genes = "MYC", + projection = "hg38", + raw = TRUE)), 1) + + expect_equal(ncol(get_gene_info(these_genes = "MYC", + projection = "grch37", + raw = TRUE)), 26) + + expect_equal(ncol(get_gene_info(these_genes = "MYC", + projection = "hg38", + raw = TRUE)), 26) + + expect_equal(ncol(get_gene_info(these_genes = "MYC", + projection = "grch37", + raw = FALSE)), 19) + + expect_equal(ncol(get_gene_info(these_genes = "MYC", + projection = "hg38", + raw = FALSE)), 19) +}) + + +test_that("Expected to fail", { + expect_error(get_gene_info(raw = TRUE)) + expect_error(get_gene_info(these_genes = "MYC", + projection = "hg19")) +}) + + +test_that("Check the type of the return", { + expect_true(is.data.frame(get_gene_info(these_genes = c("BCL2", "MYC")))) +}) From 8ddde8b9982c656f929cefa15ac1b373e5484a5f Mon Sep 17 00:00:00 2001 From: mattssca Date: Wed, 10 Jan 2024 13:35:47 -0800 Subject: [PATCH 3/5] Touch new unit test (purify_regions) --- tests/testthat/test-purify_regions.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/testthat/test-purify_regions.R diff --git a/tests/testthat/test-purify_regions.R b/tests/testthat/test-purify_regions.R new file mode 100644 index 0000000..e69de29 From 3dabc43feb032c2788cd09dd2a44c805b8428656 Mon Sep 17 00:00:00 2001 From: mattssca Date: Wed, 10 Jan 2024 13:36:31 -0800 Subject: [PATCH 4/5] Package docs updated --- NAMESPACE | 1 + R/purify_regions.R | 19 ++++--------------- man/purify_regions.Rd | 2 +- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d6ef2af..5b3c464 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(gene_ranger) export(get_gene_info) +export(purify_chr) export(purify_regions) export(region_ranger) import(data.table, except = c("last", "first", "between", "transpose")) diff --git a/R/purify_regions.R b/R/purify_regions.R index 5e95ff3..91b5a78 100644 --- a/R/purify_regions.R +++ b/R/purify_regions.R @@ -1,6 +1,6 @@ #' @title Purify Regions. #' -#' @description Helper function for cleaning and standaradize regions. +#' @description Helper function for cleaning and standardize regions. #' #' @details This function accepts a variety of incoming regions. #' Either, regions can be provided as a data frame with `these_regions`. @@ -103,20 +103,9 @@ purify_regions <- function(these_regions = NULL, `qchrom`, `qstart`, and `qend`") } - #TODO: Turn this into a helper function: - #deal with chr prefixes based on selected projection - if(projection == "hg38"){ - if(all(!str_detect(region_table$chrom, "chr"))){ - region_table = dplyr::mutate(region_table, chrom = paste0("chr", chrom)) - } - }else if(projection == "grch37"){ - if(all(str_detect(region_table$chrom, "chr"))){ - region_table = dplyr::mutate(region_table, chrom = gsub("chr", "", chrom)) - } - }else{ - stop(paste0("This function supports the following projections; hg38 and - grch37. The provided projection is: ", projection)) - } + #run helper function to deal with prefixes + region_table = purify_chr(projection = projection, + incoming_table = region_table) #enforce data types region_table$chrom = as.character(region_table$chrom) diff --git a/man/purify_regions.Rd b/man/purify_regions.Rd index ee33cde..aab9a87 100644 --- a/man/purify_regions.Rd +++ b/man/purify_regions.Rd @@ -31,7 +31,7 @@ Available projections are hg38 and grch37. Default is hg38.} A data table with three columns, chrom, start, end. } \description{ -Helper function for cleaning and standaradize regions. +Helper function for cleaning and standardize regions. } \details{ This function accepts a variety of incoming regions. From 0dcc7ca8106bea54a5ce0ff711f6f1c78aa5ec51 Mon Sep 17 00:00:00 2001 From: mattssca Date: Wed, 10 Jan 2024 13:37:10 -0800 Subject: [PATCH 5/5] removing old unit test (get_gene_info) --- tests/testthat/test-get_gene_infoR.R | 36 ---------------------------- 1 file changed, 36 deletions(-) delete mode 100644 tests/testthat/test-get_gene_infoR.R diff --git a/tests/testthat/test-get_gene_infoR.R b/tests/testthat/test-get_gene_infoR.R deleted file mode 100644 index d13b2f5..0000000 --- a/tests/testthat/test-get_gene_infoR.R +++ /dev/null @@ -1,36 +0,0 @@ -#load pacakges -library(testthat) - - -test_that("Check for rows and column consistencies", { - expect_equal(nrow(get_gene_info(these_genes = "MYC", - projection = "grch37", - raw = TRUE)), 1) - - expect_equal(nrow(get_gene_info(these_genes = "MYC", - projection = "hg38", - raw = TRUE)), 1) - - expect_equal(ncol(get_gene_info(these_genes = "MYC", - projection = "grch37", - raw = TRUE)), 26) - - expect_equal(ncol(get_gene_info(these_genes = "MYC", - projection = "hg38", - raw = TRUE)), 26) - - expect_equal(ncol(get_gene_info(these_genes = "MYC", - projection = "grch37", - raw = FALSE)), 19) - - expect_equal(ncol(get_gene_info(these_genes = "MYC", - projection = "hg38", - raw = FALSE)), 19) -}) - - -test_that("Expected to fail", { - expect_error(get_gene_info(raw = TRUE)) - expect_error(get_gene_info(these_genes = "MYC", - projection = "hg19")) -})