-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path007_extract-score-pdna-batch-read-counts.R
50 lines (40 loc) · 1.35 KB
/
007_extract-score-pdna-batch-read-counts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Prepare the pDNA batch read counts for the Score data set.
if (basename(getwd()) == "munge") {
setwd("..")
}
source(".Rprofile")
library(tidyverse)
source("munge/munge_functions.R")
extract_pdna_batch_read_counts <- function(replicate_id, p_dna_batch, read_count_file) {
read_ct_df <- read_score_count_file(read_count_file) %>%
select(sgrna_id, tidyselect::matches(p_dna_batch)) %>%
add_column(p_dna_batch = !!p_dna_batch) %>%
rename(read_counts = !!p_dna_batch)
return(read_ct_df)
}
reads_per_million <- function(df) {
df %>%
group_by(p_dna_batch) %>%
mutate(rpm = ((read_counts / sum(read_counts)) * 1e6) + 1) %>%
ungroup()
}
score_read_counts_dir <- snakemake@params[["raw_counts_dir"]]
replicate_map_path <- snakemake@input[["replicate_map"]]
output_file <- snakemake@output[["score_pdna"]]
# Select a single replicate for each pDNA batch and extract the pDNA reads from
# that counts file.
read_score_replicate_map(replicate_map_path) %>%
left_join(
map_read_count_files_to_replicate_id(score_read_counts_dir),
by = "replicate_id"
) %>%
check_no_missing_count_files(read_count_file) %>%
group_by(p_dna_batch) %>%
slice(1) %>%
ungroup() %>%
select(replicate_id, p_dna_batch, read_count_file) %>%
purrr::pmap_dfr(
extract_pdna_batch_read_counts
) %>%
reads_per_million() %>%
write_csv(output_file)