-
Notifications
You must be signed in to change notification settings - Fork 2
/
PoolCollections.GCL.r
96 lines (64 loc) · 3.63 KB
/
PoolCollections.GCL.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
PoolCollections.GCL <- function(collections, loci = LocusControl$locusnames, IDs = NULL, newname = paste(collections, collapse = ".")){
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This function combines "*.gcl" objects into a new one called "newname.gcl".
#
# Inputs~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# collections - a character vector of silly codes without the ".gcl" extention (e.g. collections <- c("KQUART06","KQUART08","KQUART10")).
# Collections can be a single silly if you want create a new ".gcl" with only fish supplied in IDs.
#
# loci - a character vector of locus names
#
# IDs - a named list of FK_FISH_ID vectors (either character or numeric), each vector is associated with and named after a member of "collections".
# These will be used to subset each collection before pooling. If no IDs are supplied all individuals from each collection are used.
#
# newname - is the name of the new "*.gcl" created. Do not provide ".gcl" extention. If no name supplied then the newname defaults to
# the collection names collapsed with a period between each name (e.g. "KQUART06.KQUART08.KQUART09")
#
# Outputs~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assigns a new "pooled collection" to your workspace
#
# Example~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# load("V:/Analysis/2_Central/Chinook/Cook Inlet/2019/2019_UCI_Chinook_baseline_hap_data/2019_UCI_Chinook_baseline_hap_data_test.RData")
#
# PoolCollections.GCL(collections = c("KQUART06","KQUART08","KQUART10"), loci = loci557, IDs = list(KQUART06 = 3:12, KQUART08 = 1:10, KQUART10 = 1:4), newname = "QuartzCr")
#
# Note~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This function is also useful for producing "pooled mixture" objects for mixed stock analysis.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(!exists("LocusControl")){
stop("'LocusControl' not yet built.")
}
if(!require("pacman")) install.packages("pacman"); library(pacman); pacman::p_load(tidyverse) # Install packages, if not in library and then load them.
if(nchar(newname) > 200){
newname <- substr(newname, start = 1, stop = 200)
}
if(!all(loci %in% LocusControl$locusnames)){
stop(paste0("The following `loci` were not found in `LocusControl`:\n", paste(setdiff(loci, LocusControl$locusnames), collapse = "\n")))
}
ncollections <- length(collections)
if(is.null(IDs)){
IDs <- sapply(collections, function(collection){
get(paste0(collection, ".gcl"), pos = 1)$FK_FISH_ID
}, simplify = FALSE)
}
if(!is.list(IDs)){
stop("'IDs' must be a list")
}
if(ncollections != length(IDs)){
stop("'IDs' must be same length as 'collections'")
}
IDs <- purrr::set_names(IDs, collections) # Making sure IDs has names
SubsetLoci <- c(loci, paste0(loci, ".1")) %>% sort() # These are the locus score headers for subsetting by loci.
output <- lapply(collections, function(collection){
my.gcl <- get(paste0(collection, ".gcl"), pos = 1)
attr <- my.gcl[ , 1:19] %>%
names() # The attribute names
my.gcl %>%
dplyr::filter(FK_FISH_ID %in% IDs[[collection]]) %>%
dplyr::select(tidyselect::all_of(attr), tidyselect::all_of(SubsetLoci))
}) %>%
dplyr::bind_rows() %>%
dplyr::mutate(FK_FISH_ID = seq(length(unlist(IDs))), SILLY_CODE = newname)
assign(paste0(newname, ".gcl"), output, pos = 1, envir = .GlobalEnv)
}