-
Notifications
You must be signed in to change notification settings - Fork 2
/
CombineLoci.GCL.r
166 lines (113 loc) · 6.48 KB
/
CombineLoci.GCL.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
CombineLoci.GCL <- function(sillyvec, markerset, update = TRUE, delim = c(".", "_")[1]){
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This function combines a set of markers into a single marker set.
#
# Inputs~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# sillyvec - a character vector of silly codes (e.g. sillyvec <- c("KQUART06","KQUART08","KQUART09"))
#
# markerset - is a vector of the set of loci you wish to combine (e.g., c("Ots_vatf-251", "Ots_ZR-575"))
#
# update - is a logical switch. If TRUE, the "LocusControl" object is updated and all "*.gcl" objects in "sillyvec" will be updated with the new marker.
# If FALSE, the "LocusControl" object is not updated and a temporary object called "*.temp.gcl" with the updated data is created.
#
# delim - specifies the separator between combined loci, either a period (.) which is the default or an underscore (_) so locus names will work in SPAM
#
# Example~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# password = "************"
#
# CreateLocusControl.GCL(markersuite = "Sockeye2011_96SNPs", username = "awbarclay", password = password)
#
# LOKI2R.GCL(sillyvec = c("SLARS11O", "SSKWEN07", "SSKK594L", "SMOOT92"), username = "awbarclay", password = password)
#
# CombineLoci.GCL(sillyvec = c("SLARS11O", "SSKWEN07", "SSKK594L", "SMOOT92"), markerset = c("One_Cytb_17", "One_CO1","One_Cytb_26"), update = TRUE, delim = c(".","_")[1])
# CombineLoci.GCL(sillyvec = c("SLARS11O", "SSKWEN07", "SSKK594L", "SMOOT92"), markerset = c("One_MHC2_251", "One_MHC2_190"), update = TRUE, delim = c(".","_")[1])
#
# Note~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# This function requires a LocusControl object. Run CreateLocusControl.GCL prior to this function.
# This function also requires dplyr version 1.0.0 or higher
#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(!all(markerset %in% LocusControl$locusnames)){
stop(paste0("'", setdiff(markerset, LocusControl$locusnames), "' from argument 'markerset' not found in 'LocusControl' object!!!"))
}
if(!require("pacman")) install.packages("pacman"); library(pacman); pacman::p_load(tidyverse) # Install packages, if not in library and then load them.
nmarkers <- length(markerset)
myploidy <- LocusControl$ploidy[markerset]
if(sum(myploidy == myploidy[1]) != nmarkers){
stop("'markerset' has different ploidies!!!")
}
MarkerSuite <- LocusControl$MarkerSuite %>%
unique()
locusnames <- LocusControl$locusnames
newmarkername <- paste(markerset, collapse = delim)
existnewmarker <- newmarkername %in% locusnames
loci <- unique(c(locusnames, newmarkername))
nloci <- length(loci)
Publishedlocusnames <- LocusControl$Publishedlocusnames
Publishedlocusnames <- c(Publishedlocusnames, purrr::set_names(NA, newmarkername))
newalleles <- AllPossiblePhenotypes.GCL(markerset)
maxchar <- max(nchar(newalleles))
alleles <- LocusControl$alleles[locusnames]
nalleles <- LocusControl$nalleles[locusnames]
ploidy <- LocusControl$ploidy[locusnames]
if(!existnewmarker){
alleles[[length(locusnames) + 1]] <- tibble::tibble(allele = seq_along(newalleles), call = newalleles)
nalleles <- c(nalleles, length(newalleles))
ploidy <- c(ploidy, 1)
}
names(alleles) <- names(nalleles) <- names(ploidy) <- loci
for(silly in sillyvec){
my.gcl <- get(paste0(silly, ".gcl"), pos = 1)
if(newmarkername %in% names(my.gcl)){
warning(paste0("'", newmarkername, "'"," already created in silly '", silly,"'!!!"))
next()
}
newmarkername_1 <- paste0(newmarkername, ".1") # Allele 2 name
# Combine haploid
if(unique(myploidy) == 1){
new.gcl <- my.gcl %>%
tidyr::unite(col = {{newmarkername}}, tidyselect::all_of(markerset), sep = '', remove = FALSE, na.rm = TRUE) %>% # Had to add the {{}} around the col object for this to work.
mutate(!!rlang::sym(newmarkername) := dplyr::case_when(nchar(!!rlang::sym(newmarkername)) < maxchar ~ NA_character_, # Added this mutate case_when to replace any genotypes with less than maxchar with NA's. Maybe there is a better way?
TRUE ~ !!rlang::sym(newmarkername)),
!!rlang::sym(newmarkername_1) := NA_character_) %>%
dplyr::relocate(!!rlang::sym(newmarkername), .after = tidyselect::last_col()) %>%
dplyr::relocate(!!rlang::sym(newmarkername_1), .after = tidyselect::last_col()) # Had to relocate alleles separately or they get reordered
}
# Combine diploid
if(unique(myploidy)==2){
sel_var <- lapply(markerset, function(mkr){
c(mkr,paste0(mkr, ".1"))
}) %>% unlist() #Setting up order of variable to unite so the markers are in the same order as markerset
new.gcl <- my.gcl %>%
tidyr::unite(col = {{newmarkername}}, tidyselect::all_of(sel_var), sep = '', remove = FALSE, na.rm = TRUE) %>% # Had to add the {{}} around the col object for this to work.
mutate(!!rlang::sym(newmarkername) := dplyr::case_when(nchar(!!rlang::sym(newmarkername)) < maxchar ~ NA_character_, # Added this mutate case_when to replace any genotypes with less than maxchar with NA's. Maybe there is a better way?
TRUE ~ !!rlang::sym(newmarkername)),
!!rlang::sym(newmarkername_1) := NA_character_) %>%
dplyr::relocate(!!rlang::sym(newmarkername), .after = tidyselect::last_col()) %>%
dplyr::relocate(!!rlang::sym(newmarkername_1), .after = tidyselect::last_col()) # Had to relocate alleles separately or they get reordered
}
if(update){
assign(paste0(silly, ".gcl"), new.gcl, pos = 1)
}
if(!update){
assign(paste0(silly, ".temp.gcl"), new.gcl, pos = 1)
}
}
if(update){
assign(
x = "LocusControl",
value = tibble::tibble(
MarkerSuite = MarkerSuite,
locusnames = loci,
Publishedlocusnames = Publishedlocusnames,
nalleles = nalleles,
ploidy = ploidy,
alleles = alleles
),
pos = 1
)
}
}