-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount_links.R
64 lines (52 loc) · 1.99 KB
/
count_links.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env Rscript
# takes a labelled tree as well as the annotation file with reconstructed locations and counts how often each location transmits to a new one
library("ape")
library("plyr")
args = commandArgs(trailingOnly=TRUE)
phylo_file = args[1]
annotation_file = args[2]
#output_file = args[3]
#phylo_file <- "/home/sreimering/repos/VirusTracker/augur_adjustment/cov2020_reconstructed.phy"
#annotation_file <- "/home/sreimering/repos/VirusTracker/augur_adjustment/cov2020_reconstructed.annotation.txt"
#output_file <- "/home/sreimering/repos/VirusTracker/augur_adjustment/cov2020_links.txt"
tree <- read.tree(phylo_file)
annotation <- read.table(annotation_file, sep=" ", header=TRUE, stringsAsFactors=FALSE)
# get all labels in the order of the ids
all_label <- c(tree$tip.label, tree$node.label)
# get all locations
all_locations <- unique(annotation$location)
changes <- list()
num_changes <- 0
# go through all edges in the tree
for (i in 1:nrow(tree$edge)){
# nodes in the edge matrix are represented by ids; use the id to get the corresponding labels stored in all_label
origin_label <- all_label[tree$edge[i, 1]]
destination_label <- all_label[tree$edge[i, 2]]
# get the location assigned to the origin and destination nodes
origin <- annotation$location[which(annotation$label == origin_label)]
destination <- annotation$location[which(annotation$label == destination_label)]
local <- c()
for (j in 1:nchar(origin)) # have the same length by definition
{
pre <- substr(origin,j,j)
post <- substr(destination,j,j)
if (pre != post)
{
local <- c(local, paste(pre, j, post, sep=""))
num_changes <- num_changes + 1
}
}
if (is.null(local))
{
changes[[i]] <- c("")
}
else
{
changes[[i]] <- local
}
names(changes)[i] <- paste(c(origin_label, destination_label), collapse=' ')
}
print(num_changes)
print(changes)
#changes.df = do.call("rbind", lapply(changes, as.data.frame))
#write.table(changes, file = output_file, sep='\t')