-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoutgroup_f3.heatmap.OG2.R
110 lines (99 loc) · 4.77 KB
/
outgroup_f3.heatmap.OG2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# title: "Heatmap of outgroup f3 on GW data"
# author: "Dang Liu 25.Jul.2019"
# Last updated: 12.Aug.2019
# Use libraries
library(tidyverse)
library(pheatmap)
library(RColorBrewer)
library(gtools)
library(data.table)
library(ggrepel)
library(Rmisc)
# load data from a huge f3 comparing matrix
load("/mnt/scratch/dang/Vietnam/outgroup_f3/HO.ancient.outgroup.v2.Mbuti_f3.Rdata")
head(f3_res)
info <- read.table("/mnt/scratch/dang/Vietnam/map/all.map.am.geo.info2", header=T)
# Prepare data set for annotation and order
info <- info[!info$Pop %in% c("Mbuti","French"),] %>% select(Pop, Country)
d <- f3_res %>% select(A, B ,f3)
colnames(d) <- c("Pop", "Pop2", "f3")
d <- info %>% left_join(d)
d$Country <- factor(d$Country, levels=c("Taiwan","Mongolia","China","Vietnam","Cambodia","Laos","Thailand","Myanmar","Malaysia","Indonesia","Philippines","India"), ordered=T)
d <- d[order(d$Country),]
# Exclude pops here
pop_exclude <- c("Atayal1","Ami1","Mamanwa1")
d <- d[!d$Pop%in%pop_exclude,]
d <- d[!d$Pop2%in%pop_exclude,]
# Make a matrix for heatmap function
d2 <- f3_res[!f3_res$A%in%pop_exclude,]
d2 <- d2[!d2$B%in%pop_exclude,]
d2.matrix <- d2 %>% select(A, B, f3) %>% dcast(A ~ B, value.var = "f3") %>% remove_rownames %>% column_to_rownames(var="A") %>% data.matrix()
head(d2.matrix)
# Subsets
Vietnam <- c("Cham","Ede","Giarai","KhoMu","Kinh","Mang","Muong","BoY","CoLao","LaChi","Nung","Tay","Thai","Dao","Hmong","PaThen","Cong","HaNhi","LaHu","LoLo","PhuLa","Sila")
Near_pop <- c("GujaratiA","GujaratiB","GujaratiC","GujaratiD","Lodhi","Brahmin_Tiwari","Jew_Cochin","Vishwabrahmin","Mala","Kharia","Onge","Dai","She","Miao","Lahu","Naxi","Yi","Tujia","Han","Han_NC","Xibo","Hezhen","Oroqen","Tu","Daur","Mongola","Ami","Atayal","Mamanwa","Borneo","Semende","Thai_T","Mlabri","Htin_Mal","Cambodian")
Ancient <- c("P-Tianyuan","Ho-Pha_Faen","Ho-Gua_Cha","N-Gua_Cha","N-Man_Bac","N-Nam_Tun","N-Mai_Da_Dieu","N-Hon_Hai_Co_Tien","N-Tam_Pa_Ling","N-Tam_Hang","N-Oakaie","N-Loyang_Ujung","BA-Nui_Nap","IA-Vat_Komnou","IA-Long_Long_Rak","Hi-Hon_Hai_Co_Tien","Hi-Supu_Hujung","Hi-Kinabatagan")
all <- c(Vietnam, Near_pop, Ancient)
# Order the matrix
#d2.matrix <- d2.matrix[unique(d$Pop),unique(d$Pop)]
#d3.matrix <- d2.matrix[Vietnam,Near_pop]
#d3.matrix <- d2.matrix[Vietnam,Ancient]
#d3.matrix <- d2.matrix[Vietnam,Vietnam]
d3.matrix <- d2.matrix[Vietnam,all]
# Deal with 0 (same pops vs. each other will result in outgroup f3=0; substitute it to maximum + 0.01)
norm_data <- d3.matrix
n <- 0
c <- 0
while(n<nrow(d3.matrix)){
n <- n+1
while(c<ncol(d3.matrix)){
c <- c+1
if (norm_data[n,c] == 0){norm_data[n,c] = NA}
#norm_data[,n] <- 1-(norm_data[,n]+(0-min(data[,n])))/max(data[,n]+(0-min(data[,n])))
}
c <- 0
}
# Normalizing by columns
#n <- 0
#while(n<ncol(norm_data)){
# n <- n+1
# norm_data[,n] <- (norm_data[,n]+(0-min(norm_data[,n],na.rm=T)))/max(norm_data[,n]+(0-min(norm_data[,n],na.rm=T)),na.rm=T)
#}
# Normalizing by rows
#n <- 0
#while(n<nrow(norm_data)){
# n <- n+1
# norm_data[n,] <- (norm_data[n,]+(0-min(norm_data[n,],na.rm=T)))/max(norm_data[n,]+(0-min(norm_data[n,],na.rm=T)),na.rm=T)
#}
# annotaion here
info2 <- read_csv("/r1/people/dang_liu/Projects/Vietnam/Vietnam.metadata.csv")
#annotation <- info[!duplicated(info), ] %>% remove_rownames %>% column_to_rownames(var="Pop")
annotation_col <- info2 %>% select(Pop, Language, Period, Country) %>% distinct(Pop, .keep_all = TRUE) %>% remove_rownames %>% column_to_rownames(var="Pop")
annotation_row <- info2 %>% select(Pop, Language) %>% distinct(Pop, .keep_all = TRUE) %>% remove_rownames %>% column_to_rownames(var="Pop")
# Specify colors
ann_colors = list(
Country = c(Taiwan="#A6CEE3",Mongolia="#CC9900",China="#1F78B4",Vietnam="#B2DF8A",Cambodia="#33A02C",Laos="#FB9A99",Thailand="#E31A1C",Myanmar="#FDBF6F",Malaysia="#FF7F00",Indonesia="#CAB2D6",Philippines="#6A3D9A",India="#B15928"),
Period = c(Present="#7FC97F",Historical="#006633",Iron_Age="#FF3300",Bronze_Age="#FF9933",Neolithic="#386CB0",Hoabinhian="#F0027F",Paleolithic="#660000"),
Language = c(Austronesian="#CC6633","Austro-Asiatic"="#9966CC","Hmong-Mien"="#FFCC33","Sino-Tibetan"="#66CC99","Tai-Kadai"="#CC0033",
"Indo-European"="#FFCCFF",Dravidian="#CC0099",Andamanese="#660033",Mongolic="#0000FF",Tungusic="#66CCFF",Ancient="#000000")
)
# plot heatmap
res <- pheatmap(
color= colorRampPalette(c("#4575B4", "#66CCFF", "#FFEDA0", "#D73027"))(25),
# Alternative color choice: "#FFFFCC", "#FEB24C", "#E31A1C"
fontsize_number=14,
fontsize_row=16,
fontsize_col=16,
#cellwidth=20,
#cellheight=20,
norm_data,
#display_numbers=as.matrix(df.mat),
#annotation=annotation,
#angle_col = "45",
gaps_col=c(22,57),
annotation_col=annotation_col,
annotation_row=annotation_row,
annotation_colors=ann_colors,
na_col="white",
cluster_rows=FALSE, cluster_cols=FALSE
)