-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2
132 lines (111 loc) · 4.13 KB
/
2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
library(dplyr)
setwd('D:\\prrc\\TCGA\\GDC\\mutation\\lnc')
# 多条件语句替换
rt3<-rt2 %>%mutate(fustat=case_when(fustat == "Alive" ~ 0,
fustat == "Dead" ~ 1))
rt4<-rt3 %>% mutate(M = replace(M, M =="", NA),N = replace(N, N =="", NA),
T = replace(T, T =="", NA),
cacer_status = replace(cacer_status, cacer_status =="", NA))
# 批量改变字符型至factor因子
for (i in c(5,7:10)){
rt3[,i] <- as.factor(rt3[,i])
}
lnc<-read.table('lncRNA.txt',sep = '\t',header = T,check.names = F)
# Lnct<-Lnc[,-c(2:52)] #必要时去除正常样本
Lnct<- na.omit(lnc)#删除有空值的行
Lnct%>%filter(rowSums(Lnct[,2:490])>1)->rr
data3 <-aggregate( .~id,data=rr, max)
rownames(data3)<-data3$id
data3$id<-NULL
write.table(data3,'lncRNA.csv',quote=F, sep = "\t")
cou<-read.table('mutCount.txt',sep = '\t',header = T,check.names = F)
quantile(cou$Count)
cou1<-cou%>%filter(Count<=26|Count>=44)
arrange(cou1,Count)->cou1
sum(cou1$Count<=26)
sum(cou1$Count>=44)
gid<-substr(cou1$id,1,16)
data3[,which(substr(colnames(data3),1,16)%in%gid[1:129])]->GS
data3[,which(substr(colnames(data3),1,16)%in%gid[130:262])]->GU
lncRNA.mutGroup<-cbind(GS,GU)
write.csv(lncRNA.mutGroup,'lncRNA.mutGroup.csv',quote=F)
data<-lncRNA.mutGroup
dim(data)
library(tidyverse)
library(DESeq2)
library(limma)
library(pheatmap)
fdrFilter=0.05 #fdr临界值
logFCfilter=1 #logFC临界值
conNum=124 #低突变组(GS)样品数目
treatNum=127 #高突变组(GS)样品数目
inputFile="lncRNA.mutGroup.txt" #输入文件
outTab=data.frame()
grade=c(rep(1,conNum),rep(2,treatNum))
rt=read.table(inputFile, header=T, sep="\t", check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>0.4,]
#差异分析
for(i in row.names(data)){
lncName=unlist(strsplit(i,"\\|",))[1]
lncName=gsub("\\/", "_", lncName)
rt=rbind(expression=data[i,],grade=grade)
rt=as.matrix(t(rt))
wilcoxTest=wilcox.test(expression ~ grade, data=rt)
conLncMeans=mean(data[i,1:conNum])
treatLncMeans=mean(data[i,(conNum+1):ncol(data)])
logFC=log2(treatLncMeans)-log2(conLncMeans)
pvalue=wilcoxTest$p.value
conMed=median(data[i,1:conNum])
treatMed=median(data[i,(conNum+1):ncol(data)])
diffMed=treatMed-conMed
if( ((logFC>0) & (diffMed>0)) | ((logFC<0) & (diffMed<0)) ){
outTab=rbind(outTab,cbind(lnc=i,conMean=conLncMeans,treatMean=treatLncMeans,logFC=logFC,pValue=pvalue))
}
}
pValue=outTab[,"pValue"]
fdr=p.adjust(as.numeric(as.vector(pValue)), method="fdr")
outTab=cbind(outTab,fdr=fdr)
write.table(outTab,file="all.xls",sep="\t",row.names=F,quote=F)
#输出所有lncRNA的差异情况
write.table(outTab,file="all.xls",sep="\t",row.names=F,quote=F)
#输出差异表格
outDiff=outTab[( abs(as.numeric(as.vector(outTab$logFC)))>logFCfilter & as.numeric(as.vector(outTab$fdr))<fdrFilter),]
write.table(outDiff,file="diff.xls",sep="\t",row.names=F,quote=F)
write.table(outDiff,file="diff.txt",sep="\t",row.names=F,quote=F)
#输出差异lncRNA的表达文件
heatmap=rbind(ID=colnames(data[as.vector(outDiff[,1]),]),data[as.vector(outDiff[,1]),])
write.table(heatmap,file="diffLncExp.txt",sep="\t",col.names=F,quote=F)
#绘制差异lncRNA热图
lncNum=20
diffSig=outDiff[order(as.numeric(as.vector(outDiff$logFC))),]
diffLncName=as.vector(diffSig[,1])
diffLength=length(diffLncName)
hmLnc=c()
if(diffLength>(lncNum*2) ){
hmLnc=diffLncName[c(1:lncNum,(diffLength-lncNum+1):diffLength)]
}else{
hmLnc=diffLncName
}
hmExp=data[hmLnc,]
hmExp=log2(hmExp+0.01)
Type=c(rep("genomic stable",conNum),rep("genomic unstable",treatNum))
names(Type)=colnames(data)
Type=as.data.frame(Type)
# data_df = separate_dt(cluster,V1,c('sample','type'),'-08')
pdf(file="heatmap.pdf",height=5,width=7)
pheatmap(hmExp,
annotation=Type,
color = colorRampPalette(c("blue", "white", "red"))(50),
cluster_cols =F,
show_colnames = F,
scale="row",
fontsize = 8,
fontsize_row=6,
fontsize_col=8)
dev.off()