-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRNAseq_coverage_HSPCs_CohesinKD.sh
216 lines (171 loc) · 7.29 KB
/
RNAseq_coverage_HSPCs_CohesinKD.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#bin/bash
#by Alex F, Jan 2024
################################################################################
# RNAseq - average coverage tracks for CD34 HSPC groups #
################################################################################
DIR_SOFT=/misc/software
DIR_DATA=/misc/data
PROJDIR=$DIR_DATA/analysis/project_cohesin/CD34
BWDIR=$DIR_DATA/processedData/bigWig/RNA/GRCh38/RNAseq/CD34 #contains individual bigWigs
AVBWDIR=$PROJDIR/RNAseq/AverageRNAcoverageTracks
CHROMSIZES_HG38=$DIR_SOFT/viewer/IGV/IGVTools_2.3.98/genomes/GRCh38.PRI_p10.chrom.sizes
TMPDIR=/loctmp/
mkdir $AVBWDIR
KDMETA=$PROJDIR/RNAseq/Metadata/Metadata_RNAseq_HSPCs_cohesin_KD.txt #contains only IDs used in study
# Define an array containing the RNA sample ids including suffix
ALLsamps=()
while IFS= read -r sn; do
name=${sn}.Unique.for.bigwig
ALLsamps+=("$name")
done < <(tail -n +2 ${KDMETA} | awk -F '\t' '{print $1}')
echo ${ALLsamps[@]}
# Print the array elements to check if everything is there
for sn in "${ALLsamps[@]}"; do
echo "$sn"
done
#look at complete array
echo ${ALLsamps[@]}
#The forward bigwigs were the wrong way around ... need to be inverted
##NOTE: (wrong option in mapping pipeline used, invert strand option needs to be set at 0 for Illumina TrueSeq mRNA )
cd $BWDIR
for sn in "${ALLsamps[@]}"; do
bigWigToBedGraph $sn ${TMPDIR}/$sn.bg
awk 'BEGIN{FS=OFS="\t"}{print $1,$2,$3,$4*(-1)}' ${TMPDIR}/$sn.bg > ${TMPDIR}/$sn.inv.bg
bedGraphToBigWig ${TMPDIR}/$sn.inv.bg ${CHROMSIZES_HG38} $BWDIR/$sn
rm ${TMPDIR}/$sn.bg ${TMPDIR}/$sn.inv.bg
done
# Get only SA2 KD files
STAG2KDBW=()
for element in "${ALLsamps[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA2 but not SA1 (to avoid the double KD samples)
if [[ $element == *_SA2_* ]] && [[ $element != *_SA1_* ]]; then
# If it does, add it to the new array
STAG2KDBW+=("$element")
fi
done
echo ${STAG2KDBW[@]}
# Get only SA1 KD files
STAG1KDBW=()
for element in "${ALLsamps[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA1 but not SA2 (to avoid the double KD samples)
if [[ $element == *_SA1_* ]] && [[ $element != *_SA2_* ]]; then
# If it does, add it to the new array
STAG1KDBW+=("$element")
fi
done
echo ${STAG1KDBW[@]}
# Get only SA1+SA2 KD files
STAG12KDBW=()
for element in "${ALLsamps[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA1 but not SA2 (to avoid the double KD samples)
if [[ $element == *_SA1_* ]] && [[ $element == *_SA2_* ]]; then
# If it does, add it to the new array
STAG12KDBW+=("$element")
fi
done
echo ${STAG12KDBW[@]}
# Get only RAD21 KD files
RAD21KDBW=()
for element in "${ALLsamps[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string
if [[ $element == *RAD21* ]]; then
# If it does, add it to the new array
RAD21KDBW+=("$element")
fi
done
echo ${RAD21KDBW[@]}
# Get only CTRL files (all types of ctrls combined!)
ctrlBW=()
for element in "${ALLsamps[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string
if [[ $element == *siCtrl* ]] || [[ $element == *Mock* ]] || [[ $element == *untreated* ]]; then
# If it does, add it to the new array
ctrlBW+=("$element")
fi
done
echo ${ctrlBW[@]}
for sn in ${ctrlBW[@]};
do
if test -f "${sn}"; then
echo "${sn} exists."
fi
done
# Calculate average bigwigs
cd $BWDIR
myAverageBigWig.pl -bw ${ctrlBW[@]} -chr ${CHROMSIZES_HG38} -o ${AVBWDIR}/ave.CD34_CTRL_RNA.Unique.for.bigwig
myAverageBigWig.pl -bw ${STAG2KDBW[@]} -chr ${CHROMSIZES_HG38} -o ${AVBWDIR}/ave.CD34_STAG2KD_RNA.Unique.for.bigwig
myAverageBigWig.pl -bw ${STAG1KDBW[@]} -chr ${CHROMSIZES_HG38} -o ${AVBWDIR}/ave.CD34_STAG1KD_RNA.Unique.for.bigwig
myAverageBigWig.pl -bw ${STAG12KDBW[@]} -chr ${CHROMSIZES_HG38} -o ${AVBWDIR}/ave.CD34_STAG1_2KD_RNA.Unique.for.bigwig
myAverageBigWig.pl -bw ${RAD21KDBW[@]} -chr ${CHROMSIZES_HG38} -o ${AVBWDIR}/ave.CD34_RAD21KD_RNA.Unique.for.bigwig
#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##for reverse bigwigs
# Define an array containing the RNA sample ids including suffix
ALLsamps1=()
while IFS= read -r sn; do
name=${sn}.Unique.rev.bigwig
ALLsamps1+=("$name")
done < <(tail -n +2 ${KDMETA} | awk -F '\t' '{print $1}')
echo ${ALLsamps1[@]}
cd $BWDIR
for sn in "${ALLsamps1[@]}"; do
bigWigToBedGraph $sn ${TMPDIR}/$sn.bg
awk 'BEGIN{FS=OFS="\t"}{print $1,$2,$3,$4*(-1)}' ${TMPDIR}/$sn.bg > ${TMPDIR}/$sn.inv.bg
bedGraphToBigWig ${TMPDIR}/$sn.inv.bg ${CHROMSIZES_HG38} $BWDIR/$sn
rm ${TMPDIR}/$sn.bg ${TMPDIR}/$sn.inv.bg
done
# Get only SA2 KD files
STAG2KDBW=()
for element in "${ALLsamps1[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA2 but not SA1 (to avoid the double KD samples)
if [[ $element == *_SA2_* ]] && [[ $element != *_SA1_* ]]; then
# If it does, add it to the new array
STAG2KDBW+=("$element")
fi
done
echo ${STAG2KDBW[@]}
# Get only SA1 KD files
STAG1KDBW=()
for element in "${ALLsamps1[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA1 but not SA2 (to avoid the double KD samples)
if [[ $element == *_SA1_* ]] && [[ $element != *_SA2_* ]]; then
# If it does, add it to the new array
STAG1KDBW+=("$element")
fi
done
echo ${STAG1KDBW[@]}
# Get only SA1+SA2 KD files
STAG12KDBW=()
for element in "${ALLsamps1[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string SA1 but not SA2 (to avoid the double KD samples)
if [[ $element == *_SA1_* ]] && [[ $element == *_SA2_* ]]; then
# If it does, add it to the new array
STAG12KDBW+=("$element")
fi
done
echo ${STAG12KDBW[@]}
# Get only RAD21 KD files
RAD21KDBW=()
for element in "${ALLsamps1[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string
if [[ $element == *RAD21* ]]; then
# If it does, add it to the new array
RAD21KDBW+=("$element")
fi
done
echo ${RAD21KDBW[@]}
# Get only CTRL files (all types of ctrls combined!)
ctrlBW=()
for element in "${ALLsamps1[@]}"; do # Loop through the elements of ALLTAGDIRS
# Check if the element contains the string
if [[ $element == *siCtrl* ]] || [[ $element == *Mock* ]] || [[ $element == *untreated* ]]; then
# If it does, add it to the new array
ctrlBW+=("$element")
fi
done
# Calculate average bigwigs
cd $BWDIR
myAverageBigWig.pl -bw ${ctrlBW[@]} -chr ${CHROMSIZES_HG38} -minus -o ${AVBWDIR}/ave.CD34_CTRL_RNA.Unique.rev.bigwig
myAverageBigWig.pl -bw ${STAG2KDBW[@]} -chr ${CHROMSIZES_HG38} -minus -o ${AVBWDIR}/ave.CD34_STAG2KD_RNA.Unique.rev.bigwig
myAverageBigWig.pl -bw ${STAG1KDBW[@]} -chr ${CHROMSIZES_HG38} -minus -o ${AVBWDIR}/ave.CD34_STAG1KD_RNA.Unique.rev.bigwig
myAverageBigWig.pl -bw ${STAG12KDBW[@]} -chr ${CHROMSIZES_HG38} -minus -o ${AVBWDIR}/ave.CD34_STAG1_2KD_RNA.Unique.rev.bigwig
myAverageBigWig.pl -bw ${RAD21KDBW[@]} -chr ${CHROMSIZES_HG38} -minus -o ${AVBWDIR}/ave.CD34_RAD21KD_RNA.Unique.rev.bigwig