-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbam2reproducibility
executable file
·107 lines (94 loc) · 3.04 KB
/
bam2reproducibility
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/bin/bash
#PBS -l nodes=1:ppn=4
PROCESSOR=1
CONFIG_FILTER="qual"
#### usage ####
usage() {
echo Program: "bam2reproducibility (compute reproducibility between replicates)"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: [email protected]
echo "Usage: bam2reproducibility -i <files> -o <dir> [OPTIONS]"
echo "Options:"
echo " -i <file> [input BAM files (atleast two)]"
echo " [if multiple separate them by a comma]"
echo " **OR**"
echo " [input configuration file containing bam file information]"
echo " [<id> <bam file> (id should start with qual)]"
echo " -o <dir> [output directory]"
echo "[OPTIONS]"
echo " -c [input BAM files are for ChIP-seq]"
echo " -p <int> [number of processors (default: 1)]"
echo " -f <string> [filter bam files from configuration file based on input indentifier (default: qual)]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:o:cp:f:h ARG; do
case "$ARG" in
i) BAMFILE=$OPTARG;;
o) OUTDIR=$OPTARG;;
c) CHIPSEQ=1;;
p) PROCESSOR=$OPTARG;;
f) CONFIG_FILTER=$OPTARG;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories and not given/exist
if [ ! "$BAMFILE" -o ! "$OUTDIR" -o "$HELP" ]; then
usage
fi
## create appropriate directory structure
echo
echo -n "Create output directory structure.. "
if [ ! -d "$OUTDIR" ]; then
mkdir -p $OUTDIR
fi
echo "done"
## check if input is BAM files or configuration file containing BAM file information
INPUT=$(echo $BAMFILE | perl -ane '$_=~s/\,.*//g; print $_;')
if [ "$(samtools view -H $INPUT | wc -l)" -le 0 ]; then
## read configuration file
INPUT=$(cat $BAMFILE | perl -ane '
if($_=~/^'$CONFIG_FILTER'/) {
$file.="$F[1],";
} END {
$file=~s/\,$//g;
print "$file\n";
}'
)
BAMFILE=$INPUT
fi
###############################################
echo -n "Check, if atleast two input BAM files are provided.. "
IFS=","
BAMFILES=($BAMFILE)
BAMFILES_COUNT=${#BAMFILES[@]}
IFS=" "
if [ "$BAMFILES_COUNT" -lt 2 ]; then
echo
echo "Atleast two input BAM files are required"
echo
usage
fi
echo "done"
echo -n "Index input BAM files, if it does not exist.. "
for (( i=0; i<$BAMFILES_COUNT; i++ )); do
if [ ! -e "${BAMFILES[$i]}.bai" ]; then
samtools index ${BAMFILES[$i]}
fi
done
echo "done"
###############################################
echo -n "Start analysis.. "
ID=$(echo $BAMFILE | perl -an -F'/\,/' -e '$_=~s/\,.*//g; $_=~s/^.*\///g; $_=~s/\..*//g; chomp($_); print "$_";')
BAMFILES=$(echo $BAMFILE | sed 's/\,/ /g')
if [ ! -z "$CHIPSEQ" ]; then
multiBamSummary bins --bamfiles $BAMFILES -out $OUTDIR/$ID.covMat --ignoreDuplicates -p $PROCESSOR
else
multiBamSummary bins --bamfiles $BAMFILES -out $OUTDIR/$ID.covMat -p $PROCESSOR
fi
plotCorrelation -in $OUTDIR/$ID.covMat -o $OUTDIR/$ID.reproducibility_heatmap.pdf -c spearman -p heatmap --plotNumbers
echo "done"
exit