-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbed2direction
executable file
·95 lines (85 loc) · 5.55 KB
/
bed2direction
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
#PBS -l nodes=1:ppn=4
GENOME="mm9"
WINDOW=500
#### usage ####
usage() {
echo
echo Program: "bed2direction (perform direction analysis around BED coordinates using H3K4me1 and H3K4me3 modifications)"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: [email protected]
echo "Usage: bed2direction -i <file> -o <file> -a <file> -b <file> -c <file> -d <file> [OPTIONS]"
echo "Options:"
echo " -i <file> [input file in BED format (can be stdin)]"
echo " -o <file> [output file]"
echo " -a <file> [input BAM file corresponding to H3K4me1 (replicate 1)]"
echo " -b <file> [input BAM file corresponding to H3K4me1 (replicate 2)]"
echo " -c <file> [input BAM file corresponding to H3K4me3 (replicate 1)]"
echo " -d <file> [input BAM file corresponding to H3K4me3 (replicate 2)]"
echo "[OPTIONS]"
echo " -m <file> [SVM model session file]"
echo " [if given, predictions based on the input model will also be performed]"
echo " -g <string> [genome (default: mm9)]"
echo " -w <int> [window size (default: 500)]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:o:a:b:c:d:m:g:w:h ARG; do
case "$ARG" in
i) BEDFILE=$OPTARG;;
o) OUTFILE=$OPTARG;;
a) BAMFILE_ME1_REP1=$OPTARG;;
b) BAMFILE_ME1_REP2=$OPTARG;;
c) BAMFILE_ME3_REP1=$OPTARG;;
d) BAMFILE_ME3_REP2=$OPTARG;;
m) SVM_MODEL=$OPTARG;;
g) GENOME=$OPTARG;;
w) WINDOW=$OPTARG;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories are given/exist
if [ -z "$BEDFILE" -o -z "$OUTFILE" -o ! -f "$BAMFILE_ME1_REP1" -o ! -f "$BAMFILE_ME1_REP2" -o ! -f "$BAMFILE_ME3_REP1" -o ! -f "$BAMFILE_ME3_REP2" -o "$HELP" ]; then
usage
fi
###################
#helperfunction
function wait_for_jobs_to_finish {
for job in `jobs -p`
do
echo $job
wait $job
done
echo $1
}
###############
## populating files based on input genome
if [ "$GENOME" == "mm9" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/mouse.mm9.genome"
elif [ "$GENOME" == "hg19" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/human.hg19.genome"
else
echo "Presently the program only support analysis for mm9 or hg19"
echo
usage
fi
## create temporary BED file if input is from stdin
if [ "$BEDFILE" == "stdin" ]; then
TMP=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
while read LINE; do
echo ${LINE}
done | perl -ane '$line=""; foreach(@F) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";' > $TMP
BEDFILE=$TMP
fi
#paste <(cut -f 1-6 $BEDFILE) <(paste <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -l -s | bed2expr -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -r -s | bed2expr -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w $WINDOW -s | bed2expr -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";')) | perl -ane '$F[6]=sqrt($F[6]+0.00001); $F[7]=sqrt($F[7]+0.00001); $F[8]=sqrt($F[8]+0.00001); $F[9]=sqrt($F[9]+0.00001); $d_me1=sprintf("%0.5f", ($F[6]-$F[8])/($F[6]+$F[8])); $d_me3=sprintf("%0.5f", ($F[7]-$F[9])/($F[7]+$F[9])); $F[10]=$F[10]; $F[11]=$F[11]; print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]\t$F[5]\t$d_me1\t$d_me3\t$F[10]\t$F[11]\n";' > $OUTFILE
paste <(cut -f 1-6 $BEDFILE) <(paste <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -l -s | bed2coverage -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -r -s | bed2coverage -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cut -f 1-6 $BEDFILE | bed2window -i stdin -w $WINDOW -s | bed2coverage -i stdin -j $BAMFILE_ME1_REP1,$BAMFILE_ME1_REP2,$BAMFILE_ME3_REP1,$BAMFILE_ME3_REP2 -m -d -g $GENOME -v 1 | perl -ane '$start=scalar(@F)-2; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";')) | perl -ane '$F[6]=($F[6]+0.00001); $F[7]=($F[7]+0.00001); $F[8]=($F[8]+0.00001); $F[9]=($F[9]+0.00001); $d_me1=sprintf("%0.5f", ($F[6]-$F[8])/($F[6]+$F[8])); $d_me3=sprintf("%0.5f", ($F[7]-$F[9])/($F[7]+$F[9])); $F[10]=$F[10]; $F[11]=$F[11]; print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]\t$F[5]\t$d_me1\t$d_me3\t$F[6]\t$F[7]\t$F[8]\t$F[9]\t$F[10]\t$F[11]\n";' > $OUTFILE
if [ ! -z "$SVM_MODEL" ]; then
bed2direction.R -i $OUTFILE -j $SVM_MODEL -o "$OUTFILE".tmp
mv "$OUTFILE".tmp $OUTFILE
fi
if [ ! -z "$TMP" ]; then
rm $TMP
fi