-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathextractMotifs
executable file
·124 lines (112 loc) · 3.59 KB
/
extractMotifs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/bin/bash
#PBS -l nodes=1:ppn=4
GENOME="mm9";
usage() {
echo Program: "extractMotifs (extract motifs of interest from a motif file)"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: [email protected]
echo "Usage: extractMotifs -i <file> -j <string> [OPTIONS]"
echo "Options:"
echo " -i <file> [input file containing motifs (or stdin)]"
echo " -j <string> [name of motifs needs to be extracted]"
echo " [if multiple, separate them by a comma]"
echo "[OPTIONS]"
echo " -S [soft match to motif name, regexp based (default: hard match)]"
echo " [id before a '/', eg. E2F1_MOUSE.H10MO.A in E2F1_MOUSE.H10MO.A/hocomoco]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:j:Sh ARG; do
case "$ARG" in
i) MOTIFFILE=$OPTARG;;
j) MOTIF=$OPTARG;;
S) SOFTMATCH=1;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories are given/exist
if [ -z "$MOTIFFILE" -o -z "$MOTIF" -o "$HELP" ]; then
usage
fi
###################
#helperfunction
function wait_for_jobs_to_finish {
for job in `jobs -p`
do
echo $job
wait $job
done
echo $1
}
###############
if [ "$GENOME" == "mm9" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/mouse.mm9.genome"
REPEAT_FILE="/home/pundhir/project/genome_annotations/mouse.mm9.simpleRepeat.gz"
GENOME_MOTIF="mm9r"
elif [ "$GENOME" == "hg19" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/human.hg19.genome"
REPEAT_FILE="/home/pundhir/project/genome_annotations/human.hg19.simpleRepeat.gz"
GENOME_MOTIF="hg19r"
elif [ "$GENOME" == "danRer7" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/zebrafish.danRer7.genome"
REPEAT_FILE="/home/pundhir/project/genome_annotations/zebrafish.danRer7.simpleRepeat.gz"
GENOME_MOTIF="danRer7r"
else
echo "Presently the program only support analysis for mm9, hg19 or danRer7."
echo
usage
fi
## create temporary BED file if input is from stdin
if [ "$MOTIFFILE" == "stdin" ]; then
TMP=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
while read LINE; do
echo ${LINE}
done | perl -ane '$line=""; foreach(@F) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";' > $TMP
MOTIFFILE=$TMP
fi
oIFS=$IFS
IFS=","
MOTIFS=($MOTIF)
IFS=$oIFS
if [ -z "$SOFTMATCH" ]; then
for (( i=0; i<${#MOTIFS[@]}; i++ )); do
MOTIFS[$i]=$(echo ${MOTIFS[$i]} | perl -ane 's/\//\\\//g; print $_;')
zless $MOTIFFILE | perl -ane '
if($_=~/'${MOTIFS[$i]}'\//) {
@t=split(/\s+/,$_);
$t[1]=~s/^.*\(/'${MOTIFS[$i]}'(/g;
$line=();
foreach(@t) { $line.="$_\t"; }
$line=~s/\t$//g;
print "$line\n"; $start=1;
}
elsif($_=~/^\>/) { $start=0; }
elsif($start) { print $_; }
'
done
else
for (( i=0; i<${#MOTIFS[@]}; i++ )); do
MOTIFS[$i]=$(echo ${MOTIFS[$i]} | perl -ane 's/\//\\\//g; print $_;')
zless $MOTIFFILE | perl -ane '
if($_=~/'${MOTIFS[$i]}'.*\//) {
@t=split(/\s+/,$_);
$t[1]=~s/^.*\(/'${MOTIFS[$i]}'(/g;
$line=();
foreach(@t) { $line.="$_\t"; }
$line=~s/\t$//g;
print "$line\n"; $start=1;
}
elsif($_=~/^\>/) { $start=0; }
elsif($start) { print $_; }
'
done
fi
if [ ! -z "$TMP" ]; then
rm $TMP
fi
#wait
#wait_for_jobs_to_finish "Wait for jobs to finish... "
#echo "done"