-
Notifications
You must be signed in to change notification settings - Fork 7
/
giraffe_accuracy_primary_kubernetes
executable file
·115 lines (114 loc) · 5.69 KB
/
giraffe_accuracy_primary_kubernetes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
(kubectl delete job xhchang-giraffe-primary || true) && kubectl apply -f - <<'EOF'
apiVersion: batch/v1
kind: Job
metadata:
name: xhchang-giraffe-primary
spec:
ttlSecondsAfterFinished: 86400
template:
spec:
containers:
- name: xhchang-giraffe-primary
imagePullPolicy: Always
image: xhchang/vg:giraffe-paper
command:
- /bin/bash
- -c
- |
set -ex
cd /tmp
printf "graph\tgbwt\treads\tpairing\tspeed\tcorrect\tmapq60\twrong_mapq60\tscore\n" > report.tsv
printf "correct\tmq\tscore\taligner\n" > roc_stats.tsv
for SPECIES in human ; do
case "${SPECIES}" in
yeast)
GRAPHS=(S288C)
READSETS=(DBVPG6044 DBVPG6765 N44 UWOPS034614 UWOPS919171 Y12 YPS138)
GBWTS=(raw)
;;
human)
GRAPHS=(1000gp hgsvc)
READSETS=(novaseq6000 hiseqxten hiseq2500)
# We really only need one GBWT because we have no variation
GBWTS=(cover)
;;
esac
for GRAPH in ${GRAPHS[@]} ; do
for READS in ${READSETS[@]} ; do
case ${GRAPH} in
1000gp)
GRAPH_BASE=s3://vg-k8s/profiling/graphs/v2-2/generic/primary/hs38d1/primaryhs38d1
aws s3 cp s3://vg-k8s/profiling/reads/sim/for-NA19239/1000gp/hs38d1/liftover_nosegdups/${READS}/out_sim_gbwt/sim.gam ./sim.gam
;;
hgsvc)
GRAPH_BASE=s3://vg-k8s/profiling/graphs/v2-2/generic/primary/hs38d1/primaryhs38d1
aws s3 cp s3://vg-k8s/profiling/reads/sim/for-NA19240/hgsvc/grch38/${READS}/out_sim_gbwt/sim.gam ./sim.gam
;;
S288C)
GRAPH_BASE=s3://vg-k8s/profiling/graphs/v2/generic/primary/S288C/primaryS288C
aws s3 cp s3://vg-k8s/profiling/reads/sim/yeast/sim-${READS}.gam ./sim.gam
;;
esac
aws s3 cp ${GRAPH_BASE}.xg ./graph.xg
aws s3 cp ${GRAPH_BASE}.dist ./graph.dist
for GBWT in ${GBWTS[@]} ; do
rm -f graph.gbwt
rm -f graph.gg
rm -f graph.min
if [[ "${GBWT}" == "raw" ]] ; then
aws s3 cp ${GRAPH_BASE}.gbwt ./graph.gbwt
aws s3 cp ${GRAPH_BASE}.gg ./graph.gg
aws s3 cp ${GRAPH_BASE}.min ./graph.min
else
aws s3 cp ${GRAPH_BASE}.${GBWT}.gbwt ./graph.gbwt
aws s3 cp ${GRAPH_BASE}.${GBWT}.gg ./graph.gg
aws s3 cp ${GRAPH_BASE}.${GBWT}.min ./graph.min
fi
for PAIRING in single paired ; do
if [[ ${PAIRING} == "paired" ]] ; then
PAIRED="-i"
elif [[ ${PAIRING} == "single" ]] ; then
PAIRED=""
fi
SPEED="$(vg giraffe -x graph.xg -H graph.gbwt -g graph.gg -m graph.min -d graph.dist -G sim.gam ${PAIRED} -t 22 -p 2>&1 >mapped.gam | grep speed | sed 's/[^0-9\.]//g')"
vg gamcompare -r 100 -s <(vg annotate -m -x graph.xg -a mapped.gam) sim.gam 2>count | vg view -aj - > compared.json
CORRECT_COUNT="$(sed -n '1p' count | sed 's/[^0-9]//g')"
SCORE="$(sed -n '2p' count | sed 's/[^0-9\.]//g')"
MAPQ="$(grep mapping_quality\":\ 60 compared.json | wc -l)"
MAPQ60="$(grep -v correctly_mapped compared.json | grep mapping_quality\":\ 60 | wc -l)"
IDENTITY="$(jq '.identity' compared.json | awk '{sum+=$1} END {print sum/NR}')"
echo ${GRAPH} ${GBWT} ${READS} ${PAIRING} ${SPEED} ${CORRECT_COUNT} ${MAPQ} ${MAPQ60} ${IDENTITY} ${SCORE}
printf "${GRAPH}\t${GBWT}\t${READS}\t${PAIRING}\t${SPEED}\t${CORRECT_COUNT}\t${MAPQ}\t${MAPQ60}\t${SCORE}\n" >> report.tsv
jq -r '(if .correctly_mapped then 1 else 0 end|tostring) + "," + (.mapping_quality|tostring) + "," + (.score|tostring)' compared.json | sed 's/,/\t/g' | sed "s/$/\tgiraffe_primary_${GRAPH}${GBWT}${READS}${PAIRING}/" >> roc_stats.tsv
done
done
done
done
done
sed -i 's/single//g ; s/paired/-pe/g ; s/null/0/g' roc_stats.tsv
gzip roc_stats.tsv
aws s3 cp report.tsv s3://vg-k8s/users/xhchang/giraffe_experiments_2/liftover/nosegdups/report_giraffe_primary.tsv
aws s3 cp roc_stats.tsv.gz s3://vg-k8s/users/xhchang/giraffe_experiments_2/liftover/nosegdups/roc_stats_giraffe_primary.tsv.gz
volumeMounts:
- mountPath: /tmp
name: scratch-volume
- mountPath: /root/.aws
name: s3-credentials
resources:
requests:
cpu: 24
memory: "120Gi"
ephemeral-storage: "150Gi"
limits:
cpu: 24
memory: "120Gi"
ephemeral-storage: "150Gi"
restartPolicy: Never
volumes:
- name: scratch-volume
emptyDir: {}
- name: s3-credentials
secret:
secretName: shared-s3-credentials
backoffLimit: 0
EOF