-
Notifications
You must be signed in to change notification settings - Fork 7
/
giraffe_control_accuracy_kubernetes
executable file
·86 lines (80 loc) · 4.22 KB
/
giraffe_control_accuracy_kubernetes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
(kubectl delete job xhchang-giraffe-control || true) && kubectl apply -f - <<'EOF'
apiVersion: batch/v1
kind: Job
metadata:
name: xhchang-giraffe-control
spec:
ttlSecondsAfterFinished: 86400
template:
spec:
containers:
- name: xhchang-giraffe
imagePullPolicy: Always
image: xhchang/vg:giraffe-paper
command:
- /bin/bash
- -c
- |
set -x
cd /tmp
printf "graph\tgbwt\treads\tpairing\tspeed\tcorrect\tmapq60\twrong_mapq60\tidentity\tscore\n" > report.tsv
printf "correct\tmq\tscore\taligner\n" > roc_stats.tsv
for GRAPH in hgsvc ; do
if [[ "${GRAPH}" == 1000gp ]] ; then
GRAPH_BASE=s3://vg-k8s/profiling/graphs/v3/for-NA19239/1000gplons/hs38d1/1000GPlons_hs38d1_NA19239_sample_withref
aws s3 cp s3://vg-k8s/profiling/reads/sim/for-NA19239/1000gp/hs38d1/liftover_nosegdups/${READS}/out_sim_gbwt/sim.gam ./sim.gam
aws s3 cp ${GRAPH_BASE}.force.augment.gbwt ./graph.gbwt
aws s3 cp ${GRAPH_BASE}.dist graph.dist
aws s3 cp ${GRAPH_BASE}.xg ./graph.xg
elif [[ "${GRAPH}" == hgsvc ]] ; then
aws s3 cp s3://vg-k8s/profiling/reads/sim/for-NA19240/hgsvc/grch38/novaseq6000/out_sim_gbwt/sim.gam ./sim.gam
aws s3 cp s3://vg-k8s/profiling/graphs/v2/for-NA19240/hgsvc/hs38d1/HGSVC_hs38d1.annotation.xg ./graph.xg
aws s3 cp s3://vg-k8s/profiling/graphs/v2/for-NA19240/hgsvc/hs38d1/HGSVC_hs38d1.onlyNA19240.augment.gbwt ./graph.gbwt
aws s3 cp s3://vg-k8s/profiling/graphs/v2/for-NA19240/hgsvc/hs38d1/HGSVC_hs38d1.annotation.dist graph.dist
fi
for READS in novaseq6000 hiseqxten hiseq2500 ; do
for PAIRING in single paired ; do
if [[ ${PAIRING} == "paired" ]] ; then
PAIRED="-i"
elif [[ ${PAIRING} == "single" ]] ; then
PAIRED=""
fi
vg giraffe -x graph.xg -H graph.gbwt -d graph.dist -G sim.gam ${PAIRED} -c 100 -C 5000 -F 1.0 -e 10000 -a 1000 -s 0 -u 0 -v 0 -w 0 -t 22 > mapped.gam
vg gamcompare -r 100 -s <(vg annotate -m -x graph.xg -a mapped.gam) sim.gam 2>count | vg view -aj - > compared.json
CORRECT_COUNT="$(sed -n '1p' count | sed 's/[^0-9]//g')"
SCORE="$(sed -n '2p' count | sed 's/[^0-9\.]//g')"
MAPQ="$(grep mapping_quality\":\ 60 compared.json | wc -l)"
MAPQ60="$(grep -v correctly_mapped compared.json | grep mapping_quality\":\ 60 | wc -l)"
IDENTITY="$(jq '.identity' compared.json | awk '{sum+=$1} END {print sum/NR}')"
printf "${GRAPH}_control\t${READS}\t${PAIRING}\t${SPEED}\t${CORRECT_COUNT}\t${MAPQ}\t${MAPQ60}\t${IDENTITY}\t${SCORE}\n" >> report.tsv
jq -r '(if .correctly_mapped then 1 else 0 end|tostring) + "," + (.mapping_quality|tostring) + "," + (.score|tostring)' compared.json | sed 's/,/\t/g' | sed "s/$/\tgiraffe_control-nocaps_${GRAPH}${READS}${PAIRING}/" >> roc_stats.tsv
done
done
done
sed -i 's/single//g ; s/paired/-pe/g ; s/null/0/g' roc_stats.tsv
gzip roc_stats.tsv
aws s3 cp report.tsv s3://vg-k8s/users/xhchang/giraffe_experiments_2/liftover/nosegdups/report_giraffe_control.tsv
aws s3 cp roc_stats.tsv.gz s3://vg-k8s/users/xhchang/giraffe_experiments_2/liftover/nosegdups/roc_stats_giraffe_control.tsv
volumeMounts:
- mountPath: /tmp
name: scratch-volume
- mountPath: /root/.aws
name: s3-credentials
resources:
requests:
cpu: 24
memory: "250Gi"
ephemeral-storage: "250Gi"
limits:
cpu: 24
memory: "250Gi"
ephemeral-storage: "250Gi"
restartPolicy: Never
volumes:
- name: scratch-volume
emptyDir: {}
- name: s3-credentials
secret:
secretName: shared-s3-credentials
backoffLimit: 0
EOF