forked from MRCIEU/godmc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
17a-setup.sh
executable file
·71 lines (47 loc) · 1.64 KB
/
17a-setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
set -e
source ./config
exec &> >(tee ${section_17a_logfile})
print_version
# Extract the SNP-CpG lists
echo "Downloading list of putative associations"
sftp ${sftp_username}@${sftp_address}:${sftp_path}/resources/phase2 <<EOF
get lists_17.tgz
get lists_17.tgz.md5sum
EOF
echo "Checking download integrity"
md5sum -c lists_17.tgz.md5sum
echo "Extracting"
rm -rf ${phase2_list_17}
mkdir -p ${phase2_list_17}
tar xzf lists_17.tgz -C ${phase2_list_17}
rm lists_17.tgz*
echo "Creating genetic files"
n_genetic_batch=`ls -l ${tabfile}.tab.* | wc -l`
echo "Genetic data is split into ${n_genetic_batch} chunks"
if [ ! "${n_genetic_batch}" = "${genetic_chunks}" ]
then
echo "Problem: Genetic data has been split into ${n_genetic_batch}, but the number of batches specified in the config file is ${genetic_chunks}"
echo "Please either change the 'genetic_chunks' variable in the config file to ${n_genetic_batch} or re-run script 02b"
exit
fi
nbatch=`ls -l ${tabfile}.tab.* | wc -l`
rm -f ${snpfile17}_temp
touch ${snpfile17}_temp
for i in $(seq 1 $nbatch)
do
echo "Extracting SNPs from batch ${i} of ${nbatch}"
fgrep -wf ${phase2_list_17}/snplist.txt ${tabfile}.tab.${i} | cat >> ${snpfile17}_temp
done
head -n 1 ${tabfile}.tab.1 > ${snpfile17}_head
cat ${snpfile17}_head ${snpfile17}_temp > ${snpfile17}
c1=`cat ${phase2_list_17}/snplist.txt | wc -l`
c2=`cat ${snpfile17}_temp | wc -l`
echo "Found ${c2} out of ${c1} SNPs"
rm ${snpfile17}_temp ${snpfile17}_head
echo "Creating SNP info file"
${plink} --bfile ${bfile} \
--freq gz \
--extract ${phase2_list_17}/snplist.txt \
--out ${phase2_list_17}/snpinfo
echo "Successfully completed script 17a"