-
Notifications
You must be signed in to change notification settings - Fork 1
/
mapping.R
128 lines (115 loc) · 2.98 KB
/
mapping.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
### script to pre-process and map/align sequencing reads
# load libs
source('functions.R')
# load parameters
conf <- load_mapping_config()
# load tool locations
tool_path <- load_tool_location()
# set max cores
max_cores <- get_max_cores() # from functions.R
# get args
get_dir <- commandArgs(trailingOnly = TRUE)
# check input dir
in_path <- parse_dir(get_dir[1])
if(!dir.exists(in_path)){
stop(
paste0(in_path, ' does not exist!')
)
}
# check if fasta files
in_files <- list.files(
path = in_path,
pattern = '.fastq',
ignore.case = TRUE
)
if(length(in_files) < 1){
stop(
paste0('No FASTQ file(s) found!')
)
}
# check output dir
out_path <- parse_dir(get_dir[2])
if(!dir.exists(out_path)){
dir.create(out_path)
}
if(!dir.exists(paste0(out_path,'mapping/'))){
dir.create(paste0(out_path,'mapping/'))
}
if(!dir.exists(paste0(out_path,'bowtie_index/'))){
dir.create(paste0(out_path,'bowtie_index/'))
}
if(!dir.exists(paste0(out_path,'adapterRemoval/'))){
dir.create(paste0(out_path,'adapterRemoval/'))
}
# check reference fasta input
fa_file <- get_dir[3]
if(!file.exists(fa_file)){
stop('No FASTA file given!')
} else if(grep(".(fa|fasta|fna|mfa)$", fa_file, ignore.case = TRUE) != 1){
stop('Given FASTA file NOT with fasta | fa | mfa | fna file end!')
}
### adapter removal
# activate 5' adapter removal if set
if(conf['cutadapt_5prime_adapter'] != ''){
five_prime <- paste0(' -g ',conf['cutadapt_5prime_adapter'])
} else {
five_prime <- ''
}
# remove adapters
cat('Adapter removal: \n')
for(i in 1:length(in_files)){
# status print
cat(paste0(' ',in_files[i]," ... \n"))
# run
system(
paste0(
tool_path['cutadapt'],
' --cores ',max_cores,
' -m ',conf['cutadapt_min_read_length'],
' -q ',conf['cutadapt_quality_trimming'],
' -a ',conf['cutadapt_3prime_adapter'],
five_prime,
' -o ',out_path,'adapterRemoval/',in_files[i],
' ',in_path,in_files[i],
' > ',out_path,'adapterRemoval/',gsub(".fastq.*","",in_files[i]),'_info.txt'
)
)
# status print
cat('DONE \n')
}
### read mapping / alignment
# create index if not present
cat('Creating index for mapping \n')
system(
paste0(
tool_path['bowtie'],'-build ',
' -q --threads ',max_cores,
' -f ',fa_file,
' ',out_path,'bowtie_index/index'
)
)
# mapping
cat('Mapping sequencing reads: \n')
for(i in 1:length(in_files)){
# status print
cat(paste0(' ',in_files[i],' ... '))
# run
system(
paste0(
tool_path['bowtie'],
' -t',
' -p ',max_cores,
' -v ',conf['bowtie_v'],
' -m ',conf['bowtie_m'],
' -S ',out_path,'bowtie_index/index',
' ',out_path,'adapterRemoval/',in_files[i],
' 2> ',out_path,'mapping/',gsub(".fastq.*","",in_files[i]),'_info.txt',
' | samtools sort -O bam',
' -@ ',max_cores,
' -T ',out_path,'mapping/',gsub(".fastq.*","",in_files[i]),'.sort',
' -o ',out_path,'mapping/',gsub(".fastq.*","",in_files[i]),'.sort.bam'
)
)
# status print
cat('DONE \n')
}