This repository has been archived by the owner on Sep 21, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ITS_MRDNA_alpha.sh
executable file
·52 lines (39 loc) · 2.05 KB
/
ITS_MRDNA_alpha.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env zsh
# Prototyping ITS processing script using:
# cutadapt
# USEARCH v9
# QIIME
echo "Enter project codename that will be given to the output files as an idenfitier:"
read project
echo $(date +%Y-%m-%d\ %H:%M) "Project name: $project"
# Cut adaptor based on the sequences MRDNA used
# Make sure these are the primer sequences in the data!w
for i in *.fastq; do
cutadapt -g CTTGGTCATTTAGAGGAAGTTAA -a GCATCGATGAAGAACGCAGC -n 2 $i -o ${i}.trimmed | tee ${i}.log;
done
# 1. Filter FASTQ for representative sequence picking and
# 2. convert FASTQ to unfiltered FASTA to map back onto the rep seq
for i in *.trimmed; do
sampleID=$(echo "$i" | cut -d "." -f1)
usearch9 -fastq_filter $i -fastq_maxee 1.0 -fastqout "${sampleID}_filtered.fastq" -sample $sampleID -log ${sampleID}_filtered.log
usearch9 -fastq_filter $i -fastaout "${sampleID}_unfiltered.fasta" -sample $sampleID -log ${sampleID}_unfiltered.log
done
# Concatenate the filtered reads into a single FASTQ file
for file in *_filtered.fastq; do
cat $file >> "${project}_sequence.fastq"
done
# Concatenate the unfiltered reads into a single FASTA file
for file in *_unfiltered.fasta; do
cat $file >> "${project}_unfiltered_seq.fasta"
done
# Dereplicate
usearch9 -fastx_uniques "${project}_sequence.fastq" -sizeout -fastaout "${project}_uniques.fasta" -log uniuqes.log
# Some filtering
mothur "#trim.seqs(fasta="${project}_uniques.fasta", minlength=200, maxlength=500, maxhomop=20)"
# OTU clustering
usearch9 -cluster_otus "${project}_uniques.trim.fasta" -minsize 2 -otus "${project}_rep_numbered.fasta" -relabel OTU
# Generating readmap and biom file
usearch9 -usearch_global "${project}_unfiltered_seq.fasta" -db "${project}_rep_numbered.fasta" -id 0.97 -strand plus -uc "${project}_readmap.uc" -biomout "${project}.biom"
# USEARCH SINTAX taxonomy classification
# The output can be integrated into analysis using RDPUtils R package
usearch9 -sintax "${project}_rep_numbered.fasta" -db ~/Bioinformatics/RDP_ITS/rdp_its_v2.fa -strand both -tabbedout "${project}_rdp_sintax.txt" -log "${project}_rdp_sintax.log"