Skip to content

Commit

Permalink
Esearch input (#47)
Browse files Browse the repository at this point in the history
* Add genomes (#45) (#46)

* Corynebacterium diphtheriae

* added Bifidobacterium adolenscentis

* replaced S. enterica IIIa; Added hops (Humulus lupulus)

* added a Citrobacter species

* m

* replaced repressed genome accession for B. faecium

* remove random single quotes

* bump version

* helpful log messages

* v5.6.3

* make symlink to avoid naming mistakes

* check whether taxonkit is loaded

* use efetch -input

* fix tr bug
  • Loading branch information
lskatz authored Jun 5, 2024
1 parent c830a08 commit ee349d5
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 16 deletions.
8 changes: 8 additions & 0 deletions bin/buildKraken1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ cp -rv $TAXDIR $DB/taxonomy

# Make --add-to-library more efficient with
# concatenated fasta files
export nl=$'\n'
find $SRC -name '*.fasta.gz' | \
xargs -n 100 -P 1 bash -c '
for i in "$@"; do
gzip -cd $i
done > $tmpfile
echo -ne "ADDING to library:\n "
zgrep "^>" $tmpfile | sed "s/^>//" | tr "$nl" " "
echo "^^ contents of $tmpfile ^^"
kraken-build --db $DB --add-to-library $tmpfile
'

Expand All @@ -35,3 +39,7 @@ kraken-build --db $DB --build --threads 1
# Reduce the size of the database
kraken-build --db $DB --clean


if [ ! -e "$sharedir/kalamari-kraken1" ]; then
ln -sv kalamari-kraken "$sharedir/kalamari-kraken1"
fi
26 changes: 12 additions & 14 deletions bin/downloadKalamari.pl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
use IO::Compress::Gzip;
use version 0.77;

our $VERSION = version->parse("5.6.0");
our $VERSION = version->parse("5.7.0");

use threads;

Expand Down Expand Up @@ -167,27 +167,25 @@ sub downloadEntries{
my $numEntries = scalar(@$entries);
my @acc = map{$$_{nuccoreAcc}} @$entries;
logmsg "Downloading ".scalar(@acc)." accessions";
my $queryArg = join("[accession] OR ", sort(@acc))."[accession]";
my $dir = tempdir("download.XXXXXX", DIR=>$$settings{tempdir});

# Make the input file for efetch
my $inputAcc = "$dir/input.acc";
open(my $fh, ">", $inputAcc) or die "ERROR: could not write to $inputAcc: $!";
print $fh join("\n", @acc)."\n";
close $fh;

# Accessions that had errors
my @err;

# Get the esearch xml in place for at least one downstream query
my $esearchXml = "$dir/esearch.xml";
my $esearchCmd = "esearch -db nuccore -query '$queryArg' > $esearchXml";
command($esearchCmd);
# Get started on the comprehensive assembly file
my $outfile = "$dir/all.fasta";
logmsg "Downloading all accessions to $outfile using input accessions in $inputAcc";
command("efetch -db nuccore -input $inputAcc -format fasta > $dir/all.fasta");
if($?){
die "ERROR running: $esearchCmd: $!";
die "ERROR: could not download all accessions";

Check failure on line 186 in bin/downloadKalamari.pl

View workflow job for this annotation

GitHub Actions / Yersinia Perl 5.32 on ubuntu-20.04

Thread 1 terminated abnormally: ERROR: could not download all accessions

Check failure on line 186 in bin/downloadKalamari.pl

View workflow job for this annotation

GitHub Actions / Perl 5.32 on ubuntu-20.04

Thread 1 terminated abnormally: ERROR: could not download all accessions

Check failure on line 186 in bin/downloadKalamari.pl

View workflow job for this annotation

GitHub Actions / chunk 0 Perl 5.32 on ubuntu-20.04

Thread 1 terminated abnormally: ERROR: could not download all accessions

Check failure on line 186 in bin/downloadKalamari.pl

View workflow job for this annotation

GitHub Actions / chunk 1 Perl 5.32 on ubuntu-20.04

Thread 1 terminated abnormally: ERROR: could not download all accessions

Check failure on line 186 in bin/downloadKalamari.pl

View workflow job for this annotation

GitHub Actions / Listeria Perl 5.32 on ubuntu-20.04

Thread 1 terminated abnormally: ERROR: could not download all accessions
}

# Get started on the assembly file
my $outfile = "$dir/all.fasta";

# Main query: efetch
my $efetchCmd = "cat $esearchXml | efetch -format fasta > $outfile";
system($efetchCmd);

my $seqsWithVersion = readSeqs($outfile);
my $seqs = {};
while(my($acc, $seq) = each(%$seqsWithVersion)){
Expand Down
3 changes: 3 additions & 0 deletions bin/filterTaxonomy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

set -eu

# Check for dependencies
which taxonkit

thisdir=$(dirname $0)
thisfile=$(basename $0)
KALAMARI_VER=$(downloadKalamari.pl --version)
Expand Down
4 changes: 2 additions & 2 deletions src/plasmids.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2964,7 +2964,7 @@ Rickettsia CP015014 780 33988
Rickettsia CP010970 780 33988
Onion yellows phytoplasma AB480166 100379 85620
Onion yellows phytoplasma AB479509 100379 85620
'Brassica napus' phytoplasma HQ637382 469009 85620
Brassica napus phytoplasma HQ637382 469009 85620
Candidatus Phytoplasma FJ905104 33926 2146
Candidatus Phytoplasma KF801472 33926 2146
Onion yellows phytoplasma AB479513 100379 2146
Expand All @@ -2986,7 +2986,7 @@ Paulownia witches'-broom phytoplasma EF426472 39647 85620
Paulownia witches'-broom phytoplasma EF426473 39647 85620
Periwinkle little leaf phytoplasma JN835187 137854 85635
Rice orange leaf phytoplasma KY086101 146897 85635
'Catharanthus roseus' aster yellows phytoplasma CP035950 1193712 85620
Catharanthus roseus aster yellows phytoplasma CP035950 1193712 85620
Bacillus thuringiensis CP016196 1428 85620
Bacillus sp. BS98 CP043831 2608254 185979
Bacillus CP009595 1386 185979
Expand Down

0 comments on commit ee349d5

Please sign in to comment.