From 529458cf71aba5698a9cc03c7792b48223f4fbee Mon Sep 17 00:00:00 2001 From: eliberg Date: Tue, 6 Feb 2024 15:40:55 +0100 Subject: [PATCH 1/9] Add genomes on server functionality --- tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 263 +++++++++++---------- 1 file changed, 140 insertions(+), 123 deletions(-) diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index 3eaeb0fa..99c57cd7 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -17,14 +17,19 @@ python $__tool_directory__/check_no_duplicates.py ##makeblastdb -in <(gunzip -c gzipped_fasta_file) ##therefore we're cramming everything ##into a single cat command below -cat -#for i in $input_file: - #if $i.is_of_type('fasta.gz') and $i.ext != "fasta": - <(gunzip -c ${i}) - #else: - ${i} - #end if -#end for +## insert genome on server - option +#if str($input_selection.source) == "history": + cat + #for i in $input_file: + #if $i.is_of_type('fasta.gz') and $i.ext != "fasta": + <(gunzip -c ${i}) + #else: + ${i} + #end if + #end for +#else: + '$input_selection.ifile.fields.path' +#end if | makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' -blastdb_version 4 $parse_seqids @@ -60,52 +65,64 @@ $hash_index > '$outfile' ]]> - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** From 1192c32ab5e886bff5f3a3980ea4631652cbddf2 Mon Sep 17 00:00:00 2001 From: eliberg Date: Tue, 13 Feb 2024 11:15:59 +0100 Subject: [PATCH 2/9] Add new new option for nucl database - Genome on server --- tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 87 ++++++++++++++-------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index 99c57cd7..be0d5f28 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -11,52 +11,70 @@ python $__tool_directory__/check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. -#for i in $input_file#'${i}' #end for# + +#if $input_selection.dbtype == "prot": + #for i in $input_selection.input_file#'${i}' #end for# +#elif $input_selection.dbtype == "nucl": + #for i in $input_selection.nucl_cond.input_file#'${i}' #end for# +#end if && ##makeblastdb does not like input redirects of the sort ##makeblastdb -in <(gunzip -c gzipped_fasta_file) ##therefore we're cramming everything ##into a single cat command below -## insert genome on server - option -#if str($input_selection.source) == "history": + ## insert genome on server - option + +#if $input_selection.dbtype == "prot": cat - #for i in $input_file: + #for i in $input_selection.input_file: + #if $i.is_of_type('fasta.gz') and $i.ext != "fasta": + <(gunzip -c ${i}) + #else: + ${i} + #end if + #end for +#elif $input_selection.dbtype == "nucl": + #if $input_selection.nucl_cond.source == "history": + cat + #for i in $input_selection.nucl_cond.input_file: #if $i.is_of_type('fasta.gz') and $i.ext != "fasta": <(gunzip -c ${i}) #else: ${i} #end if - #end for -#else: - '$input_selection.ifile.fields.path' + #end for + #else: + '$input_selection.nucl_cond.ifile.fields.path' + #end if #end if | makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}' -blastdb_version 4 $parse_seqids $hash_index -in - +-dbtype $input_selection.dbtype #if $title: --title '${title}' + -title '${title}' #else: ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful --title 'BLAST Database' + -title 'BLAST Database' #end if --dbtype $dbtype + ## -------------------------------------------------------------------- ## Masking ## -------------------------------------------------------------------- ## HACK: If no mask files, evaluates as a list with just None in it: ## See Trello issue https://trello.com/c/lp5YmA1O #if ' '.join( map(str, $mask_data_file) ) != 'None': -#for i in $mask_data_file: --mask_data '${i}' -#end for + #for i in $mask_data_file: + -mask_data '${i}' + #end for #end if ## -------------------------------------------------------------------- ## Taxonomy ## -------------------------------------------------------------------- #if $tax.taxselect == 'id': --taxid $tax.taxid + -taxid $tax.taxid ## #else if $tax.taxselect == 'map': ## -taxid_map $tax.taxmap #end if @@ -65,26 +83,33 @@ $hash_index > '$outfile' ]]> - - - - - - - - - + + + - + + + - - - - + + + + + + + + + + + + + + + @@ -117,7 +142,7 @@ $hash_index - + From 6d5d22f78746661b21122ae2c65cc3a900361411 Mon Sep 17 00:00:00 2001 From: eliberg Date: Tue, 13 Feb 2024 11:54:09 +0100 Subject: [PATCH 3/9] Adjust Tests --- tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index be0d5f28..b1a35300 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -144,8 +144,8 @@ $hash_index - - + + @@ -157,8 +157,10 @@ $hash_index With and without the taxid the only real difference is in the *.phr file. --> - - + + + + @@ -174,8 +176,10 @@ $hash_index - - + + + + @@ -193,8 +197,10 @@ $hash_index - - + + + + @@ -211,8 +217,13 @@ $hash_index - - + + + + + + + From 07cde164b3c9c72d20f2582a0249e2c6489a4635 Mon Sep 17 00:00:00 2001 From: eliberg Date: Tue, 20 Feb 2024 10:31:37 +0100 Subject: [PATCH 4/9] test corrected and further changes by @wm75 --- tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 402 +++++++++++---------- 1 file changed, 220 insertions(+), 182 deletions(-) diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index b1a35300..6116ea93 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -8,73 +8,70 @@ python '$outfile' ]]> - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + - - - - + + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + **What it does** From f07b1b70564ef881ab26e6ace01e9fe1aa1f80ca Mon Sep 17 00:00:00 2001 From: eliberg Date: Tue, 20 Feb 2024 17:18:30 +0100 Subject: [PATCH 5/9] Add fifth test for data table function --- test-data/all_fasta.loc | 3 +++ test-data/three_human_mRNA.fasta.gz | Bin 3771 -> 3780 bytes test-data/tool_data_table_conf.xml.test | 4 ++++ tool-data/all_fasta.loc.sample | 18 ++++++++++++++++++ tool-data/tool_data_table_conf.xml.sample | 4 ++++ tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 13 ++++++------- 6 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 test-data/all_fasta.loc create mode 100644 tool-data/all_fasta.loc.sample diff --git a/test-data/all_fasta.loc b/test-data/all_fasta.loc new file mode 100644 index 00000000..cf664bae --- /dev/null +++ b/test-data/all_fasta.loc @@ -0,0 +1,3 @@ +# +# +three_human_mRNA thmRNA Three-Human-mRANs ${__HERE__}/three_human_mRNA.fasta diff --git a/test-data/three_human_mRNA.fasta.gz b/test-data/three_human_mRNA.fasta.gz index d09c4123bf059c82e15a1e5e5d770cc4dd5bcaf8..9383756cfec99644a19a862a02ca21a8996797f4 100644 GIT binary patch delta 28 jcmdljdqh@PzMF$XqUK5}19L>3twWAwp((>gu8n*EeD4Tv delta 19 acmX>iyIYn^zMF&L>Qu&P2Ih^z8~Fe|D+R&; diff --git a/test-data/tool_data_table_conf.xml.test b/test-data/tool_data_table_conf.xml.test index b141c04e..3effa19d 100644 --- a/test-data/tool_data_table_conf.xml.test +++ b/test-data/tool_data_table_conf.xml.test @@ -12,4 +12,8 @@ value, name, path + + value, dbkey, name, path + +
diff --git a/tool-data/all_fasta.loc.sample b/tool-data/all_fasta.loc.sample new file mode 100644 index 00000000..1a5a28d5 --- /dev/null +++ b/tool-data/all_fasta.loc.sample @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# diff --git a/tool-data/tool_data_table_conf.xml.sample b/tool-data/tool_data_table_conf.xml.sample index cbb45443..9aa1cc4a 100644 --- a/tool-data/tool_data_table_conf.xml.sample +++ b/tool-data/tool_data_table_conf.xml.sample @@ -11,4 +11,8 @@ value, name, path + + value, dbkey, name, path + +
diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index 6116ea93..62cbb498 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -51,7 +51,7 @@ $hash_index ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title 'BLAST Database' #end if --dbtype +-dbtypne #if $input.type == "protein": prot #else: @@ -231,8 +231,7 @@ $hash_index - - + @@ -252,13 +251,13 @@ $hash_index
- + **What it does** From 2aeaaf818bb2d7edb3881df13870fe519653b438 Mon Sep 17 00:00:00 2001 From: Peter Cock Date: Wed, 21 Feb 2024 16:54:24 +0000 Subject: [PATCH 6/9] Update tools/ncbi_blast_plus/ncbi_makeblastdb.xml Spotted by @wm75 during review Co-authored-by: Wolfgang Maier --- tools/ncbi_blast_plus/ncbi_makeblastdb.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml index 62cbb498..012e2339 100644 --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml @@ -51,7 +51,7 @@ $hash_index ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title 'BLAST Database' #end if --dbtypne +-dbtype #if $input.type == "protein": prot #else: From 134eac040a7c88d0cd8be2c41e7817cfd78c401b Mon Sep 17 00:00:00 2001 From: eliberg Date: Thu, 22 Feb 2024 10:53:21 +0100 Subject: [PATCH 7/9] Update Version-Suffix and README --- tools/ncbi_blast_plus/README.rst | 3 +++ tools/ncbi_blast_plus/ncbi_macros.xml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/ncbi_blast_plus/README.rst b/tools/ncbi_blast_plus/README.rst index e77d1f62..d3bcf23d 100644 --- a/tools/ncbi_blast_plus/README.rst +++ b/tools/ncbi_blast_plus/README.rst @@ -136,6 +136,9 @@ a galaxy specific suffix which gets reset to zero with each new BLAST version: ============== =============================================================== Version Changes -------------- --------------------------------------------------------------- +2.14.1+galaxy2 - Add usage of genome FASTA files on the gaalxy server with + ``makeblastdb`` (contribution from Wolfgang Mayer and + Elischa Berger) 2.14.1+galaxy0 - Updated for NCBI BLAST+ 2.14.1 release. 2.10.1+galaxy3 - Silenced ``deltablast`` warning about using ``-num_threads`` with ``--subject`` (i.e. FASTA file from your history). diff --git a/tools/ncbi_blast_plus/ncbi_macros.xml b/tools/ncbi_blast_plus/ncbi_macros.xml index a1ee2d07..44551750 100644 --- a/tools/ncbi_blast_plus/ncbi_macros.xml +++ b/tools/ncbi_blast_plus/ncbi_macros.xml @@ -1,6 +1,6 @@ 2.14.1 - 1 + 2 16.10 From e1d7de50bc13709efb1c35d94813e1f975228f2d Mon Sep 17 00:00:00 2001 From: eliberg Date: Thu, 22 Feb 2024 11:01:13 +0100 Subject: [PATCH 8/9] Add missing README doc meesage for VERSION-SUFFIX 1 --- tools/ncbi_blast_plus/README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ncbi_blast_plus/README.rst b/tools/ncbi_blast_plus/README.rst index d3bcf23d..fa9e4ac3 100644 --- a/tools/ncbi_blast_plus/README.rst +++ b/tools/ncbi_blast_plus/README.rst @@ -139,6 +139,7 @@ Version Changes 2.14.1+galaxy2 - Add usage of genome FASTA files on the gaalxy server with ``makeblastdb`` (contribution from Wolfgang Mayer and Elischa Berger) +2.14.1+galaxy1 - Fix for get_species_taxids 2.14.1+galaxy0 - Updated for NCBI BLAST+ 2.14.1 release. 2.10.1+galaxy3 - Silenced ``deltablast`` warning about using ``-num_threads`` with ``--subject`` (i.e. FASTA file from your history). From 5c4825a0fab430e50d26c5e3086428f688aa6d2c Mon Sep 17 00:00:00 2001 From: eliberg Date: Thu, 22 Feb 2024 11:10:23 +0100 Subject: [PATCH 9/9] Type corrections --- tools/ncbi_blast_plus/README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ncbi_blast_plus/README.rst b/tools/ncbi_blast_plus/README.rst index fa9e4ac3..ad2e8365 100644 --- a/tools/ncbi_blast_plus/README.rst +++ b/tools/ncbi_blast_plus/README.rst @@ -136,8 +136,8 @@ a galaxy specific suffix which gets reset to zero with each new BLAST version: ============== =============================================================== Version Changes -------------- --------------------------------------------------------------- -2.14.1+galaxy2 - Add usage of genome FASTA files on the gaalxy server with - ``makeblastdb`` (contribution from Wolfgang Mayer and +2.14.1+galaxy2 - Add usage of genome FASTA files on the Galaxy server with + ``makeblastdb`` (contribution from Wolfgang Maier and Elischa Berger) 2.14.1+galaxy1 - Fix for get_species_taxids 2.14.1+galaxy0 - Updated for NCBI BLAST+ 2.14.1 release.