From 85791f977520c9e334baa7c229e7296c009e081d Mon Sep 17 00:00:00 2001 From: Danilo Di Leo Date: Mon, 18 Mar 2024 11:37:50 +0100 Subject: [PATCH 1/3] updated code for collecting data from gtdb --- bin/taxref_reformat_gtdb.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/taxref_reformat_gtdb.sh b/bin/taxref_reformat_gtdb.sh index 14146a214..a389bb657 100755 --- a/bin/taxref_reformat_gtdb.sh +++ b/bin/taxref_reformat_gtdb.sh @@ -10,7 +10,7 @@ for f in *.tar.gz; do done # Write the assignTaxonomy() fasta file: assignTaxonomy.fna -cat ar122*.fna bac120*.fna | sed '/^>/s/>[^ ]\+ \([^[]\+\) \[.*/>\1/' | sed '/^>/s/ \[.*//' | sed 's/[a-z]__//g' > assignTaxonomy.fna +cat ar*.fna bac*.fna | sed '/^>/s/>[^ ]\+ \([^[]\+\) \[.*/>\1/' | sed '/^>/s/ \[.*//' | sed 's/[a-z]__//g' > assignTaxonomy.fna # Write the addSpecies() fasta file: addSpecies.fna -cat ar122*.fna bac120*.fna | sed '/^>/s/>\([^ ]\+\) .*;s__\([^[]\+\) \[.*/>\1 \2/' > addSpecies.fna +cat ar*.fna bac*.fna | sed '/^>/s/>\([^ ]\+\) .*;s__\([^[]\+\) \[.*/>\1 \2/' > addSpecies.fna From 48d02b8dcb79a9246620f106a0844697c16e4269 Mon Sep 17 00:00:00 2001 From: Danilo Di Leo Date: Mon, 18 Mar 2024 11:41:35 +0100 Subject: [PATCH 2/3] updated comment --- bin/taxref_reformat_gtdb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/taxref_reformat_gtdb.sh b/bin/taxref_reformat_gtdb.sh index a389bb657..f2e65b56f 100755 --- a/bin/taxref_reformat_gtdb.sh +++ b/bin/taxref_reformat_gtdb.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Reads the ar122 and bac120 SSU fasta files from GTDB (after first untarring) +# Reads the ar* and bac* SSU fasta files from GTDB (after first untarring) # and outputs two new fasta files, one suitable for DADA2's assignTaxonomy() # and addSpecies() functions. From 0f4346c3b6aad0d491ee1cb368d519f251764de2 Mon Sep 17 00:00:00 2001 From: Danilo Di Leo Date: Mon, 18 Mar 2024 11:46:01 +0100 Subject: [PATCH 3/3] updated CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723d4a835..eb0a2e326 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#677](https://github.com/nf-core/ampliseq/pull/677) - Added cut_its information to SDBI export +- [#711](https://github.com/nf-core/ampliseq/pull/711) - Changed code in taxref_reformat_gtdb.sh so it can take both bacteria and Archaea. Check issue [#708](https://github.com/nf-core/ampliseq/issues/708) for more info. ### `Fixed`