From 450eb61f1d70825401ce2e254aa4c52d914a5549 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 20 Aug 2024 13:29:59 +0200
Subject: [PATCH] added actions and tests

---
 rescript/bv_brc.py            | 1346 ++++++++++++++++++++++++++++++++-
 rescript/plugin_setup.py      |  136 +++-
 rescript/testing.py           |   15 +
 rescript/tests/test_bv_brc.py |  450 ++++++++++-
 4 files changed, 1900 insertions(+), 47 deletions(-)
 create mode 100644 rescript/testing.py

diff --git a/rescript/bv_brc.py b/rescript/bv_brc.py
index 07df950..37fa422 100644
--- a/rescript/bv_brc.py
+++ b/rescript/bv_brc.py
@@ -6,55 +6,1333 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 from io import StringIO
-
+import os
 import qiime2
 import pandas as pd
 import requests
-from q2_types.feature_data import MixedCaseDNAFASTAFormat
+from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat, TSVTaxonomyDirectoryFormat
+from q2_types.genome_data import GenomeSequencesDirectoryFormat
+
+from rescript.ncbi import _allowed_ranks, _default_ranks
+import json
+
+
+def fetch_genomes_bv_brc(
+        rql_query: str = None,
+        genome_ids: list = None
+) -> GenomeSequencesDirectoryFormat:
+
+    # Parameter validation
+    rql_query = id_list_handling(rql_query=rql_query,
+                                 ids=genome_ids,
+                                 parameter_name="genome_ids",
+                                 data_field="genome_id"
+                                 )
+
+    # Define output format
+    genomes = GenomeSequencesDirectoryFormat()
+
+    # Get requests response
+    response = download_data(
+        url=f"https://www.bv-brc.org/api/genome_sequence/?{rql_query}",
+        data_type="genome_sequence",
+    )
+
+    # Transform
+    json_to_fasta(response.json(), str(genomes))
+
+    return genomes
+
+
+def fetch_metadata_bv_brc(data_type: str, rql_query: str) -> qiime2.Metadata:
+
+    # Get requests response
+    response = download_data(
+        url=f"https://www.bv-brc.org/api/{data_type}/?{rql_query}&http_accept=text/tsv",
+        data_type=data_type
+    )
+
+    tsv_data = StringIO(response.text)
+    metadata = pd.read_csv(tsv_data, sep='\t')
+    metadata.index.name = "id"
+    metadata.index = metadata.index.astype(str)
+
+    return qiime2.Metadata(metadata)
+
+
+def fetch_taxonomy_bv_brc(
+        rql_query: str,
+        ranks: list = None,
+        taxon_ids: list = None,
+) -> TSVTaxonomyDirectoryFormat:
+
+    # Parameter validation
+    rql_query = id_list_handling(rql_query=rql_query,
+                                 ids=taxon_ids,
+                                 parameter_name="taxon_ids",
+                                 data_field="taxon_id"
+                                 )
+
+    # Define output format
+    directory = TSVTaxonomyDirectoryFormat()
+
+    # Get requests response
+    response = download_data(
+        url=f"https://www.bv-brc.org/api/taxonomy/?{rql_query}&http_accept=text/tsv",
+        data_type="taxonomy"
+    )
+
+    # Convert to data frame
+    tsv_data = StringIO(response.text)
+    metadata = pd.read_csv(tsv_data, sep='\t')
+
+    # Transform metadata to TSVTaxonomyFormat
+    taxonomy = transform_taxonomy_df(df=metadata, ranks=ranks)
+    taxonomy.to_csv(os.path.join(str(directory), "taxonomy.tsv"), sep="\t")
+
+    return directory
+
+
+def parse_lineage_names_with_ranks(lineage_names, lineage_ranks, ranks):
+    # Set ranks to default if no list is specified
+    if not ranks:
+        ranks = _default_ranks
+
+    # Split the lineage names and ranks by ';'
+    lineage_split = lineage_names.split(';')
+    rank_split = lineage_ranks.split(';')
+
+    # Dictionary to map taxonomic ranks to their prefixes for the specified ranks
+    rank_to_prefix = {key: _allowed_ranks[key] for key in ranks if key in ranks}
+
+    # Initialize the list for the parsed lineage
+    parsed_lineage = []
+
+    # Loop over each rank and assign the corresponding prefix and name
+    for rank, name in zip(rank_split, lineage_split):
+        prefix = rank_to_prefix.get(rank, None)
+        if prefix:
+            parsed_lineage.append(f"{prefix}{name}")
+        else:
+            pass
 
+    # Ensure all taxonomic levels are covered (fill in missing levels with just the
+    # prefix)
+    final_lineage = []
+    for required_prefix in rank_to_prefix.values():
+        # Check if any parsed_lineage item starts with the required prefix
+        match = next(
+            (item for item in parsed_lineage if item.startswith(required_prefix)), None)
+        if match:
+            final_lineage.append(match)
+        else:
+            final_lineage.append(required_prefix)
 
-def json_to_fasta(json: dict):
-    fasta_output = []
+    # Join the parsed lineage names with '; '
+    return '; '.join(final_lineage)
+
+
+def transform_taxonomy_df(df, ranks):
+    # Apply the transformation
+    df['Taxon'] = df.apply(
+        lambda row: parse_lineage_names_with_ranks(lineage_names=row['lineage_names'],
+                                                   lineage_ranks=row['lineage_ranks'],
+                                                   ranks=ranks), axis=1)
+
+    # Rename columns and set index
+    df = df.rename(columns={'taxon_id': 'Feature ID'})
+    df = df[['Feature ID', 'Taxon']]
+    df = df.set_index('Feature ID')
+    return df
+
+
+def fetch_genome_features_bv_brc(
+        rql_query: str = None,
+        feature_ids: list = None,
+) -> (MixedCaseDNAFASTAFormat, ProteinFASTAFormat):
+
+    # Parameter validation
+    rql_query = id_list_handling(rql_query=rql_query,
+                                 ids=feature_ids,
+                                 parameter_name="feature_ids",
+                                 data_field="feature_id")
+
+    # Define output formats
+    genes = MixedCaseDNAFASTAFormat()
+    proteins = ProteinFASTAFormat()
+
+    # Construct URLs for genes and proteins downloads
+    base_url = "https://www.bv-brc.org/api/genome_feature/?"
+    genes_url = base_url + f"{rql_query}&http_accept=application/dna+fasta"
+    proteins_url = base_url + f"{rql_query}&http_accept=application/protein+fasta"
+
+    # Get requests response for genes and proteins
+    response_genes = download_data(url=genes_url, data_type="genome_feature")
+    response_proteins = download_data(url=proteins_url, data_type="genome_feature")
+
+    # Save genes and proteins as FASTA files
+    fasta_genes = response_genes.text
+    with genes.open() as file:
+        file.write(fasta_genes)
+
+    fasta_proteins = response_proteins.text
+    with proteins.open() as file:
+        file.write(fasta_proteins)
+
+    return genes, proteins
+
+
+def json_to_fasta(json, output_dir):
+    # Dictionary to hold sequences grouped by genome_id
+    fasta_files = {}
+
+    # Loop over all entries in dict
     for entry in json:
-        header = (f">accn|{entry['sequence_id']}   {entry['description']}   "
-                  f"[{entry['genome_name']} | {entry['genome_id']}]")
-        fasta_output.append(f"{header}\n{entry['sequence']}")
-    return "\n".join(fasta_output)
+        genome_id = entry['genome_id']
+        if genome_id not in fasta_files:
+            fasta_files[genome_id] = []
+
+        # Construct FASTA format to be identical to BV-BRC FASTA headers
+        header = (f">accn|{entry['accession']}   {entry['description']}   "
+                  f"[{entry['genome_name']} | {genome_id}]")
 
+        fasta_files[genome_id].append(f"{header}\n{entry['sequence'].upper()}")
 
-def fetch_genomes_bv_brc(rql_query: str) -> (MixedCaseDNAFASTAFormat, qiime2.Metadata):
-    genomes = MixedCaseDNAFASTAFormat()
+    # Write each genome_id's sequences to a separate FASTA file
+    for genome_id, sequences in fasta_files.items():
+        fasta_content = "\n".join(sequences)
+        fasta_filename = os.path.join(output_dir, f"{genome_id}.fasta")
 
-    # Make the GET request for metadata
-    url_metadata = f"https://www.bv-brc.org/api/genome/{rql_query}&http_accept=text/tsv"
-    response_metadata = requests.get(url_metadata)
+        with open(fasta_filename, 'w') as fasta_file:
+            fasta_file.write(fasta_content)
 
-    if response_metadata.status_code == 200:
-        # Convert TSV data to dataframe
-        tsv_data = StringIO(response_metadata.text)
-        metadata = pd.read_csv(tsv_data, sep='\t', index_col="genome_id")
 
-        metadata.index.name = "id"
-        metadata.index = metadata.index.astype(str)
+def download_data(url, data_type):
+    # Get requests response
+    response = requests.get(url)
 
-        # Extract all genome_ids out of dataframe
-        genome_ids = metadata.index.tolist()
+    # If response is correct return it
+    if response.status_code == 200:
+        return response
+
+    # Error handling if response incorrect
+    elif response.status_code == 400:
+        error_handling(response, data_type)
     else:
-        raise ValueError("Error")
+        raise ValueError(response.text)
+
+
+def error_handling(response, data_type):
+    # No data found for query or incorrect RQL query
+    if response.text == "[]":
+        raise ValueError("No data could be retrieved. Either because of an "
+                         "incorrect RQL query or because no data exists for the "
+                         "query.")
+
+    elif response.text.startswith("A Database Error Occured:"):
+
+        # Parse the response dict
+        json_str = response.text[response.text.find('{'):]
+        response_dict = json.loads(json_str)
 
-    # Make the GET request for sequences
-    url_sequences = (f"https://www.bv-brc.org/api/genome_sequence/"
-                     f"?in(genome_id,({','.join(genome_ids)}))")
-    response_sequences = requests.get(url_sequences)
+        # Incorrect RQL operator
+        if response_dict['msg'].startswith("undefined field object"):
+            raise ValueError(
+                f"Error code {response_dict['code']}: {response_dict['msg']}. "
+                f"Incorrect RQL query operator."
+            )
 
-    if response_sequences.status_code == 200:
-        # Convert JSON to FASTA
-        fasta = json_to_fasta(response_sequences.json())
+        # Incorrect field for data type
+        elif response_dict['msg'].startswith("undefined field"):
+            raise ValueError(
+                f"Error code {response_dict['code']}: {response_dict['msg']}. \n"
+                f"Allowed fields for data type {data_type}: \n{data_fields[data_type]}"
+            )
+
+        else:
+            raise ValueError(
+                f"Error code {response_dict['code']}: {response_dict['msg']}."
+            )
 
-        # Write FASTA format to file
-        with genomes.open() as file:
-            file.write(fasta)
     else:
-        raise ValueError("Error")
+        raise ValueError(response.text)
+
+
+def id_list_handling(rql_query: str, ids: list, parameter_name: str, data_field: str):
+    # Error if rql_query and ids parameters are given
+    if rql_query and ids:
+        raise ValueError(f"Parameters rql_query and {parameter_name} can't be used "
+                         "simultaneously.")
+
+    # Error if rql_query and ids parameters are not given
+    elif not rql_query and not ids:
+        raise ValueError("At least one of the parameters rql_query and "
+                         f"{parameter_name} has to be given.")
+
+    # construct the RQL queries
+    elif ids:
+        rql_query = f"in({data_field},({','.join(map(str, ids))}))"
+
+    return rql_query
+
 
-    return genomes, qiime2.Metadata(metadata)
+data_fields = {
+    "antibiotics": [
+        "_version_",
+        "antibiotic_name",
+        "atc_classification",
+        "canonical_smiles",
+        "cas_id",
+        "date_inserted",
+        "date_modified",
+        "description",
+        "drugbank_interactions",
+        "inchi_key",
+        "isomeric_smiles",
+        "mechanism_of_action",
+        "molecular_formula",
+        "molecular_weight",
+        "pharmacological_classes",
+        "pharmacology",
+        "pubchem_cid",
+        "pubchem_cid_i",
+        "synonyms"
+    ],
+    "enzyme_class_ref": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "ec_description",
+        "ec_number",
+        "go"
+    ],
+    "epitope": [
+        "_version_",
+        "assay_results",
+        "bcell_assays",
+        "comments",
+        "date_inserted",
+        "date_modified",
+        "end",
+        "epitope_id",
+        "epitope_sequence",
+        "epitope_type",
+        "host_name",
+        "mhc_assays",
+        "organism",
+        "protein_accession",
+        "protein_id",
+        "protein_name",
+        "start",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "taxon_lineage_names",
+        "tcell_assays",
+        "total_assays"
+    ],
+    "epitope_assay": [
+        "_version_",
+        "assay_group",
+        "assay_id",
+        "assay_measurement",
+        "assay_measurement_unit",
+        "assay_method",
+        "assay_result",
+        "assay_type",
+        "authors",
+        "date_inserted",
+        "date_modified",
+        "end",
+        "epitope_id",
+        "epitope_sequence",
+        "epitope_type",
+        "host_name",
+        "host_taxon_id",
+        "mhc_allele",
+        "mhc_allele_class",
+        "organism",
+        "pdb_id",
+        "pmid",
+        "protein_accession",
+        "protein_id",
+        "protein_name",
+        "start",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "taxon_lineage_names",
+        "title"
+    ],
+    "experiment": [
+        "_version_",
+        "additional_data",
+        "additional_metadata",
+        "biosets",
+        "date_inserted",
+        "date_modified",
+        "detection_instrument",
+        "doi",
+        "exp_description",
+        "exp_id",
+        "exp_name",
+        "exp_poc",
+        "exp_protocol",
+        "exp_title",
+        "exp_type",
+        "experimenters",
+        "genome_id",
+        "measurement_technique",
+        "organism",
+        "pmid",
+        "public_identifier",
+        "public_repository",
+        "samples",
+        "strain",
+        "study_description",
+        "study_institution",
+        "study_name",
+        "study_pi",
+        "study_title",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "treatment_amount",
+        "treatment_duration",
+        "treatment_name",
+        "treatment_type"
+    ],
+    "bioset": [
+        "_version_",
+        "additional_data",
+        "additional_metadata",
+        "analysis_group_1",
+        "analysis_group_2",
+        "analysis_method",
+        "bioset_criteria",
+        "bioset_description",
+        "bioset_id",
+        "bioset_name",
+        "bioset_result",
+        "bioset_type",
+        "date_inserted",
+        "date_modified",
+        "entity_count",
+        "entity_type",
+        "exp_id",
+        "exp_name",
+        "exp_title",
+        "exp_type",
+        "genome_id",
+        "organism",
+        "protocol",
+        "result_type",
+        "strain",
+        "study_description",
+        "study_institution",
+        "study_name",
+        "study_pi",
+        "study_title",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "treatment_amount",
+        "treatment_duration",
+        "treatment_name",
+        "treatment_type"
+    ],
+    "bioset_result": [
+        "_version_",
+        "bioset_description",
+        "bioset_id",
+        "bioset_name",
+        "bioset_type",
+        "counts",
+        "date_inserted",
+        "date_modified",
+        "entity_id",
+        "entity_name",
+        "entity_type",
+        "exp_id",
+        "exp_name",
+        "exp_title",
+        "exp_type",
+        "feature_id",
+        "fpkm",
+        "gene",
+        "gene_id",
+        "genome_id",
+        "id",
+        "locus_tag",
+        "log2_fc",
+        "organism",
+        "other_ids",
+        "other_value",
+        "p_value",
+        "patric_id",
+        "product",
+        "protein_id",
+        "result_type",
+        "strain",
+        "taxon_id",
+        "tpm",
+        "treatment_amount",
+        "treatment_duration",
+        "treatment_name",
+        "treatment_type",
+        "uniprot_id",
+        "z_score"
+    ],
+    "gene_ontology_ref": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "definition",
+        "go_id",
+        "go_name",
+        "ontology"
+    ],
+    "genome": [
+        "_version_",
+        "additional_metadata",
+        "altitude",
+        "antimicrobial_resistance",
+        "antimicrobial_resistance_evidence",
+        "assembly_accession",
+        "assembly_method",
+        "authors",
+        "bioproject_accession",
+        "biosample_accession",
+        "biovar",
+        "body_sample_site",
+        "body_sample_subsite",
+        "cds",
+        "cds_ratio",
+        "cell_shape",
+        "checkm_completeness",
+        "checkm_contamination",
+        "chromosomes",
+        "clade",
+        "class",
+        "coarse_consistency",
+        "collection_date",
+        "collection_year",
+        "comments",
+        "common_name",
+        "completion_date",
+        "contig_l50",
+        "contig_n50",
+        "contigs",
+        "core_families",
+        "core_family_ratio",
+        "culture_collection",
+        "date_inserted",
+        "date_modified",
+        "depth",
+        "disease",
+        "family",
+        "fine_consistency",
+        "gc_content",
+        "genbank_accessions",
+        "genome_id",
+        "genome_length",
+        "genome_name",
+        "genome_quality",
+        "genome_quality_flags",
+        "genome_status",
+        "genus",
+        "geographic_group",
+        "geographic_location",
+        "gram_stain",
+        "h1_clade_global",
+        "h1_clade_us",
+        "h3_clade",
+        "h5_clade",
+        "h_type",
+        "habitat",
+        "host_age",
+        "host_common_name",
+        "host_gender",
+        "host_group",
+        "host_health",
+        "host_name",
+        "host_scientific_name",
+        "hypothetical_cds",
+        "hypothetical_cds_ratio",
+        "isolation_comments",
+        "isolation_country",
+        "isolation_site",
+        "isolation_source",
+        "kingdom",
+        "lab_host",
+        "latitude",
+        "lineage",
+        "longitude",
+        "mat_peptide",
+        "missing_core_family_ids",
+        "mlst",
+        "motility",
+        "n_type",
+        "ncbi_project_id",
+        "nearest_genomes",
+        "optimal_temperature",
+        "order",
+        "organism_name",
+        "other_clinical",
+        "other_environmental",
+        "other_names",
+        "other_typing",
+        "outgroup_genomes",
+        "owner",
+        "oxygen_requirement",
+        "p2_genome_id",
+        "partial_cds",
+        "partial_cds_ratio",
+        "passage",
+        "pathovar",
+        "patric_cds",
+        "ph1n1_like",
+        "phenotype",
+        "phylum",
+        "plasmids",
+        "plfam_cds",
+        "plfam_cds_ratio",
+        "public",
+        "publication",
+        "reference_genome",
+        "refseq_accessions",
+        "refseq_cds",
+        "refseq_project_id",
+        "rrna",
+        "salinity",
+        "season",
+        "segment",
+        "segments",
+        "sequencing_centers",
+        "sequencing_depth",
+        "sequencing_platform",
+        "sequencing_status",
+        "serovar",
+        "species",
+        "sporulation",
+        "sra_accession",
+        "state_province",
+        "strain",
+        "subclade",
+        "subtype",
+        "superkingdom",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "taxon_lineage_names",
+        "temperature_range",
+        "trna",
+        "type_strain",
+        "user_read",
+        "user_write"
+    ],
+    "strain": [
+        "1_pb2",
+        "2_pb1",
+        "3_pa",
+        "4_ha",
+        "5_np",
+        "6_na",
+        "7_mp",
+        "8_ns",
+        "_version_",
+        "collection_date",
+        "collection_year",
+        "date_inserted",
+        "date_modified",
+        "family",
+        "genbank_accessions",
+        "genome_ids",
+        "genus",
+        "geographic_group",
+        "h_type",
+        "host_common_name",
+        "host_group",
+        "host_name",
+        "id",
+        "isolation_country",
+        "l",
+        "lab_host",
+        "m",
+        "n_type",
+        "other_segments",
+        "owner",
+        "passage",
+        "public",
+        "s",
+        "season",
+        "segment_count",
+        "species",
+        "status",
+        "strain",
+        "subtype",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "taxon_lineage_names",
+        "user_read",
+        "user_write"
+    ],
+    "genome_amr": [
+        "_version_",
+        "antibiotic",
+        "computational_method",
+        "computational_method_performance",
+        "computational_method_version",
+        "date_inserted",
+        "date_modified",
+        "evidence",
+        "genome_id",
+        "genome_name",
+        "id",
+        "laboratory_typing_method",
+        "laboratory_typing_method_version",
+        "laboratory_typing_platform",
+        "measurement",
+        "measurement_sign",
+        "measurement_unit",
+        "measurement_value",
+        "owner",
+        "pmid",
+        "public",
+        "resistant_phenotype",
+        "source",
+        "taxon_id",
+        "testing_standard",
+        "testing_standard_year",
+        "user_read",
+        "user_write",
+        "vendor"
+    ],
+    "feature_sequence": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "md5",
+        "sequence",
+        "sequence_type"
+    ],
+    "genome_feature": [
+        "aa_length",
+        "aa_sequence_md5",
+        "accession",
+        "alt_locus_tag",
+        "annotation",
+        "brc_id",
+        "classifier_round",
+        "classifier_score",
+        "codon_start",
+        "date_inserted",
+        "date_modified",
+        "end",
+        "feature_id",
+        "feature_type",
+        "figfam_id",
+        "gene",
+        "gene_id",
+        "genome_id",
+        "genome_name",
+        "go",
+        "location",
+        "na_length",
+        "na_sequence_md5",
+        "notes",
+        "og_id",
+        "owner",
+        "p2_feature_id",
+        "patric_id",
+        "pdb_accession",
+        "pgfam_id",
+        "plfam_id",
+        "product",
+        "property",
+        "protein_id",
+        "public",
+        "refseq_locus_tag",
+        "segments",
+        "sequence_id",
+        "sog_id",
+        "start",
+        "strand",
+        "taxon_id",
+        "uniprotkb_accession",
+        "user_read",
+        "user_write"
+    ],
+    "genome_sequence": [
+        "_version_",
+        "accession",
+        "chromosome",
+        "date_inserted",
+        "date_modified",
+        "description",
+        "gc_content",
+        "genome_id",
+        "genome_name",
+        "gi",
+        "length",
+        "mol_type",
+        "owner",
+        "p2_sequence_id",
+        "plasmid",
+        "public",
+        "release_date",
+        "segment",
+        "sequence",
+        "sequence_id",
+        "sequence_md5",
+        "sequence_status",
+        "sequence_type",
+        "taxon_id",
+        "topology",
+        "user_read",
+        "user_write",
+        "version"
+    ],
+    "id_ref": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "id",
+        "id_type",
+        "id_value",
+        "uniprotkb_accession"
+    ],
+    "misc_niaid_sgc": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "gene_symbol_collection",
+        "genus",
+        "has_clones",
+        "has_proteins",
+        "selection_criteria",
+        "species",
+        "strain",
+        "target_id",
+        "target_status"
+    ],
+    "pathway": [
+        "_version_",
+        "accession",
+        "alt_locus_tag",
+        "annotation",
+        "date_inserted",
+        "date_modified",
+        "ec_description",
+        "ec_number",
+        "feature_id",
+        "gene",
+        "genome_ec",
+        "genome_id",
+        "genome_name",
+        "id",
+        "owner",
+        "pathway_class",
+        "pathway_ec",
+        "pathway_id",
+        "pathway_name",
+        "patric_id",
+        "product",
+        "public",
+        "refseq_locus_tag",
+        "sequence_id",
+        "taxon_id",
+        "user_read",
+        "user_write"
+    ],
+    "pathway_ref": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "ec_description",
+        "ec_number",
+        "id",
+        "map_location",
+        "map_name",
+        "map_type",
+        "occurrence",
+        "pathway_class",
+        "pathway_id",
+        "pathway_name"
+    ],
+    "ppi": [
+        "_version_",
+        "category",
+        "date_inserted",
+        "date_modified",
+        "detection_method",
+        "domain_a",
+        "domain_b",
+        "evidence",
+        "feature_id_a",
+        "feature_id_b",
+        "gene_a",
+        "gene_b",
+        "genome_id_a",
+        "genome_id_b",
+        "genome_name_a",
+        "genome_name_b",
+        "id",
+        "interaction_type",
+        "interactor_a",
+        "interactor_b",
+        "interactor_desc_a",
+        "interactor_desc_b",
+        "interactor_type_a",
+        "interactor_type_b",
+        "pmid",
+        "refseq_locus_tag_a",
+        "refseq_locus_tag_b",
+        "score",
+        "source_db",
+        "source_id",
+        "taxon_id_a",
+        "taxon_id_b"
+    ],
+    "protein_family_ref": [
+        "_version_",
+        "date_inserted",
+        "date_modified",
+        "family_id",
+        "family_product",
+        "family_type"
+    ],
+    "sequence_feature": [
+        "aa_sequence_md5",
+        "aa_variant",
+        "additional_metadata",
+        "comments",
+        "date_inserted",
+        "date_modified",
+        "end",
+        "evidence_code",
+        "feature_id",
+        "genbank_accession",
+        "gene",
+        "genome_id",
+        "genome_name",
+        "id",
+        "length",
+        "patric_id",
+        "product",
+        "publication",
+        "refseq_locus_tag",
+        "segment",
+        "segments",
+        "sf_category",
+        "sf_id",
+        "sf_name",
+        "sf_sequence",
+        "sf_sequence_md5",
+        "source",
+        "source_aa_sequence",
+        "source_id",
+        "source_sf_location",
+        "source_strain",
+        "start",
+        "subtype",
+        "taxon_id",
+        "variant_types"
+    ],
+    "sequence_feature_vt": [
+        "additional_metadata",
+        "comments",
+        "date_inserted",
+        "date_modified",
+        "id",
+        "sf_category",
+        "sf_id",
+        "sf_name",
+        "sf_sequence",
+        "sf_sequence_md5",
+        "sfvt_genome_count",
+        "sfvt_genome_ids",
+        "sfvt_id",
+        "sfvt_sequence",
+        "sfvt_sequence_md5",
+        "sfvt_variations"
+    ],
+    "sp_gene": [
+        "_version_",
+        "alt_locus_tag",
+        "antibiotics",
+        "antibiotics_class",
+        "classification",
+        "date_inserted",
+        "date_modified",
+        "e_value",
+        "evidence",
+        "feature_id",
+        "function",
+        "gene",
+        "genome_id",
+        "genome_name",
+        "id",
+        "identity",
+        "organism",
+        "owner",
+        "patric_id",
+        "pmid",
+        "product",
+        "property",
+        "property_source",
+        "public",
+        "query_coverage",
+        "refseq_locus_tag",
+        "same_genome",
+        "same_genus",
+        "same_species",
+        "source",
+        "source_id",
+        "subject_coverage",
+        "taxon_id",
+        "user_read",
+        "user_write"
+    ],
+    "sp_gene_ref": [
+        "_version_",
+        "antibiotics",
+        "antibiotics_class",
+        "assertion",
+        "classification",
+        "date_inserted",
+        "date_modified",
+        "function",
+        "gene_id",
+        "gene_name",
+        "genus",
+        "gi",
+        "id",
+        "locus_tag",
+        "organism",
+        "pmid",
+        "product",
+        "property",
+        "source",
+        "source_id",
+        "species"
+    ],
+    "spike_lineage": [
+        "_version_",
+        "country",
+        "date_inserted",
+        "date_modified",
+        "growth_rate",
+        "id",
+        "lineage",
+        "lineage_count",
+        "lineage_of_concern",
+        "month",
+        "prevalence",
+        "region",
+        "sequence_features",
+        "total_isolates"
+    ],
+    "spike_variant": [
+        "_version_",
+        "aa_variant",
+        "country",
+        "date_inserted",
+        "date_modified",
+        "growth_rate",
+        "id",
+        "lineage_count",
+        "month",
+        "prevalence",
+        "region",
+        "sequence_features",
+        "total_isolates"
+    ],
+    "structured_assertion": [
+        "_version_",
+        "comment",
+        "date_inserted",
+        "date_modified",
+        "evidence_code",
+        "feature_id",
+        "id",
+        "owner",
+        "patric_id",
+        "pmid",
+        "property",
+        "public",
+        "refseq_locus_tag",
+        "score",
+        "source",
+        "user_read",
+        "user_write",
+        "value"
+    ],
+    "subsystem": [
+        "_version_",
+        "active",
+        "class",
+        "date_inserted",
+        "date_modified",
+        "feature_id",
+        "gene",
+        "genome_id",
+        "genome_name",
+        "id",
+        "owner",
+        "patric_id",
+        "product",
+        "public",
+        "refseq_locus_tag",
+        "role_id",
+        "role_name",
+        "subclass",
+        "subsystem_id",
+        "subsystem_name",
+        "superclass",
+        "taxon_id",
+        "user_read",
+        "user_write"
+    ],
+    "subsystem_ref": [
+        "_version_",
+        "class",
+        "date_inserted",
+        "date_modified",
+        "description",
+        "id",
+        "notes",
+        "pmid",
+        "role_id",
+        "role_name",
+        "subclass",
+        "subsystem_id",
+        "subsystem_name",
+        "superclass"
+    ],
+    "taxonomy": [
+        "_version_",
+        "cds_mean",
+        "cds_sd",
+        "core_families",
+        "core_family_ids",
+        "description",
+        "division",
+        "genetic_code",
+        "genome_count",
+        "genome_length_mean",
+        "genome_length_sd",
+        "genomes",
+        "genomes_f",
+        "hypothetical_cds_ratio_mean",
+        "hypothetical_cds_ratio_sd",
+        "lineage",
+        "lineage_ids",
+        "lineage_names",
+        "lineage_ranks",
+        "other_names",
+        "parent_id",
+        "plfam_cds_ratio_mean",
+        "plfam_cds_ratio_sd",
+        "taxon_id",
+        "taxon_id_i",
+        "taxon_name",
+        "taxon_rank"
+    ],
+    "protein_structure": [
+        "alignments",
+        "authors",
+        "date_inserted",
+        "date_modified",
+        "feature_id",
+        "file_path",
+        "gene",
+        "genome_id",
+        "institution",
+        "method",
+        "organism_name",
+        "patric_id",
+        "pdb_id",
+        "pmid",
+        "product",
+        "release_date",
+        "resolution",
+        "sequence",
+        "sequence_md5",
+        "taxon_id",
+        "taxon_lineage_ids",
+        "taxon_lineage_names",
+        "title",
+        "uniprotkb_accession"
+    ],
+    "protein_feature": [
+        "aa_sequence_md5",
+        "classification",
+        "comments",
+        "date_inserted",
+        "date_modified",
+        "description",
+        "e_value",
+        "end",
+        "evidence",
+        "feature_id",
+        "feature_type",
+        "gene",
+        "genome_id",
+        "genome_name",
+        "id",
+        "interpro_description",
+        "interpro_id",
+        "length",
+        "patric_id",
+        "product",
+        "publication",
+        "refseq_locus_tag",
+        "score",
+        "segments",
+        "sequence",
+        "source",
+        "source_id",
+        "start",
+        "taxon_id"
+    ],
+    "surveillance": [
+        "additional_metadata",
+        "alcohol_or_other_drug_dependence",
+        "breastfeeding",
+        "chest_imaging_interpretation",
+        "chronic_conditions",
+        "collection_city",
+        "collection_country",
+        "collection_date",
+        "collection_latitude",
+        "collection_longitude",
+        "collection_poi",
+        "collection_season",
+        "collection_state_province",
+        "collection_year",
+        "collector_institution",
+        "collector_name",
+        "comments",
+        "contact_email_address",
+        "contributing_institution",
+        "date_inserted",
+        "date_modified",
+        "daycare_attendance",
+        "days_elapsed_to_disease_status",
+        "days_elapsed_to_sample_collection",
+        "days_elapsed_to_vaccination",
+        "diagnosis",
+        "dialysis",
+        "disease_severity",
+        "disease_status",
+        "duration_of_exposure",
+        "duration_of_treatment",
+        "ecmo",
+        "education",
+        "embargo_end_date",
+        "exposure",
+        "exposure_type",
+        "genome_id",
+        "geographic_group",
+        "hospitalization_duration",
+        "hospitalized",
+        "host_age",
+        "host_capture_status",
+        "host_common_name",
+        "host_ethnicity",
+        "host_group",
+        "host_habitat",
+        "host_health",
+        "host_height",
+        "host_id_type",
+        "host_identifier",
+        "host_natural_state",
+        "host_race",
+        "host_sex",
+        "host_species",
+        "host_weight",
+        "human_leukocyte_antigens",
+        "id",
+        "infections_within_five_years",
+        "influenza_like_illness_over_the_past_year",
+        "initiation_of_treatment",
+        "intensive_care_unit",
+        "last_update_date",
+        "longitudinal_study",
+        "maintenance_medication",
+        "nursing_home_residence",
+        "onset_hours",
+        "other_vaccinations",
+        "oxygen_saturation",
+        "packs_per_day_for_how_many_years",
+        "pathogen_test_interpretation",
+        "pathogen_test_result",
+        "pathogen_test_type",
+        "pathogen_type",
+        "post_visit_medications",
+        "pre_visit_medications",
+        "pregnancy",
+        "primary_living_situation",
+        "profession",
+        "project_identifier",
+        "sample_accession",
+        "sample_identifier",
+        "sample_material",
+        "sample_receipt_date",
+        "sample_transport_medium",
+        "sequence_accession",
+        "source_of_vaccine_information",
+        "species",
+        "strain",
+        "submission_date",
+        "subtype",
+        "sudden_onset",
+        "symptoms",
+        "taxon_lineage_ids",
+        "tobacco_use",
+        "travel_history",
+        "treatment",
+        "treatment_dosage",
+        "treatment_type",
+        "trimester_of_pregnancy",
+        "types_of_allergies",
+        "use_of_personal_protective_equipment",
+        "vaccination_type",
+        "vaccine_dosage",
+        "vaccine_lot_number",
+        "vaccine_manufacturer",
+        "ventilation"
+    ],
+    "serology": [
+        "additional_metadata",
+        "collection_city",
+        "collection_country",
+        "collection_date",
+        "collection_state",
+        "collection_year",
+        "comments",
+        "contributing_institution",
+        "date_inserted",
+        "date_modified",
+        "genbank_accession",
+        "geographic_group",
+        "host_age",
+        "host_age_group",
+        "host_common_name",
+        "host_health",
+        "host_identifier",
+        "host_sex",
+        "host_species",
+        "host_type",
+        "id",
+        "positive_definition",
+        "project_identifier",
+        "sample_accession",
+        "sample_identifier",
+        "serotype",
+        "strain",
+        "taxon_lineage_ids",
+        "test_antigen",
+        "test_interpretation",
+        "test_pathogen",
+        "test_result",
+        "test_type",
+        "virus_identifier"
+    ]
+}
diff --git a/rescript/plugin_setup.py b/rescript/plugin_setup.py
index 8e7bbc4..5f1393c 100644
--- a/rescript/plugin_setup.py
+++ b/rescript/plugin_setup.py
@@ -8,14 +8,15 @@
 
 import importlib
 
-from q2_types.genome_data import GenomeData, Loci, Proteins
+from q2_types.genome_data import GenomeData, Loci, Proteins, Genes, DNASequence
 from q2_types.metadata import ImmutableMetadata
 from qiime2.core.type import TypeMatch
 from qiime2.plugin import (Str, Plugin, Choices, List, Citations, Range, Int,
                            Float, Visualization, Bool, TypeMap, Metadata,
                            MetadataColumn, Categorical)
 
-from .bv_brc import fetch_genomes_bv_brc
+from .bv_brc import fetch_genomes_bv_brc, fetch_metadata_bv_brc, \
+    fetch_genome_features_bv_brc, fetch_taxonomy_bv_brc
 from .subsample import subsample_fasta
 from .trim_alignment import trim_alignment
 from .merge import merge_taxa
@@ -1230,21 +1231,142 @@
     ]
 )
 
+datatypes_metadata = [
+    "antibiotics",
+    "enzyme_class_ref",
+    "epitope",
+    "epitope_assay",
+    "experiment",
+    "bioset",
+    "bioset_result",
+    "gene_ontology_ref",
+    "genome",
+    "strain",
+    "genome_amr",
+    "feature_sequence",
+    "genome_feature",
+    "genome_sequence",
+    "id_ref",
+    "misc_niaid_sgc",
+    "pathway",
+    "pathway_ref",
+    "ppi",
+    "protein_family_ref",
+    "sequence_feature",
+    "sequence_feature_vt",
+    "sp_gene",
+    "sp_gene_ref",
+    "spike_lineage",
+    "spike_variant",
+    "structured_assertion",
+    "subsystem",
+    "subsystem_ref",
+    "taxonomy",
+    "protein_structure",
+    "protein_feature",
+    "surveillance",
+    "serology"
+]
+
 plugin.methods.register_function(
     function=fetch_genomes_bv_brc,
     inputs={},
-    parameters={'rql_query': Str},
-    outputs=[('genomes', FeatureData[Sequence]),
-             ('metadata', ImmutableMetadata)],
+    parameters={
+        'rql_query': Str,
+        'genome_ids': List[Str],
+    },
+    outputs=[('genomes', GenomeData[DNASequence])],
     input_descriptions={},
-    parameter_descriptions={'rql_query': 'query'},
+    parameter_descriptions={
+        'rql_query': 'Query in RQL format. Check '
+                     'https://www.bv-brc.org/api/doc/genome_sequence '
+                     'for documentation.',
+        'genome_ids': 'List of genome IDs from BV-BRC.',
+
+},
     output_descriptions={
         'genomes': 'genomes',
-        'metadata': 'metadata'},
+    },
     name='fetch genomes',
     description="fetch genomes",
 )
 
+plugin.methods.register_function(
+    function=fetch_metadata_bv_brc,
+    inputs={},
+    parameters={
+        'data_type': Str % Choices(datatypes_metadata),
+        'rql_query': Str
+    },
+    outputs=[('metadata', ImmutableMetadata)],
+    input_descriptions={},
+    parameter_descriptions={
+        'data_type': 'BV-BCR data type. Check https://www.bv-brc.org/api/doc/ for '
+                     'documentation.',
+        'rql_query': 'Query in RQL format. Check https://www.bv-brc.org/api/doc/ for '
+                     'documentation.'
+    },
+    output_descriptions={
+        'metadata': 'metadata'},
+    name='Fetch BV-BCR metadata.',
+    description="Fetch BV-BCR metadata for a specific data type with an RQL query.",
+)
+
+plugin.methods.register_function(
+    function=fetch_taxonomy_bv_brc,
+    inputs={},
+    parameters={
+        'rql_query': Str,
+        'ranks': List[Str % Choices(_allowed_ranks)],
+        'taxon_ids': List[Str],
+    },
+    outputs=[('taxonomy', FeatureData[Taxonomy])],
+    input_descriptions={},
+    parameter_descriptions={
+        'rql_query': 'Query in RQL format. Check '
+                     'https://www.bv-brc.org/api/doc/taxonomy '
+                     'for documentation.',
+        'ranks': 'List of taxonomic ranks for building a taxonomy from the '
+                 "NCBI Taxonomy database. [default: '" +
+                 "', '".join(_default_ranks) + "']",
+        'taxon_ids': 'List of taxon IDs from BV-BRC.',
+    },
+    output_descriptions={
+        'taxonomy': 'Taxonomy data.'
+
+},
+    name='Fetch taxonomy data from BV-BRC.',
+    description='Fetch taxonomy data from BV-BRC.',
+)
+
+plugin.methods.register_function(
+    function=fetch_genome_features_bv_brc,
+    inputs={},
+    parameters={
+        'rql_query': Str,
+        'feature_ids': List[Str],
+
+    },
+    outputs=[
+        ('genes', GenomeData[Genes]),
+        ('proteins', GenomeData[Proteins])
+    ],
+    input_descriptions={},
+    parameter_descriptions={
+        'rql_query': 'Query in RQL format. Check '
+                     'https://www.bv-brc.org/api/doc/genome_feature '
+                     'for documentation.',
+        'feature_ids': 'List of feature IDs from BV-BRC.',
+    },
+    output_descriptions={
+        'genes': 'genes',
+        'proteins': 'proteins'
+
+},
+    name='Fetch genome features from BV-BRC.',
+    description='Fetch DNA and protein sequences of genome features from BV-BRC.',
+)
+
 # Registrations
 plugin.register_semantic_types(SILVATaxonomy, SILVATaxidMap)
 plugin.register_semantic_type_to_format(
diff --git a/rescript/testing.py b/rescript/testing.py
new file mode 100644
index 0000000..d4a63be
--- /dev/null
+++ b/rescript/testing.py
@@ -0,0 +1,15 @@
+import requests
+
+
+response = requests.get("https://www.bv-brc.org/api/genome_sequence/?in(genome_id,(224308.43))")
+
+# Raise an error if the request was not successful
+response.raise_for_status()
+
+# Load the response data as JSON
+data = response.json()
+
+# Count the number of entries in the JSON dictionary
+num_entries = len(data)
+
+print(num_entries)
\ No newline at end of file
diff --git a/rescript/tests/test_bv_brc.py b/rescript/tests/test_bv_brc.py
index bdbf843..9edfd5c 100644
--- a/rescript/tests/test_bv_brc.py
+++ b/rescript/tests/test_bv_brc.py
@@ -5,15 +5,453 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
+import unittest
+from typing import Any
+from unittest.mock import Mock, patch, mock_open, MagicMock
+
+import pandas as pd
+from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat, \
+    TSVTaxonomyDirectoryFormat
+from q2_types.genome_data import GenomeSequencesDirectoryFormat
 from qiime2.plugin.testing import TestPluginBase
 
-from rescript.bv_brc import fetch_genomes_bv_brc
+from rescript.bv_brc import fetch_genomes_bv_brc, fetch_metadata_bv_brc, \
+    fetch_genome_features_bv_brc, fetch_taxonomy_bv_brc, id_list_handling, \
+    error_handling, download_data, json_to_fasta, transform_taxonomy_df, \
+    parse_lineage_names_with_ranks
+
+
+class TestIDListHandling(TestPluginBase):
+    package = 'rescript.tests'
+
+    def test_error_both_parameters_given(self):
+        with self.assertRaisesRegex(ValueError,
+                                    "Parameters rql_query and ids can't be used "
+                                    "simultaneously."):
+            id_list_handling(rql_query="some_query",
+                             ids=[1, 2, 3],
+                             parameter_name="ids",
+                             data_field="id")
+
+    def test_error_neither_parameter_given(self):
+        with self.assertRaisesRegex(ValueError,
+                                    "At least one of the parameters rql_query and ids "
+                                    "has to be given."):
+            id_list_handling(rql_query="",
+                             ids=[],
+                             parameter_name="ids",
+                             data_field="id")
+
+    def test_correct_rql_query_generation(self):
+        result = id_list_handling(
+            rql_query="",
+            ids=[1, 2, 3],
+            parameter_name="ids",
+            data_field="id")
+        expected_query = "in(id,(1,2,3))"
+        self.assertEqual(result, expected_query)
+
+
+class TestErrorHandling(TestPluginBase):
+    package = 'rescript.tests'
+
+    def setUp(self):
+        super().setUp()
+        self.response = Mock()
+
+    def test_no_data_found(self):
+        self.response.text = "[]"
+
+        with self.assertRaisesRegex(ValueError, "No data"):
+            error_handling(self.response, data_type="genome")
+
+    def test_database_error_occurred_undefined_field_object(self):
+        self.response.text = ('A Database Error Occured: {"code": 400, '
+                              '"msg": "undefined field object in RQL"}')
+
+        with self.assertRaisesRegex(ValueError, "undefined field object"):
+            error_handling(self.response, data_type="genome")
+
+    def test_database_error_occurred_undefined_field(self):
+        self.response.text = ('A Database Error Occured: {"code": 400, '
+                              '"msg": "undefined field"}')
+
+        with self.assertRaisesRegex(ValueError, "undefined field"):
+            error_handling(self.response, data_type="genome")
+
+    def test_database_error_occurred_general_error(self):
+        self.response.text = ('A Database Error Occured: {"code": 500, "msg": '
+                              '"Internal Server Error"}')
+
+        with self.assertRaisesRegex(ValueError, "Internal Server Error"):
+            error_handling(self.response, data_type="genome")
+
+    def test_unhandled_response(self):
+        self.response.text = "Unexpected error message"
+
+        with self.assertRaisesRegex(ValueError, "Unexpected error"):
+            error_handling(self.response, data_type="genome")
+
+
+class TestDownloadData(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.requests.get')
+    @patch('rescript.bv_brc.error_handling')
+    def test_download_data_success(self, mock_error_handling, mock_requests_get):
+        # Mock the requests.get response for a successful request
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_requests_get.return_value = mock_response
+
+        url = "http://example.com/data"
+        data_type = "some_type"
+
+        result = download_data(url, data_type)
+
+        mock_requests_get.assert_called_once_with(url)
+        self.assertEqual(result, mock_response)
+
+    @patch('rescript.bv_brc.requests.get')
+    @patch('rescript.bv_brc.error_handling')
+    def test_download_data_error_400(self, mock_error_handling, mock_requests_get):
+        # Mock the requests.get response for a 400 Bad Request
+        mock_response = Mock()
+        mock_response.status_code = 400
+        mock_requests_get.return_value = mock_response
+
+        url = "http://example.com/data"
+        data_type = "some_type"
+
+        download_data(url, data_type)
+
+        mock_requests_get.assert_called_once_with(url)
+        mock_error_handling.assert_called_once_with(mock_response, data_type)
+
+    @patch('rescript.bv_brc.requests.get')
+    @patch('rescript.bv_brc.error_handling')
+    def test_download_data_other_error(self, mock_error_handling, mock_requests_get):
+        # Mock the requests.get response for any other error
+        mock_response = Mock()
+        mock_response.status_code = 500
+        mock_response.text = "Server Error"
+        mock_requests_get.return_value = mock_response
+
+        url = "http://example.com/data"
+        data_type = "some_type"
+
+        with self.assertRaisesRegex(ValueError, "Server Error"):
+            download_data(url, data_type)
+
+        mock_requests_get.assert_called_once_with(url)
+        mock_error_handling.assert_not_called()
+
+
+class TestJsonToFasta(TestPluginBase):
+    package = 'rescript.tests'
+
+    def setUp(self):
+        super().setUp()
+
+        self.json_input_1 = [
+            {
+                "genome_id": "genome1",
+                "accession": "acc1",
+                "description": "desc1",
+                "genome_name": "genome_name1",
+                "sequence": "ATGC"
+            }
+        ]
+
+        self.json_input_2 = [
+            {
+                "genome_id": "genome2",
+                "accession": "acc2",
+                "description": "desc2",
+                "genome_name": "genome_name2",
+                "sequence": "CGTA"
+            }
+        ]
+
+    @patch('rescript.bv_brc.open', new_callable=mock_open)
+    def test_json_to_fasta_single_genome(self, mock_file):
+        json_to_fasta(self.json_input_1, "/fake/dir")
+
+        # Expected FASTA content
+        expected_fasta = ">accn|acc1   desc1   [genome_name1 | genome1]\nATGC"
+
+        # Check if the file was created with the correct path and content
+        mock_file.assert_called_once_with("/fake/dir/genome1.fasta", 'w')
+        mock_file().write.assert_called_once_with(expected_fasta)
+
+    @patch('rescript.bv_brc.open', new_callable=mock_open)
+    def test_json_to_fasta_multiple_genomes(self, mock_file):
+        json_to_fasta(self.json_input_1 + self.json_input_2, "/fake/dir")
+
+        # Expected FASTA content
+        expected_fasta_genome1 = ">accn|acc1   desc1   [genome_name1 | genome1]\nATGC"
+        expected_fasta_genome2 = ">accn|acc2   desc2   [genome_name2 | genome2]\nCGTA"
+
+        # Check if the files were created with the correct path and content
+        mock_file().write.assert_any_call(expected_fasta_genome1)
+        mock_file().write.assert_any_call(expected_fasta_genome2)
+
+    @patch('rescript.bv_brc.open', new_callable=mock_open)
+    def test_json_to_fasta_multiple_sequences_same_genome(self, mock_file):
+
+        json_to_fasta(self.json_input_1 + self.json_input_1, "/fake/dir")
+
+        # Expected FASTA content
+        expected_fasta = (">accn|acc1   desc1   [genome_name1 | genome1]\nATGC\n"
+                          ">accn|acc1   desc1   [genome_name1 | genome1]\nATGC")
+
+        # Check if the file was created with the correct path and content
+        mock_file.assert_called_once_with("/fake/dir/genome1.fasta", 'w')
+        mock_file().write.assert_called_once_with(expected_fasta)
 
 
-class TestPipelines(TestPluginBase):
+class TestFetchGenomeFeaturesBVBR(TestPluginBase):
     package = 'rescript.tests'
 
-    def test_fetch_genomes_bv_brc(self):
-        query = "?eq(genome_id,224308.43)"
-        query2 = "?eq(taxon_id,224308)"
-        fetch_genomes_bv_brc(query2)
\ No newline at end of file
+    @patch('rescript.bv_brc.download_data')
+    @patch('rescript.bv_brc.id_list_handling')
+    @patch.object(MixedCaseDNAFASTAFormat, 'open')
+    @patch.object(ProteinFASTAFormat, 'open')
+    def test_fetch_genome_features_bv_brc(
+            self, mock_protein_open, mock_genes_open, mock_id_list_handling,
+            mock_download_data
+    ):
+        # Mock the id_list_handling function
+        mock_id_list_handling.return_value = "in(feature_id, (feature1,feature2))"
+
+        # Mock the responses from download_data
+        mock_genes_response = MagicMock()
+        mock_genes_response.text = ">gene1\nATGC\n>gene2\nATGC"
+        mock_proteins_response = MagicMock()
+        mock_proteins_response.text = ">protein1\nMVLSPADKTNVK\n>protein2\nMVLSPADKTNVK"
+        mock_download_data.side_effect = [mock_genes_response,
+                                          mock_proteins_response]
+
+        # Mock file write actions
+        mock_genes_file = MagicMock()
+        mock_protein_file = MagicMock()
+        mock_genes_open.return_value.__enter__.return_value = mock_genes_file
+        mock_protein_open.return_value.__enter__.return_value = mock_protein_file
+
+        # Call the function
+        genes, proteins = fetch_genome_features_bv_brc(
+            rql_query="in(feature_id, (feature1,feature2))",
+            feature_ids=["feature1", "feature2"]
+        )
+
+        # Assertions
+        mock_id_list_handling.assert_called_once_with(
+            rql_query="in(feature_id, (feature1,feature2))",
+            ids=["feature1", "feature2"],
+            parameter_name="feature_ids",
+            data_field="feature_id"
+        )
+
+        mock_download_data.assert_any_call(
+            url="https://www.bv-brc.org/api/genome_feature/?in(feature_id, "
+                "(feature1,feature2))&http_accept=application/dna+fasta",
+            data_type="genome_feature"
+        )
+
+        mock_download_data.assert_any_call(
+            url="https://www.bv-brc.org/api/genome_feature/?in(feature_id, "
+                "(feature1,feature2))&http_accept=application/protein+fasta",
+            data_type="genome_feature"
+        )
+
+        # Check that the correct data is written to the correct files
+        mock_genes_file.write.assert_called_once_with(">gene1\nATGC\n>gene2\nATGC")
+        mock_protein_file.write.assert_called_once_with(
+            ">protein1\nMVLSPADKTNVK\n>protein2\nMVLSPADKTNVK")
+
+        self.assertIsInstance(genes, MixedCaseDNAFASTAFormat)
+        self.assertIsInstance(proteins, ProteinFASTAFormat)
+
+
+class TestFetchGenomesBVBRC(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.json_to_fasta')
+    @patch('rescript.bv_brc.download_data')
+    @patch('rescript.bv_brc.id_list_handling')
+    def test_fetch_genomes_bv_brc(
+        self, mock_id_list_handling, mock_download_data, mock_json_to_fasta
+    ):
+        # Mock the id_list_handling function
+        mock_id_list_handling.return_value = "genome_id=in(genome1,genome2)"
+
+        # Mock the download_data response
+        mock_response = MagicMock()
+        mock_response.json.return_value = {'genomes': ['genome_data']}
+        mock_download_data.return_value = mock_response
+
+        # Call the function
+        genomes = fetch_genomes_bv_brc(
+            rql_query="genome_id=in(genome1,genome2)",
+            genome_ids=["genome1", "genome2"]
+        )
+
+        # Assertions
+        mock_id_list_handling.assert_called_once_with(
+            rql_query="genome_id=in(genome1,genome2)",
+            ids=["genome1", "genome2"],
+            parameter_name="genome_ids",
+            data_field="genome_id"
+        )
+
+        mock_download_data.assert_called_once_with(
+            url="https://www.bv-brc.org/api/genome_sequence/"
+                "?genome_id=in(genome1,genome2)",
+            data_type="genome_sequence"
+        )
+
+        mock_json_to_fasta.assert_called_once_with(
+            {'genomes': ['genome_data']},
+            str(genomes)
+        )
+
+        self.assertIsInstance(genomes, GenomeSequencesDirectoryFormat)
+
+
+class TestFetchMetadataBVBR(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.qiime2.Metadata')
+    @patch('rescript.bv_brc.pd.read_csv')
+    @patch('rescript.bv_brc.download_data')
+    def test_fetch_metadata_bv_brc(self, mock_download_data, mock_read_csv, mock_metadata):
+        # Mock the download_data response
+        mock_response = MagicMock()
+        mock_response.text = "id\tcolumn1\tcolumn2\n1\tdata1\tdata2\n2\tdata3\tdata4"
+        mock_download_data.return_value = mock_response
+
+        # Mock the pandas read_csv return value
+        mock_df = pd.DataFrame({
+            'column1': ['data1', 'data3'],
+            'column2': ['data2', 'data4']
+        }, index=pd.Index(['1', '2'], name='id'))
+        mock_read_csv.return_value = mock_df
+
+        # Mock qiime2.Metadata creation
+        mock_metadata_instance = MagicMock()
+        mock_metadata.return_value = mock_metadata_instance
+
+        # Call the function
+        result = fetch_metadata_bv_brc(
+            data_type="genome",
+            rql_query="genome_id=in(1,2)"
+        )
+
+        # Assertions
+        mock_download_data.assert_called_once_with(
+            url="https://www.bv-brc.org/api/genome/?genome_id=in(1,2)&http_accept=text/tsv",
+            data_type="genome"
+        )
+
+        mock_read_csv.assert_called_once()
+        args, kwargs = mock_read_csv.call_args
+        self.assertEqual(kwargs['sep'], '\t')
+
+        self.assertEqual(args[0].getvalue(), "id\tcolumn1\tcolumn2\n1\tdata1\tdata2\n2\tdata3\tdata4")
+
+        mock_metadata.assert_called_once_with(mock_df)
+        self.assertEqual(result, mock_metadata_instance)
+
+
+class TestFetchTaxonomyBVBR(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('pandas.DataFrame.to_csv')
+    @patch('rescript.bv_brc.transform_taxonomy_df')
+    @patch('rescript.bv_brc.download_data')
+    @patch('rescript.bv_brc.pd.read_csv')
+    @patch('rescript.bv_brc.id_list_handling')
+    def test_fetch_taxonomy_bv_brc(
+        self, mock_id_list_handling, mock_read_csv, mock_download_data, mock_transform_taxonomy_df, mock_to_csv
+    ):
+        # Mock the id_list_handling function
+        mock_id_list_handling.return_value = "taxon_id=in(taxon1,taxon2)"
+
+        # Mock the download_data response
+        mock_response = MagicMock()
+        mock_response.text = "id\trank1\trank2\n1\tdata1\tdata2\n2\tdata3\tdata4"
+        mock_download_data.return_value = mock_response
+
+
+        # Prepare mocks for file output
+        with patch('builtins.open', unittest.mock.mock_open()) as mock_file:
+            directory = fetch_taxonomy_bv_brc(
+                rql_query="taxon_id=in(taxon1,taxon2)",
+                ranks=['rank1', 'rank2'],
+                taxon_ids=["taxon1", "taxon2"]
+            )
+
+            # Assertions
+            mock_id_list_handling.assert_called_once_with(
+                rql_query="taxon_id=in(taxon1,taxon2)",
+                ids=["taxon1", "taxon2"],
+                parameter_name="taxon_ids",
+                data_field="taxon_id"
+            )
+
+            mock_download_data.assert_called_once_with(
+                url="https://www.bv-brc.org/api/taxonomy/?taxon_id=in(taxon1,taxon2)&http_accept=text/tsv",
+                data_type="taxonomy"
+            )
+
+            self.assertIsInstance(directory, TSVTaxonomyDirectoryFormat)
+
+    @patch('rescript.bv_brc.parse_lineage_names_with_ranks')
+    def test_transform_taxonomy_df(self, mock_parse_lineage_names_with_ranks):
+        # Mock the parse_lineage_names_with_ranks function
+        mock_parse_lineage_names_with_ranks.side_effect = \
+            lambda lineage_names, lineage_ranks, ranks: "Mocked Taxon"
+
+        # Create a sample DataFrame
+        df = pd.DataFrame({
+            'taxon_id': ['taxon1', 'taxon2'],
+            'lineage_names': ['name1;name2', 'name3;name4'],
+            'lineage_ranks': ['rank1;rank2', 'rank3;rank4']
+        })
+
+        ranks = ['rank1', 'rank2', 'rank3']
+
+        # Call the function
+        result_df = transform_taxonomy_df(df, ranks)
+
+        # Expected DataFrame after transformation
+        expected_df = pd.DataFrame({
+            'Feature ID': ['taxon1', 'taxon2'],
+            'Taxon': ['Mocked Taxon', 'Mocked Taxon']
+        }).set_index('Feature ID')
+
+        # Assert that the result matches the expected DataFrame
+        pd.testing.assert_frame_equal(result_df, expected_df)
+
+    def test_parse_with_missing_ranks(self):
+        lineage_names = "Bacteria;Proteobacteria;Enterobacteriaceae"
+        lineage_ranks = "kingdom;phylum;family"
+        ranks = ['kingdom', 'phylum', 'class', 'order', 'genus', 'species']
+
+        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks, ranks)
+        expected = "k__Bacteria; p__Proteobacteria; c__; o__; g__; s__"
+
+        self.assertEqual(result, expected)
+
+    def test_parse_with_no_ranks_provided(self):
+        lineage_names = ("Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacterales;"
+                         "Enterobacteriaceae;Escherichia;coli")
+        lineage_ranks = "kingdom;phylum;class;order;family;genus;species"
+        ranks = None  # Should fall back to _default_ranks
+
+        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks, ranks)
+        expected = ("k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; "
+                    "o__Enterobacterales; f__Enterobacteriaceae; g__Escherichia; "
+                    "s__coli")
+
+        self.assertEqual(result, expected)