Skip to content

Commit

Permalink
working fetching genome sequence and metadata genome
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Aug 7, 2024
1 parent 7d9ef27 commit a641c6d
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
60 changes: 60 additions & 0 deletions rescript/bv_brc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2019-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from io import StringIO

import qiime2
import pandas as pd
import requests
from q2_types.feature_data import MixedCaseDNAFASTAFormat


def json_to_fasta(json: dict):
fasta_output = []
for entry in json:
header = (f">accn|{entry['sequence_id']} {entry['description']} "
f"[{entry['genome_name']} | {entry['genome_id']}]")
fasta_output.append(f"{header}\n{entry['sequence']}")
return "\n".join(fasta_output)


def fetch_genomes_bv_brc(rql_query: str) -> (MixedCaseDNAFASTAFormat, qiime2.Metadata):
genomes = MixedCaseDNAFASTAFormat()

# Make the GET request for metadata
url_metadata = f"https://www.bv-brc.org/api/genome/{rql_query}&http_accept=text/tsv"
response_metadata = requests.get(url_metadata)

if response_metadata.status_code == 200:
# Convert TSV data to dataframe
tsv_data = StringIO(response_metadata.text)
metadata = pd.read_csv(tsv_data, sep='\t', index_col="genome_id")

metadata.index.name = "id"
metadata.index = metadata.index.astype(str)

# Extract all genome_ids out of dataframe
genome_ids = metadata.index.tolist()
else:
raise ValueError("Error")

# Make the GET request for sequences
url_sequences = (f"https://www.bv-brc.org/api/genome_sequence/"
f"?in(genome_id,({','.join(genome_ids)}))")
response_sequences = requests.get(url_sequences)

if response_sequences.status_code == 200:
# Convert JSON to FASTA
fasta = json_to_fasta(response_sequences.json())

# Write FASTA format to file
with genomes.open() as file:
file.write(fasta)
else:
raise ValueError("Error")

return genomes, qiime2.Metadata(metadata)
17 changes: 17 additions & 0 deletions rescript/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
import importlib

from q2_types.genome_data import GenomeData, Loci, Proteins
from q2_types.metadata import ImmutableMetadata
from qiime2.core.type import TypeMatch
from qiime2.plugin import (Str, Plugin, Choices, List, Citations, Range, Int,
Float, Visualization, Bool, TypeMap, Metadata,
MetadataColumn, Categorical)

from .bv_brc import fetch_genomes_bv_brc
from .subsample import subsample_fasta
from .trim_alignment import trim_alignment
from .merge import merge_taxa
Expand Down Expand Up @@ -1228,6 +1230,21 @@
]
)

plugin.methods.register_function(
function=fetch_genomes_bv_brc,
inputs={},
parameters={'rql_query': Str},
outputs=[('genomes', FeatureData[Sequence]),
('metadata', ImmutableMetadata)],
input_descriptions={},
parameter_descriptions={'rql_query': 'query'},
output_descriptions={
'genomes': 'genomes',
'metadata': 'metadata'},
name='fetch genomes',
description="fetch genomes",
)

# Registrations
plugin.register_semantic_types(SILVATaxonomy, SILVATaxidMap)
plugin.register_semantic_type_to_format(
Expand Down
19 changes: 19 additions & 0 deletions rescript/tests/test_bv_brc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2019-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from qiime2.plugin.testing import TestPluginBase

from rescript.bv_brc import fetch_genomes_bv_brc


class TestPipelines(TestPluginBase):
package = 'rescript.tests'

def test_fetch_genomes_bv_brc(self):
query = "?eq(genome_id,224308.43)"
query2 = "?eq(taxon_id,224308)"
fetch_genomes_bv_brc(query2)

0 comments on commit a641c6d

Please sign in to comment.