Skip to content

Commit

Permalink
Merge branch 'feature/reference_alleles' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
luissian committed Mar 1, 2024
2 parents 54fc96f + 74a528a commit f6ea21b
Show file tree
Hide file tree
Showing 16 changed files with 962 additions and 465 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Python tests
# This workflow is triggered on pushes and PRs to the repository.
# Only run if we changed a Python file
on:
push:
branches:
- dev
paths-ignore:
- "docs/**"
- "CHANGELOG.md"
pull_request:
paths-ignore:
- "docs/**"
- "CHANGELOG.md"
release:
types: [published]
workflow_dispatch:
inputs:
runners:
description: "Runners to test on"
type: choice
options:
- "ubuntu-latest"
- "self-hosted"
default: "self-hosted"

# Cancel if a newer run with the same workflow name is queued
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

test:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: 3.10

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run tests
run: |
pytest
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ jobs:
run: |
source $CONDA/etc/profile.d/conda.sh
conda activate taranis_env
poetry install
python -m pip install .
taranis analyze-schema -i test/MLST_listeria -o analyze_schema_test --cpus 1 --output-allele-annot --remove-no-cds --remove-duplicated --remove-subset
19 changes: 19 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Read the Docs configuration file for MkDocs projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"

mkdocs:
configuration: mkdocs.yml

# Optionally declare the Python requirements required to build your docs
python:
install:
- requirements: docs/requirements.txt
4 changes: 4 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Sphinx>=3.3.1
sphinxcontrib-napoleon
sphinx_rtd_theme>=0.5.0
myst-parser
28 changes: 20 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
[tool.poetry]
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "taranis"
version = "3.0.0"
dynamic = ["dependencies"]
requires-python = ">=3.10"

authors = [
{name = "Sara Monzon", email = "[email protected]"},
{name = "Luis Chapado", email = "[email protected]"},
]
maintainers = [
{name = "Luis Chapado", email = "[email protected]"}
]
description = "Tools for gene-by-gene allele calling analysis"
readme = "README.md"
authors = ["Sara Monzon <[email protected]>"]
license = "GNU-3.0"
license = {file = "LICENSE"}

[tool.poetry.dependencies]
python = "^3.10"

[tool.poetry.scripts]
taranis = "taranis.__main__:run_taranis"
[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}

[project.scripts]
taranis = "taranis.__main__:run_taranis"

[tool.setuptools.packages.find]
exclude = ["img", "virtualenv"]

6 changes: 6 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[pytest]
filterwarnings =
ignore::pytest.PytestRemovedIn8Warning:_pytest.nodes:140
testpaths =
tests
python_files = test_*.py
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
biopython
igraph
rich
click
leidenalg
questionary
bio
scikit-learn
Expand Down
37 changes: 37 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python

from setuptools import setup, find_packages

version = "3.0.0"

with open("README.md") as f:
readme = f.read()

with open("requirements.txt") as f:
required = f.read().splitlines()

setup(
name="taranis",
version=version,
description="Tools for gene-by-gene allele calling analysis",
long_description=readme,
long_description_content_type="text/markdown",
keywords=[
"buisciii",
"bioinformatics",
"pipeline",
"sequencing",
"NGS",
"next generation sequencing",
],
author="Sara Monzon",
author_email="[email protected]",
url="https://github.com/BU-ISCIII/taranis",
license="GNU GENERAL PUBLIC LICENSE v.3",
entry_points={"console_scripts": ["taranis=taranis.__main__:run_taranis"]},
python_requires=">=3.9, <4",
install_requires=required,
packages=find_packages(exclude=("docs")),
include_package_data=True,
zip_safe=False,
)
81 changes: 78 additions & 3 deletions taranis/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,67 @@ def analyze_schema(
type=click.Path(),
help="Output folder to save reference alleles",
)
@click.option(
"--eval-cluster/--no-eval-cluster",
required=False,
default=True,
help="Evaluate if the reference alleles match against blast with a 90% identity",
)
@click.option(
"-k",
"--kmer-size",
required=False,
type=int,
default=21,
help="Mash parameter for K-mer size.",
)
@click.option(
"-S",
"--sketch-size",
required=False,
type=int,
default=2000,
help="Mash parameter for Sketch size",
)
@click.option(
"-r",
"--cluster-resolution",
required=False,
type=float,
default=0.92,
help="Resolution value used for clustering.",
)
@click.option(
"--seed",
required=False,
type=int,
default=None,
help="Seed value for clustering",
)
@click.option(
"--cpus",
required=False,
multiple=False,
type=int,
default=1,
help="Number of cpus used for execution",
)
def reference_alleles(
schema: str,
output: str,
eval_cluster: bool,
kmer_size: int,
sketch_size: int,
cluster_resolution: float,
seed: int,
cpus: int,
):
start = time.perf_counter()
max_cpus = taranis.utils.cpus_available()
if cpus > max_cpus:
stderr.print("[red] Number of CPUs bigger than the CPUs available")
stderr.print("Running code with ", max_cpus)
cpus = max_cpus
schema_files = taranis.utils.get_files_in_folder(schema, "fasta")

# Check if output folder exists
Expand All @@ -280,9 +337,27 @@ def reference_alleles(
stderr.print("[red] ERROR. Unable to create folder " + output)
sys.exit(1)
"""Create the reference alleles from the schema """
for f_file in schema_files:
ref_alleles = taranis.reference_alleles.ReferenceAlleles(f_file, output)
_ = ref_alleles.create_ref_alleles()
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
futures = [
executor.submit(
taranis.reference_alleles.parallel_execution,
f_file,
output,
eval_cluster,
kmer_size,
sketch_size,
cluster_resolution,
seed,
)
for f_file in schema_files
]
# import pdb; pdb.set_trace()
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
_ = taranis.reference_alleles.collect_statistics(results, eval_cluster, output)
finish = time.perf_counter()
print(f"Reference alleles finish in {round((finish-start)/60, 2)} minutes")


@taranis_cli.command(help_priority=3)
Expand Down
28 changes: 23 additions & 5 deletions taranis/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,21 @@


class Blast:
def __init__(self, db_type):
def __init__(self, db_type: str):
"""Blast instance creation
Args:
db_type (str): type of blast database (nucleotide or protein)
"""
self.db_type = db_type

def create_blastdb(self, file_name, blast_dir):
def create_blastdb(self, file_name: str, blast_dir: str) -> None:
"""Create blast database and store it at blast dir
Args:
file_name (str): Fasta file from generate the database
blast_dir (str): directory to store blast database files
"""
self.f_name = Path(file_name).stem
db_dir = os.path.join(blast_dir, self.f_name)
self.out_blast_dir = os.path.join(db_dir, self.f_name)
Expand Down Expand Up @@ -60,9 +71,10 @@ def run_blast(
penalty: int = -2,
gapopen: int = 1,
gapextend: int = 1,
max_target_seqs: int = 1000,
max_target_seqs: int = 2000,
max_hsps: int = 10,
num_threads: int = 1,
query_type: str = "file",
) -> list:
"""blast command is executed, returning a list of each match found
Expand All @@ -77,10 +89,13 @@ def run_blast(
max_target_seqs (int, optional): max target to output. Defaults to 1000.
max_hsps (int, optional): max hsps. Defaults to 10.
num_threads (int, optional): number of threads. Defaults to 1.
query_type (str, optional): format of query (either file or string)
Returns:
list: list of strings containing blast results
"""
if query_type == "stdin":
stdin_query = query
query = "-"
blast_parameters = '"6 , qseqid , sseqid , pident , qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
cline = NcbiblastnCommandline(
task="blastn",
Expand All @@ -98,7 +113,10 @@ def run_blast(
query=query,
)
try:
out, _ = cline()
if query_type == "stdin":
out, _ = cline(stdin=stdin_query)
else:
out, _ = cline()
except Exception as e:
log.error("Unable to run blast for %s ", self.out_blast_dir)
log.error(e)
Expand Down
Loading

0 comments on commit f6ea21b

Please sign in to comment.