From 7db5205be34a60b21f6e2474d266c372c272b2c3 Mon Sep 17 00:00:00 2001
From: Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>
Date: Thu, 4 May 2023 08:59:07 +0200
Subject: [PATCH] style: apply style black, flake8, isort (#108)

Also includes this as a check in CI.
---
 .github/workflows/ci.yml                      |  47 +++-
 .gitignore                                    |   4 +
 docs/utils.py                                 |  16 +-
 fixes/fixer                                   |   3 +-
 misc/rsync-log-summary                        |   1 -
 misc/threading-verification.py                |  10 +-
 sbin/get-sequence-urls                        |   2 +-
 setup.cfg                                     |   3 +-
 setup.py                                      |   1 +
 src/biocommons/__init__.py                    |   1 +
 src/biocommons/seqrepo/__init__.py            |  10 +-
 .../seqrepo/_internal/logging_support.py      |   2 -
 src/biocommons/seqrepo/_internal/translate.py | 131 ++++-----
 src/biocommons/seqrepo/_versionwarning.py     |   7 +-
 src/biocommons/seqrepo/cli.py                 | 254 ++++++++++--------
 src/biocommons/seqrepo/config.py              |   7 +-
 src/biocommons/seqrepo/dataproxy.py           |  11 +-
 src/biocommons/seqrepo/fastadir/__init__.py   |   2 +-
 .../fastadir/_data/migrations/0000-base.py    |   6 +-
 .../fastadir/_data/migrations/0001-initial.py |   7 +-
 src/biocommons/seqrepo/fastadir/bases.py      |   8 +-
 src/biocommons/seqrepo/fastadir/fabgz.py      |  21 +-
 src/biocommons/seqrepo/fastadir/fastadir.py   |  37 ++-
 src/biocommons/seqrepo/fastaiter/__init__.py  |   2 +-
 src/biocommons/seqrepo/fastaiter/fastaiter.py |   5 +-
 src/biocommons/seqrepo/seqaliasdb/__init__.py |   2 +-
 .../seqaliasdb/_data/migrations/0000-base.py  |   6 +-
 .../_data/migrations/0001-initial.py          |  37 ++-
 .../seqrepo/seqaliasdb/seqaliasdb.py          |  62 +++--
 src/biocommons/seqrepo/seqrepo.py             |  98 +++----
 src/biocommons/seqrepo/utils.py               |   1 -
 tests/conftest.py                             |  11 +-
 tests/test_cli.py                             |  31 ++-
 tests/test_fastadir.py                        |   3 +-
 tests/test_fastaiter.py                       |   4 +-
 tests/test_seqaliasdb.py                      |  33 +--
 tests/test_seqrepo.py                         |  62 +++--
 tests/test_utils.py                           |  13 +-
 38 files changed, 534 insertions(+), 427 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index da899de..6315f16 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,45 @@ jobs:
         env:
           GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
 
+  linting:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v1
+
+      - name: Install Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Install dependencies
+        run: |
+            # Install / update package management tools
+            pip install -U pip setuptools
+            # Test dependencies
+            pip install -U pytest pytest-cov pytest-vcr black==23.3.0 flake8>=6.0,<7.0 isort>=5.0,<6.0
+            # Dump installed packages and versions
+            pip freeze
+
+      - name: Run linting tools
+        run: |
+          black -l 120 --check --diff .
+          isort --profile=black --check --diff .
+          flake8 src tests docs setup.py
+
+      - name: Comment PR
+        if: github.event_name == 'pull_request' && failure()
+        uses: marocchino/sticky-pull-request-comment@v1.1.0
+        with:
+          message: |
+            - Please format your Python code with [black](https://black.readthedocs.io): `make black`
+            - Please organize your imports [isorts](https://isort.readthedocs.io): `make isort`
+            - Please ensure that your code passes [flake8](https://flake8.pycqa.org/en/latest/): `make flake8`
+
+            You can trigger all lints locally by running `black -l 120 --check . && isort --profile=black --check . && flake8 src tests docs setup.py`
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
   testing:
+    needs: linting
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -36,12 +74,17 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install system dependencies
+      - name: Install dependencies
         run: |
           sudo apt install libhts-dev libhts3 libhtscodecs-dev libhtscodecs2 tabix
+          # Install / update package management tools.
           pip install -U pip setuptools
-          pip install -U pytest pytest-cov pytest-vcr
+          # Test dependencies
+          pip install -U pytest pytest-cov pytest-vcr black==23.3.0 flake8>=6.0,<7.0 isort>=5.0,<6.0
+          # Install the local package itself in editable mode.
           pip install -e .
+          # Dump installed packages and versions
+          pip freeze
 
       - name: Run tests
         run: |
diff --git a/.gitignore b/.gitignore
index 40dfebe..7ba9721 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+# Editors
+.*.sw?
+*~
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/docs/utils.py b/docs/utils.py
index e81f754..e1d227d 100644
--- a/docs/utils.py
+++ b/docs/utils.py
@@ -1,13 +1,11 @@
+import math
 from base64 import urlsafe_b64decode, urlsafe_b64encode
 from binascii import hexlify, unhexlify
-import datetime
-import math
-import sys
 
 
 def _format_time(timespan, precision=3):
     """Formats the timespan in a human readable form
-    
+
     >>> _format_time(0.35)
     '350 ms'
 
@@ -26,21 +24,21 @@ def _format_time(timespan, precision=3):
     if timespan >= 60.0:
         # we have more than a minute, format that in a human readable form
         # Idea from http://snipplr.com/view/5713/
-        parts = [("d", 60*60*24),("h", 60*60),("min", 60), ("s", 1)]
+        parts = [("d", 60 * 60 * 24), ("h", 60 * 60), ("min", 60), ("s", 1)]
         time = []
         leftover = timespan
         for suffix, length in parts:
             value = int(leftover / length)
             if value > 0:
                 leftover = leftover % length
-                time.append(u'%s%s' % (str(value), suffix))
+                time.append(u"%s%s" % (str(value), suffix))
             if leftover < 1:
                 break
         return " ".join(time)
 
-    units = [u"s", u"ms", u"us", u"ns"]  # the save value   
+    units = [u"s", u"ms", u"us", u"ns"]  # the save value
     scaling = [1, 1e3, 1e6, 1e9]
-        
+
     if timespan > 0.0:
         order = min(-int(math.floor(math.log10(timespan)) // 3), 3)
     else:
@@ -51,6 +49,6 @@ def _format_time(timespan, precision=3):
 def hex_to_base64url(s):
     return urlsafe_b64encode(unhexlify(s)).decode("ascii")
 
+
 def base64url_to_hex(s):
     return hexlify(urlsafe_b64decode(s)).decode("ascii")
-
diff --git a/fixes/fixer b/fixes/fixer
index fcbcc63..fd39fea 100755
--- a/fixes/fixer
+++ b/fixes/fixer
@@ -8,14 +8,13 @@ This script isn't for general users
 
 import argparse
 import logging
-import sqlite3
 import os
+import sqlite3
 import sys
 
 import coloredlogs
 import yaml
 
-
 _logger = logging.getLogger()
 
 fixes_dir = os.path.dirname(sys.argv[0])
diff --git a/misc/rsync-log-summary b/misc/rsync-log-summary
index 3d2cf34..f04397c 100755
--- a/misc/rsync-log-summary
+++ b/misc/rsync-log-summary
@@ -11,7 +11,6 @@ import sys
 import coloredlogs
 from dateutil.parser import parse
 
-
 _logger = logging.getLogger(__name__)
 
 # e.g., 2016/08/31 01:24:34 [32383] rsync on seqrepo/ from ec2-....compute.amazonaws.com (52.34.43.195)
diff --git a/misc/threading-verification.py b/misc/threading-verification.py
index fc4efc9..235d3bd 100644
--- a/misc/threading-verification.py
+++ b/misc/threading-verification.py
@@ -22,9 +22,9 @@
 
 
 import os
-from multiprocessing import Process, Queue
 import sqlite3
 import sys
+from multiprocessing import Process, Queue
 
 from biocommons.seqrepo import SeqRepo
 
@@ -37,7 +37,7 @@ def fetch_in_thread(sr, nsa):
     def fetch_seq(q, nsa):
         pid, ppid = os.getpid(), os.getppid()
         q.put((pid, ppid, sr[nsa]))
-    
+
     q = Queue()
     p = Process(target=fetch_seq, args=(q, nsa))
     p.start()
@@ -46,9 +46,9 @@ def fetch_seq(q, nsa):
 
     assert pid != ppid, "sequence was not fetched from thread"
     return pid, ppid, seq
-    
 
-def make_seqrepo(writeable):    
+
+def make_seqrepo(writeable):
     sr = SeqRepo("/tmp/sr", writeable=True)
     sr.store("SMELLASSWEET", [{"namespace": "en", "alias": "rose"}, {"namespace": "fr", "alias": "rose"}])
 
@@ -70,6 +70,6 @@ def _test(sr):
     print("sys.platform: " + sys.platform)
     print("sys.version: " + sys.version.replace("\n", " "))
     print("sqlite3.sqlite_version: " + sqlite3.sqlite_version)
-    
+
     _test(make_seqrepo(writeable=False))
     _test(make_seqrepo(writeable=True))
diff --git a/sbin/get-sequence-urls b/sbin/get-sequence-urls
index 87dfc0b..8759cfe 100755
--- a/sbin/get-sequence-urls
+++ b/sbin/get-sequence-urls
@@ -15,9 +15,9 @@ import logging
 import re
 import sys
 from urllib.request import urljoin, urlopen
-from requests_html import HTMLSession, HTML
 
 import coloredlogs
+from requests_html import HTML, HTMLSession
 
 _logger = logging.getLogger(__name__)
 
diff --git a/setup.cfg b/setup.cfg
index 5c3bc92..1cd277d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -103,6 +103,5 @@ all_files  = 1
 max-line-length = 120
 exclude = tests/*
 max-complexity = 10
-ignore = E129,E221,E241,E251,E303,W291
-
+ignore = E203, E266, E501, W503
 
diff --git a/setup.py b/setup.py
index 460aabe..d5d43d7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,2 +1,3 @@
 from setuptools import setup
+
 setup(use_scm_version=True)
diff --git a/src/biocommons/__init__.py b/src/biocommons/__init__.py
index 54720a6..96686a2 100644
--- a/src/biocommons/__init__.py
+++ b/src/biocommons/__init__.py
@@ -1,3 +1,4 @@
 # pragma: nocover
 import pkg_resources
+
 pkg_resources.declare_namespace(__name__)
diff --git a/src/biocommons/seqrepo/__init__.py b/src/biocommons/seqrepo/__init__.py
index 670a8da..51341fe 100644
--- a/src/biocommons/seqrepo/__init__.py
+++ b/src/biocommons/seqrepo/__init__.py
@@ -2,24 +2,24 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import logging
-import pkg_resources
 import warnings
 
+import pkg_resources
+
 _logger = logging.getLogger(__name__)
 
-from ._versionwarning import *
+from ._versionwarning import *  # noqa; F403
 
 try:
     __version__ = pkg_resources.get_distribution(__name__).version
-except pkg_resources.DistributionNotFound as e:    # pragma: no cover
+except pkg_resources.DistributionNotFound:  # pragma: no cover
     warnings.warn("can't get __version__ because %s package isn't installed" % __package__, Warning)
     __version__ = None
 
 _logger.info(__name__ + " " + __version__)
 
 
-from .seqrepo import SeqRepo
-
+from .seqrepo import SeqRepo  # noqa: F401, E402
 
 # <LICENSE>
 # Copyright 2016 biocommons.fastadir Contributors (https://github.com/biocommons/biocommons.fastadir/)
diff --git a/src/biocommons/seqrepo/_internal/logging_support.py b/src/biocommons/seqrepo/_internal/logging_support.py
index 85e1c52..41ac562 100644
--- a/src/biocommons/seqrepo/_internal/logging_support.py
+++ b/src/biocommons/seqrepo/_internal/logging_support.py
@@ -1,5 +1,3 @@
-import logging
-
 class DuplicateFilter:
     """
     Filters away duplicate log messages.
diff --git a/src/biocommons/seqrepo/_internal/translate.py b/src/biocommons/seqrepo/_internal/translate.py
index 33bb19b..a3e4090 100644
--- a/src/biocommons/seqrepo/_internal/translate.py
+++ b/src/biocommons/seqrepo/_internal/translate.py
@@ -17,7 +17,6 @@
 import datetime
 
 
-
 def translate_db2api(namespace, alias):
     """
     >>> translate_db2api("VMC", "GS_1234")
@@ -32,10 +31,7 @@ def translate_db2api(namespace, alias):
     if namespace == "LRG":
         return [("lrg", alias)]
     if namespace == "VMC":
-        return [
-            ("sha512t24u", alias[3:] if alias else None),
-            ("ga4gh", "SQ." + alias[3:] if alias else None),
-        ]
+        return [("sha512t24u", alias[3:] if alias else None), ("ga4gh", "SQ." + alias[3:] if alias else None)]
     return []
 
 
@@ -53,21 +49,16 @@ def translate_api2db(namespace, alias):
     if namespace == "lrg":
         return [("LRG", alias)]
     if namespace == "sha512t24u":
-        return [
-            ("VMC", "GS_" + alias if alias else None),
-        ]
+        return [("VMC", "GS_" + alias if alias else None)]
     if namespace == "ga4gh":
-        return [
-            ("VMC", "GS_" + alias[3:]),
-        ]
+        return [("VMC", "GS_" + alias[3:])]
     return []
 
 
-
 def translate_alias_records(aliases_itr):
     """given an iterator of find_aliases results, return a stream with
     translated records"""
-    
+
     for arec in aliases_itr:
         yield arec
 
@@ -78,56 +69,70 @@ def translate_alias_records(aliases_itr):
             yield arec2
 
 
-
-
 if __name__ == "__main__":
     aliases = [
-        {'seqalias_id': 16,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'ncbiac/e',
-         'added': datetime.datetime(2020, 7, 6, 5, 27, 23),
-         'is_current': 1,
-         'namespace': 'Ensembl'},
-        {'seqalias_id': 16,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'ncbiac/e',
-         'added': datetime.datetime(2020, 7, 6, 5, 27, 23),
-         'is_current': 1,
-         'namespace': 'ensembl'},
-        {'seqalias_id': 3,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'be8a4c35767bb783a7b8b6dc04ba3718',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'MD5'},
-        {'seqalias_id': 5,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'ncbiac',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'NCBI'},
-        {'seqalias_id': 5,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'ncbiac',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'refseq'},
-        {'seqalias_id': 4,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': '5W5mCzikufDcezdNTGKLa9zricw',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'SEGUID'},
-        {'seqalias_id': 2,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'e56e660b38a4b9f0dc7b374d4c628b6bdceb89cc',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'SHA1'},
-        {'seqalias_id': 1,
-         'seq_id': '9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'alias': 'GS_9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6',
-         'added': datetime.datetime(2020, 7, 6, 5, 10, 57),
-         'is_current': 1,
-         'namespace': 'VMC'}
+        {
+            "seqalias_id": 16,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "ncbiac/e",
+            "added": datetime.datetime(2020, 7, 6, 5, 27, 23),
+            "is_current": 1,
+            "namespace": "Ensembl",
+        },
+        {
+            "seqalias_id": 16,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "ncbiac/e",
+            "added": datetime.datetime(2020, 7, 6, 5, 27, 23),
+            "is_current": 1,
+            "namespace": "ensembl",
+        },
+        {
+            "seqalias_id": 3,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "be8a4c35767bb783a7b8b6dc04ba3718",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "MD5",
+        },
+        {
+            "seqalias_id": 5,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "ncbiac",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "NCBI",
+        },
+        {
+            "seqalias_id": 5,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "ncbiac",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "refseq",
+        },
+        {
+            "seqalias_id": 4,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "5W5mCzikufDcezdNTGKLa9zricw",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "SEGUID",
+        },
+        {
+            "seqalias_id": 2,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "e56e660b38a4b9f0dc7b374d4c628b6bdceb89cc",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "SHA1",
+        },
+        {
+            "seqalias_id": 1,
+            "seq_id": "9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "alias": "GS_9Sn3d56Fzds_c6ovS__sj1fbMd_Xd3J6",
+            "added": datetime.datetime(2020, 7, 6, 5, 10, 57),
+            "is_current": 1,
+            "namespace": "VMC",
+        },
     ]
diff --git a/src/biocommons/seqrepo/_versionwarning.py b/src/biocommons/seqrepo/_versionwarning.py
index 2df7dac..f147926 100644
--- a/src/biocommons/seqrepo/_versionwarning.py
+++ b/src/biocommons/seqrepo/_versionwarning.py
@@ -9,11 +9,12 @@
 
 __all__ = []
 
-version_warning = ("biocommons packages are tested and supported only on Python >= 3.6"
-                   " (https://github.com/biocommons/org/wiki/Migrating-to-Python-3.6)")
+version_warning = (
+    "biocommons packages are tested and supported only on Python >= 3.6"
+    " (https://github.com/biocommons/org/wiki/Migrating-to-Python-3.6)"
+)
 
 _logger = logging.getLogger(__package__)
 
 if sys.version_info < (3, 6):
     _logger.warning(version_warning)
-    
diff --git a/src/biocommons/seqrepo/cli.py b/src/biocommons/seqrepo/cli.py
index 5aa6ed7..87eded6 100644
--- a/src/biocommons/seqrepo/cli.py
+++ b/src/biocommons/seqrepo/cli.py
@@ -20,43 +20,38 @@
 import itertools
 import logging
 import os
-import pprint
 import re
 import shutil
 import stat
-import sys
 import subprocess
+import sys
 import tempfile
 
-
 import bioutils.assemblies
 import bioutils.seqfetcher
-import six
 import tqdm
 
-from . import __version__, SeqRepo
+from . import SeqRepo, __version__
 from .fastaiter import FastaIter
 from .utils import parse_defline, validate_aliases
 
-
 SEQREPO_ROOT_DIR = os.environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo")
 DEFAULT_INSTANCE_NAME_RW = "master"
 DEFAULT_INSTANCE_NAME_RO = "latest"
 
 instance_name_new_re = re.compile(r"^20[12]\d-\d\d-\d\d$")  # smells like a new datestamp, 2017-01-17
-instance_name_old_re = re.compile(r"^20[12]1\d\d\d\d\d$")   # smells like an old datestamp, 20170117
-instance_name_re = re.compile(r"^20[12]\d-?\d\d-?\d\d$")    # smells like a datestamp, 20170117 or 2017-01-17
+instance_name_old_re = re.compile(r"^20[12]1\d\d\d\d\d$")  # smells like an old datestamp, 20170117
+instance_name_re = re.compile(r"^20[12]\d-?\d\d-?\d\d$")  # smells like a datestamp, 20170117 or 2017-01-17
 
 _logger = logging.getLogger(__name__)
 
 
 def _get_remote_instances(opts):
     line_re = re.compile(r"d[-rwx]{9}\s+[\d,]+ \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} (.+)")
-    rsync_cmd = [opts.rsync_exe, "--no-motd", "--copy-dirlinks",
-                 opts.remote_host + "::seqrepo"]
+    rsync_cmd = [opts.rsync_exe, "--no-motd", "--copy-dirlinks", opts.remote_host + "::seqrepo"]
     _logger.debug("Executing `" + " ".join(rsync_cmd) + "`")
     lines = subprocess.check_output(rsync_cmd).decode().splitlines()[1:]
-    dirs = (m.group(1) for m in (line_re.match(l) for l in lines) if m)
+    dirs = (m.group(1) for m in (line_re.match(line) for line in lines) if m)
     return sorted(list(filter(instance_name_new_re.match, dirs)))
 
 
@@ -78,10 +73,13 @@ def parse_arguments():
     top_p = argparse.ArgumentParser(
         description=__doc__.split("\n\n")[0],
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        epilog="seqrepo " + __version__ + ". See https://github.com/biocommons/biocommons.seqrepo for more information")
+        epilog="seqrepo %s. See https://github.com/biocommons/biocommons.seqrepo for more information" % __version__,
+    )
     top_p.add_argument("--dry-run", "-n", default=False, action="store_true")
     top_p.add_argument("--remote-host", default="dl.biocommons.org", help="rsync server host")
-    top_p.add_argument("--root-directory", "-r", default=SEQREPO_ROOT_DIR, help="seqrepo root directory (SEQREPO_ROOT_DIR)")
+    top_p.add_argument(
+        "--root-directory", "-r", default=SEQREPO_ROOT_DIR, help="seqrepo root directory (SEQREPO_ROOT_DIR)"
+    )
     top_p.add_argument("--rsync-exe", default="/usr/bin/rsync", help="path to rsync executable")
     top_p.add_argument("--verbose", "-v", action="count", default=0, help="be verbose; multiple accepted")
     top_p.add_argument("--version", action="version", version=__version__)
@@ -95,56 +93,60 @@ def parse_arguments():
 
     # add-assembly-names
     ap = subparsers.add_parser(
-        "add-assembly-names", help="add assembly aliases (from bioutils.assemblies) to existing sequences")
+        "add-assembly-names", help="add assembly aliases (from bioutils.assemblies) to existing sequences"
+    )
     ap.set_defaults(func=add_assembly_names)
     ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a snapshot)")
+        "--instance-name",
+        "-i",
+        default=DEFAULT_INSTANCE_NAME_RW,
+        help="instance name; must be writeable (i.e., not a snapshot)",
+    )
     ap.add_argument(
-        "--partial-load", "-p", default=False, action="store_true", help="assign assembly aliases even if some sequences are missing")
+        "--partial-load",
+        "-p",
+        default=False,
+        action="store_true",
+        help="assign assembly aliases even if some sequences are missing",
+    )
     ap.add_argument(
-        "--reload-all", "-r", default=False, action="store_true", help="reload all assemblies, not just missing ones")
+        "--reload-all", "-r", default=False, action="store_true", help="reload all assemblies, not just missing ones"
+    )
 
     # export
     ap = subparsers.add_parser("export", help="export sequences")
     ap.set_defaults(func=export)
-    ap.add_argument("ALIASES",
-                    nargs="*",
-                    help="specific aliases to export")
+    ap.add_argument("ALIASES", nargs="*", help="specific aliases to export")
     ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name")
-    ap.add_argument(
-        "--namespace",
-        "-n",
-        help="namespace name (e.g., refseq, NCBI, Ensembl, LRG)", )
+    ap.add_argument("--namespace", "-n", help="namespace name (e.g., refseq, NCBI, Ensembl, LRG)")
 
     # export aliases
     ap = subparsers.add_parser("export-aliases", help="export aliases")
     ap.set_defaults(func=export_aliases)
     ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name")
-    ap.add_argument(
-        "--namespace",
-        "-n",
-        help="namespace name (e.g., refseq, NCBI, Ensembl, LRG)", )
+    ap.add_argument("--namespace", "-n", help="namespace name (e.g., refseq, NCBI, Ensembl, LRG)")
 
     # fetch-load
     ap = subparsers.add_parser("fetch-load", help="fetch remote sequences by accession and load them (low-throughput!)")
     ap.set_defaults(func=fetch_load)
     ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a snapshot)")
-    ap.add_argument(
-        "accessions",
-        nargs="+",
-        help="accessions (NCBI or Ensembl)", )
-    ap.add_argument(
-        "--namespace",
-        "-n",
-        required=True,
-        help="namespace name (e.g., NCBI, Ensembl, LRG)", )
+        "--instance-name",
+        "-i",
+        default=DEFAULT_INSTANCE_NAME_RW,
+        help="instance name; must be writeable (i.e., not a snapshot)",
+    )
+    ap.add_argument("accessions", nargs="+", help="accessions (NCBI or Ensembl)")
+    ap.add_argument("--namespace", "-n", required=True, help="namespace name (e.g., NCBI, Ensembl, LRG)")
 
     # init
     ap = subparsers.add_parser("init", help="initialize seqrepo directory")
     ap.set_defaults(func=init)
     ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a snapshot)")
+        "--instance-name",
+        "-i",
+        default=DEFAULT_INSTANCE_NAME_RW,
+        help="instance name; must be writeable (i.e., not a snapshot)",
+    )
 
     # list-local-instances
     ap = subparsers.add_parser("list-local-instances", help="list local seqrepo instances")
@@ -158,22 +160,21 @@ def parse_arguments():
     ap = subparsers.add_parser("load", help="load a single fasta file")
     ap.set_defaults(func=load)
     ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a snapshot)")
-    ap.add_argument(
-        "fasta_files",
-        nargs="+",
-        help="fasta files to load (compressed okay)", )
-    ap.add_argument(
-        "--namespace",
-        "-n",
-        required=True,
-        help="namespace name (e.g., NCBI, Ensembl, LRG)", )
+        "--instance-name",
+        "-i",
+        default=DEFAULT_INSTANCE_NAME_RW,
+        help="instance name; must be writeable (i.e., not a snapshot)",
+    )
+    ap.add_argument("fasta_files", nargs="+", help="fasta files to load (compressed okay)")
+    ap.add_argument("--namespace", "-n", required=True, help="namespace name (e.g., NCBI, Ensembl, LRG)")
 
     # pull
     ap = subparsers.add_parser("pull", help="pull incremental update from seqrepo mirror")
     ap.set_defaults(func=pull)
     ap.add_argument("--instance-name", "-i", default=None, help="instance name")
-    ap.add_argument("--update-latest", "-l", default=False, action="store_true", help="set latest symlink to point to this instance")
+    ap.add_argument(
+        "--update-latest", "-l", default=False, action="store_true", help="set latest symlink to point to this instance"
+    )
 
     # show-status
     ap = subparsers.add_parser("show-status", help="show seqrepo status")
@@ -183,11 +184,13 @@ def parse_arguments():
     # snapshot
     ap = subparsers.add_parser("snapshot", help="create a new read-only seqrepo snapshot")
     ap.set_defaults(func=snapshot)
+    ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
     ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
-    ap.add_argument("--destination-name", "-d",
-                    default=datetime.datetime.utcnow().strftime("%F"),
-                    help="destination directory name (must not already exist)")
+        "--destination-name",
+        "-d",
+        default=datetime.datetime.utcnow().strftime("%F"),
+        help="destination directory name (must not already exist)",
+    )
 
     # start-shell
     ap = subparsers.add_parser("start-shell", help="start interactive shell with initialized seqrepo")
@@ -197,14 +200,12 @@ def parse_arguments():
     # upgrade
     ap = subparsers.add_parser("upgrade", help="upgrade seqrepo database and directory")
     ap.set_defaults(func=upgrade)
-    ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
+    ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
 
     # update digests
     ap = subparsers.add_parser("update-digests", help="update computed digests in place")
     ap.set_defaults(func=update_digests)
-    ap.add_argument(
-        "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
+    ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
 
     # update latest (symlink)
     ap = subparsers.add_parser("update-latest", help="create symlink `latest` to newest seqrepo instance")
@@ -216,6 +217,7 @@ def parse_arguments():
 
 ############################################################################
 
+
 def add_assembly_names(opts):
     """add assembly names as aliases to existing sequences
 
@@ -262,12 +264,22 @@ def add_assembly_names(opts):
         # all assembled-molecules (1..22, X, Y, MT) have ncbi aliases in seqrepo
         not_in_seqrepo = [s["refseq_ac"] for s in eq_sequences if s["refseq_ac"] not in ncbi_alias_map]
         if not_in_seqrepo:
-            _logger.warning("Assembly {an} references {n} accessions not in SeqRepo instance {opts.instance_name} (e.g., {acs})".format(
-                an=assy_name, n=len(not_in_seqrepo), opts=opts, acs=", ".join(not_in_seqrepo[:5]+["..."]), seqrepo_dir=seqrepo_dir))
+            _logger.warning(
+                (
+                    "Assembly {an} references {n} accessions not in SeqRepo instance "
+                    "{opts.instance_name} @ {seqrepo_dir} (e.g., {acs})"
+                ).format(
+                    an=assy_name,
+                    n=len(not_in_seqrepo),
+                    opts=opts,
+                    acs=", ".join(not_in_seqrepo[:5] + ["..."]),
+                    seqrepo_dir=seqrepo_dir,
+                )
+            )
             if not opts.partial_load:
                 _logger.warning("Skipping {an} (-p to enable partial loading)".format(an=assy_name))
                 continue
-            
+
         eq_sequences = [es for es in eq_sequences if es["refseq_ac"] in ncbi_alias_map]
         _logger.info("Loading {n} new accessions for assembly {an}".format(an=assy_name, n=len(eq_sequences)))
 
@@ -276,41 +288,45 @@ def add_assembly_names(opts):
             aliases = [{"namespace": assy_name, "alias": a} for a in [s["name"]] + s["aliases"]]
             for alias in aliases:
                 sr.aliases.store_alias(seq_id=seq_id, **alias)
-                _logger.debug("Added assembly alias {a[namespace]}:{a[alias]} for {seq_id}".format(a=alias, seq_id=seq_id))
+                _logger.debug(
+                    "Added assembly alias {a[namespace]}:{a[alias]} for {seq_id}".format(a=alias, seq_id=seq_id)
+                )
         sr.commit()
 
 
-def export(opts):
+def export(opts):  # noqa: C901
     seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
     sr = SeqRepo(seqrepo_dir)
 
     if opts.ALIASES:
+
         def alias_generator():
             for alias in set(opts.ALIASES):
-                yield from sr.aliases.find_aliases(namespace=opts.namespace,  # None okay
-                                                   alias=alias,
-                                                   translate_ncbi_namespace=True)
+                yield from sr.aliases.find_aliases(
+                    namespace=opts.namespace, alias=alias, translate_ncbi_namespace=True  # None okay
+                )
+
         def _rec_iterator():
             """yield (srec, [arec]) tuples to export"""
-            grouped_alias_iterator = itertools.groupby(alias_generator(),
-                                                       key=lambda arec: (arec["seq_id"]))
+            grouped_alias_iterator = itertools.groupby(alias_generator(), key=lambda arec: (arec["seq_id"]))
             for seq_id, arecs in grouped_alias_iterator:
                 srec = sr.sequences.fetch_seqinfo(seq_id)
                 srec["seq"] = sr.sequences.fetch(seq_id)
                 yield srec, arecs
-        
+
     elif opts.namespace:
+
         def _rec_iterator():
             """yield (srec, [arec]) tuples to export"""
-            alias_iterator = sr.aliases.find_aliases(namespace=opts.namespace,
-                                                     translate_ncbi_namespace=True)
-            grouped_alias_iterator = itertools.groupby(alias_iterator,
-                                                       key=lambda arec: (arec["seq_id"]))
+            alias_iterator = sr.aliases.find_aliases(namespace=opts.namespace, translate_ncbi_namespace=True)
+            grouped_alias_iterator = itertools.groupby(alias_iterator, key=lambda arec: (arec["seq_id"]))
             for seq_id, arecs in grouped_alias_iterator:
                 srec = sr.sequences.fetch_seqinfo(seq_id)
                 srec["seq"] = sr.sequences.fetch(seq_id)
                 yield srec, arecs
+
     else:
+
         def _rec_iterator():
             yield from sr
 
@@ -318,17 +334,15 @@ def _rec_iterator():
         nsad = _convert_alias_records_to_ns_dict(arecs)
         aliases = ["{ns}:{a}".format(ns=ns, a=a) for ns, aliases in sorted(nsad.items()) for a in aliases]
         print(">" + " ".join(aliases))
-        for l in _wrap_lines(srec["seq"], 100):
-            print(l)
-
-
+        for line in _wrap_lines(srec["seq"], 100):
+            print(line)
 
 
 def export_aliases(opts):
     seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
     sr = SeqRepo(seqrepo_dir)
     alias_iterator = sr.aliases.find_aliases(translate_ncbi_namespace=True)
-    grouped_alias_iterator = itertools.groupby(alias_iterator, key=lambda  arec: (arec["seq_id"]))
+    grouped_alias_iterator = itertools.groupby(alias_iterator, key=lambda arec: (arec["seq_id"]))
     for _, arecs in grouped_alias_iterator:
         if opts.namespace:
             if not any(arec for arec in arecs if arec["namespace"] == opts.namespace):
@@ -339,7 +353,7 @@ def export_aliases(opts):
         nsaliases.sort(key=lambda a: (not a.startswith("VMC:"), a))  # VMC first
         nsaliases[0] = nsaliases[0].replace("VMC:GS_", "GA4GH:SQ.")
         print("\t".join(nsaliases))
-        
+
 
 def fetch_load(opts):
     disable_bar = _logger.getEffectiveLevel() < logging.WARNING
@@ -365,7 +379,7 @@ def init(opts):
     seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
     if os.path.exists(seqrepo_dir) and len(os.listdir(seqrepo_dir)) > 0:
         raise IOError("{seqrepo_dir} exists and is not empty".format(seqrepo_dir=seqrepo_dir))
-    sr = SeqRepo(seqrepo_dir, writeable=True)    # flake8: noqa
+    sr = SeqRepo(seqrepo_dir, writeable=True)  # noqa: F841
 
 
 def list_local_instances(opts):
@@ -381,6 +395,7 @@ def list_remote_instances(opts):
     for i in instances:
         print("  " + i)
 
+
 def load(opts):
     # TODO: drop this test
     if opts.namespace == "-":
@@ -407,8 +422,11 @@ def load(opts):
         seq_bar = tqdm.tqdm(FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False)
         for defline, seq in seq_bar:
             n_seqs_seen += 1
-            seq_bar.set_description("sequences: {nsa}/{nss} added/seen; aliases: {naa} added".format(
-                nss=n_seqs_seen, nsa=n_seqs_added, naa=n_aliases_added))
+            seq_bar.set_description(
+                "sequences: {nsa}/{nss} added/seen; aliases: {naa} added".format(
+                    nss=n_seqs_seen, nsa=n_seqs_added, naa=n_aliases_added
+                )
+            )
             aliases = parse_defline(defline, opts.namespace)
             validate_aliases(aliases)
             n_sa, n_aa = sr.store(seq, aliases)
@@ -433,12 +451,12 @@ def pull(opts):
         return
 
     tmp_dir = tempfile.mkdtemp(dir=opts.root_directory, prefix=instance_name + ".")
-    os.rmdir(tmp_dir)    # let rsync create it the directory
+    os.rmdir(tmp_dir)  # let rsync create it the directory
 
     cmd = [opts.rsync_exe, "-aHP", "--no-motd"]
     if local_instances:
         latest_local_instance = local_instances[-1]
-        cmd += ["--link-dest=" + os.path.join(opts.root_directory, latest_local_instance) + "/"]
+        cmd += ["--link-dest=%s/" % os.path.join(opts.root_directory, latest_local_instance)]
     cmd += ["{h}::seqrepo/{i}/".format(h=opts.remote_host, i=instance_name), tmp_dir]
 
     _logger.debug("Executing: " + " ".join(cmd))
@@ -455,18 +473,29 @@ def show_status(opts):
     seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
     tot_size = sum(
         os.path.getsize(os.path.join(dirpath, filename))
-        for dirpath, dirnames, filenames in os.walk(seqrepo_dir) for filename in filenames)
+        for dirpath, dirnames, filenames in os.walk(seqrepo_dir)
+        for filename in filenames
+    )
 
     sr = SeqRepo(seqrepo_dir)
     print("seqrepo {version}".format(version=__version__))
     print("instance directory: {sr._root_dir}, {ts:.1f} GB".format(sr=sr, ts=tot_size / 1e9))
-    print("backends: fastadir (schema {fd_v}), seqaliasdb (schema {sa_v}) ".format(
-        fd_v=sr.sequences.schema_version(), sa_v=sr.aliases.schema_version()))
-    print("sequences: {ss[n_sequences]} sequences, {ss[tot_length]} residues, {ss[n_files]} files".format(
-        ss=sr.sequences.stats()))
     print(
-        "aliases: {sa[n_aliases]} aliases, {sa[n_current]} current, {sa[n_namespaces]} namespaces, {sa[n_sequences]} sequences".
-        format(sa=sr.aliases.stats()))
+        "backends: fastadir (schema {fd_v}), seqaliasdb (schema {sa_v}) ".format(
+            fd_v=sr.sequences.schema_version(), sa_v=sr.aliases.schema_version()
+        )
+    )
+    print(
+        "sequences: {ss[n_sequences]} sequences, {ss[tot_length]} residues, {ss[n_files]} files".format(
+            ss=sr.sequences.stats()
+        )
+    )
+    print(
+        (
+            "aliases: {sa[n_aliases]} aliases, {sa[n_current]} current, {sa[n_namespaces]} "
+            "namespaces, {sa[n_sequences]} sequences"
+        ).format(sa=sr.aliases.stats())
+    )
     return sr
 
 
@@ -508,8 +537,12 @@ def snapshot(opts):
         os.mkdir(dp)
 
     # hard link sequence files
-    for rp in (os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(".") for filename in filenames
-               if ".bgz" in filename):
+    for rp in (
+        os.path.join(dirpath, filename)
+        for dirpath, _, filenames in os.walk(".")
+        for filename in filenames
+        if ".bgz" in filename
+    ):
         dp = os.path.join(tmp_dir, rp)
         os.link(rp, dp)
 
@@ -526,8 +559,11 @@ def _drop_write(p):
         new_mode = mode & ~mode_aw
         os.chmod(p, new_mode)
 
-    for dp in (os.path.join(dirpath, dirent)
-               for dirpath, dirnames, filenames in os.walk(tmp_dir) for dirent in dirnames + filenames):
+    for dp in (
+        os.path.join(dirpath, dirent)
+        for dirpath, dirnames, filenames in os.walk(tmp_dir)
+        for dirent in dirnames + filenames
+    ):
         _drop_write(dp)
     _drop_write(tmp_dir)
     os.rename(tmp_dir, dst_dir)
@@ -538,12 +574,18 @@ def _drop_write(p):
 
 def start_shell(opts):
     seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name)
-    sr = SeqRepo(seqrepo_dir)
+    sr = SeqRepo(seqrepo_dir)  # noqa: F841
     import IPython
-    IPython.embed(header="\n".join([
-        "seqrepo (https://github.com/biocommons/biocommons.seqrepo/)", "version: " + __version__,
-        "instance path: " + seqrepo_dir
-    ]))
+
+    IPython.embed(
+        header="\n".join(
+            [
+                "seqrepo (https://github.com/biocommons/biocommons.seqrepo/)",
+                "version: " + __version__,
+                "instance path: " + seqrepo_dir,
+            ]
+        )
+    )
 
 
 def upgrade(opts):
@@ -578,18 +620,15 @@ def update_latest(opts, mri=None):
 def main():
     opts = parse_arguments()
 
-    verbose_log_level = (logging.WARN if opts.verbose == 0 else
-                         logging.INFO if opts.verbose == 1 else
-                         logging.DEBUG)
+    verbose_log_level = logging.WARN if opts.verbose == 0 else logging.INFO if opts.verbose == 1 else logging.DEBUG
     logging.basicConfig(level=verbose_log_level)
     opts.func(opts)
 
 
-
-
 ############################################################################
 # INTERNAL
 
+
 def _convert_alias_records_to_ns_dict(records):
     """converts a set of alias db records to a dict like {ns: [aliases], ...}
     aliases are lexicographicaly sorted
@@ -597,11 +636,10 @@ def _convert_alias_records_to_ns_dict(records):
     records = sorted(records, key=lambda r: (r["namespace"], r["alias"]))
     return {g: [r["alias"] for r in gi] for g, gi in itertools.groupby(records, key=lambda r: r["namespace"])}
 
+
 def _wrap_lines(seq, line_width):
     for i in range(0, len(seq), line_width):
-        yield seq[i:i + line_width]
-
-
+        yield seq[i : i + line_width]
 
 
 if __name__ == "__main__":
diff --git a/src/biocommons/seqrepo/config.py b/src/biocommons/seqrepo/config.py
index 4209c35..abd6c36 100644
--- a/src/biocommons/seqrepo/config.py
+++ b/src/biocommons/seqrepo/config.py
@@ -4,8 +4,9 @@
     seqrepo_env_var = os.environ.get("SEQREPO_LRU_CACHE_MAXSIZE", "1000000")
     SEQREPO_LRU_CACHE_MAXSIZE = int(seqrepo_env_var)
 except ValueError:
-    if seqrepo_env_var.lower() == 'none':
+    if seqrepo_env_var.lower() == "none":
         SEQREPO_LRU_CACHE_MAXSIZE = None
     else:
-        raise ValueError('SEQREPO_LRU_CACHE_MAXSIZE must be a valid int, none, or not set, '
-                         'currently it is ' + seqrepo_env_var)
+        raise ValueError(
+            "SEQREPO_LRU_CACHE_MAXSIZE must be a valid int, none, or not set, " "currently it is " + seqrepo_env_var
+        )
diff --git a/src/biocommons/seqrepo/dataproxy.py b/src/biocommons/seqrepo/dataproxy.py
index e5de0e3..19a8121 100644
--- a/src/biocommons/seqrepo/dataproxy.py
+++ b/src/biocommons/seqrepo/dataproxy.py
@@ -5,16 +5,15 @@
 
 """
 
-from abc import ABC, abstractmethod
-from collections.abc import Sequence
 import datetime
 import functools
 import logging
 import os
+from abc import ABC, abstractmethod
 from urllib.parse import urlparse
 
-from bioutils.accessions import coerce_namespace
 import requests
+from bioutils.accessions import coerce_namespace
 
 _logger = logging.getLogger(__name__)
 
@@ -205,8 +204,7 @@ def create_dataproxy(uri: str = None) -> _DataProxy:
 
     """
 
-    uri = (uri
-           or os.environ.get("SEQREPO_DATAPROXY_URI", None))
+    uri = uri or os.environ.get("SEQREPO_DATAPROXY_URI", None)
 
     if uri is None:
         raise ValueError("No data proxy URI provided or found in SEQREPO_DATAPROXY_URI")
@@ -223,10 +221,11 @@ def create_dataproxy(uri: str = None) -> _DataProxy:
         if proto in ("", "file"):
             # pylint: disable=import-error, import-outside-toplevel
             from biocommons.seqrepo import SeqRepo
+
             sr = SeqRepo(root_dir=parsed_uri.path)
             dp = SeqRepoDataProxy(sr)
         elif proto in ("http", "https"):
-            dp = SeqRepoRESTDataProxy(uri[len(provider) + 1:])
+            dp = SeqRepoRESTDataProxy(uri[len(provider) + 1 :])
         else:
             raise ValueError(f"SeqRepo URI scheme {parsed_uri.scheme} not implemented")
 
diff --git a/src/biocommons/seqrepo/fastadir/__init__.py b/src/biocommons/seqrepo/fastadir/__init__.py
index d0c58aa..0d2db26 100644
--- a/src/biocommons/seqrepo/fastadir/__init__.py
+++ b/src/biocommons/seqrepo/fastadir/__init__.py
@@ -1 +1 @@
-from .fastadir import FastaDir
+from .fastadir import FastaDir  # noqa: F401
diff --git a/src/biocommons/seqrepo/fastadir/_data/migrations/0000-base.py b/src/biocommons/seqrepo/fastadir/_data/migrations/0000-base.py
index 5a828d2..a04b420 100644
--- a/src/biocommons/seqrepo/fastadir/_data/migrations/0000-base.py
+++ b/src/biocommons/seqrepo/fastadir/_data/migrations/0000-base.py
@@ -4,8 +4,10 @@
 
 step("""create unique index meta_key_idx on meta(key)""", """drop index meta_key_idx""")
 
-step("""create table log (ts timestamp not null default current_timestamp, v text not null, msg text not null);""",
-     """drop table log""")
+step(
+    """create table log (ts timestamp not null default current_timestamp, v text not null, msg text not null);""",
+    """drop table log""",
+)
 
 step("""insert into log (v,msg) values ('0', 'database created')""")
 
diff --git a/src/biocommons/seqrepo/fastadir/_data/migrations/0001-initial.py b/src/biocommons/seqrepo/fastadir/_data/migrations/0001-initial.py
index f0bedb0..e9791ff 100644
--- a/src/biocommons/seqrepo/fastadir/_data/migrations/0001-initial.py
+++ b/src/biocommons/seqrepo/fastadir/_data/migrations/0001-initial.py
@@ -1,13 +1,16 @@
 from yoyo import step
 
-step("""
+step(
+    """
 create table seqinfo (
     seq_id text primary key,
     len integer not null,
     alpha text not null,
     added timestamp not null default current_timestamp,
     relpath text not null
-)""", """drop table seqinfo""")
+)""",
+    """drop table seqinfo""",
+)
 
 step("""create unique index seqinfo_seq_id_idx on seqinfo(seq_id)""")
 
diff --git a/src/biocommons/seqrepo/fastadir/bases.py b/src/biocommons/seqrepo/fastadir/bases.py
index 16446a8..4425bcc 100644
--- a/src/biocommons/seqrepo/fastadir/bases.py
+++ b/src/biocommons/seqrepo/fastadir/bases.py
@@ -4,17 +4,17 @@
 
 
 @six.add_metaclass(abc.ABCMeta)
-class BaseReader():
+class BaseReader:
     @abc.abstractmethod
     def fetch(self, seq_id, start, end):
-        pass    # pragma: no cover
+        pass  # pragma: no cover
 
     def __getitem__(self, ac):
         return self.fetch(ac)
 
 
 @six.add_metaclass(abc.ABCMeta)
-class BaseWriter():
+class BaseWriter:
     @abc.abstractmethod
     def store(self, seq_id, seq):
-        pass    # pragma: no cover
+        pass  # pragma: no cover
diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py
index 68120ac..cc70418 100644
--- a/src/biocommons/seqrepo/fastadir/fabgz.py
+++ b/src/biocommons/seqrepo/fastadir/fabgz.py
@@ -17,10 +17,8 @@
 import subprocess
 
 import six
-
 from pysam import FastaFile
 
-
 _logger = logging.getLogger(__name__)
 
 line_width = 100
@@ -48,13 +46,20 @@ def _find_bgzip():
     except AttributeError:
         raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe))
     except missing_file_exception:
-        raise RuntimeError("{exe} doesn't exist; you need to install htslib and tabix (See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(exe=exe))
+        raise RuntimeError(
+            "{exe} doesn't exist; you need to install htslib and tabix (See https://github.com/biocommons/biocommons.seqrepo#requirements)".format(
+                exe=exe
+            )
+        )
     except Exception:
         raise RuntimeError("Unknown error while executing {exe}".format(exe=exe))
     bgzip_version_info = tuple(map(int, bgzip_version.split(".")))
     if bgzip_version_info < min_bgzip_version_info:
-        raise RuntimeError("bgzip ({exe}) {ev} is too old; >= {rv} is required; please upgrade".format(
-            exe=exe, ev=bgzip_version, rv=min_bgzip_version))
+        raise RuntimeError(
+            "bgzip ({exe}) {ev} is too old; >= {rv} is required; please upgrade".format(
+                exe=exe, ev=bgzip_version, rv=min_bgzip_version
+            )
+        )
     _logger.info("Using bgzip {ev} ({exe})".format(ev=bgzip_version, exe=exe))
     return exe
 
@@ -104,12 +109,12 @@ def __init__(self, filename):
     def store(self, seq_id, seq):
         def wrap_lines(seq, line_width):
             for i in range(0, len(seq), line_width):
-                yield seq[i:i + line_width]
+                yield seq[i : i + line_width]
 
         if seq_id not in self._added:
             self._fh.write(">" + seq_id + "\n")
-            for l in wrap_lines(seq, line_width):
-                self._fh.write(l + "\n")
+            for line in wrap_lines(seq, line_width):
+                self._fh.write(line + "\n")
             self._added.add(seq_id)
             _logger.debug("added seq_id {i}; length {l}".format(i=seq_id, l=len(seq)))
         return seq_id
diff --git a/src/biocommons/seqrepo/fastadir/fastadir.py b/src/biocommons/seqrepo/fastadir/fastadir.py
index 361ffc9..5637d24 100644
--- a/src/biocommons/seqrepo/fastadir/fastadir.py
+++ b/src/biocommons/seqrepo/fastadir/fastadir.py
@@ -8,8 +8,6 @@
 import pkg_resources
 import yoyo
 
-
-
 from ..config import SEQREPO_LRU_CACHE_MAXSIZE
 from .bases import BaseReader, BaseWriter
 from .fabgz import FabgzReader, FabgzWriter
@@ -65,23 +63,26 @@ def __init__(self, root_dir, writeable=False, check_same_thread=True):
             os.makedirs(self._root_dir, exist_ok=True)
             self._upgrade_db()
 
-        self._db = sqlite3.connect(self._db_path,
-                                   check_same_thread=check_same_thread,
-                                   detect_types=sqlite3.PARSE_DECLTYPES)
+        self._db = sqlite3.connect(
+            self._db_path, check_same_thread=check_same_thread, detect_types=sqlite3.PARSE_DECLTYPES
+        )
         schema_version = self.schema_version()
         self._db.row_factory = sqlite3.Row
 
         # if we're not at the expected schema version for this code, bail
         if schema_version != expected_schema_version:
-            raise RuntimeError("""Upgrade required: Database schema
-            version is {} and code expects {}""".format(schema_version, expected_schema_version))
+            raise RuntimeError(
+                """Upgrade required: Database schema
+            version is {} and code expects {}""".format(
+                    schema_version, expected_schema_version
+                )
+            )
 
     # ############################################################################
     # Special methods
 
     def __contains__(self, seq_id):
-        c = self._fetch_one("select exists(select 1 from seqinfo where seq_id = ? limit 1) as ex",
-                             (seq_id, ))
+        c = self._fetch_one("select exists(select 1 from seqinfo where seq_id = ? limit 1) as ex", (seq_id,))
 
         return True if c["ex"] else False
 
@@ -113,8 +114,12 @@ def fetch(self, seq_id, start=None, end=None):
         rec = self.fetch_seqinfo(seq_id)
 
         if self._writing and self._writing["relpath"] == rec["relpath"]:
-            _logger.warning("""Fetching from file opened for writing;
-            closing first ({})""".format(rec["relpath"]))
+            _logger.warning(
+                """Fetching from file opened for writing;
+            closing first ({})""".format(
+                    rec["relpath"]
+                )
+            )
             self.commit()
 
         path = os.path.join(self._root_dir, rec["relpath"])
@@ -175,8 +180,11 @@ def store(self, seq_id, seq):
         self._writing["fabgz"].store(seq_id, seq)
         alpha = "".join(sorted(set(seq)))
         cursor = self._db.cursor()
-        cursor.execute("""insert into seqinfo (seq_id, len, alpha, relpath)
-                         values (?, ?, ?,?)""", (seq_id, len(seq), alpha, self._writing["relpath"]))
+        cursor.execute(
+            """insert into seqinfo (seq_id, len, alpha, relpath)
+                         values (?, ?, ?,?)""",
+            (seq_id, len(seq), alpha, self._writing["relpath"]),
+        )
         return seq_id
 
     # ############################################################################
@@ -190,7 +198,7 @@ def _fetch_one(self, sql, params=()):
     def _upgrade_db(self):
         """upgrade db using scripts for specified (current) schema version"""
         migration_path = "_data/migrations"
-        sqlite3.connect(self._db_path).close()    # ensure that it exists
+        sqlite3.connect(self._db_path).close()  # ensure that it exists
         db_url = "sqlite:///" + self._db_path
         backend = yoyo.get_backend(db_url)
         migration_dir = pkg_resources.resource_filename(__package__, migration_path)
@@ -205,6 +213,7 @@ def _open_for_reading(self, path):
 
     def _dump_aliases(self):
         import prettytable
+
         fields = "seq_id len alpha added relpath".split()
         pt = prettytable.PrettyTable(field_names=fields)
         cursor = self._db.cursor()
diff --git a/src/biocommons/seqrepo/fastaiter/__init__.py b/src/biocommons/seqrepo/fastaiter/__init__.py
index 7d7b810..b838a5e 100644
--- a/src/biocommons/seqrepo/fastaiter/__init__.py
+++ b/src/biocommons/seqrepo/fastaiter/__init__.py
@@ -1 +1 @@
-from .fastaiter import FastaIter
+from .fastaiter import FastaIter  # noqa: F401
diff --git a/src/biocommons/seqrepo/fastaiter/fastaiter.py b/src/biocommons/seqrepo/fastaiter/fastaiter.py
index afa6cc4..f9aa6fd 100644
--- a/src/biocommons/seqrepo/fastaiter/fastaiter.py
+++ b/src/biocommons/seqrepo/fastaiter/fastaiter.py
@@ -4,12 +4,13 @@ def FastaIter(handle):
     Lines before the start of the first record are ignored.
     """
 
+    seq_lines = []
     header = None
     for line in handle:
         if line.startswith(">"):
             if header is not None:  # not the first record
                 yield header, "".join(seq_lines)
-            seq_lines = list()
+            seq_lines = []
             header = line[1:].rstrip()
         else:
             if header is not None:  # not the first record
@@ -17,5 +18,5 @@ def FastaIter(handle):
 
     if header is not None:
         yield header, "".join(seq_lines)
-    else:   # no FASTA records in file
+    else:  # no FASTA records in file
         return
diff --git a/src/biocommons/seqrepo/seqaliasdb/__init__.py b/src/biocommons/seqrepo/seqaliasdb/__init__.py
index 008c939..06b1794 100644
--- a/src/biocommons/seqrepo/seqaliasdb/__init__.py
+++ b/src/biocommons/seqrepo/seqaliasdb/__init__.py
@@ -1 +1 @@
-from .seqaliasdb import SeqAliasDB
+from .seqaliasdb import SeqAliasDB  # noqa: F401
diff --git a/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0000-base.py b/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0000-base.py
index 4644063..d76fcb1 100644
--- a/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0000-base.py
+++ b/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0000-base.py
@@ -4,8 +4,10 @@
 
 step("""create unique index meta_key_idx on meta(key)""", """drop index meta_key_idx""")
 
-step("""create table log (ts timestamp not null default current_timestamp, v text not null, msg text not null);""",
-     """drop table log""")
+step(
+    """create table log (ts timestamp not null default current_timestamp, v text not null, msg text not null);""",
+    """drop table log""",
+)
 
 step("""insert into log (v,msg) values ('', 'database created')""")
 
diff --git a/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0001-initial.py b/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0001-initial.py
index 2ffd6a9..77266b6 100644
--- a/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0001-initial.py
+++ b/src/biocommons/seqrepo/seqaliasdb/_data/migrations/0001-initial.py
@@ -1,6 +1,7 @@
 from yoyo import step
 
-step("""
+step(
+    """
 create table seqalias (
     seqalias_id integer primary key,
     seq_id text not null,
@@ -8,25 +9,37 @@
     alias text not null,
     added timestamp not null default current_timestamp,
     is_current int not null default 1
-)""", """drop table seqalias""")
+)""",
+    """drop table seqalias""",
+)
 
 # current alias must be unique with a namespace
-step("""
+step(
+    """
 create unique index seqalias_unique_ns_alias_idx on seqalias(namespace, alias) where is_current = 1
-""")
+"""
+)
 
-step("""
+step(
+    """
 create index seqalias_seq_id_idx on seqalias(seq_id)
-""")
+"""
+)
 
-step("""
+step(
+    """
 create index seqalias_namespace_idx on seqalias(namespace)
-""")
+"""
+)
 
-step("""
+step(
+    """
 create index seqalias_alias_idx on seqalias(alias)
-""")
+"""
+)
 
-step("""
+step(
+    """
 update meta set value = '1' where key = 'schema version'
-""")
+"""
+)
diff --git a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py
index bf1410c..55c454b 100644
--- a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py
+++ b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py
@@ -1,4 +1,3 @@
-import itertools
 import logging
 import sqlite3
 
@@ -6,19 +5,19 @@
 import yoyo
 
 from .._internal.translate import translate_alias_records, translate_api2db
-from .._internal.logging_support import DuplicateFilter
 
 _logger = logging.getLogger(__name__)
-#_logger.addFilter(DuplicateFilter())
+# _logger.addFilter(DuplicateFilter())
 
 
 expected_schema_version = 1
 
 min_sqlite_version_info = (3, 8, 0)
-if sqlite3.sqlite_version_info < min_sqlite_version_info:    # pragma: no cover
+if sqlite3.sqlite_version_info < min_sqlite_version_info:  # pragma: no cover
     min_sqlite_version = ".".join(map(str, min_sqlite_version_info))
-    msg = "{} requires sqlite3 >= {} but {} is installed".format(__package__, min_sqlite_version,
-                                                                 sqlite3.sqlite_version)
+    msg = "{} requires sqlite3 >= {} but {} is installed".format(
+        __package__, min_sqlite_version, sqlite3.sqlite_version
+    )
     raise ImportError(msg)
 
 
@@ -33,27 +32,30 @@ def __init__(self, db_path, writeable=False, translate_ncbi_namespace=None, chec
         self._writeable = writeable
 
         if translate_ncbi_namespace is not None:
-            _logger.warning("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
+            _logger.warning(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
 
         if self._writeable:
             self._upgrade_db()
-        self._db = sqlite3.connect(self._db_path,
-                                   check_same_thread=check_same_thread,
-                                   detect_types=sqlite3.PARSE_DECLTYPES)
+        self._db = sqlite3.connect(
+            self._db_path, check_same_thread=check_same_thread, detect_types=sqlite3.PARSE_DECLTYPES
+        )
         self._db.row_factory = sqlite3.Row
         schema_version = self.schema_version()
         # if we're not at the expected schema version for this code, bail
-        if schema_version != expected_schema_version:    # pragma: no cover
-            raise RuntimeError("Upgrade required: Database schema"
-                               "version is {} and code expects {}".format(schema_version, expected_schema_version))
+        if schema_version != expected_schema_version:  # pragma: no cover
+            raise RuntimeError(
+                "Upgrade required: Database schema"
+                "version is {} and code expects {}".format(schema_version, expected_schema_version)
+            )
 
     # ############################################################################
     # Special methods
 
     def __contains__(self, seq_id):
         cursor = self._db.cursor()
-        cursor.execute("select exists(select 1 from seqalias where seq_id = ? limit 1) as ex",
-                             (seq_id, ))
+        cursor.execute("select exists(select 1 from seqalias where seq_id = ? limit 1) as ex", (seq_id,))
         c = cursor.fetchone()
         return True if c["ex"] else False
 
@@ -68,13 +70,14 @@ def fetch_aliases(self, seq_id, current_only=True, translate_ncbi_namespace=None
         """return list of alias annotation records (dicts) for a given seq_id"""
         _logger.warning("SeqAliasDB::fetch_aliases() is deprecated; use find_aliases(seq_id=...) instead")
         if translate_ncbi_namespace is not None:
-            _logger.warning("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
-        return [dict(r) for r in self.find_aliases(seq_id=seq_id,
-                                                   current_only=current_only)]
+            _logger.warning(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
+        return [dict(r) for r in self.find_aliases(seq_id=seq_id, current_only=current_only)]
 
     def find_aliases(self, seq_id=None, namespace=None, alias=None, current_only=True, translate_ncbi_namespace=None):
         """returns iterator over alias annotation dicts that match criteria
-        
+
         The arguments, all optional, restrict the records that are
         returned.  Without arguments, all aliases are returned.
 
@@ -92,7 +95,9 @@ def eq_or_like(s):
             return "like" if "%" in s else "="
 
         if translate_ncbi_namespace is not None:
-            _logger.warning("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
+            _logger.warning(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
 
         if namespace is not None:
             ns_api2db = translate_api2db(namespace, alias)
@@ -154,16 +159,17 @@ def store_alias(self, seq_id, namespace, alias):
         log_pfx = "store({q},{n},{a})".format(n=namespace, a=alias, q=seq_id)
         cursor = self._db.cursor()
         try:
-            cursor.execute("insert into seqalias (seq_id, namespace, alias) values (?, ?, ?)", (seq_id, namespace,
-                                                                                                      alias))
+            cursor.execute(
+                "insert into seqalias (seq_id, namespace, alias) values (?, ?, ?)", (seq_id, namespace, alias)
+            )
             # success => new record
             return cursor.lastrowid
         except Exception as ex:
             # Every driver has own class for IntegrityError so we have to
             # investigate if the exception class name contains 'IntegrityError'
             # which we can ignore
-            if not type(ex).__name__.endswith('IntegrityError'):
-                raise(ex)
+            if not type(ex).__name__.endswith("IntegrityError"):
+                raise (ex)
         # IntegrityError fall-through
 
         # existing record is guaranteed to exist uniquely; fetchone() should always succeed
@@ -180,14 +186,12 @@ def store_alias(self, seq_id, namespace, alias):
         cursor.execute("update seqalias set is_current = 0 where seqalias_id = ?", [current_rec["seqalias_id"]])
         return self.store_alias(seq_id, namespace, alias)
 
-
-
-
     # ############################################################################
     # Internal methods
 
-    def _dump_aliases(self):    # pragma: no cover
+    def _dump_aliases(self):  # pragma: no cover
         import prettytable
+
         cursor = self._db.cursor()
         fields = "seqalias_id seq_id namespace alias added is_current".split()
         pt = prettytable.PrettyTable(field_names=fields)
@@ -199,7 +203,7 @@ def _dump_aliases(self):    # pragma: no cover
     def _upgrade_db(self):
         """upgrade db using scripts for specified (current) schema version"""
         migration_path = "_data/migrations"
-        sqlite3.connect(self._db_path).close()    # ensure that it exists
+        sqlite3.connect(self._db_path).close()  # ensure that it exists
         db_url = "sqlite:///" + self._db_path
         backend = yoyo.get_backend(db_url)
         migration_dir = pkg_resources.resource_filename(__package__, migration_path)
diff --git a/src/biocommons/seqrepo/seqrepo.py b/src/biocommons/seqrepo/seqrepo.py
index d22ae80..fa353c1 100644
--- a/src/biocommons/seqrepo/seqrepo.py
+++ b/src/biocommons/seqrepo/seqrepo.py
@@ -1,15 +1,15 @@
-from collections.abc import Sequence
-from functools import lru_cache
 import logging
 import os
 import re
+from collections.abc import Sequence
+from functools import lru_cache
 
 import bioutils.digests
 from bioutils.digests import seq_seqhash as sha512t24u
 
 from .config import SEQREPO_LRU_CACHE_MAXSIZE
-from .seqaliasdb import SeqAliasDB
 from .fastadir import FastaDir
+from .seqaliasdb import SeqAliasDB
 
 _logger = logging.getLogger(__name__)
 
@@ -19,12 +19,11 @@
 ct_n_residues = 1e9
 
 # namespace-alias separator
-nsa_sep = u":"
+nsa_sep = ":"
 
 uri_re = re.compile(r"([^:]+):(.+)")
 
 
-
 class SequenceProxy(Sequence):
     """Provides efficient and transparent string-like access, including
     random access slicing and reversing, to a biological sequence that
@@ -49,7 +48,7 @@ def __eq__(self, s: str):
 
     def __getitem__(self, key):
         if isinstance(key, int):
-            key = slice(key, key+1)
+            key = slice(key, key + 1)
         if key.step is not None:
             raise ValueError("Only contiguous sequence slices are supported")
         return self._fetch(key.start, key.stop)
@@ -93,7 +92,15 @@ class SeqRepo(object):
 
     """
 
-    def __init__(self, root_dir, writeable=False, upcase=True, translate_ncbi_namespace=None, check_same_thread=False, use_sequenceproxy=True):
+    def __init__(
+        self,
+        root_dir,
+        writeable=False,
+        upcase=True,
+        translate_ncbi_namespace=None,
+        check_same_thread=False,
+        use_sequenceproxy=True,
+    ):
         self._root_dir = root_dir
         self._upcase = upcase
         self._db_path = os.path.join(self._root_dir, "aliases.sqlite3")
@@ -112,12 +119,12 @@ def __init__(self, root_dir, writeable=False, upcase=True, translate_ncbi_namesp
             raise OSError("Unable to open SeqRepo directory {}".format(self._root_dir))
 
         self.sequences = FastaDir(self._seq_path, writeable=self._writeable, check_same_thread=self._check_same_thread)
-        self.aliases = SeqAliasDB(self._db_path,
-                                  writeable=self._writeable,
-                                  check_same_thread=self._check_same_thread)
+        self.aliases = SeqAliasDB(self._db_path, writeable=self._writeable, check_same_thread=self._check_same_thread)
 
         if translate_ncbi_namespace is not None:
-            _logger.warn("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
+            _logger.warn(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
 
     def __contains__(self, nsa):
         ns, a = nsa.split(nsa_sep) if nsa_sep in nsa else (None, nsa)
@@ -153,18 +160,19 @@ def commit(self):
         self.sequences.commit()
         self.aliases.commit()
         if self._pending_sequences + self._pending_aliases > 0:
-            _logger.info("Committed {} sequences ({} residues) and {} aliases".format(
-                self._pending_sequences, self._pending_sequences_len, self._pending_aliases))
+            _logger.info(
+                "Committed {} sequences ({} residues) and {} aliases".format(
+                    self._pending_sequences, self._pending_sequences_len, self._pending_aliases
+                )
+            )
         self._pending_sequences = 0
         self._pending_sequences_len = 0
         self._pending_aliases = 0
 
-
     def fetch(self, alias, start=None, end=None, namespace=None):
         seq_id = self._get_unique_seqid(alias=alias, namespace=namespace)
         return self.sequences.fetch(seq_id, start, end)
 
-
     def fetch_uri(self, uri, start=None, end=None):
         """fetch sequence for URI/CURIE of the form namespace:alias, such as
         NCBI:NM_000059.3.
@@ -174,7 +182,6 @@ def fetch_uri(self, uri, start=None, end=None):
         namespace, alias = uri_re.match(uri).groups()
         return self.fetch(alias=alias, namespace=namespace, start=start, end=end)
 
-
     def store(self, seq, nsaliases):
         """nsaliases is a list of dicts, like:
 
@@ -191,8 +198,9 @@ def store(self, seq, nsaliases):
 
         try:
             seqhash = sha512t24u(seq)
-        except Exception as e:
+        except Exception:
             import pprint
+
             _logger.critical("Exception raised for " + pprint.pformat(nsaliases))
             raise
         seq_id = seqhash
@@ -204,10 +212,11 @@ def store(self, seq, nsaliases):
             l=len(seq),
             na=len(nsaliases),
             nsa_sep=nsa_sep,
-            aliases=", ".join("{nsa[namespace]}:{nsa[alias]}".format(nsa=nsa) for nsa in nsaliases))
+            aliases=", ".join("{nsa[namespace]}:{nsa[alias]}".format(nsa=nsa) for nsa in nsaliases),
+        )
         if seq_id not in self.sequences:
             _logger.info("Storing " + msg)
-            if len(seq) > ct_n_residues:    # pragma: no cover
+            if len(seq) > ct_n_residues:  # pragma: no cover
                 _logger.debug("Precommit for large sequence")
                 self.commit()
             self.sequences.store(seq_id, seq)
@@ -230,14 +239,18 @@ def store(self, seq, nsaliases):
                 self.aliases.store_alias(seq_id=seq_id, namespace=namespace, alias=alias)
             self._pending_aliases += len(upd_tuples)
             n_aliases_added += len(upd_tuples)
-        if (self._pending_sequences > ct_n_seqs or self._pending_aliases > ct_n_aliases
-                or self._pending_sequences_len > ct_n_residues):    # pragma: no cover
-            _logger.info("Hit commit thresholds ({self._pending_sequences} sequences, "
-                        "{self._pending_aliases} aliases, {self._pending_sequences_len} residues)".format(self=self))
+        if (
+            self._pending_sequences > ct_n_seqs
+            or self._pending_aliases > ct_n_aliases
+            or self._pending_sequences_len > ct_n_residues
+        ):  # pragma: no cover
+            _logger.info(
+                "Hit commit thresholds ({self._pending_sequences} sequences, "
+                "{self._pending_aliases} aliases, {self._pending_sequences_len} residues)".format(self=self)
+            )
             self.commit()
         return n_seqs_added, n_aliases_added
 
-
     def translate_alias(self, alias, namespace=None, target_namespaces=None, translate_ncbi_namespace=None):
         """given an alias and optional namespace, return a list of all other
         aliases for same sequence
@@ -245,7 +258,9 @@ def translate_alias(self, alias, namespace=None, target_namespaces=None, transla
         """
 
         if translate_ncbi_namespace is not None:
-            _logger.warn("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
+            _logger.warn(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
         seq_id = self._get_unique_seqid(alias=alias, namespace=namespace)
         aliases = self.aliases.find_aliases(seq_id=seq_id)
         if target_namespaces:
@@ -253,23 +268,21 @@ def translate_alias(self, alias, namespace=None, target_namespaces=None, transla
         aliases = [nsa_sep.join([a["namespace"], a["alias"]]) for a in aliases]
         return aliases
 
-
     def translate_identifier(self, identifier, target_namespaces=None, translate_ncbi_namespace=None):
         """Given a string identifier, return a list of aliases (as
         identifiers) that refer to the same sequence.
 
         """
         if translate_ncbi_namespace is not None:
-            _logger.warn("translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed")
+            _logger.warn(
+                "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed"
+            )
 
         namespace, alias = identifier.split(nsa_sep) if nsa_sep in identifier else (None, identifier)
-        return self.translate_alias(alias=alias,
-                                       namespace=namespace,
-                                       target_namespaces=target_namespaces)
-
+        return self.translate_alias(alias=alias, namespace=namespace, target_namespaces=target_namespaces)
 
     ############################################################################
-    # Internal Methods 
+    # Internal Methods
 
     @lru_cache(maxsize=SEQREPO_LRU_CACHE_MAXSIZE)
     def _get_unique_seqid(self, alias, namespace):
@@ -288,7 +301,6 @@ def _get_unique_seqid(self, alias, namespace):
             raise KeyError("Alias {} (namespace: {}): not unique".format(alias, namespace))
         return seq_ids.pop()
 
-
     def _update_digest_aliases(self, seq_id, seq):
 
         """compute digest aliases for seq and update; returns number of digest
@@ -301,22 +313,10 @@ def _update_digest_aliases(self, seq_id, seq):
 
         ir = bioutils.digests.seq_vmc_identifier(seq)
         seq_aliases = [
-            {
-                "namespace": ir["namespace"],
-                "alias": ir["accession"],
-            },
-            {
-                "namespace": "SHA1",
-                "alias": bioutils.digests.seq_sha1(seq)
-            },
-            {
-                "namespace": "MD5",
-                "alias": bioutils.digests.seq_md5(seq)
-            },
-            {
-                "namespace": "SEGUID",
-                "alias": bioutils.digests.seq_seguid(seq)
-            },
+            {"namespace": ir["namespace"], "alias": ir["accession"]},
+            {"namespace": "SHA1", "alias": bioutils.digests.seq_sha1(seq)},
+            {"namespace": "MD5", "alias": bioutils.digests.seq_md5(seq)},
+            {"namespace": "SEGUID", "alias": bioutils.digests.seq_seguid(seq)},
         ]
         for sa in seq_aliases:
             self.aliases.store_alias(seq_id=seq_id, **sa)
diff --git a/src/biocommons/seqrepo/utils.py b/src/biocommons/seqrepo/utils.py
index e1d281a..418ec4b 100644
--- a/src/biocommons/seqrepo/utils.py
+++ b/src/biocommons/seqrepo/utils.py
@@ -1,6 +1,5 @@
 import re
 
-
 ncbi_defline_re = re.compile(r"(?P<namespace>ref)\|(?P<alias>[^|]+)")
 invalid_alias_chars_re = re.compile(r"[^-+./_\w]")
 
diff --git a/tests/conftest.py b/tests/conftest.py
index a22f473..293f0bb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+
 import pytest
 
 from biocommons.seqrepo import SeqRepo
@@ -18,23 +19,23 @@ def rest_dataproxy():
 
 @pytest.fixture(scope="session")
 def seqrepo(tmpdir_factory):
-    dir = str(tmpdir_factory.mktemp('seqrepo'))
+    dir = str(tmpdir_factory.mktemp("seqrepo"))
     return SeqRepo(dir, writeable=True)
 
 
 @pytest.fixture(scope="session")
 def seqrepo_ro(tmpdir_factory):
-    dir = str(tmpdir_factory.mktemp('seqrepo'))
+    dir = str(tmpdir_factory.mktemp("seqrepo"))
     sr = SeqRepo(dir, writeable=True)
-    del sr    # close it
+    del sr  # close it
     return SeqRepo(dir)
 
 
 @pytest.fixture(scope="session")
 def seqrepo_keepcase(tmpdir_factory):
-    dir = str(tmpdir_factory.mktemp('seqrepo'))
+    dir = str(tmpdir_factory.mktemp("seqrepo"))
     return SeqRepo(dir, upcase=False, writeable=True)
 
 
 def test_create(seqrepo):
-    assert str(seqrepo).startswith('SeqRepo(root_dir=/')
+    assert str(seqrepo).startswith("SeqRepo(root_dir=/")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 70e7d4d..1700395 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -7,7 +7,7 @@
 
 import pytest
 
-from biocommons.seqrepo.cli import (init, load)
+from biocommons.seqrepo.cli import init, load
 from biocommons.seqrepo.fastaiter import FastaIter
 from biocommons.seqrepo.utils import parse_defline
 
@@ -18,7 +18,7 @@ class MockOpts(object):
         pass
 
     test_dir = os.path.dirname(__file__)
-    test_data_dir = os.path.join(test_dir, 'data')
+    test_data_dir = os.path.join(test_dir, "data")
 
     opts = MockOpts()
     opts.root_directory = os.path.join(tempfile.mkdtemp(prefix="seqrepo_pytest_"), "seqrepo")
@@ -48,33 +48,36 @@ def test_20_load(opts):
 def test_refseq_fasta(opts):
     def _get_refseq_alias(aliases):
         for al in aliases:
-            if al['namespace'] == 'refseq':
-                return al['alias']
+            if al["namespace"] == "refseq":
+                return al["alias"]
         return None
 
-
     init(opts)
-    opts.namespace = 'refseq'
-    old_fasta = '>gi|295424141|ref|NM_000439.4| Homo sapiens proprotein convertase subtilisin/kexin type 1 ' + \
-                         '(PCSK1), transcript variant 1, mRNA\nTTT'
-    new_fasta = '>NM_000439.4 Homo sapiens proprotein convertase subtilisin/kexin type 1 (PCSK1), ' + \
-                         'transcript variant 1, mRNA\nTTT'
+    opts.namespace = "refseq"
+    old_fasta = (
+        ">gi|295424141|ref|NM_000439.4| Homo sapiens proprotein convertase subtilisin/kexin type 1 "
+        + "(PCSK1), transcript variant 1, mRNA\nTTT"
+    )
+    new_fasta = (
+        ">NM_000439.4 Homo sapiens proprotein convertase subtilisin/kexin type 1 (PCSK1), "
+        + "transcript variant 1, mRNA\nTTT"
+    )
 
     aliases = parse_defline(old_fasta, opts.namespace)
     nm = _get_refseq_alias(aliases)
-    assert nm == 'NM_000439.4'
+    assert nm == "NM_000439.4"
 
     aliases2 = parse_defline(new_fasta, opts.namespace)
     nm2 = _get_refseq_alias(aliases2)
-    assert nm2 == 'NM_000439.4'
+    assert nm2 == "NM_000439.4"
 
     data = io.StringIO(new_fasta)
 
     iterator = FastaIter(data)
     header, seq = next(iterator)
-    assert header.startswith('NM_000439.4 Homo sapiens proprotein convertase subtilisin/kexin type 1 (PCSK1)')
+    assert header.startswith("NM_000439.4 Homo sapiens proprotein convertase subtilisin/kexin type 1 (PCSK1)")
     assert seq == "TTT"
 
     aliases3 = parse_defline(header, opts.namespace)
     nm3 = _get_refseq_alias(aliases3)
-    assert nm3 == 'NM_000439.4'
+    assert nm3 == "NM_000439.4"
diff --git a/tests/test_fastadir.py b/tests/test_fastadir.py
index 535101c..dd868ea 100644
--- a/tests/test_fastadir.py
+++ b/tests/test_fastadir.py
@@ -35,6 +35,7 @@ def test_write_reread():
 
 if __name__ == "__main__":
     import logging
+
     logging.basicConfig(level=logging.DEBUG)
     test_write_reread()
 
@@ -58,4 +59,4 @@ def test_writeability():
         fd._writeable = False
         fd.store("NC_000001.11", "TGGTGGCACGCGCTTGTAGT")
 
-    fd._writeable = True
\ No newline at end of file
+    fd._writeable = True
diff --git a/tests/test_fastaiter.py b/tests/test_fastaiter.py
index 1fa52e2..0d31310 100644
--- a/tests/test_fastaiter.py
+++ b/tests/test_fastaiter.py
@@ -1,8 +1,7 @@
+import pytest
 import six
 from six.moves import StringIO
 
-import pytest
-
 from biocommons.seqrepo.fastaiter import FastaIter
 
 
@@ -74,4 +73,3 @@ def test_multiline():
     # should be empty now
     with pytest.raises(StopIteration):
         six.next(iterator)
-
diff --git a/tests/test_seqaliasdb.py b/tests/test_seqaliasdb.py
index b301c48..d9fc9af 100644
--- a/tests/test_seqaliasdb.py
+++ b/tests/test_seqaliasdb.py
@@ -39,31 +39,12 @@ def test_seqinfo():
     aliases = [{k: r[k] for k in alias_keys} for r in db.find_aliases(current_only=False)]
     aliases.sort(key=lambda r: (r["seqalias_id"], r["seq_id"], r["namespace"], r["alias"], r["is_current"]))
 
-    assert aliases == [{
-        'seqalias_id': 1,
-        'seq_id': 'q1',
-        'namespace': 'A',
-        'alias': '1',
-        'is_current': 0
-    }, {
-        'seqalias_id': 2,
-        'seq_id': 'q1',
-        'namespace': 'A',
-        'alias': '2',
-        'is_current': 1
-    }, {
-        'seqalias_id': 3,
-        'seq_id': 'q1',
-        'namespace': 'B',
-        'alias': '1',
-        'is_current': 1
-    }, {
-        'seqalias_id': 4,
-        'seq_id': 'q2',
-        'namespace': 'A',
-        'alias': '1',
-        'is_current': 1
-    }]
+    assert aliases == [
+        {"seqalias_id": 1, "seq_id": "q1", "namespace": "A", "alias": "1", "is_current": 0},
+        {"seqalias_id": 2, "seq_id": "q1", "namespace": "A", "alias": "2", "is_current": 1},
+        {"seqalias_id": 3, "seq_id": "q1", "namespace": "B", "alias": "1", "is_current": 1},
+        {"seqalias_id": 4, "seq_id": "q2", "namespace": "A", "alias": "1", "is_current": 1},
+    ]
 
     # __contains__
     assert "q1" in db
@@ -71,7 +52,7 @@ def test_seqinfo():
 
     assert db.stats()["n_sequences"] == 2
 
-    del db    # close
+    del db  # close
     db = SeqAliasDB(db_path)
 
     with pytest.raises(RuntimeError):
diff --git a/tests/test_seqrepo.py b/tests/test_seqrepo.py
index c97aeeb..939bf87 100644
--- a/tests/test_seqrepo.py
+++ b/tests/test_seqrepo.py
@@ -5,12 +5,12 @@
 
 
 def test_create(seqrepo):
-    assert str(seqrepo).startswith('SeqRepo(root_dir=/')
+    assert str(seqrepo).startswith("SeqRepo(root_dir=/")
 
 
 def test_seqrepo_dir_not_exist(tmpdir_factory):
     """Ensure that exception is raised for non-existent seqrepo directory"""
-    dir = str(tmpdir_factory.mktemp('seqrepo')) + "-IDONTEXIST"
+    dir = str(tmpdir_factory.mktemp("seqrepo")) + "-IDONTEXIST"
     with pytest.raises(OSError) as ex:
         SeqRepo(dir, writeable=False)
 
@@ -19,9 +19,9 @@ def test_seqrepo_dir_not_exist(tmpdir_factory):
 
 def test_store(seqrepo):
     seqrepo.store("SMELLASSWEET", [{"namespace": "en", "alias": "rose"}, {"namespace": "fr", "alias": "rose"}])
-    seqrepo.store("smellassweet", [{"namespace": "es", "alias": "rosa"}])    # same sequence, new alias
+    seqrepo.store("smellassweet", [{"namespace": "es", "alias": "rosa"}])  # same sequence, new alias
 
-    seqrepo.store("ASINCHANGE", [{"namespace": "en", "alias": "coin"}])    # same alias, diff seqs in diff namespaces
+    seqrepo.store("ASINCHANGE", [{"namespace": "en", "alias": "coin"}])  # same alias, diff seqs in diff namespaces
     seqrepo.store("ASINACORNER", [{"namespace": "fr", "alias": "coin"}])
     seqrepo.commit()
 
@@ -37,10 +37,10 @@ def test_fetch(seqrepo):
     assert seqrepo.fetch("rosa", start=5, end=7) == "AS"
 
     with pytest.raises(KeyError):
-        assert seqrepo.fetch("bogus")    # non-existent alias
+        assert seqrepo.fetch("bogus")  # non-existent alias
 
     with pytest.raises(KeyError):
-        assert seqrepo.fetch("coin")    # ambiguous alias
+        assert seqrepo.fetch("coin")  # ambiguous alias
 
     assert seqrepo.fetch("coin", namespace="en") == "ASINCHANGE"
     assert seqrepo.fetch("coin", namespace="fr") == "ASINACORNER"
@@ -59,7 +59,7 @@ def test_digests(seqrepo):
     assert seqrepo.fetch_uri("SEGUID:aMQF/cdHkAayLkVYs8XV2u+Hy34") == "ASINACORNER"
     assert seqrepo.fetch_uri("SHA1:68c405fdc7479006b22e4558b3c5d5daef87cb7e") == "ASINACORNER"
     assert seqrepo.fetch_uri("VMC:GS_LDz34B6fA_fLxFoc2agLrXQRYuupOGGM") == "ASINACORNER"
-    
+
 
 def test_errors(seqrepo_ro):
     with pytest.raises(RuntimeError):
@@ -77,46 +77,53 @@ def test_refseq_lookup(seqrepo):
     assert seqrepo["ncbiac"] == "NCBISEQUENCE"
     assert seqrepo["NCBI:ncbiac"] == "NCBISEQUENCE"
     assert seqrepo["refseq:ncbiac"] == "NCBISEQUENCE"
-    
+
 
 def test_namespace_translation(tmpdir_factory):
-    dir = str(tmpdir_factory.mktemp('seqrepo'))
+    dir = str(tmpdir_factory.mktemp("seqrepo"))
     seqrepo = SeqRepo(dir, writeable=True)
 
     # store sequences
-    seqrepo.store("NCBISEQUENCE",    [{"namespace": "NCBI",    "alias": "ncbiac"   }])
+    seqrepo.store("NCBISEQUENCE", [{"namespace": "NCBI", "alias": "ncbiac"}])
     seqrepo.store("ENSEMBLSEQUENCE", [{"namespace": "Ensembl", "alias": "ensemblac"}])
-    seqrepo.store("LRGSEQUENCE",     [{"namespace": "LRG",     "alias": "lrgac"   }])
-    seqrepo.store("REFSEQSEQUENCE",  [{"namespace": "refseq",  "alias": "refseqac" }])  # should be stored as NCBI:refseqac
+    seqrepo.store("LRGSEQUENCE", [{"namespace": "LRG", "alias": "lrgac"}])
+    seqrepo.store("REFSEQSEQUENCE", [{"namespace": "refseq", "alias": "refseqac"}])  # should be stored as NCBI:refseqac
     seqrepo.commit()
 
     # lookups, no query translation
-    assert seqrepo["NCBI:ncbiac"]       == "NCBISEQUENCE"
+    assert seqrepo["NCBI:ncbiac"] == "NCBISEQUENCE"
     assert seqrepo["Ensembl:ensemblac"] == "ENSEMBLSEQUENCE"
-    assert seqrepo["LRG:lrgac"]         == "LRGSEQUENCE"
-    assert seqrepo["NCBI:refseqac"]     == "REFSEQSEQUENCE"  # tests ns translation on store
+    assert seqrepo["LRG:lrgac"] == "LRGSEQUENCE"
+    assert seqrepo["NCBI:refseqac"] == "REFSEQSEQUENCE"  # tests ns translation on store
 
     # lookups, w/ query translation
-    assert seqrepo["refseq:ncbiac"]     == "NCBISEQUENCE"
-    assert seqrepo["RefSeq:ncbiac"]     == "NCBISEQUENCE"  # case-squashed
+    assert seqrepo["refseq:ncbiac"] == "NCBISEQUENCE"
+    assert seqrepo["RefSeq:ncbiac"] == "NCBISEQUENCE"  # case-squashed
     assert seqrepo["Ensembl:ensemblac"] == "ENSEMBLSEQUENCE"
-    assert seqrepo["LRG:lrgac"]         == "LRGSEQUENCE"
+    assert seqrepo["LRG:lrgac"] == "LRGSEQUENCE"
 
     seq_id = seqrepo._get_unique_seqid(alias="ncbiac", namespace="NCBI")
     aliases = list(seqrepo.aliases.find_aliases(seq_id=seq_id))
     assert any(a for a in aliases if a["namespace"] == "refseq")
     assert any(a for a in aliases if a["namespace"] == "ga4gh")
 
-    assert seqrepo["ga4gh:SQ."+seq_id]   == "NCBISEQUENCE"
-    assert seqrepo["sha512t24u:"+seq_id] == "NCBISEQUENCE"
-    
+    assert seqrepo["ga4gh:SQ." + seq_id] == "NCBISEQUENCE"
+    assert seqrepo["sha512t24u:" + seq_id] == "NCBISEQUENCE"
 
 
 def test_translation(seqrepo):
-    assert "MD5:8b2698fb0b0c93558a6adbb11edb1e4b" in seqrepo.translate_identifier("en:rose"), "failed fully-qualified identifier lookup"
-    assert "MD5:8b2698fb0b0c93558a6adbb11edb1e4b" in seqrepo.translate_identifier("rose"), "failed unqualified identifier lookup"
-    assert "VMC:GS_bsoUMlD3TrEtlh9Dt1iT29mzfkwwFUDr" in seqrepo.translate_identifier("en:rose"), "failed to find expected identifier in returned identifiers"
-    assert ["VMC:GS_bsoUMlD3TrEtlh9Dt1iT29mzfkwwFUDr"] == seqrepo.translate_identifier("en:rose", target_namespaces=["VMC"]), "failed to rerieve exactly the expected identifier"
+    assert "MD5:8b2698fb0b0c93558a6adbb11edb1e4b" in seqrepo.translate_identifier(
+        "en:rose"
+    ), "failed fully-qualified identifier lookup"
+    assert "MD5:8b2698fb0b0c93558a6adbb11edb1e4b" in seqrepo.translate_identifier(
+        "rose"
+    ), "failed unqualified identifier lookup"
+    assert "VMC:GS_bsoUMlD3TrEtlh9Dt1iT29mzfkwwFUDr" in seqrepo.translate_identifier(
+        "en:rose"
+    ), "failed to find expected identifier in returned identifiers"
+    assert ["VMC:GS_bsoUMlD3TrEtlh9Dt1iT29mzfkwwFUDr"] == seqrepo.translate_identifier(
+        "en:rose", target_namespaces=["VMC"]
+    ), "failed to rerieve exactly the expected identifier"
 
 
 def test_sequenceproxy(seqrepo):
@@ -124,6 +131,5 @@ def test_sequenceproxy(seqrepo):
     # instantiated with use_sequenceproxy=True
 
     sp = SequenceProxy(seqrepo, namespace=None, alias="rosa")
-    assert sp                   # __bool__ dunder method
-    assert sp[5:7] == "AS"      # __eq__ and __getitem__
-
+    assert sp  # __bool__ dunder method
+    assert sp[5:7] == "AS"  # __eq__ and __getitem__
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 618ff56..f9ec473 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -23,17 +23,11 @@ def test_parse_defline():
     assert parse_defline(defline, "refseq") == [{"namespace": "refseq", "alias": "NG_007107.2"}]
 
     defline = ">gi|568815364|ref|NT_077402.3| Homo sapiens chromosome 1 genomic scaffold, GRCh38.p7 Primary Assembly HSCHR1_CTG1"
-    assert parse_defline(defline, "refseq") == [
-        {"namespace": "refseq", "alias": "NT_077402.3"}
-        ]
-    
+    assert parse_defline(defline, "refseq") == [{"namespace": "refseq", "alias": "NT_077402.3"}]
 
 
 def test_validate_aliases():
-    aliases = [
-        {"namespace": "refseq", "alias": "NM_012345.6"},
-        {"namespace": "Ensembl", "alias": "ENST012345.6"}
-        ]
+    aliases = [{"namespace": "refseq", "alias": "NM_012345.6"}, {"namespace": "Ensembl", "alias": "ENST012345.6"}]
 
     assert validate_aliases(aliases)  # okay
 
@@ -42,7 +36,6 @@ def test_validate_aliases():
 
     with pytest.raises(RuntimeError):
         validate_aliases([{"namespace": "refseq", "alias": "NM_012345"}])
-        
+
     with pytest.raises(RuntimeError):
         validate_aliases([{"namespace": "Ensembl", "alias": "ENST012345"}])
-