Skip to content

Commit

Permalink
Merge pull request #2226 from merenlab/v22-to-v23-migration
Browse files Browse the repository at this point in the history
V22 to v23 contigs-db migration
  • Loading branch information
mschecht authored Feb 19, 2024
2 parents 2ab8c19 + 642e7da commit 01a086d
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 6 deletions.
2 changes: 1 addition & 1 deletion anvio/dbops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4276,7 +4276,7 @@ def list_available_hmm_sources(self):


def remove_data_from_db(self, tables_dict):
"""This is quite an experimental function to clean up tables in contgis databases. Use with caution.
"""This is quite an experimental function to clean up tables in contigs databases. Use with caution.
The expected tables dict should follow this structure:
Expand Down
80 changes: 80 additions & 0 deletions anvio/migrations/contigs/v22_to_v23.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8

import sys
import argparse

import anvio.db as db
import anvio.utils as utils
import anvio.terminal as terminal

from anvio.errors import ConfigError

current_version, next_version = [x[1:] for x in __name__.split('_to_')]

run = terminal.Run()
progress = terminal.Progress()

def migrate(db_path):
if db_path is None:
raise ConfigError("No database path is given.")

utils.is_contigs_db(db_path)

contigs_db = db.DB(db_path, None, ignore_version = True)
if str(contigs_db.get_version()) != current_version:
raise ConfigError(f"The version of the provided contigs database is {contigs_db.get_version()}, "
f"not the required version, {current_version}, so this script cannot upgrade the database.")

db_altered = False
progress.new("Migrating")

# usually we never need to do this, but in this case it is best practice to test if
# we need an actual removal of the SCGs from a contigs-db since we released new SCGs
# in the master repository, and some people likely already updated their contigs-dbs.
# if they have already gone through that, we can save them the trouble.
scg_taxonomy_was_run = contigs_db.get_meta_value('scg_taxonomy_was_run')
scg_taxonomy_db_version = contigs_db.get_meta_value('scg_taxonomy_database_version')
if scg_taxonomy_was_run and scg_taxonomy_db_version == "GTDB: v214.1; Anvi'o: v1":
# does not need an update.
pass
else:
# needs an update
contigs_db._exec('''DELETE FROM scg_taxonomy''')
contigs_db.set_meta_value('scg_taxonomy_was_run', 0)
contigs_db.set_meta_value('scg_taxonomy_database_version', None)

db_altered = True

progress.update("Updating version")
contigs_db.remove_meta_key_value_pair('version')
contigs_db.set_version(next_version)

progress.update("Committing changes")
contigs_db.disconnect()

progress.end()

if db_altered:
message = (f"Your contigs database is now version {next_version}. Sadly this update removed all SCG taxonomy "
f"data in this contigs-db due to a change in the set of SCGs anvi'o now uses for taxonomy estimation. "
f"As a result, you will need to re-run anvi-run-scg-taxonomy command on this contigs-db :/ If you "
f"would like to learn why this was necessary, please visit https://github.com/merenlab/anvio/issues/2211. "
f"We thank you for your patience!")
else:
message = ("Since you have already updated your contigs-db with new SCGs, anvi'o simply bumped the version of your "
"database rather than removing or editing any data :) Moving on.")

run.info_single(message, nl_after=1, nl_before=1, mc='green')


if __name__ == '__main__':
parser = argparse.ArgumentParser(description=f"A simple script to upgrade an anvi'o contigs database from version {current_version} to version {next_version}")
parser.add_argument("contigs_db", metavar="CONTIGS_DB", help=f"An anvi'o contigs database of version {current_version}")
args, unknown = parser.parse_known_args()

try:
migrate(args.contigs_db)
except ConfigError as e:
print(e)
sys.exit(-1)
2 changes: 1 addition & 1 deletion anvio/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
__email__ = "[email protected]"


contigs_db_version = "22"
contigs_db_version = "23"
profile_db_version = "40"
genes_db_version = "6"
pan_db_version = "17"
Expand Down
8 changes: 4 additions & 4 deletions bin/anvi-migrate
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from anvio.migrations import migration_scripts


__description__ = "Migrates any anvi'o artifact, whether it is a database or a config file, to a newer version. Pure magic."
__authors__ = ['meren', 'ozcan', 'ekiefl', 'ivagljiva', 'semiller10']
__authors__ = ['meren', 'ozcan', 'ekiefl', 'ivagljiva', 'semiller10', 'mschecht']
__requires__ = ["contigs-db", "profile-db", "pan-db", "genes-db", "genomes-storage-db", "structure-db", "modules-db", "workflow-config"]
__provides__ = []

Expand Down Expand Up @@ -270,7 +270,7 @@ class Migrater(object):
shutil.move(self.artifact_path, self.artifact_path + '.broken')

raise ConfigError("Anvi'o has very bad news for you :( Your migration failed, and anvi'o has no backups to restore your "
"original file. The current artifact is likely in a broken state, and you will unlkely going to be "
"original file. The current artifact is likely in a broken state, and you will unlikely going to be "
"able to use it. So anvi'o renamed it by adding a prefix '.broken' to its file name. We are very sorry "
"for this error (and anvi'o will certainly not put salt on the wound by reminding you that you could "
"have avoided it by using the `--migrate-safely` flag): \"%s\"." % e)
Expand Down Expand Up @@ -301,8 +301,8 @@ if __name__ == '__main__':
"all of the files that match to your filters.")

groupB = parser.add_argument_group('SAFETY', "It is up to you. Safe things take much longer and boring. Unsafe things "
"are fast, fun, and .. well, don't come to use if your computer loses power "
"or somiething.")
"are fast, fun, and .. well, don't come to us if your computer loses power "
"or something.")
groupB.add_argument(*anvio.A('migrate-safely'), **anvio.K('migrate-safely'))
groupB.add_argument(*anvio.A('migrate-quickly'), **anvio.K('migrate-quickly'))

Expand Down

0 comments on commit 01a086d

Please sign in to comment.