From 0b6f33c1e723837551ff3fc588d95d874570868c Mon Sep 17 00:00:00 2001 From: "Daniel G. A. Smith" Date: Wed, 6 Nov 2019 12:51:19 -0500 Subject: [PATCH 1/4] Migration: Updates contrib values migration --- .../159ba85908fd_add_contributed_values_table.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/qcfractal/alembic/versions/159ba85908fd_add_contributed_values_table.py b/qcfractal/alembic/versions/159ba85908fd_add_contributed_values_table.py index d88cf75cf..841e4295a 100644 --- a/qcfractal/alembic/versions/159ba85908fd_add_contributed_values_table.py +++ b/qcfractal/alembic/versions/159ba85908fd_add_contributed_values_table.py @@ -37,6 +37,8 @@ def migrate_contributed_values_data(): for ds in ds_ids_data: (ds_id, ds_contrib) = ds + if ds_contrib is None: + continue for key, dict_values in ds_contrib.items(): @@ -67,18 +69,21 @@ def upgrade(): sa.Column('name', sa.String(), nullable=False), sa.Column('collection_id', sa.Integer(), nullable=False), sa.Column('citations', sa.JSON(), nullable=True), - sa.Column('theory_level', sa.JSON(), nullable=True), + sa.Column('theory_level', sa.JSON(), nullable=False), sa.Column('theory_level_details', sa.JSON(), nullable=True), sa.Column('comments', sa.String(), nullable=True), - sa.Column('values', MsgpackExt(), nullable=True), - sa.Column('index', MsgpackExt(), nullable=True), + sa.Column('values', MsgpackExt(), nullable=False), + sa.Column('index', MsgpackExt(), nullable=False), sa.Column('external_url', sa.String(), nullable=True), sa.Column('doi', sa.String(), nullable=True), - sa.Column('units', sa.String(), nullable=True), + sa.Column('units', sa.String(), nullable=False), + sa.Column('values_structure', sa.JSON(), nullable=True, default=lambda: {}), sa.ForeignKeyConstraint(['collection_id'], ['collection.id'], ondelete='cascade'), sa.PrimaryKeyConstraint('name', 'collection_id') ) + op.alter_column('contributed_values', 'values_structure', server_default=None, nullable=False) + migrate_contributed_values_data() op.drop_column('dataset', 'contributed_values_data') From 71aca6be1a3df2199634cf93e104285e7d71ab35 Mon Sep 17 00:00:00 2001 From: "Daniel G. A. Smith" Date: Wed, 6 Nov 2019 13:47:02 -0500 Subject: [PATCH 2/4] Storage: Makes manager a weak table --- devtools/README.md | 16 ++++++++++++ ...5_updates_collections_table_with_owner_.py | 4 --- .../models/collections_models.py | 26 +++++++++---------- .../storage_sockets/models/sql_models.py | 2 +- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/devtools/README.md b/devtools/README.md index 1efe818d1..9925ff5b8 100644 --- a/devtools/README.md +++ b/devtools/README.md @@ -71,3 +71,19 @@ is installed by looking at the `git` tags and how many commits ahead this versio If the version of this commit is the same as a `git` tag, the installed version is the same as the tag, e.g. `qcfractal-0.1.2`, otherwise it will be appended with `+X` where `X` is the number of commits ahead from the last tag, and then `-YYYYYY` where the `Y`'s are replaced with the `git` commit hash. + + +# Alembic Migration +Alembic migrations form the core of changing the databaes schema. A general guideline is below, but keep in +mind that there often may be required changes to this procedure. When in doubt, ask QCArchive Slack. + +Initialize a server from the previous version: + - `git checkout 'prevsion version'` + - `qcfractal-server init --base-folder=tmp --db-port=9000` + +Play upgrades ontop of this server: + - `git checkout `master or feature branch'` + - `qcfractal-server upgrade --base-folder=tmp` + +Pull alembic command line string and create migration: + - `qcfractal-server info --base-folder=tmp alembic | tail -1` revision --autogenerate diff --git a/qcfractal/alembic/versions/a2f76bb7be65_updates_collections_table_with_owner_.py b/qcfractal/alembic/versions/a2f76bb7be65_updates_collections_table_with_owner_.py index 6cce5e0b7..37b1ec38b 100644 --- a/qcfractal/alembic/versions/a2f76bb7be65_updates_collections_table_with_owner_.py +++ b/qcfractal/alembic/versions/a2f76bb7be65_updates_collections_table_with_owner_.py @@ -22,10 +22,6 @@ def upgrade(): op.add_column('collection', sa.Column('group', sa.String(), nullable=False, server_default='default')) op.alter_column('collection', 'group', server_default=None) - op.add_column('collection', sa.Column('metadata', sa.JSON(), nullable=True)) - op.execute("UPDATE collection SET metadata = '{}'::json") - op.alter_column('collection', 'metadata', nullable=False) - op.add_column('collection', sa.Column('view_url_hdf5', sa.String(), nullable=True)) op.add_column('collection', sa.Column('view_url_plaintext', sa.String(), nullable=True)) op.add_column('collection', sa.Column('view_metadata', sa.JSON(), nullable=True)) diff --git a/qcfractal/storage_sockets/models/collections_models.py b/qcfractal/storage_sockets/models/collections_models.py index ac94aaf45..f7012dc10 100644 --- a/qcfractal/storage_sockets/models/collections_models.py +++ b/qcfractal/storage_sockets/models/collections_models.py @@ -35,14 +35,14 @@ class CollectionORM(Base): tags = Column(JSON) tagline = Column(String) - description = Column(String, nullable=True) + description = Column(String) - group = Column(String(100), nullable=True) + group = Column(String(100), nullable=False) visibility = Column(Boolean, nullable=False) - view_url_hdf5 = Column(String, nullable=True) - view_url_plaintext = Column(String, nullable=True) - view_metadata = Column(JSON, nullable=True) + view_url_hdf5 = Column(String) + view_url_plaintext = Column(String) + view_metadata = Column(JSON) view_available = Column(Boolean, nullable=False) provenance = Column(JSON) @@ -68,13 +68,13 @@ class DatasetMixin: Mixin class for common Dataset attributes. """ - default_benchmark = Column(String, nullable=True) - default_keywords = Column(JSON, nullable=True) + default_benchmark = Column(String) + default_keywords = Column(JSON) - default_driver = Column(String, nullable=True) - default_units = Column(String, nullable=True) - alias_keywords = Column(JSON, nullable=True) - default_program = Column(String, nullable=True) + default_driver = Column(String) + default_units = Column(String) + alias_keywords = Column(JSON) + default_program = Column(String) history_keys = Column(JSON) history = Column(JSON) @@ -91,7 +91,7 @@ class ContributedValuesORM(Base): name = Column(String, nullable=False, primary_key=True) values = Column(MsgpackExt, nullable=False) index = Column(MsgpackExt, nullable=False) - values_structure = Column(JSON) + values_structure = Column(JSON, nullable=False) theory_level = Column(JSON, nullable=False) units = Column(String, nullable=False) @@ -242,7 +242,7 @@ class ReactionDatasetORM(CollectionORM, DatasetMixin): id = Column(Integer, ForeignKey('collection.id', ondelete="CASCADE"), primary_key=True) - ds_type = Column(String, nullable=True) + ds_type = Column(String) records_obj = relationship(ReactionDatasetEntryORM, lazy='selectin', diff --git a/qcfractal/storage_sockets/models/sql_models.py b/qcfractal/storage_sockets/models/sql_models.py index ce39932d2..55e45db13 100644 --- a/qcfractal/storage_sockets/models/sql_models.py +++ b/qcfractal/storage_sockets/models/sql_models.py @@ -210,7 +210,7 @@ class TaskQueueORM(Base): procedure = Column(String) status = Column(Enum(TaskStatusEnum), default=TaskStatusEnum.waiting) priority = Column(Integer, default=int(PriorityEnum.NORMAL)) - manager = Column(String, ForeignKey('queue_manager.name'), default=None) + manager = Column(String, ForeignKey('queue_manager.name', ondelete="SET NULL"), default=None) error = Column(String) # TODO: tobe removed - should be in results created_on = Column(DateTime, default=datetime.datetime.utcnow) From f593241222bd97bf5ec2ab679fd7f8c56df9b544 Mon Sep 17 00:00:00 2001 From: "Daniel G. A. Smith" Date: Wed, 6 Nov 2019 13:56:37 -0500 Subject: [PATCH 3/4] Migration: Final v0.11 -> v0.12 migration generator --- devtools/README.md | 2 + .../versions/4b27843a188a_v12_final.py | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 qcfractal/alembic/versions/4b27843a188a_v12_final.py diff --git a/devtools/README.md b/devtools/README.md index 9925ff5b8..27214c3dd 100644 --- a/devtools/README.md +++ b/devtools/README.md @@ -87,3 +87,5 @@ Play upgrades ontop of this server: Pull alembic command line string and create migration: - `qcfractal-server info --base-folder=tmp alembic | tail -1` revision --autogenerate + +This will create a new file in `qcfractal/alembic/versions` which should be reviewed and edited as needed. diff --git a/qcfractal/alembic/versions/4b27843a188a_v12_final.py b/qcfractal/alembic/versions/4b27843a188a_v12_final.py new file mode 100644 index 000000000..759a934d4 --- /dev/null +++ b/qcfractal/alembic/versions/4b27843a188a_v12_final.py @@ -0,0 +1,56 @@ +"""Final migration synchronization from v0.11 to v0.12 + +Revision ID: 4b27843a188a +Revises: 159ba85908fd +Create Date: 2019-11-06 13:48:39.716633 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '4b27843a188a' +down_revision = '159ba85908fd' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('collection', 'group', + existing_type=sa.VARCHAR(), + type_=sa.String(length=100), + existing_nullable=False) + op.alter_column('molecule', 'geometry', + existing_type=postgresql.BYTEA(), + nullable=False) + op.alter_column('molecule', 'symbols', + existing_type=postgresql.BYTEA(), + nullable=False) + op.alter_column('task_queue', 'spec', + existing_type=postgresql.BYTEA(), + nullable=False) + op.drop_constraint('task_queue_manager_fkey', 'task_queue', type_='foreignkey') + op.create_foreign_key('task_queue_manager_fkey', 'task_queue', 'queue_manager', ['manager'], ['name'], ondelete='SET NULL') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('task_queue_manager_fkey', 'task_queue', type_='foreignkey') + op.create_foreign_key('task_queue_manager_fkey', 'task_queue', 'queue_manager', ['manager'], ['name']) + op.alter_column('task_queue', 'spec', + existing_type=postgresql.BYTEA(), + nullable=True) + op.alter_column('molecule', 'symbols', + existing_type=postgresql.BYTEA(), + nullable=True) + op.alter_column('molecule', 'geometry', + existing_type=postgresql.BYTEA(), + nullable=True) + op.alter_column('collection', 'group', + existing_type=sa.String(length=100), + type_=sa.VARCHAR(), + existing_nullable=False) + # ### end Alembic commands ### From 332192bf4e68444607ae46f36c0870070fa9c95a Mon Sep 17 00:00:00 2001 From: "Daniel G. A. Smith" Date: Wed, 6 Nov 2019 14:23:46 -0500 Subject: [PATCH 4/4] Collection: Testing fixes due to stricted postgres requirements --- qcfractal/tests/test_client.py | 7 +++-- qcfractal/tests/test_server.py | 2 +- qcfractal/tests/test_storage.py | 54 ++++++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/qcfractal/tests/test_client.py b/qcfractal/tests/test_client.py index 3bff8fcfc..2214011ba 100644 --- a/qcfractal/tests/test_client.py +++ b/qcfractal/tests/test_client.py @@ -101,14 +101,16 @@ def test_collection_portal(test_server, encoding): "something": "else", "array": ["12345"], "visibility": True, - "view_available": False + "view_available": False, + "group": "default" } client = ptl.FractalClient(test_server) client._set_encoding(encoding) # Test add - _ = client.add_collection(db) + ret = client.add_collection(db, full_return=True) + print(ret) # Test get get_db = client.get_collection(db["collection"], db["name"], full_return=True) @@ -121,7 +123,6 @@ def test_collection_portal(test_server, encoding): get_db.data[0].pop("view_url_hdf5", None) get_db.data[0].pop("view_url_plaintext", None) get_db.data[0].pop("view_metadata", None) - get_db.data[0].pop("group", None) get_db.data[0].pop("description", None) assert db == get_db.data[0] diff --git a/qcfractal/tests/test_server.py b/qcfractal/tests/test_server.py index 711c537c4..c774e9847 100644 --- a/qcfractal/tests/test_server.py +++ b/qcfractal/tests/test_server.py @@ -36,6 +36,7 @@ def test_storage_socket(test_server): "array": ["54321"], "visibility": True, "view_available": False, + "group": "default", } # Cast collection type to lower since the server-side does it anyways storage['collection'] = storage['collection'].lower() @@ -67,7 +68,6 @@ def test_storage_socket(test_server): pdata["data"][0].pop("view_url_hdf5", None) pdata["data"][0].pop("view_url_plaintext", None) pdata["data"][0].pop("view_metadata", None) - pdata["data"][0].pop("group", None) pdata["data"][0].pop("description", None) assert pdata["data"][0] == storage diff --git a/qcfractal/tests/test_storage.py b/qcfractal/tests/test_storage.py index d6606b6c2..07589d195 100644 --- a/qcfractal/tests/test_storage.py +++ b/qcfractal/tests/test_storage.py @@ -219,7 +219,8 @@ def test_collections_add(storage_socket): "something": "else", "array": ["54321"], "visibility": True, - "view_available": False + "view_available": False, + "group": "default", } ret = storage_socket.add_collection(db) @@ -239,6 +240,7 @@ def test_collections_add(storage_socket): # assert len(ret["meta"]["missing"]) == 1 assert ret["meta"]["n_found"] == 0 + def test_collections_overwrite(storage_socket): collection = 'TorsionDriveRecord' @@ -249,7 +251,8 @@ def test_collections_overwrite(storage_socket): "something": "else", "array": ["54321"], "visibility": True, - "view_available": False + "view_available": False, + "group": "default", } ret = storage_socket.add_collection(db) @@ -264,6 +267,7 @@ def test_collections_overwrite(storage_socket): # "id": ret["data"][0]["id"], "collection": "TorsionDriveRecord", # no need to include "name": "Torsion123", # no need to include + "group": "default", "something": "New", "something2": "else", "view_available": True, @@ -290,6 +294,7 @@ def test_collections_overwrite(storage_socket): ret = storage_socket.del_collection(collection, name) assert ret == 1 + def test_dataset_add_delete_cascade(storage_socket): collection = 'dataset' @@ -311,6 +316,8 @@ def test_dataset_add_delete_cascade(storage_socket): True, "view_available": False, + "group": + "default", "records": [{ "name": "He1", "molecule_id": mol_insert["data"][0], @@ -328,13 +335,15 @@ def test_dataset_add_delete_cascade(storage_socket): "theory_level": 'PBE0', "units": 'kcal/mol', "values": [5, 10], - "index": ["He2", "He1"] + "index": ["He2", "He1"], + "values_structure": {}, } } } ret = storage_socket.add_collection(db.copy()) - assert ret["meta"]["n_inserted"] == 1 + print(ret["meta"]["error_description"]) + assert ret["meta"]["n_inserted"] == 1, ret["meta"]["error_description"] ret = storage_socket.get_collections(collection=collection, name=name) assert ret["meta"]["success"] is True @@ -349,14 +358,16 @@ def test_dataset_add_delete_cascade(storage_socket): "theory_level": 'PBE0 FHI-AIMS', "units": 'kcal/mol', "values": np.array([5, 10], dtype=np.int16), - "index": ["He2", "He1"] + "index": ["He2", "He1"], + "values_structure": {}, }, 'contrib2': { "name": 'contrib2', "theory_level": 'PBE0 FHI-AIMS tight', "units": 'kcal/mol', "values": [np.random.rand(2, 3), np.random.rand(2, 3)], - "index": ["He2", "He1"] + "index": ["He2", "He1"], + "values_structure": {}, } } @@ -381,7 +392,6 @@ def test_dataset_add_delete_cascade(storage_socket): assert len(ret['data'][0]['contributed_values'].keys()) == 2 assert len(ret['data'][0]['records']) == 0 - # cleanup # Can't delete molecule when datasets refernece it (no cascade) with pytest.raises(sqlalchemy.exc.IntegrityError): @@ -1301,12 +1311,27 @@ def test_collections_include_exclude(storage_socket): mol_insert = storage_socket.add_molecules([water, water2]) db = { - "collection": collection, - "name": name, - "visibility": True, - "view_available": False, - "records": [{"name": "He1", "molecule_id": mol_insert["data"][0], "comment": None, "local_results": {}}, - {"name": "He2", "molecule_id": mol_insert["data"][1], "comment": None, "local_results": {}}] + "collection": + collection, + "name": + name, + "visibility": + True, + "view_available": + False, + "group": + "default", + "records": [{ + "name": "He1", + "molecule_id": mol_insert["data"][0], + "comment": None, + "local_results": {} + }, { + "name": "He2", + "molecule_id": mol_insert["data"][1], + "comment": None, + "local_results": {} + }] } db2 = { @@ -1314,7 +1339,8 @@ def test_collections_include_exclude(storage_socket): "name": name2, "visibility": True, "view_available": False, - "records": [] + "records": [], + "group": "default" } ret = storage_socket.add_collection(db)