diff --git a/qcarchivetesting/conda-envs/fulltest_server.yaml b/qcarchivetesting/conda-envs/fulltest_server.yaml index d36d97ec3..addaca9fb 100644 --- a/qcarchivetesting/conda-envs/fulltest_server.yaml +++ b/qcarchivetesting/conda-envs/fulltest_server.yaml @@ -38,6 +38,7 @@ dependencies: # QCFractal Services - torsiondrive + - qcmanybody - pip: - "geometric @ git+https://github.com/leeping/geomeTRIC" diff --git a/qcarchivetesting/conda-envs/fulltest_snowflake.yaml b/qcarchivetesting/conda-envs/fulltest_snowflake.yaml index 05f123325..4b47c274e 100644 --- a/qcarchivetesting/conda-envs/fulltest_snowflake.yaml +++ b/qcarchivetesting/conda-envs/fulltest_snowflake.yaml @@ -41,6 +41,7 @@ dependencies: # QCFractal Services - torsiondrive + - qcmanybody # Worker codes below - qcengine<0.70a0 diff --git a/qcarchivetesting/qcarchivetesting/hash_data/manybody_specification_tests.json.xz b/qcarchivetesting/qcarchivetesting/hash_data/manybody_specification_tests.json.xz new file mode 100644 index 000000000..ab1513bd7 Binary files /dev/null and b/qcarchivetesting/qcarchivetesting/hash_data/manybody_specification_tests.json.xz differ diff --git a/qcarchivetesting/qcarchivetesting/procedure_data/generate_manybody.py b/qcarchivetesting/qcarchivetesting/procedure_data/generate_manybody.py index b8629b292..5ed55a929 100644 --- a/qcarchivetesting/qcarchivetesting/procedure_data/generate_manybody.py +++ b/qcarchivetesting/qcarchivetesting/procedure_data/generate_manybody.py @@ -35,7 +35,8 @@ _, ids = client.add_manybodys( [molecule], program=test_data["specification"]["program"], - singlepoint_specification=test_data["specification"]["singlepoint_specification"], + bsse_correction=test_data["specification"]["bsse_correction"], + levels=test_data["specification"]["levels"], keywords=test_data["specification"]["keywords"], ) diff --git a/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multi.json.xz b/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multi.json.xz new file mode 100644 index 000000000..ee336ea1f Binary files /dev/null and b/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multi.json.xz differ diff --git a/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multiss.json.xz b/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multiss.json.xz new file mode 100644 index 000000000..fc5e63196 Binary files /dev/null and b/qcarchivetesting/qcarchivetesting/procedure_data/mb_all_he4_psi4_multiss.json.xz differ diff --git a/qcarchivetesting/qcarchivetesting/procedure_data/mb_cp_he4_psi4_mp2.json.xz b/qcarchivetesting/qcarchivetesting/procedure_data/mb_cp_he4_psi4_mp2.json.xz index 25b18404d..e5ca1c71b 100644 Binary files a/qcarchivetesting/qcarchivetesting/procedure_data/mb_cp_he4_psi4_mp2.json.xz and b/qcarchivetesting/qcarchivetesting/procedure_data/mb_cp_he4_psi4_mp2.json.xz differ diff --git a/qcarchivetesting/qcarchivetesting/procedure_data/mb_none_he4_psi4_mp2.json.xz b/qcarchivetesting/qcarchivetesting/procedure_data/mb_none_he4_psi4_mp2.json.xz deleted file mode 100644 index ae3da196f..000000000 Binary files a/qcarchivetesting/qcarchivetesting/procedure_data/mb_none_he4_psi4_mp2.json.xz and /dev/null differ diff --git a/qcarchivetesting/qcarchivetesting/test_full_manybody.py b/qcarchivetesting/qcarchivetesting/test_full_manybody.py index e480d8410..4d560ad4e 100644 --- a/qcarchivetesting/qcarchivetesting/test_full_manybody.py +++ b/qcarchivetesting/qcarchivetesting/test_full_manybody.py @@ -25,10 +25,14 @@ def test_manybody_full_1(fulltest_client: PortalClient): "keywords": {"e_convergence": 1e-10, "d_convergence": 1e-10}, } - mb_keywords = {"max_nbody": None, "bsse_correction": "none"} + levels = {1: sp_spec, 2: sp_spec, 3: sp_spec, 4: sp_spec} meta, ids = fulltest_client.add_manybodys( - initial_molecules=[molecule], program="manybody", singlepoint_specification=sp_spec, keywords=mb_keywords + initial_molecules=[molecule], + program="qcmanybody", + bsse_correction=["nocp"], + levels=levels, + keywords={"return_total_data": True}, ) for i in range(240): @@ -49,18 +53,42 @@ def test_manybody_full_2(fulltest_client: PortalClient): fragments=[[0], [1], [2], [3]], ) - sp_spec = { + sp_spec_1 = { "program": "psi4", "driver": "energy", "method": "mp2", - "basis": "aug-cc-pvdz", - "keywords": {"e_convergence": 1e-10, "d_convergence": 1e-10}, + "basis": "sto-3g", + "keywords": {"cc_type": "df", "df_basis_mp2": "def2-qzvpp-ri"}, + } + + sp_spec_2 = { + "program": "psi4", + "driver": "energy", + "method": "b3lyp", + "basis": "sto-3g", + "keywords": {"cc_type": "df", "df_basis_mp2": "def2-qzvpp-ri"}, + } + + sp_spec_3 = { + "program": "psi4", + "driver": "energy", + "method": "hf", + "basis": "sto-3g", + "keywords": {"cc_type": "df", "df_basis_mp2": "def2-qzvpp-ri"}, } - mb_keywords = {"max_nbody": 2, "bsse_correction": "cp"} + levels = { + 1: sp_spec_1, + 2: sp_spec_2, + "supersystem": sp_spec_3, + } meta, ids = fulltest_client.add_manybodys( - initial_molecules=[molecule], program="manybody", singlepoint_specification=sp_spec, keywords=mb_keywords + initial_molecules=[molecule], + program="qcmanybody", + bsse_correction=["nocp", "cp", "vmfc"], + levels=levels, + keywords={"return_total_data": True}, ) for i in range(240): diff --git a/qcfractal/pyproject.toml b/qcfractal/pyproject.toml index 92f999758..4f6a58ef6 100644 --- a/qcfractal/pyproject.toml +++ b/qcfractal/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ [project.optional-dependencies] services = [ "torsiondrive", + "qcmanybody", ] geoip = [ "geoip2" diff --git a/qcfractal/qcfractal/alembic/versions/2024-06-11-a5a701dc344d_delete_old_manybody.py b/qcfractal/qcfractal/alembic/versions/2024-06-11-a5a701dc344d_delete_old_manybody.py new file mode 100644 index 000000000..01d186d76 --- /dev/null +++ b/qcfractal/qcfractal/alembic/versions/2024-06-11-a5a701dc344d_delete_old_manybody.py @@ -0,0 +1,49 @@ +"""delete old manybody + +Revision ID: a5a701dc344d +Revises: 73b4838a6839 +Create Date: 2024-06-11 15:51:11.380308 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "a5a701dc344d" +down_revision = "73b4838a6839" +branch_labels = None +depends_on = None + + +def upgrade(): + + conn = op.get_bind() + + res = conn.execute(sa.text("SELECT count(*) FROM base_record WHERE record_type = 'manybody'")) + count = res.fetchone()[0] + if count != 0: + raise ValueError("Will not delete old manybody tables with existing data") + + res = conn.execute(sa.text("SELECT count(*) FROM manybody_record")) + count = res.fetchone()[0] + if count != 0: + raise ValueError("Will not delete old manybody tables with existing data") + + res = conn.execute(sa.text("SELECT count(*) FROM manybody_dataset")) + count = res.fetchone()[0] + if count != 0: + raise ValueError("Will not delete old manybody tables with existing data") + + op.execute(sa.text("DROP TABLE manybody_cluster CASCADE")) + op.execute(sa.text("DROP TABLE manybody_dataset CASCADE")) + op.execute(sa.text("DROP TABLE manybody_dataset_entry CASCADE")) + op.execute(sa.text("DROP TABLE manybody_dataset_record CASCADE")) + op.execute(sa.text("DROP TABLE manybody_dataset_specification CASCADE")) + op.execute(sa.text("DROP TABLE manybody_record CASCADE")) + op.execute(sa.text("DROP TABLE manybody_specification CASCADE")) + + +def downgrade(): + raise NotImplementedError("Downgrade not supported") diff --git a/qcfractal/qcfractal/alembic/versions/2024-06-11-fd95035b773b_new_manybody_code.py b/qcfractal/qcfractal/alembic/versions/2024-06-11-fd95035b773b_new_manybody_code.py new file mode 100644 index 000000000..13c7af815 --- /dev/null +++ b/qcfractal/qcfractal/alembic/versions/2024-06-11-fd95035b773b_new_manybody_code.py @@ -0,0 +1,215 @@ +"""new manybody code + +Revision ID: fd95035b773b +Revises: a5a701dc344d +Create Date: 2024-06-11 16:29:38.468745 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "fd95035b773b" +down_revision = "a5a701dc344d" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "manybody_specification", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("program", sa.String(), nullable=False), + sa.Column("bsse_correction", postgresql.ARRAY(sa.String()), nullable=False), + sa.Column("keywords", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("protocols", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("specification_hash", sa.String(), nullable=False), + sa.CheckConstraint("program = LOWER(program)", name="ck_manybody_specification_program_lower"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_manybody_specification_program", "manybody_specification", ["program"], unique=False) + op.create_table( + "manybody_specification_levels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("manybody_specification_id", sa.Integer(), nullable=False), + sa.Column("level", sa.Integer(), nullable=False), + sa.Column("singlepoint_specification_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["manybody_specification_id"], + ["manybody_specification.id"], + ), + sa.ForeignKeyConstraint( + ["singlepoint_specification_id"], + ["qc_specification.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("manybody_specification_id", "level", name="ux_manybody_specification_levels_unique"), + ) + op.create_index( + "ix_manybody_specifications_levels_manybody_specification_id", + "manybody_specification_levels", + ["manybody_specification_id"], + unique=False, + ) + op.create_index( + "ix_manybody_specifications_levels_singlepoint_specification_id", + "manybody_specification_levels", + ["singlepoint_specification_id"], + unique=False, + ) + op.create_table( + "manybody_dataset", + sa.Column("id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["id"], ["base_dataset.id"], ondelete="cascade"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "manybody_record", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("initial_molecule_id", sa.Integer(), nullable=False), + sa.Column("specification_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["id"], ["base_record.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["initial_molecule_id"], + ["molecule.id"], + ), + sa.ForeignKeyConstraint( + ["specification_id"], + ["manybody_specification.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "manybody_cluster", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("manybody_id", sa.Integer(), nullable=True), + sa.Column("molecule_id", sa.Integer(), nullable=False), + sa.Column("mc_level", sa.String(), nullable=False), + sa.Column("fragments", postgresql.ARRAY(sa.Integer()), nullable=False), + sa.Column("basis", postgresql.ARRAY(sa.Integer()), nullable=False), + sa.Column("singlepoint_id", sa.Integer(), nullable=True), + sa.CheckConstraint("array_length(basis, 1) > 0", name="ck_manybody_cluster_basis"), + sa.CheckConstraint("array_length(fragments, 1) > 0", name="ck_manybody_cluster_fragments"), + sa.ForeignKeyConstraint(["manybody_id"], ["manybody_record.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["molecule_id"], + ["molecule.id"], + ), + sa.ForeignKeyConstraint( + ["singlepoint_id"], + ["singlepoint_record.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("manybody_id", "mc_level", "fragments", "basis", name="ux_manybody_cluster_unique"), + ) + op.create_index("ix_manybody_cluster_molecule_id", "manybody_cluster", ["molecule_id"], unique=False) + op.create_index("ix_manybody_cluster_singlepoint_id", "manybody_cluster", ["singlepoint_id"], unique=False) + op.create_table( + "manybody_dataset_entry", + sa.Column("dataset_id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("comment", sa.String(), nullable=True), + sa.Column("initial_molecule_id", sa.Integer(), nullable=False), + sa.Column("additional_singlepoint_keywords", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("attributes", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.ForeignKeyConstraint(["dataset_id"], ["manybody_dataset.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["initial_molecule_id"], + ["molecule.id"], + ), + sa.PrimaryKeyConstraint("dataset_id", "name"), + ) + op.create_index("ix_manybody_dataset_entry_dataset_id", "manybody_dataset_entry", ["dataset_id"], unique=False) + op.create_index( + "ix_manybody_dataset_entry_initial_molecule_id", "manybody_dataset_entry", ["initial_molecule_id"], unique=False + ) + op.create_index("ix_manybody_dataset_entry_name", "manybody_dataset_entry", ["name"], unique=False) + op.create_table( + "manybody_dataset_specification", + sa.Column("dataset_id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=True), + sa.Column("specification_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(["dataset_id"], ["manybody_dataset.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["specification_id"], + ["manybody_specification.id"], + ), + sa.PrimaryKeyConstraint("dataset_id", "name"), + ) + op.create_index( + "ix_manybody_dataset_specification_dataset_id", "manybody_dataset_specification", ["dataset_id"], unique=False + ) + op.create_index("ix_manybody_dataset_specification_name", "manybody_dataset_specification", ["name"], unique=False) + op.create_index( + "ix_manybody_dataset_specification_specification_id", + "manybody_dataset_specification", + ["specification_id"], + unique=False, + ) + op.create_table( + "manybody_dataset_record", + sa.Column("dataset_id", sa.Integer(), nullable=False), + sa.Column("entry_name", sa.String(), nullable=False), + sa.Column("specification_name", sa.String(), nullable=False), + sa.Column("record_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["dataset_id", "entry_name"], + ["manybody_dataset_entry.dataset_id", "manybody_dataset_entry.name"], + onupdate="cascade", + ondelete="cascade", + ), + sa.ForeignKeyConstraint( + ["dataset_id", "specification_name"], + ["manybody_dataset_specification.dataset_id", "manybody_dataset_specification.name"], + onupdate="cascade", + ondelete="cascade", + ), + sa.ForeignKeyConstraint(["dataset_id"], ["manybody_dataset.id"], ondelete="cascade"), + sa.ForeignKeyConstraint( + ["record_id"], + ["manybody_record.id"], + ), + sa.PrimaryKeyConstraint("dataset_id", "entry_name", "specification_name"), + ) + op.create_index("ix_manybody_dataset_record_record_id", "manybody_dataset_record", ["record_id"], unique=False) + + op.execute( + """CREATE TRIGGER qca_manybody_record_delete_base_tr + AFTER DELETE ON public.manybody_record + FOR EACH ROW EXECUTE FUNCTION qca_base_record_delete();""" + ) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_manybody_dataset_record_record_id", table_name="manybody_dataset_record") + op.drop_table("manybody_dataset_record") + op.drop_index("ix_manybody_dataset_specification_specification_id", table_name="manybody_dataset_specification") + op.drop_index("ix_manybody_dataset_specification_name", table_name="manybody_dataset_specification") + op.drop_index("ix_manybody_dataset_specification_dataset_id", table_name="manybody_dataset_specification") + op.drop_table("manybody_dataset_specification") + op.drop_index("ix_manybody_dataset_entry_name", table_name="manybody_dataset_entry") + op.drop_index("ix_manybody_dataset_entry_initial_molecule_id", table_name="manybody_dataset_entry") + op.drop_index("ix_manybody_dataset_entry_dataset_id", table_name="manybody_dataset_entry") + op.drop_table("manybody_dataset_entry") + op.drop_index("ix_manybody_cluster_singlepoint_id", table_name="manybody_cluster") + op.drop_index("ix_manybody_cluster_molecule_id", table_name="manybody_cluster") + op.drop_table("manybody_cluster") + op.drop_table("manybody_record") + op.drop_table("manybody_dataset") + op.drop_index( + "ix_manybody_specifications_levels_singlepoint_specification_id", table_name="manybody_specification_levels" + ) + op.drop_index( + "ix_manybody_specifications_levels_manybody_specification_id", table_name="manybody_specification_levels" + ) + op.drop_table("manybody_specification_levels") + op.drop_index("ix_manybody_specification_program", table_name="manybody_specification") + op.drop_table("manybody_specification") + # ### end Alembic commands ### diff --git a/qcfractal/qcfractal/alembic/versions/2025-01-14-d5988aa750ae_merge_manybody_branch_with_changes.py b/qcfractal/qcfractal/alembic/versions/2025-01-14-d5988aa750ae_merge_manybody_branch_with_changes.py new file mode 100644 index 000000000..9c5066bdd --- /dev/null +++ b/qcfractal/qcfractal/alembic/versions/2025-01-14-d5988aa750ae_merge_manybody_branch_with_changes.py @@ -0,0 +1,21 @@ +"""merge manybody branch with changes + +Revision ID: d5988aa750ae +Revises: 3690c677f8d1, fd95035b773b +Create Date: 2025-01-14 10:49:27.547435 + +""" + +# revision identifiers, used by Alembic. +revision = "d5988aa750ae" +down_revision = ("3690c677f8d1", "fd95035b773b") +branch_labels = None +depends_on = None + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/qcfractal/qcfractal/components/manybody/dataset_db_models.py b/qcfractal/qcfractal/components/manybody/dataset_db_models.py index 7dac53c1e..03f954c46 100644 --- a/qcfractal/qcfractal/components/manybody/dataset_db_models.py +++ b/qcfractal/qcfractal/components/manybody/dataset_db_models.py @@ -19,7 +19,7 @@ class ManybodyDatasetEntryORM(BaseORM): comment = Column(String) initial_molecule_id = Column(Integer, ForeignKey(MoleculeORM.id), nullable=False) - additional_keywords = Column(JSONB, nullable=False) + additional_singlepoint_keywords = Column(JSONB, nullable=False) attributes = Column(JSONB, nullable=False) initial_molecule = relationship(MoleculeORM, lazy="joined") diff --git a/qcfractal/qcfractal/components/manybody/dataset_socket.py b/qcfractal/qcfractal/components/manybody/dataset_socket.py index 1ef739ca7..618609809 100644 --- a/qcfractal/qcfractal/components/manybody/dataset_socket.py +++ b/qcfractal/qcfractal/components/manybody/dataset_socket.py @@ -54,7 +54,7 @@ def _create_entries(self, session: Session, dataset_id: int, new_entries: Sequen name=entry.name, comment=entry.comment, initial_molecule_id=molecule_id, - additional_keywords=entry.additional_keywords, + additional_singlepoint_keywords=entry.additional_singlepoint_keywords, attributes=entry.attributes, ) @@ -80,8 +80,8 @@ def _submit( n_existing = 0 # Weed out any with additional keywords - special_entries = [x for x in entry_orm if x.additional_keywords] - normal_entries = [x for x in entry_orm if not x.additional_keywords] + special_entries = [x for x in entry_orm if x.additional_singlepoint_keywords] + normal_entries = [x for x in entry_orm if not x.additional_singlepoint_keywords] # Normal entries - just let it rip for spec in spec_orm: @@ -118,7 +118,8 @@ def _submit( continue new_spec = copy.deepcopy(spec_input_dict) - new_spec["keywords"].update(entry.additional_keywords) + for v in new_spec["levels"].values(): + v["keywords"].update(entry.additional_singlepoint_keywords) meta, mb_ids = self.root_socket.records.manybody.add( initial_molecules=[entry.initial_molecule_id], diff --git a/qcfractal/qcfractal/components/manybody/record_db_models.py b/qcfractal/qcfractal/components/manybody/record_db_models.py index 3549fc5cf..b5cb76e54 100644 --- a/qcfractal/qcfractal/components/manybody/record_db_models.py +++ b/qcfractal/qcfractal/components/manybody/record_db_models.py @@ -1,14 +1,30 @@ from __future__ import annotations -from sqlalchemy import Column, String, Integer, ForeignKey, UniqueConstraint, Index, CheckConstraint, event, DDL +from typing import TYPE_CHECKING + +from sqlalchemy import ( + Column, + String, + Integer, + ForeignKey, + UniqueConstraint, + Index, + CheckConstraint, + event, + DDL, +) from sqlalchemy.dialects.postgresql import JSONB, ARRAY from sqlalchemy.orm import relationship +from sqlalchemy.orm.collections import attribute_keyed_dict from qcfractal.components.molecules.db_models import MoleculeORM from qcfractal.components.record_db_models import BaseRecordORM from qcfractal.components.singlepoint.record_db_models import SinglepointRecordORM, QCSpecificationORM from qcfractal.db_socket import BaseORM +if TYPE_CHECKING: + from typing import Dict, Any, Optional, Iterable + class ManybodyClusterORM(BaseORM): """ @@ -17,11 +33,12 @@ class ManybodyClusterORM(BaseORM): __tablename__ = "manybody_cluster" - manybody_id = Column(Integer, ForeignKey("manybody_record.id", ondelete="cascade"), primary_key=True) - molecule_id = Column(Integer, ForeignKey("molecule.id"), primary_key=True) + id = Column(Integer, primary_key=True) + manybody_id = Column(Integer, ForeignKey("manybody_record.id", ondelete="cascade")) + molecule_id = Column(Integer, ForeignKey("molecule.id"), nullable=False) + mc_level = Column(String, nullable=False) fragments = Column(ARRAY(Integer), nullable=False) basis = Column(ARRAY(Integer), nullable=False) - degeneracy = Column(Integer, nullable=False) singlepoint_id = Column(Integer, ForeignKey(SinglepointRecordORM.id), nullable=True) @@ -29,14 +46,14 @@ class ManybodyClusterORM(BaseORM): singlepoint_record = relationship(SinglepointRecordORM) __table_args__ = ( - CheckConstraint("degeneracy > 0", name="ck_manybody_cluster_degeneracy"), CheckConstraint("array_length(fragments, 1) > 0", name="ck_manybody_cluster_fragments"), CheckConstraint("array_length(basis, 1) > 0", name="ck_manybody_cluster_basis"), + UniqueConstraint("manybody_id", "mc_level", "fragments", "basis", name="ux_manybody_cluster_unique"), Index("ix_manybody_cluster_molecule_id", "molecule_id"), Index("ix_manybody_cluster_singlepoint_id", "singlepoint_id"), ) - _qcportal_model_excludes = ["manybody_id"] + _qcportal_model_excludes = ["manybody_id", "id"] class ManybodySpecificationORM(BaseORM): @@ -47,31 +64,65 @@ class ManybodySpecificationORM(BaseORM): __tablename__ = "manybody_specification" id = Column(Integer, primary_key=True) + specification_hash = Column(String, nullable=False) program = Column(String, nullable=False) - singlepoint_specification_id = Column(Integer, ForeignKey(QCSpecificationORM.id), nullable=False) + bsse_correction = Column(ARRAY(String), nullable=False) + keywords = Column(JSONB, nullable=False) - keywords_hash = Column(String, nullable=False) + protocols = Column(JSONB, nullable=False) - singlepoint_specification = relationship(QCSpecificationORM, lazy="joined") + levels = relationship( + "ManybodySpecificationLevelsORM", lazy="selectin", collection_class=attribute_keyed_dict("level") + ) + # Note - specification_hash will not be unique because of the different levels! + # The levels are stored in another table with FK to this table, so seemingly + # duplicate rows in this table could have different rows in the levels table __table_args__ = ( - UniqueConstraint( - "program", - "singlepoint_specification_id", - "keywords_hash", - name="ux_manybody_specification_keys", - ), Index("ix_manybody_specification_program", "program"), - Index("ix_manybody_specification_singlepoint_specification_id", "singlepoint_specification_id"), CheckConstraint("program = LOWER(program)", name="ck_manybody_specification_program_lower"), ) - _qcportal_model_excludes = ["id", "keywords_hash", "singlepoint_specification_id"] + _qcportal_model_excludes = ["id", "specification_hash"] + + def model_dict(self, exclude: Optional[Iterable[str]] = None) -> Dict[str, Any]: + d = BaseORM.model_dict(self, exclude) + + # Levels should just be key -> specification + # map -1 for levels to 'supersystem' + d["levels"] = {k if k != -1 else "supersystem": v["singlepoint_specification"] for k, v in d["levels"].items()} + + return d @property def short_description(self) -> str: - return f"{self.program}~{self.singlepoint_specification.short_description}" + return f"{self.program}~{sorted(self.levels.keys())}" + + +class ManybodySpecificationLevelsORM(BaseORM): + """ + Association table for storing singlepoint specifications that are part of a manybody specification + """ + + __tablename__ = "manybody_specification_levels" + + id = Column(Integer, primary_key=True) + + manybody_specification_id = Column(Integer, ForeignKey(ManybodySpecificationORM.id), nullable=False) + + level = Column(Integer, nullable=False) + singlepoint_specification_id = Column(Integer, ForeignKey(QCSpecificationORM.id), nullable=False) + + singlepoint_specification = relationship(QCSpecificationORM, lazy="joined") + + __table_args__ = ( + UniqueConstraint("manybody_specification_id", "level", name="ux_manybody_specification_levels_unique"), + Index("ix_manybody_specifications_levels_manybody_specification_id", "manybody_specification_id"), + Index("ix_manybody_specifications_levels_singlepoint_specification_id", "singlepoint_specification_id"), + ) + + _qcportal_model_excludes = ["id"] class ManybodyRecordORM(BaseRecordORM): @@ -85,7 +136,6 @@ class ManybodyRecordORM(BaseRecordORM): initial_molecule_id = Column(Integer, ForeignKey(MoleculeORM.id), nullable=False) specification_id = Column(Integer, ForeignKey(ManybodySpecificationORM.id), nullable=False) - results = Column(JSONB) specification = relationship(ManybodySpecificationORM, lazy="selectin") initial_molecule = relationship(MoleculeORM) diff --git a/qcfractal/qcfractal/components/manybody/record_socket.py b/qcfractal/qcfractal/components/manybody/record_socket.py index dca55f5a5..30cccb2af 100644 --- a/qcfractal/qcfractal/components/manybody/record_socket.py +++ b/qcfractal/qcfractal/components/manybody/record_socket.py @@ -1,33 +1,44 @@ from __future__ import annotations -import itertools +import contextlib +import importlib +import io import logging -import math -from typing import List, Dict, Tuple, Optional, Sequence, Any, Union, Set, TYPE_CHECKING +import textwrap +from typing import List, Dict, Tuple, Optional, Sequence, Any, Union, TYPE_CHECKING +import qcmanybody import tabulate -from sqlalchemy import select -from sqlalchemy.dialects.postgresql import insert +from sqlalchemy import select, func +from sqlalchemy.dialects.postgresql import array_agg, aggregate_order_by from sqlalchemy.orm import defer, undefer, lazyload, joinedload, selectinload -from qcfractal import __version__ as qcfractal_version from qcfractal.components.services.db_models import ServiceQueueORM, ServiceDependencyORM from qcfractal.components.singlepoint.record_db_models import QCSpecificationORM from qcfractal.db_socket.helpers import insert_general from qcportal.exceptions import MissingDataError from qcportal.manybody import ( - BSSECorrectionEnum, - ManybodyKeywords, ManybodySpecification, ManybodyQueryFilters, ) from qcportal.metadata_models import InsertMetadata from qcportal.molecules import Molecule from qcportal.record_models import PriorityEnum, RecordStatusEnum, OutputTypeEnum -from qcportal.utils import hash_dict -from .record_db_models import ManybodyClusterORM, ManybodyRecordORM, ManybodySpecificationORM +from qcportal.utils import chunk_iterable, hash_dict +from .record_db_models import ( + ManybodyClusterORM, + ManybodyRecordORM, + ManybodySpecificationORM, + ManybodySpecificationLevelsORM, +) from ..record_socket import BaseRecordSocket +_qcm_spec = importlib.util.find_spec("qcmanybody") + +if _qcm_spec is not None: + qcmanybody = importlib.util.module_from_spec(_qcm_spec) + _qcm_spec.loader.exec_module(qcmanybody) + if TYPE_CHECKING: from sqlalchemy.orm.session import Session from qcfractal.db_socket.socket import SQLAlchemySocket @@ -35,152 +46,50 @@ # Meaningless, but unique to manybody manybody_insert_lock_id = 14500 +manybody_insert_spec_lock_id = 14501 -def nCr(n: int, r: int) -> int: - """ - Compute the binomial coefficient n! / (k! * (n-k)!) - """ +def _get_qcmanybody_core( + mb_orm: ManybodyRecordORM, +) -> Tuple[qcmanybody.ManyBodyCore, Dict[str, ManybodySpecificationLevelsORM]]: + init_mol: Molecule = mb_orm.initial_molecule.to_model(Molecule) + + qcm_levels = {} + level_spec_map = {} + sp_id_map = {} - # TODO: available in python 3.8 as math.comb - return math.factorial(n) // (math.factorial(r) * math.factorial(n - r)) - - -def analyze_results(mb_orm: ManybodyRecordORM): - keywords = ManybodyKeywords(**mb_orm.specification.keywords) - - # Total number of fragments present on the molecule - total_frag = len(mb_orm.initial_molecule.fragments) - - # Group clusters by nbody - # For CP, this only includes the calculations done in the full basis - clusters = {} - for c in mb_orm.clusters: - if keywords.bsse_correction == BSSECorrectionEnum.none: - nbody = len(c.fragments) - clusters.setdefault(nbody, []) - clusters[nbody].append(c) - elif keywords.bsse_correction == BSSECorrectionEnum.cp and len(c.basis) > 1: - nbody = len(c.fragments) - clusters.setdefault(nbody, []) - clusters[nbody].append(c) - - # Total energy for each nbody cluster. This is the energy calculated - # by the singlepoint multiplied by its degeneracy - cluster_energy: Dict[int, float] = {} - - for nbody, v in clusters.items(): - cluster_energy[nbody] = sum(c.degeneracy * c.singlepoint_record.properties["return_energy"] for c in v) - - # Calculate CP correction - bsse = 0.0 - if keywords.bsse_correction == BSSECorrectionEnum.cp: - monomer_clusters = [c for c in mb_orm.clusters if len(c.fragments) == 1 and len(c.basis) == 1] - monomer_energy = sum(c.degeneracy * c.singlepoint_record.properties["return_energy"] for c in monomer_clusters) - bsse = cluster_energy[1] - monomer_energy - - # Total energies - total_energy_through = {} - - for n in cluster_energy.keys(): - # If entire molecule was calculated, then add that - if n == total_frag: - total_energy_through[n] = cluster_energy[n] - elif n == 1: - total_energy_through[n] = cluster_energy[n] + for nb, lvl in sorted(mb_orm.specification.levels.items()): + if nb == -1: + nb = "supersystem" + + sp_spec = lvl.singlepoint_specification + if sp_spec.id in sp_id_map: + sp_name = sp_id_map[sp_spec.id] else: - total_energy_through[n] = 0.0 - for nbody in range(1, n + 1): - sign = (-1) ** (n - nbody) - take_nk = nCr(total_frag - nbody - 1, n - nbody) - total_energy_through[n] += take_nk * sign * cluster_energy[nbody] - - # Apply CP correction - if keywords.bsse_correction == BSSECorrectionEnum.cp: - total_energy_through = {k: v - bsse for k, v in total_energy_through.items()} - - # Contributions to interaction energy - energy_contrib = {} - energy_contrib[1] = 0.0 - for n in total_energy_through: - if n != 1: - energy_contrib[n] = total_energy_through[n] - total_energy_through[n - 1] - - # Interaction energy - interaction_energy = {} - for n in total_energy_through: - interaction_energy[n] = total_energy_through[n] - total_energy_through[1] - - results = { - "cluster_energy": cluster_energy, - "total_energy_through": total_energy_through, - "interaction_energy": interaction_energy, - "energy_contrib": energy_contrib, - } - - mb_orm.results = results - - -def build_mbe_clusters(mol: Molecule, keywords: ManybodyKeywords) -> List[Tuple[Set[int], Set[int], Molecule]]: - """ - Fragments a larger molecule into clusters - - Parameters - ---------- - mol - Molecule to fragment - keywords - Keywords that control the fragmenting - - Returns - ------- - : - A list of tuples with three elements - - (1) Set of fragment indices (2) Set of basis indices (3) Fragment molecule - """ + test_name = f"{sp_spec.program}/{sp_spec.method}/{sp_spec.basis}" + sp_name = test_name - # List: (fragments, basis, Molecule) - # fragments and basis are sequences - ret: List[Tuple[Set[int], Set[int], Molecule]] = [] - - if len(mol.fragments) < 2: - raise RuntimeError("manybody service: Molecule must have at least two fragments") - - max_nbody = keywords.max_nbody - - if max_nbody is None: - max_nbody = len(mol.fragments) - else: - max_nbody = min(max_nbody, len(mol.fragments)) - - # Build some info - allfrag = set(range(max_nbody)) - - # Loop over the nbody (the number of bodies to include. 1 = monomers, 2 = dimers) - for nbody in range(1, max_nbody): - for frag_idx in itertools.combinations(allfrag, nbody): - frag_idx = set(frag_idx) - if keywords.bsse_correction == BSSECorrectionEnum.none: - frag_mol = mol.get_fragment(frag_idx, orient=True, group_fragments=True) - ret.append((frag_idx, frag_idx, frag_mol)) - elif keywords.bsse_correction == BSSECorrectionEnum.cp: - ghost = list(set(allfrag) - set(frag_idx)) - frag_mol = mol.get_fragment(frag_idx, ghost, orient=True, group_fragments=True) - ret.append((frag_idx, allfrag, frag_mol)) - else: - raise RuntimeError(f"Unknown BSSE correction method: {keywords.bsse_correction}") + # duplicates + i = 0 + while sp_name in qcm_levels: + i += 1 + sp_name = f"{test_name}_{i}" + + sp_id_map[sp_spec.id] = sp_name - # Include full molecule as well - if max_nbody >= len(mol.fragments): - ret.append((allfrag, allfrag, mol)) + qcm_levels[nb] = sp_name + level_spec_map[sp_name] = lvl - # Always include monomer in monomer basis for CP - if keywords.bsse_correction == BSSECorrectionEnum.cp: - for frag_idx in allfrag: - frag_mol = mol.get_fragment([frag_idx], orient=True, group_fragments=True) - ret.append(({frag_idx}, {frag_idx}, frag_mol)) + qcm = qcmanybody.ManyBodyCore( + molecule=init_mol, + levels=qcm_levels, + bsse_type=[qcmanybody.BsseEnum[x] for x in mb_orm.specification.bsse_correction], + return_total_data=mb_orm.specification.keywords.get("return_total_data", False), + supersystem_ie_only=mb_orm.specification.keywords.get("supersystem_ie_only", False), + embedding_charges=None, + ) - return ret + return qcm, level_spec_map class ManybodyRecordSocket(BaseRecordSocket): @@ -195,6 +104,9 @@ def __init__(self, root_socket: SQLAlchemySocket): BaseRecordSocket.__init__(self, root_socket) self._logger = logging.getLogger(__name__) + def available(self) -> bool: + return _qcm_spec is not None + @staticmethod def get_children_select() -> List[Any]: stmt = select( @@ -207,70 +119,75 @@ def initialize_service(self, session: Session, service_orm: ServiceQueueORM) -> mb_orm: ManybodyRecordORM = service_orm.record output = "\n\nCreated manybody calculation\n" + output += "qcmanybody version: " + qcmanybody.__version__ + "\n\n" + + output += "-" * 80 + "\nSpecification:\n\n" + + table_rows = [] + + for k, v in mb_orm.specification.keywords.items(): + table_rows.append((k, v)) - output += "-" * 80 + "\nManybody Keywords:\n\n" - spec: ManybodySpecification = mb_orm.specification.to_model(ManybodySpecification) - table_rows = sorted(spec.keywords.dict().items()) - output += tabulate.tabulate(table_rows, headers=["keyword", "value"]) - output += "\n\n" + "-" * 80 + "\nQC Specification:\n\n" - table_rows = sorted(spec.singlepoint_specification.dict().items()) - output += tabulate.tabulate(table_rows, headers=["keyword", "value"]) + if mb_orm.specification.program != "qcmanybody": + raise RuntimeError(f"Unknown program: {mb_orm.specification.program}") + + table_rows.append(("bsse_correction", str(mb_orm.specification.bsse_correction))) + output += tabulate.tabulate(table_rows, tablefmt="plain") output += "\n\n" init_mol: Molecule = mb_orm.initial_molecule.to_model(Molecule) - output += f"Initial molecule: formula={init_mol.get_molecular_formula()} id={mb_orm.initial_molecule_id}\n" output += f"Initial molecule has {len(init_mol.fragments)} fragments\n" - # Fragment the initial molecule into clusters - keywords = ManybodyKeywords(**mb_orm.specification.keywords) - mol_clusters = build_mbe_clusters(init_mol, keywords) - - output += f"Molecule is split into into {len(mol_clusters)} separate clusters:\n\n" + # Create a computer instance to get what calculations we need + qcm, spec_map = _get_qcmanybody_core(mb_orm) - # Group by nbody and count for output - mol_clusters_nbody = {} - for mc in mol_clusters: - nbody = len(mc[0]) - mol_clusters_nbody.setdefault(nbody, 0) - mol_clusters_nbody[nbody] += 1 + output += "\n\n" + "-" * 80 + "\nModel Chemistries/Specifications:\n\n" + for name, lvl_spec in spec_map.items(): + output += f"{name}:\n" + output += textwrap.indent( + tabulate.tabulate(lvl_spec.singlepoint_specification.model_dict().items(), tablefmt="plain"), " " + ) + output += "\n" - table_rows = [(k, v) for k, v in sorted(mol_clusters_nbody.items())] - output += tabulate.tabulate(table_rows, headers=["n-body", "count"]) + output += "\n\n" + "-" * 80 + "\nLevels:\n\n" + for level, mc_name in qcm.levels.items(): + output += f" {level:>13}: {mc_name}\n" output += "\n\n" - # Add the manybody molecules/clusters to the db - nbody_mols = [x[2] for x in mol_clusters] - meta, mol_ids = self.root_socket.molecules.add(nbody_mols) - - if not meta.success: - raise RuntimeError("Unable to add molecules to the database: " + meta.error_string) - - # We do unique ids only - # Some manybody calculations will have identical molecules - # Think of single-atom dimers or something. There will only be one monomer - done_ids = set() - + output += "\n\n" + "-" * 80 + "\nComputation count:\n\n" table_rows = [] - for (frag_idx, basis_idx, frag_mol), mol_id in zip(mol_clusters, mol_ids): - if mol_id in done_ids: - continue + for mc, compute_dict in qcm.compute_map.items(): + for nb, frags in compute_dict["all"].items(): + table_rows.append((f"{mc} {nb}-mer", len(frags))) + output += tabulate.tabulate(table_rows, headers=["n-body", "count"]) - done_ids.add(mol_id) - degen = mol_ids.count(mol_id) - frag_idx = sorted(frag_idx) - basis_idx = sorted(basis_idx) + # Add what we need to compute to the database + table_rows = [] - new_mb_orm = ManybodyClusterORM(fragments=frag_idx, basis=basis_idx, molecule_id=mol_id, degeneracy=degen) + for mol_batch in chunk_iterable(qcm.iterate_molecules(), 400): + to_add = [x[2] for x in mol_batch] + meta, mol_ids = self.root_socket.molecules.add(to_add, session=session) + if not meta.success: + raise RuntimeError("Unable to add molecules to the database: " + meta.error_string) - mb_orm.clusters.append(new_mb_orm) + for (mc_level, label, molecule), mol_id in zip(mol_batch, mol_ids): + # Decode the label given by qcmanybody + _, frag, bas = qcmanybody.delabeler(label) - table_rows.append((degen, frag_mol.get_molecular_formula(), mol_id, frag_idx, basis_idx)) + mb_cluster_orm = ManybodyClusterORM( + mc_level=mc_level, + fragments=frag, + basis=bas, + molecule_id=mol_id, + ) + table_rows.append((mc_level, frag, bas, mol_id, molecule.get_molecular_formula(), molecule.get_hash())) + mb_orm.clusters.append(mb_cluster_orm) - # Sort rows by nbody (# of fragments), the degeneracy descending, then molecule id - table_rows = sorted(table_rows, key=lambda x: (len(x[3]), -x[0], x[2])) - output += tabulate.tabulate(table_rows, headers=["degeneracy", "molecule", "molecule id", "fragments", "basis"]) - output += "\n\n" + output += "\n\nMolecules to compute\n\n" + output += tabulate.tabulate( + table_rows, headers=["model chemistry", "fragments", "basis", "molecule_id", "formula", "hash"] + ) self.root_socket.records.append_output(session, mb_orm, OutputTypeEnum.stdout, output) @@ -283,25 +200,32 @@ def iterate_service( # Always update with the current provenance mb_orm.compute_history[-1].provenance = { - "creator": "qcfractal", - "version": qcfractal_version, - "routine": "qcfractal.services.manybody", + "creator": "qcmanybody", + "version": qcmanybody.__version__, + "routine": "qcmanybody", } - # Grab all the clusters for the computation and them map them to molecule ID - clusters = mb_orm.clusters - clusters_by_mol = {c.molecule_id: c for c in clusters} - service_orm.dependencies = [] - # What molecules/clusters we still have to do - mols_to_compute = [c.molecule_id for c in clusters if c.singlepoint_id is None] + submitted = [] - if mols_to_compute: - sp_spec_id = mb_orm.specification.singlepoint_specification_id + qcm, spec_map = _get_qcmanybody_core(mb_orm) + done_sp_ids = set(c.singlepoint_id for c in mb_orm.clusters if c.singlepoint_id is not None) + + # what we need to submit, mapped by single spec id + + clusters_to_submit = {} + for c in mb_orm.clusters: + if c.singlepoint_id is not None: + continue + clusters_to_submit.setdefault(c.mc_level, []) + clusters_to_submit[c.mc_level].append(c) + + for mc_level, clusters in clusters_to_submit.items(): + mol_ids = [c.molecule_id for c in clusters] meta, sp_ids = self.root_socket.records.singlepoint.add_internal( - mols_to_compute, - sp_spec_id, + mol_ids, + spec_map[mc_level].singlepoint_specification_id, service_orm.tag, service_orm.priority, mb_orm.owner_user_id, @@ -310,75 +234,106 @@ def iterate_service( session=session, ) - output = f"\nSubmitted {len(sp_ids)} singlepoint calculations " - output += f"({meta.n_inserted} new, {meta.n_existing} existing):\n\n" + for cluster, sp_id in zip(clusters, sp_ids): + cluster.singlepoint_id = sp_id + submitted.append((cluster, sp_id)) - for mol_id, sp_id in zip(mols_to_compute, sp_ids): - svc_dep = ServiceDependencyORM(record_id=sp_id, extras={}) + # Add as a dependency to the service, but only if it's not done yet + if sp_id not in done_sp_ids: + svc_dep = ServiceDependencyORM(record_id=sp_id, extras={}) + service_orm.dependencies.append(svc_dep) + done_sp_ids.add(sp_id) - cluster_orm = clusters_by_mol[mol_id] + if len(submitted) != 0: + output = f"\nSubmitted {len(submitted)} singlepoint calculations " + self.root_socket.records.append_output(session, mb_orm, OutputTypeEnum.stdout, output) + return False - # Assign the singlepoint id to the cluster - assert cluster_orm.singlepoint_id is None - cluster_orm.singlepoint_id = sp_id + output = "\n\n" + "*" * 80 + "\n" + output += "All manybody singlepoint computations are complete!\n\n" - service_orm.dependencies.append(svc_dep) + output += "=" * 20 + "\nSinglepoint results\n" + "=" * 20 + "\n\n" - table_rows = sorted(zip(mols_to_compute, sp_ids)) - output += tabulate.tabulate(table_rows, headers=["molecule id", "singlepoint id"]) + # Make a nice output table + table_rows = [] + for cluster in mb_orm.clusters: + mol_id = cluster.molecule_id - else: - output = "\n\n" + "*" * 80 + "\n" - output += "All manybody singlepoint computations are complete!\n\n" + energy = cluster.singlepoint_record.properties["return_energy"] + table_row = [cluster.mc_level, cluster.fragments, cluster.basis, energy, mol_id, cluster.singlepoint_id] + table_rows.append(table_row) - output += "Singlepoint results:\n" + output += tabulate.tabulate( + table_rows, + headers=["model chemistry", "fragments", "basis", "energy (hartree)", "molecule id", "singlepoint id"], + floatfmt=".10f", + ) - # Map molecule_id -> singlepoint record - result_map = {c.molecule_id: c.singlepoint_record for c in clusters} + # Analyze the actual results + component_results = {} + for cluster in mb_orm.clusters: + mc_level = cluster.mc_level + label = qcmanybody.labeler(mc_level, cluster.fragments, cluster.basis) + energy = cluster.singlepoint_record.properties["return_energy"] - # Make a nice output table - table_rows = [] - for component in mb_orm.clusters: - mol_id = component.molecule_id - mol_form = component.molecule.identifiers["molecular_formula"] + component_results.setdefault(label, {}) + component_results[label]["energy"] = energy - energy = component.singlepoint_record.properties["return_energy"] - table_row = [mol_id, component.singlepoint_id, mol_form, energy] - table_rows.append(table_row) + # Swallow any output + qcmb_stdout = io.StringIO() - result_map[mol_id] = component.singlepoint_record + with contextlib.redirect_stdout(qcmb_stdout): + mb_orm.properties = qcm.analyze(component_results) - output += tabulate.tabulate( - table_rows, headers=["molecule id", "singlepoint id", "molecule", "energy (hartree)"], floatfmt=".10f" - ) + output += "\n\n" + "=" * 40 + "\nManybody expansion results\n" + "=" * 40 + "\n" + output += mb_orm.properties.pop("stdout") + self.root_socket.records.append_output(session, mb_orm, OutputTypeEnum.stdout, output) - # Create the results of the manybody calculation - analyze_results(mb_orm) - - # Make a results table - r = mb_orm.results - nb_keys = sorted(r["total_energy_through"].keys()) - table_rows = [ - ( - nbody, - r["total_energy_through"][nbody], - r["interaction_energy"][nbody], - r["energy_contrib"][nbody], - ) - for nbody in nb_keys - ] - - output += "\n\n\n\n" + "=" * 80 + "\n" - output += "Final energy results (in hartrees)\n" + "=" * 80 + "\n\n" - output += tabulate.tabulate( - table_rows, - headers=["\nnbody", "Total Energy \nthrough n-body", "\nInteraction Energy", "\nContrib to IE"], - floatfmt="6.10f", - ) + # We are done! + return True - self.root_socket.records.append_output(session, mb_orm, OutputTypeEnum.stdout, output) + def add_specifications( + self, mb_specs: Sequence[ManybodySpecification], *, session: Optional[Session] = None + ) -> Tuple[InsertMetadata, List[int]]: + """ + Adds a specification for a manybody service to the database, returning its id. - return len(mols_to_compute) == 0 + If an identical specification exists, then no insertion takes place and the id of the existing + specification is returned. + + Parameters + ---------- + mb_specs + Sequence of specifications to add to the database + session + An existing SQLAlchemy session to use. If None, one will be created. If an existing session + is used, it will be flushed (but not committed) before returning from this function. + + Returns + ------- + : + Metadata about the insertion, and the IDs of the specification. + """ + + # Because of how we handle levels, we do this the opposite of other record types - we add one at a time + + all_metadata = [] + all_ids = [] + + with self.root_socket.optional_session(session) as session: + for mb_spec in mb_specs: + meta, spec_id = self.add_specification(mb_spec, session=session) + + if not meta.success: + return ( + InsertMetadata(error_description="Unable to add manybody specification: " + meta.error_string), + [], + ) + + all_metadata.append(meta) + all_ids.append(spec_id) + + return InsertMetadata.merge(all_metadata), all_ids def add_specification( self, mb_spec: ManybodySpecification, *, session: Optional[Session] = None @@ -403,47 +358,98 @@ def add_specification( Metadata about the insertion, and the id of the specification. """ - kw_dict = mb_spec.keywords.dict() - kw_hash = hash_dict(kw_dict) + mb_kw_dict = mb_spec.keywords.dict() - with self.root_socket.optional_session(session) as session: - meta, sp_spec_id = self.root_socket.records.singlepoint.add_specification( - qc_spec=mb_spec.singlepoint_specification, session=session - ) + mb_spec_dict = { + "program": mb_spec.program, + "bsse_correction": sorted(mb_spec.bsse_correction), + "keywords": mb_kw_dict, + "protocols": {}, + } + mb_spec_hash = hash_dict(mb_spec_dict) - if not meta.success: - return ( - InsertMetadata( - error_description="Unable to add singlepoint specification: " + meta.error_string, - ), - None, - ) + # Map 'supersystem' to -1 + # The reverse (mapping -1 to 'supersystem') happens in the specification orm model_dict function + levels = mb_spec.levels.copy() + if "supersystem" in levels: + levels[-1] = levels.pop("supersystem") - stmt = ( - insert(ManybodySpecificationORM) - .values( - program=mb_spec.program, - singlepoint_specification_id=sp_spec_id, - keywords=kw_dict, - keywords_hash=kw_hash, + with self.root_socket.optional_session(session) as session: + # add all singlepoint specifications + + # Level to singlepoint spec id + level_spec_id_map: Dict[int, int] = {} + for k, v in levels.items(): + meta, sp_spec_id = self.root_socket.records.singlepoint.add_specification(qc_spec=v, session=session) + + if not meta.success: + return ( + InsertMetadata( + error_description="Unable to add singlepoint specification: " + meta.error_string, + ), + None, + ) + + level_spec_id_map[k] = sp_spec_id + + # Now the full manybody specification. Lock due to query + insert + session.execute(select(func.pg_advisory_xact_lock(manybody_insert_spec_lock_id))).scalar() + + # Create a cte with the specification + levels + mb_spec_cte = ( + select( + ManybodySpecificationORM.id, + ManybodySpecificationORM.specification_hash, + array_agg( + aggregate_order_by( + ManybodySpecificationLevelsORM.singlepoint_specification_id, + ManybodySpecificationLevelsORM.singlepoint_specification_id.asc(), + ) + ).label("singlepoint_ids"), + array_agg( + aggregate_order_by( + ManybodySpecificationLevelsORM.level, + ManybodySpecificationLevelsORM.level.asc(), + ) + ).label("levels"), + ) + .join( + ManybodySpecificationLevelsORM, + ManybodySpecificationLevelsORM.manybody_specification_id == ManybodySpecificationORM.id, ) - .on_conflict_do_nothing() - .returning(ManybodySpecificationORM.id) + .group_by(ManybodySpecificationORM.id) + .cte() ) - r = session.execute(stmt).scalar_one_or_none() - if r is not None: - return InsertMetadata(inserted_idx=[0]), r - else: - # Specification was already existing - stmt = select(ManybodySpecificationORM.id).filter_by( - program=mb_spec.program, - singlepoint_specification_id=sp_spec_id, - keywords_hash=kw_hash, + stmt = select(mb_spec_cte.c.id) + stmt = stmt.where(mb_spec_cte.c.specification_hash == mb_spec_hash) + stmt = stmt.where(mb_spec_cte.c.levels == sorted(level_spec_id_map.keys())) + stmt = stmt.where(mb_spec_cte.c.singlepoint_ids == sorted(level_spec_id_map.values())) + + existing_id = session.execute(stmt).scalar_one_or_none() + + if existing_id is not None: + return InsertMetadata(existing_idx=[0]), existing_id + + # Does not exist. Insert new + mb_levels_orms = {} + for level, sp_spec_id in level_spec_id_map.items(): + mb_levels_orms[level] = ManybodySpecificationLevelsORM( + level=level, singlepoint_specification_id=sp_spec_id ) - r = session.execute(stmt).scalar_one() - return InsertMetadata(existing_idx=[0]), r + new_orm = ManybodySpecificationORM( + program=mb_spec.program, + bsse_correction=mb_spec.bsse_correction, + keywords=mb_kw_dict, + specification_hash=mb_spec_hash, + levels=mb_levels_orms, + protocols={}, + ) + + session.add(new_orm) + session.flush() + return InsertMetadata(inserted_idx=[0]), new_orm.id def get( self, @@ -503,7 +509,9 @@ def query( stmt = stmt.join(ManybodyRecordORM.specification) if need_qcspec_join: - stmt = stmt.join(ManybodySpecificationORM.singlepoint_specification) + stmt = stmt.join(ManybodySpecificationORM.levels).join( + ManybodySpecificationLevelsORM.singlepoint_specification + ) stmt = stmt.where(*and_query) diff --git a/qcfractal/qcfractal/components/manybody/test_record_client.py b/qcfractal/qcfractal/components/manybody/test_record_client.py index db3eb6600..f0eb1d341 100644 --- a/qcfractal/qcfractal/components/manybody/test_record_client.py +++ b/qcfractal/qcfractal/components/manybody/test_record_client.py @@ -7,7 +7,7 @@ from qcarchivetesting import load_molecule_data from qcfractal.components.manybody.record_db_models import ManybodyRecordORM -from qcportal.manybody import ManybodySpecification, ManybodyKeywords +from qcportal.manybody import ManybodySpecification from qcportal.record_models import RecordStatusEnum, PriorityEnum from qcportal.singlepoint import QCSpecification from qcportal.utils import now_at_utc @@ -30,9 +30,15 @@ def test_manybody_client_tag_priority(snowflake_client: PortalClient): keywords={"tag_priority": [tag, priority]}, ) - kw = ManybodyKeywords(max_nbody=1, bsse_correction="none") - - meta1, id1 = snowflake_client.add_manybodys([water], "manybody", sp_spec, kw, tag=tag, priority=priority) + meta1, id1 = snowflake_client.add_manybodys( + [water], + "qcmanybody", + bsse_correction=["nocp"], + levels={1: sp_spec}, + keywords={"return_total_data": True}, + tag=tag, + priority=priority, + ) assert meta1.n_inserted == 1 rec = snowflake_client.get_records(id1, include=["service"]) @@ -52,8 +58,9 @@ def test_manybody_client_add_get( meta1, id1 = submitter_client.add_manybodys( [water2, water4], spec.program, - spec.singlepoint_specification, - spec.keywords, + spec.levels, + spec.bsse_correction, + keywords=spec.keywords, tag="tag1", priority=PriorityEnum.low, owner_group=owner_group, @@ -97,8 +104,9 @@ def test_manybody_client_add_duplicate( meta, id = submitter_client.add_manybodys( all_mols, spec.program, - spec.singlepoint_specification, - spec.keywords, + spec.levels, + spec.bsse_correction, + keywords=spec.keywords, tag="tag1", priority=PriorityEnum.low, owner_group=None, @@ -110,8 +118,9 @@ def test_manybody_client_add_duplicate( meta, id2 = submitter_client.add_manybodys( all_mols, spec.program, - spec.singlepoint_specification, - spec.keywords, + spec.levels, + spec.bsse_correction, + keywords=spec.keywords, tag="tag1", priority=PriorityEnum.low, owner_group=None, @@ -141,8 +150,9 @@ def test_manybody_client_add_existing_molecule(snowflake_client: PortalClient): meta1, id1 = snowflake_client.add_manybodys( [mol1, mol2, mol1], spec.program, - spec.singlepoint_specification, - spec.keywords, + spec.levels, + spec.bsse_correction, + keywords=spec.keywords, tag="tag1", priority=PriorityEnum.low, ) @@ -167,7 +177,7 @@ def test_manybody_client_delete(snowflake: QCATestingSnowflake): activated_manager_name, _ = snowflake.activate_manager() snowflake_client = snowflake.client() - mb_id = run_test_data(storage_socket, activated_manager_name, "mb_none_he4_psi4_mp2") + mb_id = run_test_data(storage_socket, activated_manager_name, "mb_cp_he4_psi4_mp2") with storage_socket.session_scope() as session: rec = session.get(ManybodyRecordORM, mb_id) @@ -188,7 +198,7 @@ def test_manybody_client_delete(snowflake: QCATestingSnowflake): meta = snowflake_client.delete_records(mb_id, soft_delete=True, delete_children=True) assert meta.success assert meta.deleted_idx == [0] - assert meta.n_children_deleted == len(child_ids) + assert meta.n_children_deleted == len(set(child_ids)) child_recs = snowflake_client.get_records(child_ids, missing_ok=True) assert all(x.status == RecordStatusEnum.deleted for x in child_recs) @@ -198,7 +208,7 @@ def test_manybody_client_delete(snowflake: QCATestingSnowflake): meta = snowflake_client.delete_records(mb_id, soft_delete=False, delete_children=True) assert meta.success assert meta.deleted_idx == [0] - assert meta.n_children_deleted == len(child_ids) + assert meta.n_children_deleted == len(set(child_ids)) recs = snowflake_client.get_manybodys(mb_id, missing_ok=True) assert recs is None @@ -217,7 +227,7 @@ def test_manybody_client_harddelete_nochildren(snowflake: QCATestingSnowflake): activated_manager_name, _ = snowflake.activate_manager() snowflake_client = snowflake.client() - mb_id = run_test_data(storage_socket, activated_manager_name, "mb_none_he4_psi4_mp2") + mb_id = run_test_data(storage_socket, activated_manager_name, "mb_cp_he4_psi4_mp2") with storage_socket.session_scope() as session: rec = session.get(ManybodyRecordORM, mb_id) @@ -240,7 +250,7 @@ def test_manybody_client_delete_opt_inuse(snowflake: QCATestingSnowflake): activated_manager_name, _ = snowflake.activate_manager() snowflake_client = snowflake.client() - mb_id = run_test_data(storage_socket, activated_manager_name, "mb_none_he4_psi4_mp2") + mb_id = run_test_data(storage_socket, activated_manager_name, "mb_cp_he4_psi4_mp2") with storage_socket.session_scope() as session: rec = session.get(ManybodyRecordORM, mb_id) @@ -258,13 +268,13 @@ def test_manybody_client_query(snowflake: QCATestingSnowflake): storage_socket = snowflake.get_storage_socket() snowflake_client = snowflake.client() - id_1, _ = submit_test_data(storage_socket, "mb_none_he4_psi4_mp2") - id_2, _ = submit_test_data(storage_socket, "mb_cp_he4_psi4_mp2") + id_1, _ = submit_test_data(storage_socket, "mb_cp_he4_psi4_mp2") + id_2, _ = submit_test_data(storage_socket, "mb_all_he4_psi4_multiss") all_mbs = snowflake_client.get_manybodys([id_1, id_2]) mol_ids = [x.initial_molecule_id for x in all_mbs] - query_res = snowflake_client.query_manybodys(program=["manybody"]) + query_res = snowflake_client.query_manybodys(program=["qcmanybody"]) query_res_l = list(query_res) assert len(query_res_l) == 2 @@ -304,7 +314,7 @@ def test_manybody_client_query(snowflake: QCATestingSnowflake): # query for method query_res = snowflake_client.query_manybodys(qc_method=["hf"]) query_res_l = list(query_res) - assert len(query_res_l) == 0 + assert len(query_res_l) == 1 query_res = snowflake_client.query_manybodys(qc_method=["mp2"]) query_res_l = list(query_res) diff --git a/qcfractal/qcfractal/components/manybody/test_record_socket.py b/qcfractal/qcfractal/components/manybody/test_record_socket.py index d2d42f12f..0e34a8191 100644 --- a/qcfractal/qcfractal/components/manybody/test_record_socket.py +++ b/qcfractal/qcfractal/components/manybody/test_record_socket.py @@ -9,7 +9,7 @@ from qcfractal.db_socket import SQLAlchemySocket from qcfractal.testing_helpers import run_service from qcportal.auth import UserInfo, GroupInfo -from qcportal.manybody import ManybodySpecification, ManybodyKeywords +from qcportal.manybody import ManybodySpecification from qcportal.record_models import RecordStatusEnum, PriorityEnum from qcportal.singlepoint import SinglepointProtocols, QCSpecification from qcportal.utils import now_at_utc @@ -21,7 +21,7 @@ from sqlalchemy.orm.session import Session -@pytest.mark.parametrize("spec", test_specs[:1]) +@pytest.mark.parametrize("spec", test_specs) def test_manybody_socket_add_get(storage_socket: SQLAlchemySocket, session: Session, spec: ManybodySpecification): water2 = load_molecule_data("water_dimer_minima") water4 = load_molecule_data("water_stacked") @@ -53,16 +53,27 @@ def test_manybody_socket_add_get(storage_socket: SQLAlchemySocket, session: Sess def test_manybody_socket_add_same_1(storage_socket: SQLAlchemySocket): spec = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 2: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, ) water2 = load_molecule_data("water_dimer_minima") @@ -83,8 +94,9 @@ def test_manybody_socket_add_same_1(storage_socket: SQLAlchemySocket): @pytest.mark.parametrize( "test_data_name", [ - "mb_none_he4_psi4_mp2", "mb_cp_he4_psi4_mp2", + "mb_all_he4_psi4_multi", + "mb_all_he4_psi4_multiss", ], ) def test_manybody_socket_run( @@ -124,13 +136,12 @@ def test_manybody_socket_run( assert desc_info["record_type"] == rec.record_type assert desc_info["created_on"] == rec.created_on assert rec.specification.program in short_desc - assert rec.specification.singlepoint_specification.program in short_desc - assert rec.specification.singlepoint_specification.method in short_desc out = rec.compute_history[-1].outputs["stdout"].get_output() assert "All manybody singlepoint computations are complete" in out - assert len(rec.clusters) == n_singlepoints + unique_sp = set(x.singlepoint_id for x in rec.clusters) + assert len(unique_sp) == n_singlepoints def test_manybody_socket_run_duplicate( diff --git a/qcfractal/qcfractal/components/manybody/test_record_socket_specs.py b/qcfractal/qcfractal/components/manybody/test_record_socket_specs.py index 753542727..232c7b982 100644 --- a/qcfractal/qcfractal/components/manybody/test_record_socket_specs.py +++ b/qcfractal/qcfractal/components/manybody/test_record_socket_specs.py @@ -1,171 +1,526 @@ +from qcarchivetesting import load_hash_test_data +from qcfractal.components.manybody.record_db_models import ManybodySpecificationORM +from qcfractal.components.testing_fixtures import spec_test_runner from qcfractal.db_socket import SQLAlchemySocket -from qcportal.manybody import ManybodySpecification, ManybodyKeywords +from qcportal.manybody import ManybodySpecification from qcportal.singlepoint import SinglepointProtocols, QCSpecification -def test_manybody_socket_add_specification_same_0(storage_socket: SQLAlchemySocket): - spec1 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), - ) - - meta, id = storage_socket.records.manybody.add_specification(spec1) - assert meta.success - assert meta.inserted_idx == [0] - assert meta.existing_idx == [] - assert id is not None +def test_manybody_hash_canaries(storage_socket: SQLAlchemySocket): + # Test data is hash : spec dict + test_data = load_hash_test_data("manybody_specification_tests") - # Try inserting again - meta, id2 = storage_socket.records.manybody.add_specification(spec1) + spec_map = [(k, ManybodySpecification(**v)) for k, v in test_data.items()] + + specs = [x[1] for x in spec_map] + meta, ids = storage_socket.records.manybody.add_specifications(specs) assert meta.success - assert meta.inserted_idx == [] - assert meta.existing_idx == [0] - assert id == id2 + assert len(ids) == len(specs) + assert meta.n_existing == 0 + + with storage_socket.session_scope() as session: + for spec_id, (spec_hash, _) in zip(ids, spec_map): + spec_orm = session.get(ManybodySpecificationORM, spec_id) + assert spec_orm.specification_hash == spec_hash + + +def test_manybody_socket_add_specification_same_1(spec_test_runner): + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, + ) + spec_test_runner("manybody", spec1, spec1, True) -def test_manybody_socket_add_specification_same_1(storage_socket: SQLAlchemySocket): + +def test_manybody_socket_add_specification_same_2(spec_test_runner): # Test case sensitivity spec1 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prOg1", + driver="energy", + method="b3LYP", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, + ) + + spec_test_runner("manybody", spec1, spec2, True) + + +def test_manybody_socket_add_specification_same_3(spec_test_runner): + # Test supersystem + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + "supersystem": QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, + ) + + spec_test_runner("manybody", spec1, spec1, True) + + +def test_manybody_socket_add_specification_same_4(spec_test_runner): + # Test ordering + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="sto-3g", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + "supersystem": QCSpecification( + program="prog1", + driver="energy", + method="ccsd", + basis="aug-cc-pvtz", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["nocp"], + keywords={}, ) + # Test ordering spec2 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prOg1", - driver="energy", - method="b3LYP", - basis="6-31g*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 2: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + "supersystem": QCSpecification( + program="prog1", + driver="energy", + method="ccsd", + basis="aug-cc-pvtz", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 1: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="sto-3g", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["nocp"], + keywords={}, ) - meta, id = storage_socket.records.manybody.add_specification(spec1) - assert meta.inserted_idx == [0] + spec_test_runner("manybody", spec1, spec2, True) + + +def test_manybody_socket_add_specification_diff_1(spec_test_runner): + # Test different parameters + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, + ) - meta, id = storage_socket.records.manybody.add_specification(spec2) - assert meta.existing_idx == [0] + spec_test_runner("manybody", spec1, spec2, False) -def test_manybody_socket_add_diff_1(storage_socket: SQLAlchemySocket): +def test_manybody_socket_add_specification_diff_2(spec_test_runner): # Test different parameters spec1 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=4, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["nocp"], + keywords={"return_total_data": True}, ) spec2 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prOg1", - driver="energy", - method="b3LYP", - basis="6-31g*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["vmfc"], + keywords={"return_total_data": True}, ) - meta, id = storage_socket.records.manybody.add_specification(spec1) - assert meta.inserted_idx == [0] + spec_test_runner("manybody", spec1, spec2, False) - meta, id = storage_socket.records.manybody.add_specification(spec2) - assert meta.inserted_idx == [0] +def test_manybody_socket_add_specification_diff_3(spec_test_runner): + # Test different parameters + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["cp"], + keywords={"return_total_data": False}, + ) + spec_test_runner("manybody", spec1, spec2, False) -def test_manybody_socket_add_diff_2(storage_socket: SQLAlchemySocket): + +def test_manybody_socket_add_specification_diff_4(spec_test_runner): # Test different parameters spec1 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="cp"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ) + }, + bsse_correction=["cp"], + ) + spec_test_runner("manybody", spec1, spec2, False) + + +def test_manybody_socket_add_specification_diff_5(spec_test_runner): + # Test different levels + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + keywords={}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + keywords={}, + ) + + spec_test_runner("manybody", spec1, spec2, False) + + +def test_manybody_socket_add_specification_diff_6(spec_test_runner): + # Test different levels + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="ccsd", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, ) spec2 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prOg1", - driver="energy", - method="b3LYP", - basis="6-31g*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, ) - meta, id = storage_socket.records.manybody.add_specification(spec1) - assert meta.inserted_idx == [0] + spec_test_runner("manybody", spec1, spec2, False) - meta, id = storage_socket.records.manybody.add_specification(spec2) - assert meta.inserted_idx == [0] +def test_manybody_socket_add_specification_diff_7(spec_test_runner): + # Test different levels + spec1 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, + ) + + spec2 = ManybodySpecification( + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="none"), + ), + }, + bsse_correction=["cp"], + keywords={"return_total_data": True}, + ) + spec_test_runner("manybody", spec1, spec2, False) -def test_manybody_socket_add_diff_3(storage_socket: SQLAlchemySocket): - # Test different qc spec + +def test_manybody_socket_add_specification_diff_8(spec_test_runner): + # Test different levels spec1 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="ccsd", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + 2: QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], ) spec2 = ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prOg1", - driver="energy", - method="bhlyp", - basis="6-31g*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), - ) - - meta, id = storage_socket.records.manybody.add_specification(spec1) - assert meta.inserted_idx == [0] - - meta, id = storage_socket.records.manybody.add_specification(spec2) - assert meta.inserted_idx == [0] + program="qcmanybody", + levels={ + 1: QCSpecification( + program="prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + "supersystem": QCSpecification( + program="prog1", + driver="energy", + method="hf", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + bsse_correction=["cp"], + ) + + spec_test_runner("manybody", spec1, spec2, False) diff --git a/qcfractal/qcfractal/components/manybody/testing_helpers.py b/qcfractal/qcfractal/components/manybody/testing_helpers.py index e44628314..88cb95cb1 100644 --- a/qcfractal/qcfractal/components/manybody/testing_helpers.py +++ b/qcfractal/qcfractal/components/manybody/testing_helpers.py @@ -11,9 +11,9 @@ from qcarchivetesting.helpers import read_record_data from qcfractal.components.manybody.record_db_models import ManybodyRecordORM from qcfractal.testing_helpers import run_service -from qcportal.manybody import ManybodySpecification, ManybodyKeywords +from qcportal.manybody import ManybodySpecification +from qcportal.singlepoint import QCSpecification, SinglepointProtocols from qcportal.record_models import PriorityEnum, RecordStatusEnum, RecordTask -from qcportal.singlepoint import SinglepointProtocols, QCSpecification if TYPE_CHECKING: from qcfractal.db_socket import SQLAlchemySocket @@ -21,38 +21,55 @@ test_specs = [ ManybodySpecification( - program="manybody", - keywords=ManybodyKeywords(max_nbody=None, bsse_correction="none"), - singlepoint_specification=QCSpecification( - program="prog1", - driver="energy", - method="b3lyp", - basis="6-31G*", - keywords={"k": "value"}, - protocols=SinglepointProtocols(wavefunction="all"), - ), + program="qcmanybody", + bsse_correction=["nocp"], + levels={ + 1: QCSpecification( + program="Prog1", + driver="energy", + method="b3lyp", + basis="6-31G*", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + keywords={"return_total_data": True}, ), ManybodySpecification( - keywords=ManybodyKeywords(max_nbody=1, bsse_correction="none"), - program="manybody", - singlepoint_specification=QCSpecification( - program="Prog2", - driver="energy", - method="Hf", - basis="def2-tzVP", - keywords={"k": "value"}, - ), + program="qcmanybody", + bsse_correction=["cp"], + levels={ + 1: QCSpecification( + program="Prog2", + driver="energy", + method="Hf", + basis="def2-tzVP", + keywords={"k": "value"}, + protocols=SinglepointProtocols(wavefunction="all"), + ), + }, + keywords={"return_total_data": True}, ), ManybodySpecification( - keywords=ManybodyKeywords(max_nbody=1, bsse_correction="none"), - program="manybody", - singlepoint_specification=QCSpecification( - program="Prog3", - driver="properties", - method="Hf", - basis="sto-3g", - keywords={"k": "v"}, - ), + program="qcmanybody", + bsse_correction=["cp", "vmfc"], + levels={ + 1: QCSpecification( + program="Prog3", + driver="energy", + method="mp2", + basis="sto-3g", + keywords={"k": "v"}, + ), + "supersystem": QCSpecification( + program="Prog3", + driver="energy", + method="Hf", + basis="sto-3g", + keywords={"k": "v"}, + ), + }, + keywords={"return_total_data": False}, ), ] diff --git a/qcportal/qcportal/client.py b/qcportal/qcportal/client.py index f41475aca..b3fbfb0bc 100644 --- a/qcportal/qcportal/client.py +++ b/qcportal/qcportal/client.py @@ -4,7 +4,7 @@ import math import os from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Union, Sequence, Iterable, TypeVar, Type +from typing import Any, Dict, List, Optional, Tuple, Union, Sequence, Iterable, TypeVar, Type, Literal from tabulate import tabulate @@ -17,9 +17,10 @@ GridoptimizationQueryFilters, ) from qcportal.manybody import ( - ManybodyKeywords, + BSSECorrectionEnum, ManybodyRecord, ManybodyAddBody, + ManybodyKeywords, ManybodyQueryFilters, ) from qcportal.neb import ( @@ -2088,8 +2089,9 @@ def add_manybodys( self, initial_molecules: Sequence[Union[int, Molecule]], program: str, - singlepoint_specification: QCSpecification, - keywords: ManybodyKeywords, + levels: Dict[Union[int, Literal["supersystem"]], QCSpecification], + bsse_correction: Union[BSSECorrectionEnum, Sequence[BSSECorrectionEnum]], + keywords: Union[ManybodyKeywords, Dict[str, Any]], tag: str = "*", priority: PriorityEnum = PriorityEnum.normal, owner_group: Optional[str] = None, @@ -2142,7 +2144,8 @@ def add_manybodys( "initial_molecules": initial_molecules, "specification": { "program": program, - "singlepoint_specification": singlepoint_specification, + "levels": levels, + "bsse_correction": make_list(bsse_correction), "keywords": keywords, }, "tag": tag, diff --git a/qcportal/qcportal/dataset_testing_helpers.py b/qcportal/qcportal/dataset_testing_helpers.py index 3747e9fc4..49f237284 100644 --- a/qcportal/qcportal/dataset_testing_helpers.py +++ b/qcportal/qcportal/dataset_testing_helpers.py @@ -436,9 +436,6 @@ def run_dataset_model_submit(ds, test_entries, test_spec, record_compare, backgr assert ds.record_count == 0 assert ds._client.list_datasets()[0]["record_count"] == 0 - # test_entries[2] should have additional keywords - assert test_entries[2].additional_keywords - ds.add_specification("spec_1", test_spec) ds.add_entries(test_entries[0]) diff --git a/qcportal/qcportal/external/__init__.py b/qcportal/qcportal/external/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/qcportal/qcportal/external/crystalatte.py b/qcportal/qcportal/external/crystalatte.py new file mode 100644 index 000000000..b22a158e4 --- /dev/null +++ b/qcportal/qcportal/external/crystalatte.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import os.path +import tempfile +from typing import List, Optional, Dict, Any, TYPE_CHECKING + +import numpy as np +import qcelemental as qcel +import tabulate + +from qcportal.dataset_models import load_dataset_view +from qcportal.manybody import ManybodyDatasetEntry, ManybodySpecification, ManybodyDataset, BSSECorrectionEnum +from qcportal.record_models import RecordStatusEnum + +try: + from crystalatte import build_nmer, cif_main, supercell2monomers +except ImportError: + raise ImportError("Please install crystalatte to use this module") + +if TYPE_CHECKING: + from qcportal import PortalClient + from qcportal.singlepoint import QCSpecification + +nmer_names = ["monomers", "dimers", "trimers", "tetramers", "pentamers"] +nmer_name_map = {nmer_names[n - 1]: n for n in range(2, len(nmer_names) + 1)} + + +def _check_ds_complete(ds: ManybodyDataset, specification_name: str): + if specification_name not in ds.specification_names: + raise RuntimeError(f"Specification {specification_name} not found in dataset") + + stat = ds.status() + if specification_name not in stat: + raise RuntimeError(f"Specification {specification_name} found in dataset, but not submitted?") + + if set(stat[specification_name].keys()) != {RecordStatusEnum.complete}: + raise RuntimeError(f"Specification {specification_name} not entirely complete for dataset {ds.name}") + + if stat[specification_name][RecordStatusEnum.complete] != len(ds.entry_names): + raise RuntimeError(f"Not all entries submitted/completed") + + +def create_datasets( + client: PortalClient, + dataset_basename: str, + cif_input_path: str, + r_cut: List[float], + qc_specification: QCSpecification, + bsse_correction: List[BSSECorrectionEnum], + manybody_keywords: Dict[str, Any], + *, + uniq_filter="ChSEV", + r_cut_com=1000, + bfs_thresh=1.2, + cif_a=0, + cif_b=0, + cif_c=0, + verbose: int = 1, + tmpdir: Optional[str] = None, +): + nmers_up_to = len(r_cut) + nmer_cutoff = max(r_cut[1:]) + + with tempfile.TemporaryDirectory(dir=tmpdir) as outdir: + cif_name = os.path.split(cif_input_path)[1] + cif_base = os.path.splitext(cif_name)[0] + cif_output = os.path.join(outdir, f"{cif_base}.xyz") + + r_cut_monomer = cif_main(cif_input_path, cif_output, cif_a, cif_b, cif_c, r_cut[0], nmer_cutoff, True) + nmers = supercell2monomers(cif_output, r_cut_monomer, bfs_thresh, verbose) + + total_monomers = len(nmers) + + # These are indexed by nmer count + datasets: Dict[int, ManybodyDataset] = {} + entries = {n: [] for n in range(2, nmers_up_to + 1)} + + ######################################### + # Create specifications for the datasets + ######################################### + for n in range(2, nmers_up_to + 1): + build_nmer(nmers, total_monomers, nmer_names[n - 1], r_cut[n - 1], r_cut_com, uniq_filter, verbose) + datasets[n] = client.add_dataset("manybody", f"{dataset_basename}: {nmer_names[n - 1]}") + + mb_spec = ManybodySpecification( + program="qcmanybody", + levels={k: qc_specification for k in range(1, n + 1)}, + bsse_correction=bsse_correction, + keywords=manybody_keywords, + ) + + datasets[n].add_specification("default", mb_spec) + + ######################################### + # Create entries for the datasets + ######################################### + for keynmer, nmer in nmers.items(): + + # Energies are not calculated for monomers. Rigid body approximation. + if len(nmer["monomers"]) == 1: + continue + + nat = nmer["coords"].shape[0] + fidx = np.split(np.arange(nat), nmer["delimiters"]) + fragments = [fr.tolist() for fr in fidx if len(fr)] + + qcskmol = qcel.models.Molecule( + symbols=nmer["elem"], + geometry=nmer["coords"], + fragments=fragments, + fix_com=True, + fix_orientation=True, + ) + + drop_attributes = ["elem", "coords"] + attributes = {k: v for k, v in nmer.items() if k not in drop_attributes} + + ent = ManybodyDatasetEntry(name=keynmer, initial_molecule=qcskmol, attributes=attributes) + + n = len(nmer["monomers"]) + entries[n].append(ent) + + for n in range(2, nmers_up_to + 1): + meta = datasets[n].add_entries(entries[n]) + if not meta.success: + raise RuntimeError(f"Failed to add entries to dataset {datasets[n].name}. Error:\n {meta.error_string}") + + print("\n" + "-" * 80) + for n, ds in datasets.items(): + print(f"Dataset {ds.name} [id {ds.id}] added with {len(entries[n])} entries") + + +def _analyze_datasets(specification_name, *datasets: ManybodyDataset): + + assert len(datasets) > 0 + + ds_split_names = [ds.name.rsplit(":", maxsplit=1) for ds in datasets] + + # All have the same base + dataset_basename = ds_split_names[0][0] + assert all(x[0] == dataset_basename for x in ds_split_names) + + # for example, ds_map[2] = ds + ds_map: Dict[int, ManybodyDataset] = {nmer_name_map[n.strip()]: ds for (_, n), ds in zip(ds_split_names, datasets)} + + nmer_results = [] + + for n, ds in ds_map.items(): + for e, s, r in ds.iterate_records(specification_names=specification_name, status=RecordStatusEnum.complete): + + entry = ds.get_entry(e) + assert len(entry.attributes["monomers"]) == n + n_body_energy = r.properties["results"][f"cp_corrected_interaction_energy_through_{n}_body"] + + if n > 2: + n_minus_1_body_energy = r.properties["results"][f"cp_corrected_interaction_energy_through_{n-1}_body"] + nambe = n_body_energy - n_minus_1_body_energy + else: + nambe = n_body_energy + + contrib = nambe * entry.attributes["replicas"] / n + + rminseps = "" + for r in sorted(entry.attributes["min_monomer_separations"]): + rminseps += "{:6.3f} ".format(r * qcel.constants.bohr2angstroms) + + res = { + "name": e, + "nambe": nambe * qcel.constants.hartree2kcalmol * qcel.constants.cal2J, + "replicas": entry.attributes["replicas"], + "contrib": contrib * qcel.constants.hartree2kcalmol * qcel.constants.cal2J, + "priority_cutoff": entry.attributes["priority_cutoff"], + "rminseps": rminseps, + } + + nmer_results.append(res) + + return nmer_results + + +def analyze_datasets(client: PortalClient, dataset_basename: str, specification_name: str): + datasets: Dict[int, ManybodyDataset] = {} + + client_datasets = [x["dataset_name"] for x in client.list_datasets()] + for n in range(2, len(nmer_names) + 1): + dname = f"{dataset_basename}: {nmer_names[n - 1]}" + if dname in client_datasets: + datasets[n] = client.get_dataset("manybody", dname) + + if not datasets: + raise RuntimeError(f"Could not find any datasets with prefix {dataset_basename}") + + print("Found datasets:") + for n, ds in datasets.items(): + print(f" [{nmer_names[n - 1]:<9}] {ds.name}") + + for ds in datasets.values(): + _check_ds_complete(ds, specification_name) + + return _analyze_datasets(specification_name, *datasets.values()) + + +def analyze_dataset_views(specification_name: str, *view_paths: str): + datasets = [load_dataset_view(x) for x in view_paths] + return _analyze_datasets(specification_name, *datasets) + + +def results_summary_str(nmer_results): + + table_header = [ + "N-mer Name", + "Non-Additive\nMB Energy\n(kJ/mol)", + "Num.\nRep.", + "N-mer\nContribution\n(kJ/mol)", + "Partial\nCrystal\nLattice Energy\n(kJ/mol)", + "Calculation\nPriority\n(Arb. Units)", + "Minimum\nMonomer\nSeparations\n(A)", + ] + + # Sort by priority cutoff (inverse) + nmer_results = sorted(nmer_results, key=lambda x: -x["priority_cutoff"]) + + crystal_lattice_energy = 0.0 + table_rows = [] + + for nr in nmer_results: + crystal_lattice_energy += nr["contrib"] + table_rows.append( + ( + nr["name"], + nr["nambe"], + nr["replicas"], + nr["contrib"], + crystal_lattice_energy, + nr["priority_cutoff"], + nr["rminseps"], + ) + ) + + # print( + # "{:26} | {:>12.8f} | {:>4} | {:>12.8f} | {:>13.8f} | {:12.6e} | {}".format( + # nr["name"], + # nr["nambe"], + # nr["replicas"], + # nr["contrib"], + # crystal_lattice_energy, + # nr["priority_cutoff"], + # nr["rminseps"], + # ) + # ) + + output = tabulate.tabulate( + table_rows, headers=table_header, tablefmt="simple", floatfmt=(".8f", ".8f", ".8f", ".4e") + ) + + output += "\n\n" + output += f"Crystal Lattice Energy (kJ/mol) = {crystal_lattice_energy:9.8f}\n" + output += f"Crystal Lattice Energy (kcal/mol) = {crystal_lattice_energy / qcel.constants.cal2J:9.8f}\n" + return output + + +def print_results_summary(nmer_results): + print(results_summary_str(nmer_results)) diff --git a/qcportal/qcportal/manybody/__init__.py b/qcportal/qcportal/manybody/__init__.py index 0c893e091..ae247daf0 100644 --- a/qcportal/qcportal/manybody/__init__.py +++ b/qcportal/qcportal/manybody/__init__.py @@ -6,8 +6,8 @@ ) from .record_models import ( BSSECorrectionEnum, - ManybodyKeywords, ManybodySpecification, + ManybodyKeywords, ManybodyClusterMeta, ManybodyAddBody, ManybodyRecord, diff --git a/qcportal/qcportal/manybody/dataset_models.py b/qcportal/qcportal/manybody/dataset_models.py index 781ecdc08..a49aded73 100644 --- a/qcportal/qcportal/manybody/dataset_models.py +++ b/qcportal/qcportal/manybody/dataset_models.py @@ -18,7 +18,7 @@ class Config: name: str initial_molecule: Union[Molecule, int] - additional_keywords: Dict[str, Any] = {} + additional_singlepoint_keywords: Dict[str, Any] = {} attributes: Dict[str, Any] = {} comment: Optional[str] = None @@ -66,19 +66,19 @@ def add_entry( self, name: str, initial_molecule: Union[int, Molecule], - additional_keywords: Optional[Dict[str, Any]] = None, + additional_singlepoint_keywords: Optional[Dict[str, Any]] = None, attributes: Optional[Dict[str, Any]] = None, comment: Optional[str] = None, ): - if additional_keywords is None: - additional_keywords = {} + if additional_singlepoint_keywords is None: + additional_singlepoint_keywords = {} if attributes is None: attributes = {} ent = ManybodyDatasetNewEntry( name=name, initial_molecule=initial_molecule, - additional_keywords=additional_keywords, + additional_singlepoint_keywords=additional_singlepoint_keywords, attributes=attributes, comment=comment, ) diff --git a/qcportal/qcportal/manybody/record_models.py b/qcportal/qcportal/manybody/record_models.py index b1167821b..e9ab97713 100644 --- a/qcportal/qcportal/manybody/record_models.py +++ b/qcportal/qcportal/manybody/record_models.py @@ -9,8 +9,8 @@ from pydantic import BaseModel, Extra, validator, constr, PrivateAttr, Field from typing_extensions import Literal -from qcportal.molecules import Molecule from qcportal.cache import get_records_with_cache +from qcportal.molecules import Molecule from qcportal.record_models import BaseRecord, RecordAddBodyBase, RecordQueryFilters from qcportal.singlepoint.record_models import ( QCSpecification, @@ -19,31 +19,27 @@ class BSSECorrectionEnum(str, Enum): - none = "none" + nocp = "nocp" cp = "cp" + vmfc = "vmfc" class ManybodyKeywords(BaseModel): class Config: extra = Extra.forbid - max_nbody: Optional[int] = None - bsse_correction: BSSECorrectionEnum - - @validator("max_nbody") - def check_max_nbody(cls, v): - if v is not None and v <= 0: - raise ValueError("max_nbody must be None or > 0") - return v + return_total_data: bool = False class ManybodySpecification(BaseModel): class Config: extra = Extra.forbid - program: constr(to_lower=True) = "manybody" - singlepoint_specification: QCSpecification - keywords: ManybodyKeywords + program: constr(to_lower=True) = "qcmanybody" + levels: Dict[Union[int, Literal["supersystem"]], QCSpecification] + bsse_correction: List[BSSECorrectionEnum] + keywords: ManybodyKeywords = Field(ManybodyKeywords()) + protocols: Dict[str, Any] = Field(default_factory=dict) class ManybodyAddBody(RecordAddBodyBase): @@ -64,11 +60,10 @@ class Config: extra = Extra.forbid molecule_id: int + mc_level: str fragments: List[int] basis: List[int] - degeneracy: int singlepoint_id: Optional[int] - molecule: Optional[Molecule] = None @@ -79,7 +74,6 @@ class ManybodyCluster(ManybodyClusterMeta): class ManybodyRecord(BaseRecord): record_type: Literal["manybody"] = "manybody" specification: ManybodySpecification - results: Optional[Dict[str, Any]] initial_molecule_id: int diff --git a/qcportal/qcportal/manybody/test_dataset_models.py b/qcportal/qcportal/manybody/test_dataset_models.py index beeead8f4..363dbfe34 100644 --- a/qcportal/qcportal/manybody/test_dataset_models.py +++ b/qcportal/qcportal/manybody/test_dataset_models.py @@ -7,7 +7,7 @@ import qcportal.dataset_testing_helpers as ds_helpers from qcarchivetesting import load_molecule_data from qcportal.dataset_testing_helpers import dataset_submit_test_client -from qcportal.manybody import ManybodyDatasetNewEntry, ManybodySpecification, ManybodyKeywords, BSSECorrectionEnum +from qcportal.manybody import ManybodyDatasetNewEntry, ManybodySpecification, BSSECorrectionEnum from qcportal.record_models import PriorityEnum from qcportal.singlepoint.record_models import QCSpecification @@ -31,42 +31,59 @@ ManybodyDatasetNewEntry( name="test_mb_3", initial_molecule=water4, - additional_keywords={"max_nbody": 1234}, + additional_singlepoint_keywords={"maxiter": 1234}, ), ] test_specs = [ ManybodySpecification( - singlepoint_specification=QCSpecification( - program="prog1", driver="energy", method="b3lyp", basis="6-31g*", keywords={"maxiter": 20} - ), - keywords=ManybodyKeywords(bsse_correction=BSSECorrectionEnum.none, max_nbody=4), + program="qcmanybody", + bsse_correction=[BSSECorrectionEnum.nocp, BSSECorrectionEnum.cp], + levels={ + 1: QCSpecification( + program="prog1", driver="energy", method="b3lyp", basis="6-31g*", keywords={"maxiter": 20} + ), + 2: QCSpecification(program="prog1", driver="energy", method="hf", basis="6-31g*", keywords={"maxiter": 20}), + }, + keywords={"return_total_data": True}, ), ManybodySpecification( - singlepoint_specification=QCSpecification( - program="prog2", driver="energy", method="hf", basis="sto-3g", keywords={"maxiter": 40} - ), - keywords=ManybodyKeywords(bsse_correction=BSSECorrectionEnum.none), + program="qcmanybody", + bsse_correction=[BSSECorrectionEnum.vmfc], + levels={ + 1: QCSpecification( + program="prog2", driver="energy", method="b3lyp", basis="6-31g*", keywords={"maxiter": 20} + ), + 2: QCSpecification(program="prog2", driver="energy", method="hf", basis="6-31g*", keywords={"maxiter": 20}), + }, + keywords={"return_total_data": True}, ), ManybodySpecification( - singlepoint_specification=QCSpecification( - program="prog3", driver="energy", method="hf", basis="sto-3g", keywords={"maxiter": 40} - ), - keywords=ManybodyKeywords(bsse_correction=BSSECorrectionEnum.cp), + program="qcmanybody", + bsse_correction=[BSSECorrectionEnum.vmfc], + levels={ + 1: QCSpecification( + program="prog2", driver="energy", method="b3lyp", basis="sto-3g", keywords={"maxiter": 20} + ), + 2: QCSpecification(program="prog2", driver="energy", method="hf", basis="sto-3g", keywords={"maxiter": 20}), + }, + keywords={"return_total_data": True}, ), ] def entry_extra_compare(ent1, ent2): assert ent1.initial_molecule == ent2.initial_molecule - assert ent1.additional_keywords == ent2.additional_keywords + assert ent1.additional_singlepoint_keywords == ent2.additional_singlepoint_keywords def record_compare(rec, ent, spec): assert rec.initial_molecule == ent.initial_molecule merged_spec = spec.dict() - merged_spec["keywords"].update(ent.additional_keywords) + for v in merged_spec["levels"].values(): + v["keywords"] = v["keywords"] or {} + v["keywords"].update(ent.additional_singlepoint_keywords) assert rec.specification == ManybodySpecification(**merged_spec) diff --git a/qcportal/qcportal/manybody/test_record_models.py b/qcportal/qcportal/manybody/test_record_models.py index 2006021ed..2e1d4a2af 100644 --- a/qcportal/qcportal/manybody/test_record_models.py +++ b/qcportal/qcportal/manybody/test_record_models.py @@ -51,8 +51,8 @@ def test_manybody_record_model(snowflake: QCATestingSnowflake, includes: Optiona assert molecule == record.initial_molecule - assert isinstance(record.results, dict) - assert len(record.results) > 0 + assert isinstance(record.properties, dict) + assert len(record.properties) > 0 cl = record.clusters assert isinstance(cl, list) diff --git a/qcportal/qcportal/serialization.py b/qcportal/qcportal/serialization.py index f6d11ef70..9c9a32b72 100644 --- a/qcportal/qcportal/serialization.py +++ b/qcportal/qcportal/serialization.py @@ -115,7 +115,7 @@ def deserialize(data: Union[bytes, str], content_type: str): content_type = content_type[12:] if content_type == "msgpack": - return msgpack.loads(data, object_hook=_msgpack_decode, raw=False) + return msgpack.loads(data, object_hook=_msgpack_decode, raw=False, strict_map_key=False) elif content_type == "json": # JSON stored as bytes? Decode into a string for json to load if isinstance(data, bytes):