From f0ad914965547c3e4815062517d0a07afddf3d63 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 14 Nov 2022 16:08:39 -0600 Subject: [PATCH] Lint --- openff/qcsubmit/datasets/datasets.py | 53 ++++---- openff/qcsubmit/procedures.py | 2 +- openff/qcsubmit/results/caching.py | 22 ++- openff/qcsubmit/results/filters.py | 24 +++- openff/qcsubmit/results/results.py | 126 ++++++++++++------ openff/qcsubmit/tests/results/__init__.py | 10 +- openff/qcsubmit/tests/results/test_caching.py | 2 +- openff/qcsubmit/tests/results/test_filters.py | 2 +- openff/qcsubmit/tests/results/test_results.py | 13 +- 9 files changed, 157 insertions(+), 97 deletions(-) diff --git a/openff/qcsubmit/datasets/datasets.py b/openff/qcsubmit/datasets/datasets.py index 3989b457..af9af83b 100644 --- a/openff/qcsubmit/datasets/datasets.py +++ b/openff/qcsubmit/datasets/datasets.py @@ -25,15 +25,11 @@ from qcportal.datasets.optimization import OptimizationDatasetNewEntry from qcportal.datasets.singlepoint import SinglepointDatasetNewEntry from qcportal.datasets.torsiondrive import TorsiondriveDatasetNewEntry -from qcportal.records.singlepoint import SinglepointDriver, QCSpecification from qcportal.records.optimization import OptimizationSpecification +from qcportal.records.singlepoint import QCSpecification, SinglepointDriver from typing_extensions import Literal -from openff.qcsubmit.common_structures import ( - CommonBase, - Metadata, - MoleculeAttributes, -) +from openff.qcsubmit.common_structures import CommonBase, Metadata, MoleculeAttributes from openff.qcsubmit.constraints import Constraints from openff.qcsubmit.datasets.entries import ( DatasetEntry, @@ -150,7 +146,6 @@ def _get_specifications(self) -> "OptimizationSpecification": """ raise NotImplementedError() - @abc.abstractmethod def _get_entries(self) -> List[Any]: """Add entries to the Dataset's corresponding Collection. @@ -167,7 +162,6 @@ def _get_entries(self) -> List[Any]: """ pass - @abc.abstractmethod def to_tasks(self) -> Dict[str, List[Union[AtomicInput, OptimizationInput]]]: """ @@ -199,7 +193,6 @@ def submit( """ - # pre submission checks # make sure we have some QCSpec to submit self._check_qc_specs() @@ -738,7 +731,6 @@ def _molecules_to_inchikey(self) -> List[str]: return inchikey - # TODO: SinglepointDataset class BasicDataset(_BaseDataset): """ @@ -799,7 +791,9 @@ def __add__(self, other: "BasicDataset") -> "BasicDataset": return new_dataset - def _generate_collection(self, client: "PortalClient") -> ptl.datasets.SinglepointDataset: + def _generate_collection( + self, client: "PortalClient" + ) -> ptl.datasets.SinglepointDataset: return client.add_dataset( dataset_type="singlepoint", @@ -817,7 +811,7 @@ def _get_specifications(self) -> Dict[str, QCSpecification]: """Needed for `submit` usage.""" ret = {} - for spec_name,spec in self.qc_specifications.items(): + for spec_name, spec in self.qc_specifications.items(): ret[spec_name] = QCSpecification( driver=self.driver, method=spec.method, @@ -829,7 +823,6 @@ def _get_specifications(self) -> Dict[str, QCSpecification]: return ret - def _get_entries(self) -> List[SinglepointDatasetNewEntry]: entries: List[SinglepointDatasetNewEntry] = [] @@ -844,13 +837,16 @@ def _get_entries(self) -> List[SinglepointDatasetNewEntry]: for j, molecule in enumerate(entry.initial_molecules): name = index + f"-{tag + j}" - entries.append(SinglepointDatasetNewEntry(name=name, molecule=molecule)) + entries.append( + SinglepointDatasetNewEntry(name=name, molecule=molecule) + ) else: entries.append( SinglepointDatasetNewEntry( name=entry_name, molecule=entry.initial_molecules[0], - )) + ) + ) return entries @@ -1008,7 +1004,7 @@ def _get_specifications(self) -> Dict[str, OptimizationSpecification]: ret = {} - for spec_name,spec in self.qc_specifications.items(): + for spec_name, spec in self.qc_specifications.items(): qc_spec = QCSpecification( driver=self.driver, method=spec.method, @@ -1021,7 +1017,7 @@ def _get_specifications(self) -> Dict[str, OptimizationSpecification]: ret[spec_name] = OptimizationSpecification( program=self.optimization_procedure.program, qc_specification=qc_spec, - keywords=opt_kw + keywords=opt_kw, ) return ret @@ -1040,17 +1036,21 @@ def _get_entries(self) -> List[OptimizationDatasetNewEntry]: for j, molecule in enumerate(entry.initial_molecules): name = index + f"-{tag + j}" - entries.append(OptimizationDatasetNewEntry(name=name, initial_molecule=molecule)) + entries.append( + OptimizationDatasetNewEntry( + name=name, initial_molecule=molecule + ) + ) else: entries.append( OptimizationDatasetNewEntry( name=entry_name, initial_molecule=entry.initial_molecules[0], - )) + ) + ) return entries - def to_tasks(self) -> Dict[str, List[OptimizationInput]]: """ Build a list of QCEngine optimisation inputs organised by the optimisation engine which should be used to run the task. @@ -1222,15 +1222,16 @@ def _get_entries(self) -> List[TorsiondriveDatasetNewEntry]: td_keywords.update(entry.keywords.dict(exclude_defaults=True)) - entries.append(TorsiondriveDatasetNewEntry( - name=entry_name, - initial_molecules=entry.initial_molecules, - torsiondrive_keywords=td_keywords - )) + entries.append( + TorsiondriveDatasetNewEntry( + name=entry_name, + initial_molecules=entry.initial_molecules, + torsiondrive_keywords=td_keywords, + ) + ) return entries - def to_tasks(self) -> Dict[str, List[OptimizationInput]]: """Build a list of QCEngine procedure tasks which correspond to this dataset.""" diff --git a/openff/qcsubmit/procedures.py b/openff/qcsubmit/procedures.py index 37d67931..49beccf5 100644 --- a/openff/qcsubmit/procedures.py +++ b/openff/qcsubmit/procedures.py @@ -2,7 +2,7 @@ The procedure settings controllers """ -from typing import Dict, Any +from typing import Any, Dict from pydantic import BaseModel, Field, validator from qcportal.records.optimization import OptimizationSpecification diff --git a/openff/qcsubmit/results/caching.py b/openff/qcsubmit/results/caching.py index 28b34d83..c5cdad2e 100644 --- a/openff/qcsubmit/results/caching.py +++ b/openff/qcsubmit/results/caching.py @@ -9,8 +9,12 @@ from openff.toolkit.topology import Molecule from qcportal import PortalClient from qcportal.molecules import Molecule as QCMolecule -from qcportal.records import TorsiondriveRecord -from qcportal.records import OptimizationRecord, BaseRecord, SinglepointRecord +from qcportal.records import ( + BaseRecord, + OptimizationRecord, + SinglepointRecord, + TorsiondriveRecord, +) try: from openmm import unit @@ -159,7 +163,7 @@ def cached_query_procedures(client_address: str, record_ids: List[str]) -> List[ client_address = client_address.rstrip("/") client = cached_fractal_client(client_address) - query_limit = client.api_limits['get_records'] + query_limit = client.api_limits["get_records"] return _cached_client_query( client_address, @@ -186,7 +190,7 @@ def cached_query_molecules( client_address = client_address.rstrip("/") client = cached_fractal_client(client_address) - query_limit = client.api_limits['get_molecules'] + query_limit = client.api_limits["get_molecules"] return _cached_client_query( client_address, @@ -330,10 +334,14 @@ def cached_query_torsion_drive_results( qc_record = qc_records[result.record_id] - qc_grid_molecules = [(grid_point, opt.final_molecule) - for grid_point, opt in qc_record.minimum_optimizations.items()] + qc_grid_molecules = [ + (grid_point, opt.final_molecule) + for grid_point, opt in qc_record.minimum_optimizations.items() + ] # order the ids so the conformers follow the torsiondrive scan range - qc_grid_molecules.sort(key=lambda s: tuple(float(x) for x in s.strip("[]").split(", "))) + qc_grid_molecules.sort( + key=lambda s: tuple(float(x) for x in s.strip("[]").split(", ")) + ) molecule: Molecule = Molecule.from_mapped_smiles( result.cmiles, allow_undefined_stereo=True diff --git a/openff/qcsubmit/results/filters.py b/openff/qcsubmit/results/filters.py index 052b9f58..49ef0cf8 100644 --- a/openff/qcsubmit/results/filters.py +++ b/openff/qcsubmit/results/filters.py @@ -16,8 +16,8 @@ from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator from qcelemental.molutil import guess_connectivity from qcportal.records import ( - OptimizationRecord, BaseRecord, + OptimizationRecord, RecordStatusEnum, SinglepointRecord, ) @@ -26,6 +26,7 @@ from openmm import unit except ImportError: from simtk import unit + from typing_extensions import Literal from openff.qcsubmit.results.results import ( @@ -234,7 +235,12 @@ class LowestEnergyFilter(SinglepointRecordGroupFilter): def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: """Only return the lowest energy entry or final molecule.""" @@ -366,7 +372,12 @@ def _compute_rmsd_matrix(self, molecule: Molecule) -> numpy.ndarray: def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: @@ -440,7 +451,12 @@ class MinimumConformersFilter(SinglepointRecordGroupFilter): def _filter_function( self, entries: List[ - Tuple["_BaseResult", Union[SinglepointRecord, OptimizationRecord], Molecule, str] + Tuple[ + "_BaseResult", + Union[SinglepointRecord, OptimizationRecord], + Molecule, + str, + ] ], ) -> List[Tuple["_BaseResult", str]]: diff --git a/openff/qcsubmit/results/results.py b/openff/qcsubmit/results/results.py index 04f775bd..6deb7ece 100644 --- a/openff/qcsubmit/results/results.py +++ b/openff/qcsubmit/results/results.py @@ -21,17 +21,23 @@ from openmm import unit except ImportError: from simtk import unit + import numpy import qcportal from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField from pydantic import BaseModel, Field, validator -from qcportal.datasets import OptimizationDataset, TorsiondriveDataset from qcportal.datasets import BaseDataset as QCDataset +from qcportal.datasets import OptimizationDataset, TorsiondriveDataset from qcportal.datasets.singlepoint import SinglepointDataset, SinglepointDatasetNewEntry -from qcportal.records import OptimizationRecord, SinglepointRecord, TorsiondriveRecord, RecordStatusEnum +from qcportal.records import ( + BaseRecord, + OptimizationRecord, + RecordStatusEnum, + SinglepointRecord, + TorsiondriveRecord, +) from qcportal.records.singlepoint import SinglepointDriver -from qcportal.records import BaseRecord from typing_extensions import Literal from openff.qcsubmit.common_structures import Metadata, MoleculeAttributes, QCSpec @@ -286,7 +292,9 @@ def from_datasets( datasets = [datasets] if not all(isinstance(dataset, SinglepointDataset) for dataset in datasets): - raise TypeError("A ``BasicResultCollection`` can only be created from ``SinglepointDataset`` objects.") + raise TypeError( + "A ``BasicResultCollection`` can only be created from ``SinglepointDataset`` objects." + ) result_records = defaultdict(dict) @@ -295,27 +303,36 @@ def from_datasets( client = dataset.client # Fetch all entries for use later - dataset.fetch_entries(include=['molecule']) + dataset.fetch_entries(include=["molecule"]) if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) molecule = entry.molecule - cmiles = molecule.extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = molecule.extras[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = molecule.attributes.get("fixed_hydrogen_inchi_key") # Undefined stereochemistry is not expected however there # may be some TK specific edge cases we don't want # exceptions for such as OE and nitrogen stereochemistry. if inchi_key is None: - tmp_mol=Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) - inchi_key=tmp_mol.to_inchikey(fixed_hydrogens=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) + inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - br = BasicResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + br = BasicResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = br return cls( @@ -358,7 +375,7 @@ def to_records(self) -> List[Tuple[SinglepointRecord, Molecule]]: client = cached_fractal_client(address=client_address) for record in records: - rec = client.get_singlepoints(record.record_id, include=['molecule']) + rec = client.get_singlepoints(record.record_id, include=["molecule"]) # OpenFF molecule molecule: Molecule = Molecule.from_mapped_smiles( @@ -417,26 +434,34 @@ def from_datasets( client = dataset.client # Fetch all entries for use later - dataset.fetch_entries(include=['initial_molecule']) + dataset.fetch_entries(include=["initial_molecule"]) if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") - + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) molecule = entry.initial_molecule - cmiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = entry.attributes[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = molecule.extras.get("fixed_hydrogen_inchi_key") if inchi_key is None: - tmp_mol = Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) - inchi_key=tmp_mol.to_inchikey(fixed_hydrogens=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) + inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - opt_rec = OptimizationResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + opt_rec = OptimizationResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = opt_rec return cls( @@ -480,7 +505,9 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: client = cached_fractal_client(address=client_address) for record in records: - rec = client.get_optimizations(record.record_id, include=['initial_molecule']) + rec = client.get_optimizations( + record.record_id, include=["initial_molecule"] + ) # OpenFF molecule molecule: Molecule = Molecule.from_mapped_smiles( @@ -488,14 +515,14 @@ def to_records(self) -> List[Tuple[OptimizationRecord, Molecule]]: ) molecule.add_conformer( - numpy.array(rec.initial_molecule.geometry, float).reshape(-1, 3) * unit.bohr + numpy.array(rec.initial_molecule.geometry, float).reshape(-1, 3) + * unit.bohr ) records_and_molecules.append((rec, molecule)) return records_and_molecules - # NOTE: no longer using `driver` here def to_basic_result_collection(self) -> BasicResultCollection: """Returns a basic results collection which references results records which @@ -517,7 +544,9 @@ def to_basic_result_collection(self) -> BasicResultCollection: # will be inefficient at the moment for record, molecule in records_and_molecules: - result_records[record.client.address].append((record.trajectory[-1], molecule)) + result_records[record.client.address].append( + (record.trajectory[-1], molecule) + ) result_entries = defaultdict(list) @@ -525,14 +554,14 @@ def to_basic_result_collection(self) -> BasicResultCollection: for record, molecule in result_records[client_address]: result_entries[client_address].append( - BasicResult( - record_id=record.id, - cmiles=molecule.to_smiles( - isomeric=True, explicit_hydrogens=True, mapped=True - ), - inchi_key=molecule.to_inchikey(fixed_hydrogens=True), - ) + BasicResult( + record_id=record.id, + cmiles=molecule.to_smiles( + isomeric=True, explicit_hydrogens=True, mapped=True + ), + inchi_key=molecule.to_inchikey(fixed_hydrogens=True), ) + ) return BasicResultCollection(entries=result_entries) @@ -579,7 +608,10 @@ def create_basic_dataset( metadata={} if metadata is None else metadata, qc_specifications={"default": QCSpec()} if qc_specifications is None - else {qc_specification.spec_name: qc_specification for qc_specification in qc_specifications}, + else { + qc_specification.spec_name: qc_specification + for qc_specification in qc_specifications + }, ) for records in records_by_cmiles.values(): @@ -645,21 +677,30 @@ def from_datasets( dataset.fetch_entries() if spec_name not in dataset.specifications: - raise KeyError(f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification") + raise KeyError( + f"The {dataset.name} dataset does not contain a '{spec_name}' compute specification" + ) - for entry_name, spec_name, record in dataset.iterate_records(specification_names=spec_name, - status=RecordStatusEnum.complete): + for entry_name, spec_name, record in dataset.iterate_records( + specification_names=spec_name, status=RecordStatusEnum.complete + ): entry = dataset.get_entry(entry_name) - cmiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] + cmiles = entry.attributes[ + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ] inchi_key = entry.attributes.get("fixed_hydrogen_inchi_key") if inchi_key is None: - tmp_mol = Molecule.from_mapped_smiles(cmiles, allow_undefined_stereo=True) + tmp_mol = Molecule.from_mapped_smiles( + cmiles, allow_undefined_stereo=True + ) inchi_key = tmp_mol.to_inchikey(fixed_hydrogens=True) - td_rec = TorsionDriveResult(record_id=record.id, cmiles=cmiles, inchi_key=inchi_key) + td_rec = TorsionDriveResult( + record_id=record.id, cmiles=cmiles, inchi_key=inchi_key + ) result_records[client.address][record.id] = td_rec return cls( @@ -712,7 +753,9 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: ) # Map of torsion drive keys to minimum optimization - qc_grid_molecules = [(k, v.final_molecule) for k,v in rec.minimum_optimizations.items()] + qc_grid_molecules = [ + (k, v.final_molecule) for k, v in rec.minimum_optimizations.items() + ] # order the ids so the conformers follow the torsiondrive scan range # x[0] is the torsiondrive key, ie "[90]" @@ -730,7 +773,6 @@ def to_records(self) -> List[Tuple[TorsiondriveRecord, Molecule]]: return records_and_molecules - def create_optimization_dataset( self, dataset_name: str, diff --git a/openff/qcsubmit/tests/results/__init__.py b/openff/qcsubmit/tests/results/__init__.py index 927b5f32..89aedc73 100644 --- a/openff/qcsubmit/tests/results/__init__.py +++ b/openff/qcsubmit/tests/results/__init__.py @@ -6,16 +6,10 @@ from pydantic import BaseModel from qcelemental.models import DriverEnum from qcelemental.models.procedures import TDKeywords - -from qcportal.records import ( - SinglepointRecord, - OptimizationRecord, - TorsiondriveRecord, -) - +from qcportal.records import OptimizationRecord, SinglepointRecord, TorsiondriveRecord +from qcportal.records.models import RecordStatusEnum from qcportal.records.optimization.models import OptimizationSpecification from qcportal.records.singlepoint.models import QCSpecification -from qcportal.records.models import RecordStatusEnum try: from openmm import unit diff --git a/openff/qcsubmit/tests/results/test_caching.py b/openff/qcsubmit/tests/results/test_caching.py index 512e13d4..aadca373 100644 --- a/openff/qcsubmit/tests/results/test_caching.py +++ b/openff/qcsubmit/tests/results/test_caching.py @@ -4,7 +4,7 @@ import pytest import requests_mock from openff.toolkit.topology import Molecule -from qcportal.records import OptimizationRecord, SinglepointRecord +from qcportal.records import OptimizationRecord, SinglepointRecord try: from openmm import unit diff --git a/openff/qcsubmit/tests/results/test_filters.py b/openff/qcsubmit/tests/results/test_filters.py index b2c270ea..7de62cea 100644 --- a/openff/qcsubmit/tests/results/test_filters.py +++ b/openff/qcsubmit/tests/results/test_filters.py @@ -5,7 +5,7 @@ from openff.toolkit.topology import Molecule from pydantic import ValidationError from qcelemental.models import DriverEnum -from qcportal.records import SinglepointRecord, RecordStatusEnum +from qcportal.records import RecordStatusEnum, SinglepointRecord try: from openmm import unit diff --git a/openff/qcsubmit/tests/results/test_results.py b/openff/qcsubmit/tests/results/test_results.py index a71bf082..c806b742 100644 --- a/openff/qcsubmit/tests/results/test_results.py +++ b/openff/qcsubmit/tests/results/test_results.py @@ -9,22 +9,21 @@ from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField from pydantic import ValidationError +from qcelemental.models import DriverEnum from qcportal import PortalClient from qcportal.molecules import Molecule as QCMolecule from qcportal.records import ( - SinglepointRecord, OptimizationRecord, - TorsiondriveRecord, RecordStatusEnum, + SinglepointRecord, + TorsiondriveRecord, ) -from qcelemental.models import DriverEnum - +from qcportal.records.optimization import OptimizationSpecification +from qcportal.records.singlepoint import QCSpecification from qcportal.records.torsiondrive import ( - TorsiondriveSpecification, TorsiondriveKeywords, + TorsiondriveSpecification, ) -from qcportal.records.optimization import OptimizationSpecification -from qcportal.records.singlepoint import QCSpecification try: from openmm import unit