diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index 4f7f9fad2..76b9a3242 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.3" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm + mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.4" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 77dde7a7b..5420e438a 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -9,6 +9,12 @@ company supporting open-source development of fostering academic/industrial coll within the biomolecular simulation community. Our software is hosted via the `OpenBioSim` `GitHub `__ organisation. +`2023.5.0 `_ - Dec 16 2023 +------------------------------------------------------------------------------------------------- + +* Add support for detecting nucleic acid backbones (`@fjclark `_) (`#189 `__). +* Added SOMD and GROMACS support for multiple distance restraints for ABFE calculations (`#178 `__). + `2023.4.1 `_ - Dec 14 2023 ------------------------------------------------------------------------------------------------- @@ -17,7 +23,7 @@ within the biomolecular simulation community. Our software is hosted via the `Op * Make sure velocities are double counted when searching for velocity properties when combining molecules (`#197 `__). * Remove redundant ``BioSimSpace.Types.Type.__ne__`` operator (`#201 `__). * Minor internal updates due to Sire API fixes (`#203 `__). -* Fixed bug in the BSS Boresch restraint search code (`@fjclark `_) (`#204 `__). +* Fixed bug in the Boresch restraint search code (`@fjclark `_) (`#204 `__). * Fixed ``renumber`` option in :meth:`extract ` method (`#210 `__). * Add workaround for fixing reconstruction of intrascale matrix in :func:`readPerturbableSystem ` function (`#210 `__). * Remove incorrect ``try_import`` statement in metadynamics driver script and make sure that global parameters in OpenMM script are unique (`#217 `__). diff --git a/python/BioSimSpace/Parameters/__init__.py b/python/BioSimSpace/Parameters/__init__.py index 9c0610b06..de89a1d5a 100644 --- a/python/BioSimSpace/Parameters/__init__.py +++ b/python/BioSimSpace/Parameters/__init__.py @@ -141,7 +141,7 @@ # ready to be returned. molecule = BSS.Parameters.ff14SB(molecule, water_model="tip3p").getMolecule() -Additional parameters can be loaded by passing the ``leap_commands`` option to +Additional parameters can be loaded by passing the ``pre_mol_commands`` option to any compatible AMBER force field function, e.g.: .. code-block:: python @@ -157,7 +157,11 @@ # Initialise the parameterisation process and block until the molecule is # ready to be returned. - molecule = BSS.Parameters.ff14SB(molecule, leap_commands=leap_commands).getMolecule() + molecule = BSS.Parameters.ff14SB(molecule, pre_mol_commands=leap_commands).getMolecule() + +If you need to apply any additional operations on the loaded molecule, then this +can be done using the ``post_mol_commands`` option, using ``mol`` for the name of +the molecule. """ from ._parameters import * diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py index 0be2de619..95692bdc5 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py @@ -258,12 +258,23 @@ def __init__( "Use the 'BioSimSpace.Align' package to map and merge molecules." ) - if self._protocol.getPerturbationType() != "full": + pert_type = self._protocol.getPerturbationType() + if pert_type not in ["full", "release_restraint"]: raise NotImplementedError( "GROMACS currently only supports the 'full' perturbation " "type. Please use engine='SOMD' when running multistep " "perturbation types." ) + if pert_type == "release_restraint": + restraint_err = ValueError( + "The 'release_restraint' perturbation type requires a multiple " + "distance restraint restraint type." + ) + if not restraint: + raise restraint_err + if restraint._restraint_type != "multiple_distance": + raise restraint_err + self._exe = _gmx_exe elif engine == "AMBER": # Find a molecular dynamics engine and executable. @@ -354,6 +365,15 @@ def __init__( # Ensure that the system is compatible with the restraint restraint.system = self._system + # Warn the user about instabilities with multiple distance restraints in SOMD. + if restraint._restraint_type == "multiple_distance" and engine == "SOMD": + _warnings.warn( + "SOMD simulations with some non-interacting ligands can be unstable. This " + "affects some systems with multiple distance restraints during the release " + "restraint state. If you experience problems, consider using multiple distance " + "restraints with GROMACS instead." + ) + self._restraint = restraint # Create fake instance methods for 'analyse' and 'difference'. These diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py index 53d71a177..499846255 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py @@ -48,8 +48,13 @@ class Restraint: - """The Restraint class which holds the restraint information for the ABFE - calculations. Currently only Boresch type restraint is supported. + """ + The Restraint class which holds the restraint information for the ABFE + calculations. Currently Boresch restaraints and multiple distance restraints + (between pairs of atoms) are supported. For the multiple distance restraints, + it is assumed that all restraints but one will be released after decoupling, + and an analytical correction will be applied to account for releasing the last + restraint. Boresch restraint is a set of harmonic restraints containing one bond, two angle and three dihedrals, which comes from three atoms in the ligand @@ -62,7 +67,7 @@ class Restraint: Angles: r2-r1-l1 (thetaA0, kthetaA), r1-l1-l2 (thetaB0, kthetaB) Dihedrals: r3-r2-r1-l1 (phiA0, kphiA), r2-r1-l1-l2 (phiB0, kphiB), r1-l1-l2-l3 (phiC0, kphiC) - The restraint_dict has the following compact format. + The Boresch restraint_dict has the following compact format. restraint_dict = { "anchor_points":{"r1": BioSimSpace._SireWrappers.Atom, @@ -83,8 +88,34 @@ class Restraint: "kphiA": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area), "kphiB": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area), "kphiC": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area)}} + + The multiple distance restraints are flat-bottom restraints, which are represented as a dictionaries + of the form: + + distance_restraint_dict = {r1: BioSimSpace._SireWrappers.Atom, + l1: BioSimSpace._SireWrappers.Atom, + r0: BioSimSpace.Types.Length, + kr: BioSimSpace.Types.Energy / BioSimSpace.Types.Area, + r_fb: BioSimSpace.Types.Length} + + where r1 and l1 are the atoms involved in the restraint in the receptor and the ligand respectively, + r0 and kr are the equilibrium distance and the force constant, and r_fb is the flat bottom radius. + + The overall restraint_dict is a dictionary of the form: + + restraint_dict = {"distance_restraints": [distance_restraint_dict, ...], + "permanent_distance_restraint": distance_restraint_dict} + + where distance_restraints is a list of distance restraints which are released after decoupling, + and permanent_distance_restraint is the distance restraint which is not released after decoupling. """ + # Create a dict of supported restraints and compatible engines. + supported_restraints = { + "boresch": ["gromacs", "somd"], + "multiple_distance": ["gromacs", "somd"], + } + def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch"): """ Constructor. @@ -102,7 +133,7 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" The temperature of the system restraint_type : str - The type of the restraint. (`Boresch`, ) + The type of the restraint. (`Boresch`, `multiple_distance`) """ if not isinstance(temperature, _Temperature): raise ValueError( @@ -208,10 +239,75 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" " values further from 0 or pi radians." ) + elif restraint_type.lower() == "multiple_distance": + self._restraint_type = "multiple_distance" + + if not set(restraint_dict.keys()) == { + "distance_restraints", + "permanent_distance_restraint", + }: + raise ValueError( + "restraint_dict must have keys 'distance_restraints' and 'permanent_distance_restraint'" + ) + + # Warn the user if there are no distance restraints (although they may be deliberately supplying + # only the permanent distance restraint) + if len(restraint_dict["distance_restraints"]) == 0: + _warnings.warn( + "No distance restraints have been specified other than the permanent distance restraint." + ) + + # Check each distance restraint is of the correct format + all_restraints = restraint_dict["distance_restraints"] + [ + restraint_dict["permanent_distance_restraint"] + ] + for single_restraint_dict in all_restraints: + if not set(single_restraint_dict.keys()) == { + "r1", + "l1", + "r0", + "r_fb", + "kr", + }: + raise ValueError( + "distance_restraint_dict must have keys 'r1', 'l1', 'r0', 'r_fb', 'kr' " + f"but has keys {list(single_restraint_dict.keys())}" + ) + + # Check that the atoms are of type BioSimSpace._SireWrappers.Atom + for key in ["r1", "l1"]: + if not isinstance(single_restraint_dict[key], _Atom): + raise ValueError( + f"distance_restraint_dict['{key}'] " + "must be of type 'BioSimSpace._SireWrappers.Atom'" + ) + + # Test that all quantities have the correct units + for key in ["r0", "r_fb"]: + if not isinstance(single_restraint_dict[key], _Length): + raise ValueError( + f"distance_restraint_dict['{key}'] must be of type " + "'BioSimSpace.Types.Length'" + ) + if not single_restraint_dict["kr"].dimensions() == ( + 0, + 0, + 0, + 1, + -1, + 0, + -2, + ): + raise ValueError( + "distance_restraint_dict['kr'] must be of type " + "'BioSimSpace.Types.Energy'/'BioSimSpace.Types.Length^2'" + ) + else: raise NotImplementedError( f"Restraint type {type} not implemented " - f"yet. Only Boresch restraint is supported." + f"yet. Only {Restraint.supported_restraints.keys()} " + "are currently supported." ) self._restraint_dict = restraint_dict @@ -236,33 +332,55 @@ def system(self, system): raise TypeError( "'system' must be of type 'BioSimSpace._SireWrappers.System'" ) - else: - if self._restraint_type == "boresch": - # Check if the ligand atoms are decoupled. - # Find the decoupled molecule, assume that only one can be - # decoupled. - (decoupled_mol,) = system.getDecoupledMolecules() - for key in ["l1", "l2", "l3"]: - atom = self._restraint_dict["anchor_points"][key] - # Discussed in https://github.com/michellab/BioSimSpace/pull/337 - if ( - atom._sire_object.molecule().number() - != decoupled_mol._sire_object.number() - ): - raise ValueError( - f"The ligand atom {key} is not from decoupled moleucle." - ) - for key in ["r1", "r2", "r3"]: - atom = self._restraint_dict["anchor_points"][key] - if not atom in system: - raise ValueError( - f"The protein atom {key} is not in the system." - ) - # Store a copy of solvated system. - self._system = system.copy() + if self._restraint_type == "boresch": + # Check if the ligand atoms are decoupled. + # Find the decoupled molecule, assume that only one can be + # decoupled. + (decoupled_mol,) = system.getDecoupledMolecules() + for key in ["l1", "l2", "l3"]: + atom = self._restraint_dict["anchor_points"][key] + # Discussed in https://github.com/michellab/BioSimSpace/pull/337 + if ( + atom._sire_object.molecule().number() + != decoupled_mol._sire_object.number() + ): + raise ValueError( + f"The ligand atom {key} is not from decoupled molecule." + ) + for key in ["r1", "r2", "r3"]: + atom = self._restraint_dict["anchor_points"][key] + if not atom in system: + raise ValueError(f"The receptor atom {key} is not in the system.") + + if self._restraint_type == "multiple_distance": + # Check if the ligand atoms are decoupled. + # Find the decoupled molecule, assume that only one can be + # decoupled. + (decoupled_mol,) = system.getDecoupledMolecules() + + all_restraints = self._restraint_dict["distance_restraints"] + [ + self._restraint_dict["permanent_distance_restraint"] + ] + for single_restraint_dict in all_restraints: + ligand_atom = single_restraint_dict["l1"] + if ( + ligand_atom._sire_object.molecule().number() + != decoupled_mol._sire_object.number() + ): + raise ValueError( + f"The ligand atom {ligand_atom} is not from decoupled molecule." + ) + receptor_atom = single_restraint_dict["r1"] + if not receptor_atom in system: + raise ValueError( + f"The protein atom {receptor_atom} is not in the system." + ) + + # Store a copy of solvated system. + self._system = system.copy() - def _gromacs_boresch(self): + def _gromacs_boresch(self, perturbation_type=None): """Format the Gromacs string for boresch restraint.""" # Format the atoms into index list @@ -368,7 +486,130 @@ def write_dihedral(key_list, equilibrium_values, force_constants): return "\n".join(output) - def _somd_boresch(self): + def _gromacs_multiple_distance(self, perturbation_type=None): + """ + Format the Gromacs string for multiple distance restraints. + + Parameters + ---------- + perturbation_type : str, optional, default=None + The type of perturbation to applied during the current stage of the free energy + calculation. If the perturbation type is "release_restraint", the permanent distance + restraint will be written as a distance restraint (topology file directive [ distance + restraints ], not affected by any lambda values), while all other restraints will be + written as restraint potentials (topology file directive [ bonds ], affected by bonded-lambda. + This allows the bond restraints to be released while retaining the permanent distance restraint. + For all other perturbation types, all restraints will be written as restraint potential bonds. + + Returns + ------- + str + The Gromacs string for the multiple distance restraints. + """ + + def _get_distance_restraint_restraint_str(r1, l1, r0, r_fb, kr): + """ + Get the text line specifying a distance restraint restraint + (unaffected by any lambdas). + """ + # Calculate parameters. + ai = self._system.getIndex(r1) + 1 + aj = self._system.getIndex(l1) + 1 + low = r0 - r_fb + up1 = r0 + r_fb + up2 = 100 # Set this unresonably high so that we never get a linear potential. + + # Format strings, remembering to convert units. + restr_type_str = "2" # No averaging + restr_index_str = "0" # Only one restraint + low_str = "{:.3f}".format(low / _nanometer) + up1_str = "{:.3f}".format(up1 / _nanometer) + up2_str = "{:.3f}".format(up2) + fac_str = ( + "1.0" # Scale the force constant (specified in the mdp file) by 1.0. + ) + + # Format entire string. + # ai, aj, type, index, type', low, up1, up2, fac + distance_restraint_restraint_parameter_string = f" {ai:<10} {aj:<10} {restr_type_str:<10} {restr_index_str:<10} {restr_type_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {fac_str:<10}" + + return distance_restraint_restraint_parameter_string + + def _get_restraint_potential_bond_str(r1, l1, r0, r_fb, kr): + """ + Get the text line specifying a restraint potential bond + (affected by bonded-lambda). + """ + # Calculate parameters. + ai = self._system.getIndex(r1) + 1 + aj = self._system.getIndex(l1) + 1 + low = r0 - r_fb + up1 = r0 + r_fb + up2 = 100 # Set this unresonably high so that we never get a linear potential. + + # Format strings, remembering to convert units. + restr_type_str = "10" # Restraint potential bond + low_str = "{:.3f}".format(low / _nanometer) + up1_str = "{:.3f}".format(up1 / _nanometer) + up2_str = "{:.3f}".format(up2) + kdr_0_str = "{:.2f}".format( + 0 + ) # Force constant is 0 when bonded-lambda = 0. + kdr_str = "{:.2f}".format(kr / (_kj_per_mol / _nanometer**2)) + + # Format entire string. + # ai, aj, type, lowA, up1A, up2A, kdrA, lowB, up1B, up2B, kdrB + restraint_potential_bond_parameter_string = f" {ai:<10} {aj:<10} {restr_type_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {kdr_0_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {kdr_str:<10}" + + return restraint_potential_bond_parameter_string + + # Write the output string. + output = [ + "[ intermolecular_interactions ]", + ] + + output.append("[ bonds ]") + output.append( + "; ai aj type lowA up1A up2A kdrA lowB up1B up2B kdrB" + ) + for restraint in self._restraint_dict["distance_restraints"]: + output.append( + _get_restraint_potential_bond_str( + restraint["r1"], + restraint["l1"], + restraint["r0"], + restraint["r_fb"], + restraint["kr"], + ) + ) + if perturbation_type != "release_restraint": + output.append( + _get_restraint_potential_bond_str( + self._restraint_dict["permanent_distance_restraint"]["r1"], + self._restraint_dict["permanent_distance_restraint"]["l1"], + self._restraint_dict["permanent_distance_restraint"]["r0"], + self._restraint_dict["permanent_distance_restraint"]["r_fb"], + self._restraint_dict["permanent_distance_restraint"]["kr"], + ) + ) + else: # Perturbation type is release_restraint - write the permanent restraint as a distance restraint + output.append("[ distance_restraints ]") + output.append( + "; ai aj type index type' low up1 up2 fac" + ) + output.append( + _get_distance_restraint_restraint_str( + self._restraint_dict["permanent_distance_restraint"]["r1"], + self._restraint_dict["permanent_distance_restraint"]["l1"], + self._restraint_dict["permanent_distance_restraint"]["r0"], + self._restraint_dict["permanent_distance_restraint"]["r_fb"], + self._restraint_dict["permanent_distance_restraint"]["kr"], + ) + ) + + return "\n".join(output) + + def _somd_boresch(self, perturbation_type=None): """Format the SOMD string for the Boresch restraints.""" # Indices @@ -420,7 +661,48 @@ def _somd_boresch(self): return restr_string - def toString(self, engine=None): + def _somd_multiple_distance(self, perturbation_type=None): + """Format the SOMD string for the multiple distance restraints.""" + + def _add_restr_to_str(restr, restr_string): + """Apend the information for a single restraint to the string.""" + # Indices + r1 = self._system.getIndex(restr["r1"]) + l1 = self._system.getIndex(restr["l1"]) + # Equilibrium value + r0 = restr["r0"] / _angstrom + # Force constant. Halve these as SOMD defines force constants as E = kx**2 + kr = (restr["kr"] / (_kcal_per_mol / (_angstrom * _angstrom))) / 2 + # Flat bottomed radius + r_fb = restr["r_fb"] / _angstrom + + restr_string += f"({r1}, {l1}): ({r0}, {kr}, {r_fb}), " + return restr_string + + standard_restr_string = "distance restraints dictionary = {" + + for single_restraint in self._restraint_dict["distance_restraints"]: + standard_restr_string = _add_restr_to_str( + single_restraint, standard_restr_string + ) + + if perturbation_type == "restraint": + # In this case, we want all restraints to be switched on, even "permanent" ones + standard_restr_string = _add_restr_to_str( + self._restraint_dict["permanent_distance_restraint"], + standard_restr_string, + ) + return standard_restr_string[:-2] + "}" + else: # Other perturbation types, we want the permanent restraints to be constantly on + standard_restr_string = standard_restr_string[:-2] + "}\n" + permanent_restr_string = "permanent distance restraints dictionary = {" + permanent_restr_string = _add_restr_to_str( + self._restraint_dict["permanent_distance_restraint"], + permanent_restr_string, + ) + return standard_restr_string + permanent_restr_string[:-2] + "}" + + def toString(self, engine, perturbation_type=None): """ The method for convert the restraint to a format that could be used by MD Engines. @@ -429,43 +711,50 @@ def toString(self, engine=None): ---------- engine : str - The molecular dynamics engine used to generate the restraint. - Available options currently is "GROMACS" and "SOMD". If this argument - is omitted then BioSimSpace will choose an appropriate engine - for you. + The molecular dynamics engine for which to generate the restraint. + Available options are currently "GROMACS" or "SOMD" for Boresch restraints, + or "SOMD" only for multiple distance restraints. + perturbation_type : str, optional, default=None + The type of perturbation to applied during the current stage of the free energy + calculation. This is only used for multiple distance restraints, for which all + restraints are converted to standard distance restraints to allow them to be + turned on when the perturbation type is "restraint", but for which the permanent + distance restraint is always active if the perturbation type is "release_restraint" + (or any other perturbation type). """ - if engine.strip().lower() == "gromacs": - if self._restraint_type == "boresch": - return self._gromacs_boresch() - elif engine.lower() == "somd": - if self._restraint_type == "boresch": - return self._somd_boresch() - else: - raise NotImplementedError( - f"Restraint type {self._restraint_type} not implemented " - f"yet. Only Boresch restraints are supported." - ) - else: + to_str_functions = { + "boresch": {"gromacs": self._gromacs_boresch, "somd": self._somd_boresch}, + "multiple_distance": { + "gromacs": self._gromacs_multiple_distance, + "somd": self._somd_multiple_distance, + }, + } + + engine = engine.strip().lower() + try: + str_fn = to_str_functions[self._restraint_type][engine] + except KeyError: raise NotImplementedError( - f"MD Engine {engine} not implemented " - f"yet. Only Gromacs and SOMD are supported." + f"Restraint type {self._restraint_type} not implemented " + f"yet for {engine}." ) + return str_fn(perturbation_type) + def getCorrection(self, method="analytical"): """ Calculate the free energy of releasing the restraint - to the standard state volume.''' + to the standard state volume. Parameters ---------- - method : str The integration method to use for calculating the correction for releasing the restraint to the standard state concentration. - "numerical" or "analytical". Note that the analytical correction - can introduce errors when the restraints are weak, restrained - angles are close to 0 or pi radians, or the restrained distance - is close to 0. + "numerical" or "analytical". Note that the Boresch analytical + correction can introduce errors when the restraints are weak, + restrained angles are close to 0 or pi radians, or the restrained + distance is close to 0. Returns ---------- @@ -473,6 +762,16 @@ def getCorrection(self, method="analytical"): Free energy of releasing the restraint to the standard state volume, in kcal / mol. """ + # Constants. Take .value() to avoid issues with ** and log of GeneralUnit + v0 = ( + ((_Sire_meter3 / 1000) / _Sire_mole) / _Sire_angstrom3 + ).value() # standard state volume in A^3 + R = ( + _k_boltz.value() * _kcal_per_mol / _kelvin + ).value() # molar gas constant in kcal mol-1 K-1 + + # Parameters + T = self.T / _kelvin # Temperature in Kelvin if self._restraint_type == "boresch": # Constants. Take .value() to avoid issues with ** and log of GeneralUnit @@ -484,7 +783,6 @@ def getCorrection(self, method="analytical"): ).value() # molar gas constant in kcal mol-1 K-1 # Parameters - T = self.T / _kelvin # Temperature in Kelvin prefactor = ( 8 * (_np.pi**2) * v0 ) # In A^3. Divide this to account for force constants of 0 in the @@ -664,7 +962,101 @@ def numerical_dihedral_integrand(phi, phi0, kphi): f"Correction method {method} is not supported. Please choose from 'numerical' or 'analytical'." ) + if self._restraint_type == "multiple_distance": + if method == "analytical": + raise NotImplementedError( + "The analytical correction is not supported for multiple distance restraints." + ) + + else: + + def _numerical_distance_integrand(r, r0, r_fb, kr): + """ + Integrand for harmonic distance restraint. + + Parameters + ---------- + r : float + Distance to be integrated, in Angstrom + r0 : float + Equilibrium distance, in Angstrom + r_fb : float + Flat-bottomed radius, in Angstrom + kr : float + Force constant, in kcal mol-1 A-2 + + Returns + ------- + float + Value of integrand + + Notes + ----- + The domain of the integrand is [0, infinity], but this will + be truncated to [0, 8 RT] for practicality. + """ + r_eff = abs(r - r0) - r_fb + if r_eff < 0: + r_eff = 0 + return (r**2) * _np.exp(-(kr * r_eff**2) / (2 * R * T)) + + def _get_correction(r0, r_fb, kr): + """ + Get the free energy of releasing the harmonic distance restraint. + + Parameters + ---------- + r0 : float + Equilibrium distance, in Angstrom + r_fb : float + Flat-bottomed radius, in Angstrom + kr : float + Force constant, in kcal mol-1 A-2 + + Returns + ------- + float + Free energy of releasing the restraint + + Notes + ----- + The domain of the integrand is [0, infinity], but this will + be truncated to [0, 8 RT] for practicality. + """ + dist_at_8RT = ( + 4 * _np.sqrt((R * T) / kr) + r_fb + ) # Dist. which gives restraint energy = 8 RT + r_min = max(0, r0 - dist_at_8RT) + r_max = r0 + dist_at_8RT + integrand = lambda r: _numerical_distance_integrand(r, r0, r_fb, kr) + z_r = _integrate.quad(integrand, r_min, r_max)[0] + dg = -R * T * _np.log(v0 / (4 * _np.pi * z_r)) + + # Attatch unit of kcal/mol + dg *= _kcal_per_mol + + return dg + + # Get the parameters from the permanent distance restraint, which is not released + r0 = ( + self._restraint_dict["permanent_distance_restraint"]["r0"] + / _angstrom + ) + r_fb = ( + self._restraint_dict["permanent_distance_restraint"]["r_fb"] + / _angstrom + ) + kr = self._restraint_dict["permanent_distance_restraint"]["kr"] / ( + _kcal_per_mol / _angstrom2 + ) + _warnings.warn( + "The multiple distance restraint correction is assumes that only " + "the 'permanent_distance_restraint' is active." + ) + return _get_correction(r0, r_fb, kr) + @property def correction(self): """Give the free energy of removing the restraint.""" - return self.getCorrection() + method = "analytical" if self._restraint_type == "boresch" else "numerical" + return self.getCorrection(method=method) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py index b4da8e8d3..30c4b0af4 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py @@ -33,6 +33,7 @@ import numpy as _np import os as _os from scipy.stats import circmean as _circmean +from sklearn.cluster import KMeans as _KMeans import warnings as _warnings from sire.legacy.Base import getBinDir as _getBinDir @@ -108,6 +109,9 @@ class RestraintSearch: # Create a list of supported molecular dynamics engines. _engines = ["GROMACS", "SOMD"] + # Create a list of supported restraint types. + _restraint_types = ["boresch", "multiple_distance"] + def __init__( self, system, @@ -334,7 +338,7 @@ def _analyse( ---------- restraint_type: str - The type of restraints to select (currently only Boresch is available). + The type of restraints to select, from "Boresch" or "multiple_distance". Default is 'Boresch'. method: str @@ -513,8 +517,8 @@ def analyse( The temperature of the system restraint_type : str - The type of restraints to select (currently only Boresch is available). - Default is ``Boresch``. + The type of restraints to select, from 'Boresch' or 'multiple_distance'. + Default is "Boresch". method : str The method to use to derive the restraints. 'BSS' or 'MDRestraintsGenerator'. @@ -558,6 +562,11 @@ def analyse( The restraints of `restraint_type` which best mimic the strongest receptor-ligand interactions. """ + _supported_methods = { + "boresch": ["BSS", "MDRestraintsGenerator"], + "multiple_distance": ["BSS"], + } + # Check all inputs if not isinstance(work_dir, str): @@ -587,9 +596,11 @@ def analyse( raise TypeError( f"restraint_type {type(restraint_type)} must be of type 'str'." ) - if not restraint_type.lower() == "boresch": + + if not restraint_type.lower() in RestraintSearch._restraint_types: raise NotImplementedError( - "Only Boresch restraints are currently implemented" + f"Restraint type {restraint_type} is not implemented. " + f"Please choose from {RestraintSearch._restraint_types}" ) if not isinstance(method, str): @@ -599,6 +610,12 @@ def analyse( "Deriving restraints using 'MDRestraintsGenerator'" "or 'BSS' are the only options implemented." ) + # Check that the selected method is supported for the selected restraint type. + if not method in _supported_methods[restraint_type.lower()]: + raise NotImplementedError( + f"Method {method} is not supported for restraint type {restraint_type}. " + f"Please choose from {_supported_methods[restraint_type.lower()]}" + ) if method.lower() == "mdrestraintsgenerator": if not is_MDRestraintsGenerator: @@ -666,6 +683,18 @@ def analyse( restraint_idx=restraint_idx, ) + if restraint_type.lower() == "multiple_distance": + return RestraintSearch._multiple_distance_restraint( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + method, + work_dir, + cutoff, + ) + @staticmethod def _boresch_restraint( u, @@ -943,6 +972,116 @@ def _boresch_restraint_MDRestraintsGenerator( ) return restraint + @staticmethod + def _findOrderedPairs(u, ligand_selection_str, receptor_selection_str, cutoff): + """ + Return a list of receptor-ligand anchor atoms pairs in the form + (lig atom index, receptor atom index), where the pairs are ordered + from low to high variance of distance over the trajectory. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + ligand_selection_str: str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str: str + The selection string for the protein in the ligand to consider + as potential anchor points. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + pairs_ordered_sd : list of tuples + List of receptor-ligand atom pairs ordered by increasing variance of distance over + the trajectory. + """ + + lig_selection = u.select_atoms(ligand_selection_str) + pair_variance_dict = {} + + # Get all receptor atoms within specified distance of cutoff + for lig_atom in lig_selection: + for prot_atom in u.select_atoms( + f"{receptor_selection_str} and (around {cutoff / _angstrom} index {lig_atom.index})" + ): + pair_variance_dict[(lig_atom.index, prot_atom.index)] = {} + pair_variance_dict[(lig_atom.index, prot_atom.index)]["dists"] = [] + + # Compute Average Distance and SD + for frame in _tqdm( + u.trajectory, desc="Searching for low variance pairs. Frame no: " + ): + for lig_atom_index, prot_atom_index in pair_variance_dict.keys(): + distance = _dist( + _mda.AtomGroup([u.atoms[lig_atom_index]]), + _mda.AtomGroup([u.atoms[prot_atom_index]]), + box=frame.dimensions, + )[2][0] + pair_variance_dict[(lig_atom_index, prot_atom_index)]["dists"].append( + distance + ) + + # change lists to numpy arrays + for pair in pair_variance_dict.keys(): + pair_variance_dict[pair]["dists"] = _np.array( + pair_variance_dict[pair]["dists"] + ) + + # calculate SD + for pair in pair_variance_dict.keys(): + pair_variance_dict[pair]["sd"] = pair_variance_dict[pair]["dists"].std() + + # get n pairs with lowest SD + pairs_ordered_sd = [] + for item in sorted(pair_variance_dict.items(), key=lambda item: item[1]["sd"]): + pairs_ordered_sd.append(item[0]) + + if len(pairs_ordered_sd) == 0: + raise _AnalysisError( + "No receptor-ligand atom pairs found within specified cutoff." + ) + + return pairs_ordered_sd + + @staticmethod + def _getDistance(idx1, idx2, u): + """ + Get the distance between two atoms in a universe. + + Parameters + ---------- + idx1 : int + Index of the first atom + idx2 : int + Index of the second atom + u : MDAnalysis.Universe + The MDA universe containing the atoms and + trajectory. + + Returns + ------- + distance : float + The distance between the two atoms in Angstroms. + """ + distance = _dist( + _mda.AtomGroup([u.atoms[idx1]]), + _mda.AtomGroup([u.atoms[idx2]]), + box=u.dimensions, + )[2][0] + return distance + @staticmethod def _boresch_restraint_BSS( u, @@ -1021,85 +1160,6 @@ def _boresch_restraint_BSS( interactions. """ - def _findOrderedPairs(u, ligand_selection_str, receptor_selection_str, cutoff): - """ - Return a list of receptor-ligand anchor atoms pairs in the form - (lig atom index, receptor atom index), where the pairs are ordered - from low to high variance of distance over the trajectory. - - Parameters - ---------- - - u : MDAnalysis.Universe - The trajectory for the ABFE restraint calculation as a - MDAnalysis.Universe object. - - ligand_selection_str: str - The selection string for the atoms in the ligand to consider - as potential anchor points. - - receptor_selection_str: str - The selection string for the protein in the ligand to consider - as potential anchor points. - - cutoff: BioSimSpace.Types.Length - The greatest distance between ligand and receptor anchor atoms. - Only affects behaviour when method == "BSS" Receptor anchors - further than cutoff Angstroms from the closest ligand anchors will not - be included in the search for potential anchor points. - - Returns - ------- - - pairs_ordered_sd : list of tuples - List of receptor-ligand atom pairs ordered by increasing variance of distance over - the trajectory. - """ - - lig_selection = u.select_atoms(ligand_selection_str) - pair_variance_dict = {} - - # Get all receptor atoms within specified distance of cutoff - for lig_atom in lig_selection: - for prot_atom in u.select_atoms( - f"{receptor_selection_str} and (around {cutoff / _angstrom} index {lig_atom.index})" - ): - pair_variance_dict[(lig_atom.index, prot_atom.index)] = {} - pair_variance_dict[(lig_atom.index, prot_atom.index)]["dists"] = [] - - # Compute Average Distance and SD - for frame in _tqdm( - u.trajectory, desc="Searching for low variance pairs. Frame no: " - ): - for lig_atom_index, prot_atom_index in pair_variance_dict.keys(): - distance = _dist( - _mda.AtomGroup([u.atoms[lig_atom_index]]), - _mda.AtomGroup([u.atoms[prot_atom_index]]), - box=frame.dimensions, - )[2][0] - pair_variance_dict[(lig_atom_index, prot_atom_index)][ - "dists" - ].append(distance) - - # change lists to numpy arrays - for pair in pair_variance_dict.keys(): - pair_variance_dict[pair]["dists"] = _np.array( - pair_variance_dict[pair]["dists"] - ) - - # calculate SD - for pair in pair_variance_dict.keys(): - pair_variance_dict[pair]["sd"] = pair_variance_dict[pair]["dists"].std() - - # get n pairs with lowest SD - pairs_ordered_sd = [] - for item in sorted( - pair_variance_dict.items(), key=lambda item: item[1]["sd"] - ): - pairs_ordered_sd.append(item[0]) - - return pairs_ordered_sd - def _getAnchorAts(a1_idx, selection_str, u): """ Takes in index of anchor atom 1 (in either the receptor or ligand) @@ -1158,32 +1218,6 @@ def _getAnchorAts(a1_idx, selection_str, u): return a1_idx, a2_idx, a3_idx - def _getDistance(idx1, idx2, u): - """ - Get the distance between two atoms in a universe. - - Parameters - ---------- - idx1 : int - Index of the first atom - idx2 : int - Index of the second atom - u : MDAnalysis.Universe - The MDA universe containing the atoms and - trajectory. - - Returns - ------- - distance : float - The distance between the two atoms in Angstroms. - """ - distance = _dist( - _mda.AtomGroup([u.atoms[idx1]]), - _mda.AtomGroup([u.atoms[idx2]]), - box=u.dimensions, - )[2][0] - return distance - def _getAngle(idx1, idx2, idx3, u): """ Get the angle between three atoms in a universe. @@ -1243,7 +1277,7 @@ def _getDihedral(idx1, idx2, idx3, idx4, u): def _getBoreschDOF(l1, l2, l3, r1, r2, r3, u): """Calculate Boresch degrees of freedom from indices of anchor atoms""" # Ordering of connection of anchors is r3,r2,r1,l1,l2,l3 - r = _getDistance(r1, l1, u) + r = RestraintSearch._getDistance(r1, l1, u) thetaA = _getAngle(r2, r1, l1, u) thetaB = _getAngle(r1, l1, l2, u) phiA = _getDihedral(r3, r2, r1, l1, u) @@ -1697,7 +1731,7 @@ def _getBoreschRestraint(pair, boresch_dof_data): return restraint # Find pairs with lowest SD - pairs_ordered_sd = _findOrderedPairs( + pairs_ordered_sd = RestraintSearch._findOrderedPairs( u, ligand_selection_str, receptor_selection_str, cutoff ) @@ -1720,3 +1754,458 @@ def _getBoreschRestraint(pair, boresch_dof_data): ) return restraint + + @staticmethod + def _multiple_distance_restraint( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + method, + work_dir, + cutoff, + ): + """ + Generate the multiple distance restraints Restraint using the native + BioSimSpace implementation. This involves: + + 1. Finding the pairs of receptor-ligand atoms that have the lowest standard + deviation of distances + 2. Selecting pairs based on lowest standard deviation of distances and diversity + of direction. One pair is selected for each selected ligand atom. + 3. Deriving restraint parameters from the selected pairs so that the flat-bottomed + region encompasses 95 % of the sampled distances. The pair with the lowest + variance of the distance is selected to be the "permanent" distance restraint. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + system : :class:`System ` + The molecular system for the ABFE restraint calculation. This + must contain a single decoupled molecule and is assumed to have + already been equilibrated. + + temperature : :class:`System ` + The temperature of the system + + ligand_selection_str : str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str : str + The selection string for the atoms in the receptor to consider + as potential anchor points. Uses the mdanalysis atom selection + language. + + method : str + The method to use to derive the restraints. Currently only 'BSS' + is implemented, which uses the native BioSimSpace derivation. + + work_dir : str + The working directory for the simulation. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + restraint : :class:`Restraint ` + The restraints of `restraint_type` which best mimic the strongest receptor-ligand + interactions. + """ + if method == "MDRestraintsGenerator": + raise NotImplementedError( + "MDRestraintsGenerator is not implemented for multiple distance restraints." + ) + + elif method == "BSS": + return RestraintSearch._multiple_distance_restraint_BSS( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + work_dir, + cutoff, + ) + + else: + raise ValueError("method must be 'MDRestraintsGenerator' or 'BSS'.") + + @staticmethod + def _multiple_distance_restraint_BSS( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + work_dir, + cutoff, + ): + """ + Generate the multiple distance restraints Restraint using the native + BioSimSpace implementation. This involves: + + 1. Finding the pairs of receptor-ligand atoms that have the lowest standard + deviation of distances + 2. Selecting pairs based on lowest standard deviation of distances and diversity + of direction. One pair is selected for each selected ligand atom. + 3. Deriving restraint parameters from the selected pairs so that the flat-bottomed + region encompasses 95 % of the sampled distances. The pair with the lowest + variance of the distance is selected to be the "permanent" distance restraint. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + system : :class:`System ` + The molecular system for the ABFE restraint calculation. This + must contain a single decoupled molecule and is assumed to have + already been equilibrated. + + temperature : :class:`System ` + The temperature of the system + + ligand_selection_str: str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str: str + The selection string for the protein in the ligand to consider + as potential anchor points. + + work_dir : str + The working directory for the simulation. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + restraint : :class:`Restraint ` + The restraints of `restraint_type` which best mimic the strongest receptor-ligand + interactions. + """ + + def _get_norm_vector(frame, pair): + """ + Get the normalised interatomic vector between two atoms. + + Parameters + ---------- + frame : MDAnalysis.Timestep + The frame of the trajectory to use. + pair : tuple + The pair of atoms to consider. + + Returns + ------- + vector : np.ndarray + The normalised interatomic vector between the two atoms. + """ + inter_vec = frame.positions[pair[1]] - frame.positions[pair[0]] + # Check the length of this is not excessive (e.g. due to PBCs) + norm = _np.linalg.norm(inter_vec) + if norm > 15: + raise ValueError( + f"The interatomic vector between atoms {pair[0]} and {pair[1]} is " + f"excessive ({norm:.2f} A). This is likely due to PBCs. " + "Please check your input trajectory." + ) + else: + return inter_vec / norm + + def _clusterPairsByDirection(u, pairs_ordered_sd, n_clusters): + """ + Cluster the pairs of atoms by direction (based on the final structure). + + Parameters + ---------- + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + pairs_ordered_sd : list + The pairs of atoms ordered by SD. + n_clusters : int + The number of clusters to use. + + Returns + ------- + pairs_ordered_clustered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction. + The third element of each tuple is the cluster number. + """ + # Get the normalised interatomic vectors for each pair of atoms, + # using the final frame of the trajectory. + vectors_dict = { + pair: _get_norm_vector(u.trajectory[-1], pair) + for pair in pairs_ordered_sd + } + # Cluster the vectors by direction, using k-means clustering. Predict the cluster + # indices at the same time. + cluster_indices = _KMeans(n_clusters=n_clusters, n_init=10).fit_predict( + list(vectors_dict.values()) + ) + + # Create a new list of pairs of atoms, ordered by SD and including the cluster number. + pairs_ordered_clustered = [ + (pair[0], pair[1], cluster_indices[i]) + for i, pair in enumerate(pairs_ordered_sd) + ] + + return pairs_ordered_clustered + + def _filterPairsByCluster(pairs_ordered_clustered, selected_ligand_atoms): + """ + Filter the pairs of atoms by cluster. This is done by checking each pair + in order of decreasing variance. If the ligand heavy atom has not already + been used in a restraint and no other restraints from the given directional + cluster have been selected, the restraint is accepted. + + Parameters + ---------- + pairs_ordered_clustered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction. + The first and second elements of each tuple are the atom indices. + The third element of each tuple is the cluster number. + + selected_ligand_atoms : list[int] + The indices of the selected ligand atoms. + + Returns + ------- + pairs_ordered_clustered_filtered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction, filtered + by cluster. The tuples contain the atom indices only. + """ + filtered_pairs = [] + selected_clusters = [] + available_ligand_atoms = [at.index for at in selected_ligand_atoms] + + # Loop over the pairs of atoms in order of decreasing variance. + for pair in pairs_ordered_clustered: + # If the ligand atom has not already been used in a restraint and no + # other restraints from the given directional cluster have been selected, + # accept the restraint. + if ( + pair[0] in available_ligand_atoms + and pair[2] not in selected_clusters + ): + filtered_pairs.append((pair[0], pair[1])) + selected_clusters.append(pair[2]) + available_ligand_atoms.remove(pair[0]) + + if len(available_ligand_atoms) == 0: + return filtered_pairs + + # If not all ligand atoms have been used in a restraint, return nothing. + return None + + def _plotDistanceRestraints(distance_dict): + """ + Plot the variation of the the distances for each pair + over the trajectory. + + Parameters + ---------- + distance_dict : dict + A dictionary containing the distances for each pair, where + the keys are the pair indices and the values are lists of + distances in Angstroms. + """ + n_pairs = len(distance_dict) + n_columns = 6 + n_rows = int(_np.ceil(n_pairs / n_columns)) + + # Plot histograms + fig, axs = _plt.subplots( + n_rows, n_columns, figsize=(2.7 * n_columns, 4 * n_rows), dpi=500 + ) + axs = axs.flatten() + for i, pair in enumerate(distance_dict): + axs[i].hist(distance_dict[pair], bins=10, density=True) + axs[i].axvline( + x=_np.mean(distance_dict[pair]), + color="r", + linestyle="dashed", + linewidth=2, + label="mean", + ) + axs[i].set_xlabel(f"({pair[0]}, {pair[1]}) Distance / $\mathrm{{\AA}}$") + axs[i].set_ylabel("Probability") + if i == n_pairs - 1: # Only add legend to last plot + axs[i].legend() + # Hide unused axes + if i >= n_pairs: + axs[i].axis("off") + + fig.tight_layout() + fig.savefig( + f"{work_dir}/distance_restraints_hist.png", + facecolor="white", + ) + _plt.close(fig) + + # Plot variation with time to see if there are slow DOF + fig, axs = _plt.subplots( + n_rows, n_columns, figsize=(2.7 * n_columns, 4 * n_rows), dpi=500 + ) + axs = axs.flatten() + for i, pair in enumerate(distance_dict): + axs[i].plot( + list(range(len(distance_dict[pair]))), + distance_dict[pair], + ) + axs[i].axhline( + y=_np.mean(distance_dict[pair]), + color="r", + linestyle="dashed", + linewidth=2, + label="mean", + ) # No need to add legend as has been + # added to histograms + axs[i].set_ylabel(f"({pair[0]}, {pair[1]}) Distance / $\mathrm{{\AA}}$") + axs[i].set_xlabel("Frame Index") + fig.tight_layout() + fig.savefig( + f"{work_dir}/distance_restraints_time.png", + facecolor="white", + ) + _plt.close(fig) + + def _getRestraintDict(u, pairs_ordered): + """ + Generate the multiple distance restraints dictionary given + a list of pairs of atoms to restrain, ordered by decreasing + variance of intramolecular distance. The first pair will be + selected to be the "permanent" distance restraint. The restraints + are selected so that the flat-bottomed region covers 95 % of + the distances sampled during the trajectory. + + Parameters + ---------- + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + pairs_ordered : list[tuple] + The pairs of atoms ordered by SD. The first and second + elements of each tuple are the atom indices. + + Returns + ------- + restraint_dict : dict + The multiple distance restraints dictionary. + """ + # For each pair, get a list of the distances over the trajectory. + distances = {pair: [] for pair in pairs_ordered} + for _ in u.trajectory: + for pair in pairs_ordered: + distances[pair].append( + RestraintSearch._getDistance(pair[0], pair[1], u) + ) + + # Plot the distances over the trajectory. + _plotDistanceRestraints(distances) + + # For each pair, get the restraint parameters and add them to the + # restraint dictionary. + restraint_dict = {"distance_restraints": []} + for i, pair in enumerate(pairs_ordered): + r1 = system.getAtom(int(pair[1])) + l1 = system.getAtom(int(pair[0])) + r0 = _np.mean(distances[pair]) * _angstrom # Equilibrium distance + kr = 40 * _kcal_per_mol / (_angstrom**2) # Default of 40 kcal/mol/A^2 + r_fb = ( + _np.percentile(distances[pair], 97.5) + - _np.percentile(distances[pair], 2.5) + ) * _angstrom # Flat-bottomed region + individual_restraint_dict = { + "r1": r1, + "l1": l1, + "r0": r0, + "kr": kr, + "r_fb": r_fb, + } + # If this is the first pair, add it as the permanent distance restraint. + if i == 0: + restraint_dict[ + "permanent_distance_restraint" + ] = individual_restraint_dict + else: + restraint_dict["distance_restraints"].append( + individual_restraint_dict + ) + + return restraint_dict + + # Find pairs with lowest SD, ordered by SD. + pairs_ordered_sd = RestraintSearch._findOrderedPairs( + u, ligand_selection_str, receptor_selection_str, cutoff + ) + + # Cluster pairs according to direction (based on initial structure). Choose + # the number of clusters to be equal to the number of selected heavy atoms in the ligand. + selected_ligand_atoms = u.select_atoms(ligand_selection_str).select_atoms( + "not name H*" + ) + # Choose initial number of clusters to to the number of selected heavy atoms in the ligand. + n_clusters_initial = len(selected_ligand_atoms) + n_clusters_max = 3 * n_clusters_initial + n_clusters_to_try = [ + round(n) for n in _np.linspace(n_clusters_initial, n_clusters_max, 5) + ] + + for n_clusters in n_clusters_to_try: + pairs_ordered_clustered = _clusterPairsByDirection( + u, pairs_ordered_sd, n_clusters=n_clusters + ) + + # Check each pair in order of reverse variance. If the ligand heavy atom has not + # already been used in a restraint and no other restraints from the given cluster + # have been selected, accept the restraint. + pairs_ordered_clustered_filtered = _filterPairsByCluster( + pairs_ordered_clustered, selected_ligand_atoms + ) + + if pairs_ordered_clustered_filtered is not None: + break + + if ( + n_clusters_to_try[-1] == n_clusters + and pairs_ordered_clustered_filtered is None + ): + raise _AnalysisError( + "Could not find suitable multiple distance restraints. " + "Please try increasing the number of candidate pairs." + ) + + # Get restraint parameters for each pair by selecting the flat-bottomed region + # to encompass 95 % of sampled configurations. This also plots the data. + restraint_dict = _getRestraintDict(u, pairs_ordered_clustered_filtered) + + # Create the restraint object and return it. + restraint = _Restraint( + system, + restraint_dict, + temperature=temperature, + restraint_type="multiple_distance", + ) + + return restraint diff --git a/python/BioSimSpace/Sandpit/Exscientia/Parameters/__init__.py b/python/BioSimSpace/Sandpit/Exscientia/Parameters/__init__.py index 9c0610b06..de89a1d5a 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Parameters/__init__.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Parameters/__init__.py @@ -141,7 +141,7 @@ # ready to be returned. molecule = BSS.Parameters.ff14SB(molecule, water_model="tip3p").getMolecule() -Additional parameters can be loaded by passing the ``leap_commands`` option to +Additional parameters can be loaded by passing the ``pre_mol_commands`` option to any compatible AMBER force field function, e.g.: .. code-block:: python @@ -157,7 +157,11 @@ # Initialise the parameterisation process and block until the molecule is # ready to be returned. - molecule = BSS.Parameters.ff14SB(molecule, leap_commands=leap_commands).getMolecule() + molecule = BSS.Parameters.ff14SB(molecule, pre_mol_commands=leap_commands).getMolecule() + +If you need to apply any additional operations on the loaded molecule, then this +can be done using the ``post_mol_commands`` option, using ``mol`` for the name of +the molecule. """ from ._parameters import * diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py index 4c83b1c0f..93c86f6d4 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py @@ -198,6 +198,12 @@ def __init__( raise ValueError("'show_errors' must be of type 'bool'.") self._show_errors = show_errors + if restraint and not isinstance(protocol, _Protocol._FreeEnergyMixin): + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "FreeEnergy protocol for running with a restraint!" + ) + # Initialise the energy dictionary and title header. self._energy_dict = ( dict() @@ -312,14 +318,30 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) ) # Check that the perturbation type is supported.. - if self._protocol.getPerturbationType() != "full": + if self._protocol.getPerturbationType() not in [ + "full", + "release_restraint", + ]: msg = ( "'BioSimSpace.Process.Gromacs' currently only supports the 'full' " - "perturbation type. Please use 'BioSimSpace.Process.Somd' " + " and 'release_restraint' perturbation type. Please use 'BioSimSpace.Process.Somd' " "for multistep perturbation types." ) raise NotImplementedError(msg) + # Check that we have multiple distance restraints if the perturbation type is 'release_restraint'. + if self._protocol.getPerturbationType() == "release_restraint": + if not self._restraint: + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "restraint for the 'release_restraint' perturbation type!" + ) + if self._restraint._restraint_type != "multiple_distance": + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "multiple distance restraint for the 'release_restraint' perturbation type!" + ) + else: # Check for perturbable molecules and convert to the chosen end state. system = self._checkPerturbable(system) @@ -381,7 +403,12 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) if self._restraint: with open(topol_file, "a") as f: f.write("\n") - f.write(self._restraint.toString(engine="GROMACS")) + f.write( + self._restraint.toString( + engine="GROMACS", + perturbation_type=self._protocol.getPerturbationType(), + ) + ) def _generate_config(self): """Generate GROMACS configuration file strings.""" @@ -453,10 +480,17 @@ def _generate_config(self): # Set the configuration. if not isinstance(self._protocol, _Protocol.Dummy): config = _Protocol.ConfigFactory(self._system, self._protocol) + pert_type = ( + self._protocol._perturbation_type + if isinstance(self._protocol, _Protocol._FreeEnergyMixin) + else None + ) self.addToConfig( config.generateGromacsConfig( extra_options={**config_options, **self._extra_options}, extra_lines=self._extra_lines, + restraint=self._restraint, + perturbation_type=pert_type, ) ) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py index 532ccbe41..8db9fc8be 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py @@ -958,6 +958,11 @@ def _to_pert_file( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Returns ------- @@ -1007,6 +1012,7 @@ def _to_pert_file( "grow_soft", "charge_soft", "restraint", + "release_restraint", ] if perturbation_type not in allowed_perturbation_types: @@ -1327,6 +1333,77 @@ def atom_sorting_criteria(atom): # End atom record. file.write(" endatom\n") + elif perturbation_type == "release_restraint": + if print_all_atoms: + for atom in sorted( + mol.atoms(), key=lambda atom: atom_sorting_criteria(atom) + ): + # Start atom record. + file.write(" atom\n") + + # Only require the final Lennard-Jones and charge properties. + LJ1 = atom.property("LJ1") + charge1_value = atom.property("charge1").value() + + # Atom data. Create dummy pert file with identical initial and final properties. + file.write(" name %s\n" % atom.name().value()) + file.write( + " initial_type %s\n" % atom.property("ambertype1") + ) + file.write( + " final_type %s\n" % atom.property("ambertype1") + ) + file.write( + " initial_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write( + " final_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write(" initial_charge %.5f\n" % charge1_value) + file.write(" final_charge %.5f\n" % charge1_value) + + # End atom record. + file.write(" endatom\n") + + # Only print records for the atoms that are perturbed. + else: + for idx in sorted( + pert_idxs, key=lambda idx: atom_sorting_criteria(mol.atom(idx)) + ): + # Get the perturbed atom. + atom = mol.atom(idx) + + # Start atom record. + file.write(" atom\n") + + # Only require the final Lennard-Jones and charge properties. + LJ1 = atom.property("LJ1") + charge1_value = atom.property("charge1").value() + + # Atom data. Create dummy pert file with identical initial and final properties. + file.write(" name %s\n" % atom.name().value()) + file.write( + " initial_type %s\n" % atom.property("ambertype1") + ) + file.write( + " final_type %s\n" % atom.property("ambertype1") + ) + file.write( + " initial_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write( + " final_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write(" initial_charge %.5f\n" % charge1_value) + file.write(" final_charge %.5f\n" % charge1_value) + + # End atom record. + file.write(" endatom\n") + else: # Given multistep protocol: if print_all_atoms: diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py index 20ac4e3fe..4496107d7 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py @@ -8,6 +8,9 @@ from .. import _gmx_version from .._Exceptions import IncompatibleError as _IncompatibleError from ..Align._squash import _amber_mask_from_indices, _squashed_atom_mapping +from ..FreeEnergy._restraint import Restraint as _Restraint +from ..Units.Energy import kj_per_mol as _kj_per_mol +from ..Units.Length import nanometer as _nanometer class ConfigFactory: @@ -276,7 +279,19 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N" + # Determine wether the system contains protein, nucleic acid, or both. + restraint_atom_names = [] + if self.system.nAminoAcids() > 0: + restraint_atom_names += ["N", "CA", "C", "O"] + if self.system.nNucleotides() > 0: + restraint_atom_names += [ + "P", + "C5'", + "C3'", + "O3'", + "O5'", + ] + restraint_mask = "@" + ",".join(restraint_atom_names) elif restraint == "heavy": restraint_mask = "!:WAT & !@H=" elif restraint == "all": @@ -378,7 +393,13 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): return total_lines - def generateGromacsConfig(self, extra_options=None, extra_lines=None): + def generateGromacsConfig( + self, + extra_options=None, + extra_lines=None, + restraint=None, + perturbation_type=None, + ): """ Outputs the current protocol in a format compatible with GROMACS. @@ -391,12 +412,29 @@ def generateGromacsConfig(self, extra_options=None, extra_lines=None): extra_lines : list A list of extra lines to be put at the end of the script. + restraint : :class:`Restraint ` + Restraint object that contains information for ABFE calculations. + + perturbation_type : str + The type of perturbation to perform. Options are: + "full" : A full perturbation of all terms (default option). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). The non-permanent restraints + can be scaled with bonded-lambda. + Returns ------- config : list The generated config list in a GROMACS format. """ + if perturbation_type == "release_restraint" and restraint is None: + raise ValueError( + "Cannot use perturbation_type='release_restraint' without a Restraint object." + ) extra_options = extra_options if extra_options is not None else {} extra_lines = extra_lines if extra_lines is not None else [] @@ -586,6 +624,20 @@ def tranform(charge, LJ): "nstdhdl" ] = self._report_interval # Write gradients every report_interval steps. + # Handle the combination of multiple distance restraints and perturbation type + # of "release_restraint". In this case, the force constant of the "permanent" + # restraint needs to be written to the mdp file. + if perturbation_type == "release_restraint": + if not isinstance(restraint, _Restraint): + raise ValueError( + "Cannot use perturbation_type='release_restraint' without a Restraint object." + ) + # Get force constant in correct units. + force_constant = restraint._restraint_dict[ + "permanent_distance_restraint" + ]["kr"] / (_kj_per_mol / _nanometer**2) + protocol_dict["disre-fc"] = force_constant + # Put everything together in a line-by-line format. total_dict = {**protocol_dict, **extra_options} total_lines = [ @@ -626,6 +678,11 @@ def generateSomdConfig( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Returns ------- @@ -812,10 +869,33 @@ def generateSomdConfig( # Restraint if restraint: - total_lines.append("use boresch restraints = True") - total_lines.append(restraint.toString(engine="SOMD")) - # If we are turning on the restraint, need to specify this in the config file - if perturbation_type == "restraint": + # Handle Boresch restraints. + if restraint._restraint_type == "boresch": + if perturbation_type == "release_restraint": + raise _IncompatibleError( + "SOMD does not support releasing Boresch restraints, only " + "multiple distance restraints." + ) + total_lines.append("use boresch restraints = True") + total_lines.append(restraint.toString(engine="SOMD")) + + # Handle multiple distance restraints. + elif restraint._restraint_type == "multiple_distance": + total_lines.append("use distance restraints = True") + if perturbation_type != "restraint": + # In this case, we want to ensure that the "permanent" distance restraint is active + total_lines.append("use permanent distance restraints = True") + total_lines.append( + restraint.toString( + engine="SOMD", perturbation_type=perturbation_type + ) + ) + + # If we are turning on the restraint, need to specify this in the config file. + if ( + perturbation_type == "restraint" + or perturbation_type == "release_restraint" + ): total_lines.append("turn on receptor-ligand restraints mode = True") return total_lines diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py index 7f30e93cf..2ed1caa74 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py @@ -112,6 +112,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py index ee65d833e..c562b1493 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py @@ -115,6 +115,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py index 6b13bac93..a51655838 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py @@ -53,6 +53,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py index 21f4db472..633c205de 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py @@ -50,6 +50,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. @@ -126,6 +131,12 @@ def setPerturbationType(self, perturbation_type): "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). + """ if type(perturbation_type) is not str: raise TypeError("'perturbation_type' must be of type 'str'") @@ -141,6 +152,7 @@ def setPerturbationType(self, perturbation_type): "grow_soft", "charge_soft", "restraint", + "release_restraint", ] if perturbation_type not in allowed_perturbation_types: @@ -197,7 +209,7 @@ def getLambda(self, type="float"): """ if type.lower() == "float": if len(self._lambda) == 1: - return float(self._lambda) + return float(self._lambda.iloc[0]) else: warnings.warn( f"The {self._lambda} has more than one value, return as pd.Series." diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py index 67e0c3d92..853038840 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_system.py @@ -46,6 +46,7 @@ from .. import Units as _Units from ._sire_wrapper import SireWrapper as _SireWrapper +from ._utils import _prot_res, _nucl_res, _ions from sire.mol import Select as _Select @@ -1853,186 +1854,6 @@ def getRestraintAtoms( # Initialise the list of indices. indices = [] - # A set of protein residues. Taken from MDAnalysis. - prot_res = { - # CHARMM top_all27_prot_lipid.rtf - "ALA", - "ARG", - "ASN", - "ASP", - "CYS", - "GLN", - "GLU", - "GLY", - "HSD", - "HSE", - "HSP", - "ILE", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "SER", - "THR", - "TRP", - "TYR", - "VAL", - "ALAD", - ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin - # PDB - "HIS", - "MSE", - # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp - "ARGN", - "ASPH", - "CYS2", - "CYSH", - "QLN", - "PGLU", - "GLUH", - "HIS1", - "HISD", - "HISE", - "HISH", - "LYSH", - # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp - "ASN1", - "CYS1", - "HISA", - "HISB", - "HIS2", - # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp - "HID", - "HIE", - "HIP", - "ORN", - "DAB", - "LYN", - "HYP", - "CYM", - "CYX", - "ASH", - "GLH", - "ACE", - "NME", - # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp - "NALA", - "NGLY", - "NSER", - "NTHR", - "NLEU", - "NILE", - "NVAL", - "NASN", - "NGLN", - "NARG", - "NHID", - "NHIE", - "NHIP", - "NTRP", - "NPHE", - "NTYR", - "NGLU", - "NASP", - "NLYS", - "NPRO", - "NCYS", - "NCYX", - "NMET", - "CALA", - "CGLY", - "CSER", - "CTHR", - "CLEU", - "CILE", - "CVAL", - "CASF", - "CASN", - "CGLN", - "CARG", - "CHID", - "CHIE", - "CHIP", - "CTRP", - "CPHE", - "CTYR", - "CGLU", - "CASP", - "CLYS", - "CPRO", - "CCYS", - "CCYX", - "CMET", - "CME", - "ASF", - } - - # A list of ion elements. - ions = [ - "F", - "Cl", - "Br", - "I", - "Li", - "Na", - "K", - "Rb", - "Cs", - "Mg", - "Tl", - "Cu", - "Ag", - "Be", - "Cu", - "Ni", - "Pt", - "Zn", - "Co", - "Pd", - "Ag", - "Cr", - "Fe", - "Mg", - "V", - "Mn", - "Hg", - "Cd", - "Yb", - "Ca", - "Sn", - "Pb", - "Eu", - "Sr", - "Sm", - "Ba", - "Ra", - "Al", - "Fe", - "Cr", - "In", - "Tl", - "Y", - "La", - "Ce", - "Pr", - "Nd", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Er", - "Tm", - "Lu", - "Hf", - "Zr", - "Ce", - "U", - "Pu", - "Th", - ] - # Whether we've searched a perturbable system. If so, then we need to process # the search results differently. (There will be one for each molecule.) is_perturbable_system = False @@ -2043,11 +1864,12 @@ def getRestraintAtoms( if self.nPerturbableMolecules() == 0: # Backbone restraints. if restraint == "backbone": - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = self.search(string, property_map) @@ -2056,7 +1878,7 @@ def getRestraintAtoms( elif restraint == "heavy": # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2066,7 +1888,7 @@ def getRestraintAtoms( elif restraint == "all": # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2097,11 +1919,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + "(not water) and (resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2111,7 +1934,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2122,7 +1945,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2153,9 +1976,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " + ",".join(prot_res) + ") and (atomname N,CA,C,O)" + "(not water) and (resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2165,7 +1991,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2176,7 +2002,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2227,6 +2053,80 @@ def getRestraintAtoms( return indices + def getAminoAcids(self, property_map={}): + """ + Return a list containing all of the amino acid residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all amino acid residues in the system. + """ + search_string = "(resname " + ",".join(_prot_res) + ")" + try: + residues = list(self.search(search_string, property_map).residues()) + except: + residues = [] + return residues + + def nAminoAcids(self): + """ + Return the number of amino acid residues in the system. + + Returns + ------- + + num_aa : int + The number of amino acid residues in the system. + """ + return len(self.getAminoAcids()) + + def getNucleotides(self, property_map={}): + """ + Return a list containing all of the nucleotide residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all nucleotide residues in the system. + """ + search_string = "(resname " + ",".join(_nucl_res) + ")" + try: + residues = list(self.search(search_string, property_map).residues()) + except: + residues = [] + return residues + + def nNucleotides(self): + """ + Return the number of nucleotide residues in the system. + + Returns + ------- + + num_nuc : int + The number of nucleotide residues in the system. + """ + return len(self.getNucleotides()) + def _isParameterised(self, property_map={}): """ Whether the system is parameterised, i.e. can we run a simulation diff --git a/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py new file mode 100644 index 000000000..faab3e069 --- /dev/null +++ b/python/BioSimSpace/Sandpit/Exscientia/_SireWrappers/_utils.py @@ -0,0 +1,241 @@ +###################################################################### +# BioSimSpace: Making biomolecular simulation a breeze! +# +# Copyright: 2017-2023 +# +# Authors: Lester Hedges +# +# BioSimSpace is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# BioSimSpace is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BioSimSpace. If not, see . +##################################################################### +""" +Utilities. +""" + +# A set of protein residues. Taken from MDAnalysis. +_prot_res = { + # CHARMM top_all27_prot_lipid.rtf + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HSD", + "HSE", + "HSP", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "ALAD", + ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin + # PDB + "HIS", + "MSE", + # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp + "ARGN", + "ASPH", + "CYS2", + "CYSH", + "QLN", + "PGLU", + "GLUH", + "HIS1", + "HISD", + "HISE", + "HISH", + "LYSH", + # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp + "ASN1", + "CYS1", + "HISA", + "HISB", + "HIS2", + # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp + "HID", + "HIE", + "HIP", + "ORN", + "DAB", + "LYN", + "HYP", + "CYM", + "CYX", + "ASH", + "GLH", + "ACE", + "NME", + # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NPRO", + "NCYS", + "NCYX", + "NMET", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASF", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CPRO", + "CCYS", + "CCYX", + "CMET", + "CME", + "ASF", +} + +# A set of nucleic acid residues. Taken from MDAnalysis. +_nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", +} + +# A list of ion elements. +_ions = [ + "F", + "Cl", + "Br", + "I", + "Li", + "Na", + "K", + "Rb", + "Cs", + "Mg", + "Tl", + "Cu", + "Ag", + "Be", + "Cu", + "Ni", + "Pt", + "Zn", + "Co", + "Pd", + "Ag", + "Cr", + "Fe", + "Mg", + "V", + "Mn", + "Hg", + "Cd", + "Yb", + "Ca", + "Sn", + "Pb", + "Eu", + "Sr", + "Sm", + "Ba", + "Ra", + "Al", + "Fe", + "Cr", + "In", + "Tl", + "Y", + "La", + "Ce", + "Pr", + "Nd", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Er", + "Tm", + "Lu", + "Hf", + "Zr", + "Ce", + "U", + "Pu", + "Th", +] diff --git a/python/BioSimSpace/_Config/_amber.py b/python/BioSimSpace/_Config/_amber.py index 5941bc51a..ddd51ed41 100644 --- a/python/BioSimSpace/_Config/_amber.py +++ b/python/BioSimSpace/_Config/_amber.py @@ -210,7 +210,19 @@ def createConfig( # "heavy" restraints using a non-interoperable name mask. if type(restraint) is str: if restraint == "backbone": - restraint_mask = "@CA,C,O,N" + # Determine wether the system contains protein, nucleic acid, or both. + restraint_atom_names = [] + if self._system.nAminoAcids() > 0: + restraint_atom_names += ["N", "CA", "C", "O"] + if self._system.nNucleotides() > 0: + restraint_atom_names += [ + "P", + "C5'", + "C3'", + "O3'", + "O5'", + ] + restraint_mask = "@" + ",".join(restraint_atom_names) elif restraint == "heavy": restraint_mask = "!:WAT & !@H=" elif restraint == "all": diff --git a/python/BioSimSpace/_SireWrappers/_system.py b/python/BioSimSpace/_SireWrappers/_system.py index cc41b3020..3ab8258f9 100644 --- a/python/BioSimSpace/_SireWrappers/_system.py +++ b/python/BioSimSpace/_SireWrappers/_system.py @@ -46,6 +46,7 @@ from .. import Units as _Units from ._sire_wrapper import SireWrapper as _SireWrapper +from ._utils import _prot_res, _nucl_res, _ions from sire.mol import Select as _Select @@ -1774,186 +1775,6 @@ def getRestraintAtoms( # Initialise the list of indices. indices = [] - # A set of protein residues. Taken from MDAnalysis. - prot_res = { - # CHARMM top_all27_prot_lipid.rtf - "ALA", - "ARG", - "ASN", - "ASP", - "CYS", - "GLN", - "GLU", - "GLY", - "HSD", - "HSE", - "HSP", - "ILE", - "LEU", - "LYS", - "MET", - "PHE", - "PRO", - "SER", - "THR", - "TRP", - "TYR", - "VAL", - "ALAD", - ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin - # PDB - "HIS", - "MSE", - # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp - "ARGN", - "ASPH", - "CYS2", - "CYSH", - "QLN", - "PGLU", - "GLUH", - "HIS1", - "HISD", - "HISE", - "HISH", - "LYSH", - # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp - "ASN1", - "CYS1", - "HISA", - "HISB", - "HIS2", - # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp - "HID", - "HIE", - "HIP", - "ORN", - "DAB", - "LYN", - "HYP", - "CYM", - "CYX", - "ASH", - "GLH", - "ACE", - "NME", - # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp - "NALA", - "NGLY", - "NSER", - "NTHR", - "NLEU", - "NILE", - "NVAL", - "NASN", - "NGLN", - "NARG", - "NHID", - "NHIE", - "NHIP", - "NTRP", - "NPHE", - "NTYR", - "NGLU", - "NASP", - "NLYS", - "NPRO", - "NCYS", - "NCYX", - "NMET", - "CALA", - "CGLY", - "CSER", - "CTHR", - "CLEU", - "CILE", - "CVAL", - "CASF", - "CASN", - "CGLN", - "CARG", - "CHID", - "CHIE", - "CHIP", - "CTRP", - "CPHE", - "CTYR", - "CGLU", - "CASP", - "CLYS", - "CPRO", - "CCYS", - "CCYX", - "CMET", - "CME", - "ASF", - } - - # A list of ion elements. - ions = [ - "F", - "Cl", - "Br", - "I", - "Li", - "Na", - "K", - "Rb", - "Cs", - "Mg", - "Tl", - "Cu", - "Ag", - "Be", - "Cu", - "Ni", - "Pt", - "Zn", - "Co", - "Pd", - "Ag", - "Cr", - "Fe", - "Mg", - "V", - "Mn", - "Hg", - "Cd", - "Yb", - "Ca", - "Sn", - "Pb", - "Eu", - "Sr", - "Sm", - "Ba", - "Ra", - "Al", - "Fe", - "Cr", - "In", - "Tl", - "Y", - "La", - "Ce", - "Pr", - "Nd", - "Sm", - "Eu", - "Gd", - "Tb", - "Dy", - "Er", - "Tm", - "Lu", - "Hf", - "Zr", - "Ce", - "U", - "Pu", - "Th", - ] - # Whether we've searched a perturbable system. If so, then we need to process # the search results differently. (There will be one for each molecule.) is_perturbable_system = False @@ -1964,11 +1785,12 @@ def getRestraintAtoms( if self.nPerturbableMolecules() == 0: # Backbone restraints. if restraint == "backbone": - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( "(not water) and (resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = self.search(string, property_map) @@ -1977,7 +1799,7 @@ def getRestraintAtoms( elif restraint == "heavy": # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -1987,7 +1809,7 @@ def getRestraintAtoms( elif restraint == "all": # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"(not water) and (not element {ion_string})" try: @@ -2018,11 +1840,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " - + ",".join(prot_res) - + ") and (atomname N,CA,C,O)" + "(not water) and (resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2032,7 +1855,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2043,7 +1866,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2074,9 +1897,12 @@ def getRestraintAtoms( if restraint == "backbone": if not mol.isWater(): - # Find all N, CA, C, and O atoms in protein residues. + # Find all backbone atoms in protein residues or nucleotides. string = ( - "(resname " + ",".join(prot_res) + ") and (atomname N,CA,C,O)" + "(resname " + + ",".join(_prot_res) + + ",".join(_nucl_res) + + ") and (atomname N,CA,C,O,P,/C5'/,/C3'/,/O3'/,/O5'/)" ) try: search = mol.search(string, _property_map) @@ -2086,7 +1912,7 @@ def getRestraintAtoms( elif restraint == "heavy": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions + ["H,Xx"]) + ion_string = ",".join(_ions + ["H,Xx"]) # Find all non-water, non-hydrogen, non-ion elements. string = f"not element {ion_string}" try: @@ -2097,7 +1923,7 @@ def getRestraintAtoms( elif restraint == "all": if not mol.isWater(): # Convert to a formatted string for the search. - ion_string = ",".join(ions) + ion_string = ",".join(_ions) # Find all non-water, non-ion elements. string = f"not element {ion_string}" try: @@ -2114,7 +1940,7 @@ def getRestraintAtoms( if num_results == 0: msg = "No atoms matched the restraint!" if restraint == "backbone": - msg += " Backbone restraints only apply to atoms in protein residues." + msg += " Backbone restraints only apply to atoms in protein residues or nucleotides." raise _IncompatibleError(msg) # Loop over the searches for each molecule. @@ -2131,9 +1957,7 @@ def getRestraintAtoms( if len(search) == 0 and not allow_zero_matches: msg = "No atoms matched the restraint!" if restraint == "backbone": - msg += ( - " Backbone restraints only apply to atoms in protein residues." - ) + msg += " Backbone restraints only apply to atoms in protein residues or nucleotides." raise _IncompatibleError(msg) # Now loop over all matching atoms and get their indices. @@ -2148,6 +1972,80 @@ def getRestraintAtoms( return indices + def getAminoAcids(self, property_map={}): + """ + Return a list containing all of the amino acid residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all amino acid residues in the system. + """ + search_string = "(resname " + ",".join(_prot_res) + ")" + try: + residues = list(self.search(search_string, property_map).residues()) + except: + residues = [] + return residues + + def nAminoAcids(self): + """ + Return the number of amino acid residues in the system. + + Returns + ------- + + num_aa : int + The number of amino acid residues in the system. + """ + return len(self.getAminoAcids()) + + def getNucleotides(self, property_map={}): + """ + Return a list containing all of the nucleotide residues in the system. + + Parameters + ---------- + + property_map : dict + A dictionary that maps system "properties" to their user defined + values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } + + Returns + ------- + + residues : [:class:`Residue `] + The list of all nucleotide residues in the system. + """ + search_string = "(resname " + ",".join(_nucl_res) + ")" + try: + residues = list(self.search(search_string, property_map).residues()) + except: + residues = [] + return residues + + def nNucleotides(self): + """ + Return the number of nucleotide residues in the system. + + Returns + ------- + + num_nuc : int + The number of nucleotide residues in the system. + """ + return len(self.getNucleotides()) + def _isParameterised(self, property_map={}): """ Whether the system is parameterised, i.e. can we run a simulation @@ -2161,6 +2059,7 @@ def _isParameterised(self, property_map={}): property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their + own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- diff --git a/python/BioSimSpace/_SireWrappers/_utils.py b/python/BioSimSpace/_SireWrappers/_utils.py new file mode 100644 index 000000000..faab3e069 --- /dev/null +++ b/python/BioSimSpace/_SireWrappers/_utils.py @@ -0,0 +1,241 @@ +###################################################################### +# BioSimSpace: Making biomolecular simulation a breeze! +# +# Copyright: 2017-2023 +# +# Authors: Lester Hedges +# +# BioSimSpace is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# BioSimSpace is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with BioSimSpace. If not, see . +##################################################################### +""" +Utilities. +""" + +# A set of protein residues. Taken from MDAnalysis. +_prot_res = { + # CHARMM top_all27_prot_lipid.rtf + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HSD", + "HSE", + "HSP", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "ALAD", + ## 'CHO','EAM', # -- special formyl and ethanolamine termini of gramicidin + # PDB + "HIS", + "MSE", + # from Gromacs 4.5.3 oplsaa.ff/aminoacids.rtp + "ARGN", + "ASPH", + "CYS2", + "CYSH", + "QLN", + "PGLU", + "GLUH", + "HIS1", + "HISD", + "HISE", + "HISH", + "LYSH", + # from Gromacs 4.5.3 gromos53a6.ff/aminoacids.rtp + "ASN1", + "CYS1", + "HISA", + "HISB", + "HIS2", + # from Gromacs 4.5.3 amber03.ff/aminoacids.rtp + "HID", + "HIE", + "HIP", + "ORN", + "DAB", + "LYN", + "HYP", + "CYM", + "CYX", + "ASH", + "GLH", + "ACE", + "NME", + # from Gromacs 2016.3 amber99sb-star-ildn.ff/aminoacids.rtp + "NALA", + "NGLY", + "NSER", + "NTHR", + "NLEU", + "NILE", + "NVAL", + "NASN", + "NGLN", + "NARG", + "NHID", + "NHIE", + "NHIP", + "NTRP", + "NPHE", + "NTYR", + "NGLU", + "NASP", + "NLYS", + "NPRO", + "NCYS", + "NCYX", + "NMET", + "CALA", + "CGLY", + "CSER", + "CTHR", + "CLEU", + "CILE", + "CVAL", + "CASF", + "CASN", + "CGLN", + "CARG", + "CHID", + "CHIE", + "CHIP", + "CTRP", + "CPHE", + "CTYR", + "CGLU", + "CASP", + "CLYS", + "CPRO", + "CCYS", + "CCYX", + "CMET", + "CME", + "ASF", +} + +# A set of nucleic acid residues. Taken from MDAnalysis. +_nucl_res = { + "ADE", + "URA", + "CYT", + "GUA", + "THY", + "DA", + "DC", + "DG", + "DT", + "RA", + "RU", + "RG", + "RC", + "A", + "T", + "U", + "C", + "G", + "DA5", + "DC5", + "DG5", + "DT5", + "DA3", + "DC3", + "DG3", + "DT3", + "RA5", + "RU5", + "RG5", + "RC5", + "RA3", + "RU3", + "RG3", + "RC3", +} + +# A list of ion elements. +_ions = [ + "F", + "Cl", + "Br", + "I", + "Li", + "Na", + "K", + "Rb", + "Cs", + "Mg", + "Tl", + "Cu", + "Ag", + "Be", + "Cu", + "Ni", + "Pt", + "Zn", + "Co", + "Pd", + "Ag", + "Cr", + "Fe", + "Mg", + "V", + "Mn", + "Hg", + "Cd", + "Yb", + "Ca", + "Sn", + "Pb", + "Eu", + "Sr", + "Sm", + "Ba", + "Ra", + "Al", + "Fe", + "Cr", + "In", + "Tl", + "Y", + "La", + "Ce", + "Pr", + "Nd", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Er", + "Tm", + "Lu", + "Hf", + "Zr", + "Ce", + "U", + "Pu", + "Th", +] diff --git a/requirements.txt b/requirements.txt index b501e7840..f7bb52604 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # BioSimSpace runtime requirements. # main -sire~=2023.4.2 +sire~=2023.5.0 # devel #sire==2023.5.0.dev diff --git a/tests/IO/test_file_cache.py b/tests/IO/test_file_cache.py index 25d831b2f..9d3866c99 100644 --- a/tests/IO/test_file_cache.py +++ b/tests/IO/test_file_cache.py @@ -87,7 +87,7 @@ def test_file_cache(): has_amber is False or has_openff is False, reason="Requires AMBER and OpenFF to be installed", ) -def test_file_cache_mol_nuums(): +def test_file_cache_mol_nums(): """ Make sure that systems can be cached if they have the same UID, but contain different MolNUms. diff --git a/tests/Process/test_amber.py b/tests/Process/test_amber.py index 7267e4506..89355c80b 100644 --- a/tests/Process/test_amber.py +++ b/tests/Process/test_amber.py @@ -15,6 +15,22 @@ def system(): return BSS.IO.readMolecules(["tests/input/ala.top", "tests/input/ala.crd"]) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + +@pytest.fixture(scope="session") +def large_protein_system(): + """A large protein system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["complex_vac0.prm7", "complex_vac0.rst7"]) + ) + + @pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") @pytest.mark.parametrize("restraint", restraints) def test_minimise(system, restraint): @@ -193,6 +209,42 @@ def test_args(system): assert arg_string == "-x X -a A -b B -y -e -f 6 -g -h H -k K -z Z" +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_protein(large_protein_system): + """ + Test that the amber backbone restraint mask is correct for a protein system. + We need a large protein system otherwise the logic we want to test will be + skipped, and individual atoms will be specified in the config. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(large_protein_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert ' restraintmask="@N,CA,C,O",' in config + + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_rna(rna_system): + """ + Test that the amber backbone restraint mask is correct for an RNA system. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(rna_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert " restraintmask=\"@P,C5',C3',O3',O5'\"," in config + + def run_process(system, protocol, check_data=False): """Helper function to run various simulation protocols.""" diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index b72be55fe..e5d7ce24d 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -2,6 +2,11 @@ import numpy as np +from sire.legacy.Units import angstrom3 as _Sire_angstrom3 +from sire.legacy.Units import k_boltz as _k_boltz +from sire.legacy.Units import meter3 as _Sire_meter3 +from sire.legacy.Units import mole as _Sire_mole + import BioSimSpace.Sandpit.Exscientia as BSS from BioSimSpace.Sandpit.Exscientia.Align import decouple from BioSimSpace.Sandpit.Exscientia.FreeEnergy import Restraint @@ -13,9 +18,11 @@ # Store the tutorial URL. url = BSS.tutorialUrl() +################### Test Borech Restraint ################ + @pytest.fixture(scope="session") -def restraint_components(): +def boresch_restraint_component(): """Generate a the components required to create a restraint.""" ligand = BSS.IO.readMolecules( [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] @@ -69,9 +76,9 @@ def restraint_components(): @pytest.fixture(scope="session") -def restraint(restraint_components): +def boresch_restraint(boresch_restraint_component): """Generate the Boresch restraint object.""" - system, restraint_dict = restraint_components + system, restraint_dict = boresch_restraint_component restraint = Restraint( system, restraint_dict, 300 * kelvin, restraint_type="Boresch" @@ -79,23 +86,23 @@ def restraint(restraint_components): return restraint -def test_sanity(restraint): +def test_sanity_boresch(boresch_restraint): """Sanity check.""" - assert isinstance(restraint, Restraint) + assert isinstance(boresch_restraint, Restraint) -def test_numerical_correction(restraint): - dG = restraint.getCorrection(method="numerical") / kcal_per_mol +def test_numerical_correction_boresch(boresch_restraint): + dG = boresch_restraint.getCorrection(method="numerical") / kcal_per_mol assert np.isclose(-7.2, dG, atol=0.1) -def test_analytical_correction(restraint): - dG = restraint.getCorrection(method="analytical") / kcal_per_mol +def test_analytical_correction_boresch(boresch_restraint): + dG = boresch_restraint.getCorrection(method="analytical") / kcal_per_mol assert np.isclose(-7.2, dG, atol=0.1) - assert isinstance(restraint, Restraint) + assert isinstance(boresch_restraint, Restraint) -test_force_constants = [ +test_force_constants_boresch = [ ({"kr": 0}, ValueError), ({"kthetaA": 0}, ValueError), ({"kthetaB": 0}, ValueError), @@ -110,10 +117,12 @@ def test_analytical_correction(restraint): ] -@pytest.mark.parametrize("force_constants, expected", test_force_constants) -def test_input_force_constants(restraint_components, force_constants, expected): +@pytest.mark.parametrize("force_constants, expected", test_force_constants_boresch) +def test_input_force_constants_boresch( + boresch_restraint_component, force_constants, expected +): print(force_constants) - system, restraint_dict = restraint_components + system, restraint_dict = boresch_restraint_component dict_copy = restraint_dict.copy() force_constants_copy = restraint_dict["force_constants"].copy() force_constants_copy.update(force_constants) @@ -125,11 +134,11 @@ def test_input_force_constants(restraint_components, force_constants, expected): Restraint(system, dict_copy, 300 * kelvin, restraint_type="Boresch") -class TestGromacsOutput: +class TestGromacsOutputBoresch: @staticmethod @pytest.fixture(scope="class") - def Topology(restraint): - return restraint.toString(engine="Gromacs").split("\n") + def Topology(boresch_restraint): + return boresch_restraint.toString(engine="Gromacs").split("\n") def test_sanity(self, Topology): """Sanity check.""" @@ -177,11 +186,11 @@ def test_dihedral(self, Topology): assert al == "1498" -class TestSomdOutput: +class TestSomdOutputBoresch: @staticmethod @pytest.fixture(scope="class") - def getRestraintSomd(restraint): - boresch_str = restraint.toString(engine="SOMD").split("=")[1].strip() + def getRestraintSomd(boresch_restraint): + boresch_str = boresch_restraint.toString(engine="SOMD").split("=")[1].strip() boresch_dict = eval(boresch_str) return boresch_dict @@ -207,3 +216,341 @@ def test_equil_vals(self, getRestraintSomd): assert equil_vals["phiA0"] == 2.59 assert equil_vals["phiB0"] == -1.20 assert equil_vals["phiC0"] == 2.63 + + +################### Test Multiple Distance Restraint ################### + + +@pytest.fixture(scope="session") +def mdr_restraint_component(): + """ + Generate a the components required to create a + multiple distance restraints restraint object. + """ + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + protein = BSS.IO.readMolecules( + [f"{url}/1jr5.crd.bz2", f"{url}/1jr5.top.bz2"] + ).getMolecule(0) + + system = (protein + decoupled_ligand).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": protein.getAtoms()[2], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + + return system, restraint_dict + + +@pytest.fixture(scope="session") +def mdr_restraint(mdr_restraint_component): + """Generate the multiple distance restraints restraint object.""" + system, restraint_dict = mdr_restraint_component + + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + return restraint + + +@pytest.fixture(scope="session") +def mdr_restraint_component_fb_r0(): + """ + Generate a the components required to create a + multiple distance restraints restraint object with + the values for r0 and r_fb set to 0 for the permanent restraint. + """ + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + protein = BSS.IO.readMolecules( + [f"{url}/1jr5.crd.bz2", f"{url}/1jr5.top.bz2"] + ).getMolecule(0) + + system = (protein + decoupled_ligand).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": protein.getAtoms()[2], + "r0": 0 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 0 * angstrom, + }, + } + + return system, restraint_dict + + +@pytest.fixture(scope="session") +def mdr_restraint_fb_r0(mdr_restraint_component_fb_r0): + """ + Generate the multiple distance restraints restraint object with + the values for r0 and r_fb set to 0 for the permanent restraint. + """ + system, restraint_dict = mdr_restraint_component_fb_r0 + + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + return restraint + + +def test_sanity_mdr(mdr_restraint): + """Sanity check.""" + assert isinstance(mdr_restraint, Restraint) + + +def test_numerical_correction_mdr(mdr_restraint): + dG = mdr_restraint.getCorrection(method="numerical") / kcal_per_mol + assert np.isclose(-0.991, dG, atol=0.001) + + +def test_numerical_correction_mdr_fb0(mdr_restraint_fb_r0): + """ + Check that the numerical multiple distance restraints correction is + consistent with the analytical result when the flat-bottom radius is 0 + and r0 = 0. + """ + # Get the numerical correction + dG_numerical = mdr_restraint_fb_r0.getCorrection(method="numerical") / kcal_per_mol + + # Calculate the analytical correction + # Constants + v0 = ( + ((_Sire_meter3 / 1000) / _Sire_mole) / _Sire_angstrom3 + ).value() # standard state volume in A^3 + R = ( + _k_boltz.value() * kcal_per_mol / kelvin + ).value() # molar gas constant in kcal mol-1 K-1 + + T = mdr_restraint_fb_r0.T / kelvin # Temperature in Kelvin + + kr = mdr_restraint_fb_r0._restraint_dict["permanent_distance_restraint"]["kr"] / ( + kcal_per_mol / angstrom**2 + ) + dG_analytical = ( + R * T * np.log((2 * np.pi * R * T / kr) ** (3 / 2) / v0) + ) # kcal mol-1 + + assert np.isclose(dG_analytical, dG_numerical, atol=0.001) + + +def test_analytical_correction_mdr(mdr_restraint): + with pytest.raises(NotImplementedError): + dG = mdr_restraint.getCorrection(method="analytical") / kcal_per_mol + + +class TestGromacsOutputMDR: + @staticmethod + @pytest.fixture(scope="class") + def getRestraintGromacsFull(mdr_restraint): + """The form of the restraints when the perturbation type is `full`.""" + mdr_str = mdr_restraint.toString( + engine="GROMACS", perturbation_type="full" + ).split("\n") + return mdr_str + + @staticmethod + @pytest.fixture(scope="class") + def getRestraintGromacsRelease(mdr_restraint): + """The form of the restraints when the perturbation type is `release_restraint`.""" + mdr_str = mdr_restraint.toString( + engine="GROMACS", perturbation_type="release_restraint" + ).split("\n") + return mdr_str + + def test_n_lines(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Test that the correct number of lines have been written.""" + assert len(getRestraintGromacsFull) == 6 + assert len(getRestraintGromacsRelease) == 8 + + def test_section_names(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the section headings are correct.""" + assert ( + getRestraintGromacsFull[0].strip() + == getRestraintGromacsRelease[0].strip() + == "[ intermolecular_interactions ]" + ) + assert ( + getRestraintGromacsFull[1].strip() + == getRestraintGromacsRelease[1].strip() + == "[ bonds ]" + ) + assert getRestraintGromacsRelease[5].strip() == "[ distance_restraints ]" + + def test_comments(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the value label comments are as expected.""" + bond_restr_str = "; ai aj type lowA up1A up2A kdrA lowB up1B up2B kdrB" + distance_restr_str = "; ai aj type index type' low up1 up2 fac" + assert ( + getRestraintGromacsFull[2].strip() + == getRestraintGromacsRelease[2].strip() + == bond_restr_str + ) + assert getRestraintGromacsRelease[6].strip() == distance_restr_str + + def test_at_nums(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the anchor point atom numbers are correct.""" + full_restr_lines = getRestraintGromacsFull[3:] + release_restr_lines = getRestraintGromacsRelease[3:5] + [ + getRestraintGromacsRelease[7] + ] + for lines in [full_restr_lines, release_restr_lines]: + ais = [x.split()[0] for x in lines] + ajs = [x.split()[1] for x in lines] + assert ais == ["1", "2", "3"] + assert ajs == ["1496", "1497", "1498"] + + def test_restraint_vals_bond_restraint( + self, getRestraintGromacsFull, getRestraintGromacsRelease + ): + """Check that the bond restraint values are correct.""" + full_restr_lines = getRestraintGromacsFull[3:] + release_restr_lines = getRestraintGromacsRelease[3:5] + + # Strip off the atom numbers. + full_restr_lines = [x.split()[2:] for x in full_restr_lines] + release_restr_lines = [x.split()[2:] for x in release_restr_lines] + + # Check that all lines are the same. + assert all([x == full_restr_lines[0] for x in full_restr_lines]) + assert all([x == release_restr_lines[0] for x in release_restr_lines]) + assert full_restr_lines[0] == release_restr_lines[0] + + # Pick one line (equivalence of all lines checked above) and check the values. + line = [x.strip() for x in full_restr_lines[0]] + assert line == [ + "10", + "0.200", + "0.400", + "100.000", + "0.00", + "0.200", + "0.400", + "100.000", + "4184.00", + ] + + def test_restraint_vals_distance_restraint(self, getRestraintGromacsRelease): + """Check that the distance restraint values are correct.""" + restr_vals = [x.strip() for x in getRestraintGromacsRelease[7].split()[2:]] + assert restr_vals == ["2", "0", "2", "0.200", "0.400", "100.000", "1.0"] + + +class TestSomdOutputMDR: + @staticmethod + @pytest.fixture(scope="class") + def getRestraintSomd(mdr_restraint): + """The standard form of the restraints for any perturbation type other than `restraint`.""" + mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="release_restraint") + .split("\n")[0] + .split("=")[1] + .strip() + ) + permanent_mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="release_restraint") + .split("\n")[1] + .split("=")[1] + .strip() + ) + + mdr_dict = eval(mdr_str) + permanent_mdr_str = eval(permanent_mdr_str) + return mdr_dict, permanent_mdr_str + + @staticmethod + @pytest.fixture(scope="class") + def getRestraintSomdRestraintPert(mdr_restraint): + mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="restraint") + .split("=")[1] + .strip() + ) + mdr_dict = eval(mdr_str) + return mdr_dict + + def test_sanity(self, getRestraintSomd): + "Sanity check" + mdr_dict_std = getRestraintSomd[0] + mdr_dict_permanent = getRestraintSomd[1] + assert isinstance(mdr_dict_std, dict) + assert isinstance(mdr_dict_permanent, dict) + + def test_dict_vals(self, getRestraintSomd): + std_mdr_dict = getRestraintSomd[0] + expected_anchors_std = [{0, 1495}, {1, 1496}] + permanent_mdr_dict = getRestraintSomd[1] + expected_anchors_permanent = [{2, 1497}] + + # Test std restraints + for i, restraint in enumerate(std_mdr_dict): + assert set(restraint) == expected_anchors_std[i] + assert std_mdr_dict[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert std_mdr_dict[restraint][1] == 5.0 # kr + assert std_mdr_dict[restraint][2] == 1.0 # r_fb + + # Test permanent restraint + for i, restraint in enumerate(permanent_mdr_dict): + assert set(restraint) == expected_anchors_permanent[i] + assert permanent_mdr_dict[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert permanent_mdr_dict[restraint][1] == 5.0 # kr + assert permanent_mdr_dict[restraint][2] == 1.0 # r_fb + + def test_dict_vals_restraint_pert(self, getRestraintSomdRestraintPert): + expected_anchors = [{0, 1495}, {1, 1496}, {2, 1497}] + + for i, restraint in enumerate(getRestraintSomdRestraintPert): + assert set(restraint) == expected_anchors[i] + assert getRestraintSomdRestraintPert[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert getRestraintSomdRestraintPert[restraint][1] == 5.0 # kr + assert getRestraintSomdRestraintPert[restraint][2] == 1.0 # r_fb diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py index 4bd2a5762..c7489f8d2 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py @@ -37,7 +37,7 @@ def setup_system(): return ligand, decouple_system, protocol -# Make sure GROMSCS is installed. +# Make sure GROMACS is installed. has_gromacs = BSS._gmx_exe is not None @@ -169,7 +169,7 @@ def _restraint_search(tmp_path_factory): @staticmethod @pytest.fixture(scope="class") - def restraint(_restraint_search): + def boresch_restraint(_restraint_search): restraint_search, outdir = _restraint_search restraint = restraint_search.analyse( method="BSS", restraint_type="Boresch", block=False @@ -178,7 +178,7 @@ def restraint(_restraint_search): @staticmethod @pytest.fixture(scope="class") - def restraint_k20(_restraint_search): + def boresch_restraint_k20(_restraint_search): restraint_search, outdir = _restraint_search restraint = restraint_search.analyse( method="BSS", @@ -188,30 +188,39 @@ def restraint_k20(_restraint_search): ) return restraint, outdir - def test_sanity(self, restraint): - restraint, _ = restraint + @staticmethod + @pytest.fixture(scope="class") + def multiple_distance_restraint(_restraint_search): + restraint_search, outdir = _restraint_search + restraint = restraint_search.analyse( + method="BSS", restraint_type="multiple_distance", block=False + ) + return restraint, outdir + + def test_sanity(self, boresch_restraint): + restraint, _ = boresch_restraint assert isinstance(restraint, Restraint) - def test_plots(self, restraint): + def test_plots_boresch(self, boresch_restraint): """Test if all the plots have been generated correctly""" - restraint, outdir = restraint + restraint, outdir = boresch_restraint assert (outdir / "restraint_idx0_dof_time.png").is_file() assert (outdir / "restraint_idx0_dof_hist.png").is_file() - def test_dG_off(self, restraint): + def test_dG_off_boresch(self, boresch_restraint): """Test if the restraint generated has the same energy""" - restraint, _ = restraint + restraint, _ = boresch_restraint assert np.isclose(-9.8955, restraint.correction.value(), atol=0.01) - def test_bond(self, restraint): - restraint, _ = restraint + def test_bond_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_r0 = ( restraint._restraint_dict["equilibrium_values"]["r0"] / nanometer ) assert np.isclose(0.6057, equilibrium_values_r0, atol=0.001) - def test_angles(self, restraint): - restraint, _ = restraint + def test_angles_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_thetaA0 = ( restraint._restraint_dict["equilibrium_values"]["thetaA0"] / degree ) @@ -221,8 +230,8 @@ def test_angles(self, restraint): ) assert np.isclose(56.4496, equilibrium_values_thetaB0, atol=0.001) - def test_dihedrals(self, restraint): - restraint, _ = restraint + def test_dihedrals_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_phiA0 = ( restraint._restraint_dict["equilibrium_values"]["phiA0"] / degree ) @@ -236,20 +245,20 @@ def test_dihedrals(self, restraint): ) assert np.isclose(71.3148, equilibrium_values_phiC0, atol=0.001) - def test_index(self, restraint): - restraint, _ = restraint + def test_index_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint idxs = { k: restraint._restraint_dict["anchor_points"][k].index() for k in restraint._restraint_dict["anchor_points"] } assert idxs == {"r1": 1560, "r2": 1558, "r3": 1562, "l1": 10, "l2": 9, "l3": 11} - def test_force_constant(self, restraint_k20): - restraint, _ = restraint_k20 + def test_force_constant_boresch(self, boresch_restraint_k20): + restraint, _ = boresch_restraint_k20 for force_constant in restraint._restraint_dict["force_constants"].values(): assert np.isclose(20, force_constant.value(), atol=0.01) - def test_analysis_failure(self, _restraint_search): + def test_analysis_failure_boresch(self, _restraint_search): restraint_search, _ = _restraint_search with pytest.raises(AnalysisError): restraint = restraint_search.analyse( @@ -258,3 +267,75 @@ def test_analysis_failure(self, _restraint_search): block=False, cutoff=0.1 * angstrom, ) + + def test_plots_mdr(self, multiple_distance_restraint): + """Test if all the plots have been generated correctly""" + restraint, outdir = multiple_distance_restraint + assert (outdir / "distance_restraints_time.png").is_file() + assert (outdir / "distance_restraints_hist.png").is_file() + + def test_dG_off_mdr(self, multiple_distance_restraint): + """Test if the restraint generated has the same energy""" + restraint, _ = multiple_distance_restraint + assert np.isclose(-0.0790, restraint.correction.value(), atol=0.001) + + def test_dict_mdr(self, multiple_distance_restraint): + """Check that the MDR restraint dictionary is correct""" + restraint, _ = multiple_distance_restraint + restr_dict = restraint._restraint_dict + # Check the permanent distance restraint - this should always be the same. + assert restr_dict["permanent_distance_restraint"]["r1"].index() == 1539 + assert restr_dict["permanent_distance_restraint"]["l1"].index() == 10 + assert restr_dict["permanent_distance_restraint"]["r0"].unit() == "ANGSTROM" + # Check close + assert restr_dict["permanent_distance_restraint"][ + "r0" + ].value() == pytest.approx(8.9019, abs=1e-4) + assert restr_dict["permanent_distance_restraint"]["kr"].unit() == "M Q-1 T-2" + assert restr_dict["permanent_distance_restraint"]["kr"].value() == 40.0 + assert restr_dict["permanent_distance_restraint"]["r_fb"].unit() == "ANGSTROM" + assert restr_dict["permanent_distance_restraint"][ + "r_fb" + ].value() == pytest.approx(0.5756, abs=1e-4) + # Check the distance restraints - the parameters will vary slightly due to random initialisation + # of the clustering algorithm during restraint searching. + assert len(restr_dict["distance_restraints"]) == 9 + idxs_receptor = [ + pair["r1"].index() + for pair in restraint._restraint_dict["distance_restraints"] + ] + idxs_ligand = [ + pair["l1"].index() + for pair in restraint._restraint_dict["distance_restraints"] + ] + # The search algorithm is not deterministic, so the receptor anchor points + # are not always the same. The ligand anchor points are always the same as + # all heavy atoms are used. + expected_idxs_ligand = [13, 9, 16, 12, 11, 14, 15, 8, 17].sort() + assert len(idxs_receptor) == len(idxs_ligand) + assert idxs_ligand.sort() == expected_idxs_ligand + + def test_parameters_mdr(self, multiple_distance_restraint): + """ + Check the parameters selected for the permanent distance restraint. + Other restraints are not tested as the multiple distance restraint + selection algorithm is not deterministic. + """ + restraint, _ = multiple_distance_restraint + permanent_restraint = restraint._restraint_dict["permanent_distance_restraint"] + assert np.isclose(8.9019, permanent_restraint["r0"] / angstrom, atol=0.001) + assert np.isclose( + 40, + permanent_restraint["kr"] / (kcal_per_mol / angstrom**2), + ) + assert np.isclose(0.5756, permanent_restraint["r_fb"] / angstrom, atol=0.001) + + def test_analysis_failure_mdr(self, _restraint_search): + restraint_search, _ = _restraint_search + with pytest.raises(AnalysisError): + restraint = restraint_search.analyse( + method="BSS", + restraint_type="multiple_distance", + block=False, + cutoff=0.1 * angstrom, + ) diff --git a/tests/Sandpit/Exscientia/IO/test_file_cache.py b/tests/Sandpit/Exscientia/IO/test_file_cache.py index 92e0a8d91..20ed2f9f3 100644 --- a/tests/Sandpit/Exscientia/IO/test_file_cache.py +++ b/tests/Sandpit/Exscientia/IO/test_file_cache.py @@ -88,7 +88,7 @@ def test_file_cache(): has_amber is False or has_openff is False, reason="Requires AMBER and OpenFF to be installed", ) -def test_file_cache_mol_nuums(): +def test_file_cache_mol_nums(): """ Make sure that systems can be cached if they have the same UID, but contain different MolNUms. diff --git a/tests/Sandpit/Exscientia/Process/test_amber.py b/tests/Sandpit/Exscientia/Process/test_amber.py index 60168a707..73866d01a 100644 --- a/tests/Sandpit/Exscientia/Process/test_amber.py +++ b/tests/Sandpit/Exscientia/Process/test_amber.py @@ -21,6 +21,22 @@ def system(): ) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + +@pytest.fixture(scope="session") +def large_protein_system(): + """A large protein system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["complex_vac0.prm7", "complex_vac0.rst7"]) + ) + + @pytest.mark.skipif( has_amber is False or has_pyarrow is False, reason="Requires AMBER and pyarrow to be installed.", @@ -213,6 +229,42 @@ def test_args(system): assert arg_string == "-x X -a A -b B -y -e -f 6 -g -h H -k K -z Z" +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_protein(large_protein_system): + """ + Test that the amber backbone restraint mask is correct for a protein system. + We need a large protein system otherwise the logic we want to test will be + skipped, and individual atoms will be specified in the config. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(large_protein_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert ' restraintmask="@N,CA,C,O",' in config + + +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER to be installed.") +def test_backbone_restraint_mask_rna(rna_system): + """ + Test that the amber backbone restraint mask is correct for an RNA system. + """ + + # Create an equilibration protocol with backbone restraints. + protocol = BSS.Protocol.Equilibration(restraint="backbone") + + # Create the process object. + process = BSS.Process.Amber(rna_system, protocol, name="test") + + # Check that the correct restraint mask is in the config. + config = process.getConfig() + assert " restraintmask=\"@P,C5',C3',O3',O5'\"," in config + + def run_process(system, protocol, check_data=False): """Helper function to run various simulation protocols.""" diff --git a/tests/Sandpit/Exscientia/Process/test_gromacs.py b/tests/Sandpit/Exscientia/Process/test_gromacs.py index 7d4e57c49..ec3b9b7f7 100644 --- a/tests/Sandpit/Exscientia/Process/test_gromacs.py +++ b/tests/Sandpit/Exscientia/Process/test_gromacs.py @@ -210,7 +210,9 @@ def test_write_restraint(system, tmp_path): ) # Create a short production protocol. - protocol = BSS.Protocol.Production(runtime=BSS.Types.Time(0.0001, "nanoseconds")) + protocol = BSS.Protocol.FreeEnergy( + runtime=BSS.Types.Time(0.0001, "nanoseconds"), perturbation_type="full" + ) # Run the process and check that it finishes without error. run_process(system, protocol, restraint=restraint, work_dir=str(tmp_path)) diff --git a/tests/Sandpit/Exscientia/Process/test_somd.py b/tests/Sandpit/Exscientia/Process/test_somd.py index cf2a4d4de..9147ee3d0 100644 --- a/tests/Sandpit/Exscientia/Process/test_somd.py +++ b/tests/Sandpit/Exscientia/Process/test_somd.py @@ -142,9 +142,11 @@ def test_pert_res_num(perturbable_system): assert unique1[0] == "perturbed residue number = 2" -def test_restraint(system, tmp_path): - """Test if the restraint has been written in a way that can be processed - correctly.""" +def test_restraint_boresch(system, tmp_path): + """ + Test if the Boresch restraint has been written in a way that can be processed + correctly. + """ ligand = BSS.IO.readMolecules( [f"{url}/ligand01.rst7", f"{url}/ligand01.prm7"] ).getMolecule(0) @@ -196,6 +198,77 @@ def test_restraint(system, tmp_path): assert "boresch restraints dictionary" in lines[-1] +@pytest.fixture(scope="module") +def restraint_mdr(system): + """A restraint object for testing.""" + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.rst7", f"{url}/ligand01.prm7"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + fake_protein = BSS.IO.readMolecules( + [f"{url}/ligand04.rst7", f"{url}/ligand04.prm7"] + ).getMolecule(0) + + system = (decoupled_ligand + fake_protein).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": fake_protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": fake_protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": fake_protein.getAtoms()[2], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + + return restraint + + +def test_restraint_mdr(tmp_path, restraint_mdr): + """ + Test if the multiple distance restraints restraint has been + written in a way that can be processed correctly. + """ + # Create a short free energy protocol. + protocol = BSS.Protocol.Production( + runtime=BSS.Types.Time(0.001, "nanoseconds"), + ) + + # Run the process and check that it finishes without error. + run_process( + restraint_mdr._system, protocol, restraint=restraint_mdr, work_dir=str(tmp_path) + ) + + # Check for restraint options in config file. + with open(tmp_path / "test.cfg", "r") as f: + lines = f.readlines() + assert "use distance restraints = True" in lines[-4] + assert "use permanent distance restraints = True" in lines[-3] + assert "distance restraints dictionary" in lines[-2] + assert "permanent distance restraints dictionary" in lines[-1] + + def run_process(system, protocol, **kwargs): """Helper function to run various simulation protocols.""" diff --git a/tests/Sandpit/Exscientia/Protocol/test_config.py b/tests/Sandpit/Exscientia/Protocol/test_config.py index 054714bec..efb52898a 100644 --- a/tests/Sandpit/Exscientia/Protocol/test_config.py +++ b/tests/Sandpit/Exscientia/Protocol/test_config.py @@ -205,6 +205,61 @@ def test_staged_fep_df(self, system): assert "init-lambda-state = 6" in mdp_text +@pytest.mark.skipif( + has_antechamber is False or has_openff is False, + reason="Requires ambertools/antechamber and openff to be installed", +) +@pytest.fixture(scope="module") +def system_and_mdr_restraint(): + # Benzene. + m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() + m._sire_object = m._sire_object.edit().rename("LIG").molecule().commit() + + # Assign atoms for restraint + atom_1 = m.getAtoms()[0] + atom_2 = m.getAtoms()[1] + atom_3 = m.getAtoms()[2] + atom_4 = m.getAtoms()[3] + atom_5 = m.getAtoms()[4] + atom_6 = m.getAtoms()[5] + + mol = decouple(m) + system = mol.toSystem() + + # Create random restraint dictionary + restraint_dict = { + "distance_restraints": [ + { + "l1": atom_1, + "r1": atom_2, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": atom_3, + "r1": atom_4, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": atom_5, + "r1": atom_6, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + + restraint = Restraint( + system, restraint_dict, 298 * kelvin, restraint_type="multiple_distance" + ) + + return system, restraint + + @pytest.mark.skipif( has_antechamber is False or has_openff is False, reason="Requires ambertools/antechamber and openff to be installed", @@ -265,6 +320,30 @@ def test_annihilate_vdw2q(self, system): assert "couple-lambda1 = q" in mdp_text assert "couple-intramol = no" in mdp_text + @pytest.mark.skipif( + has_gromacs is False, reason="Requires GROMACS to be installed." + ) + def test_mdr_force_constant(self, system, system_and_mdr_restraint): + """ + Check that when the restraint type == 'release_restraint', the + force constant of the permanent distance restraint (not affected by + restraint-lambda) is written to the MDP file. + """ + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy( + perturbation_type="release_restraint", + ) + + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, + protocol, + engine="GROMACS", + restraint=restraint, + ) + with open(f"{freenrg._work_dir}/lambda_6/gromacs.mdp", "r") as f: + mdp_text = f.read() + assert "disre-fc = 4184.0" in mdp_text + @pytest.mark.skipif( has_gromacs is False, reason="Requires GROMACS to be installed." ) @@ -294,7 +373,7 @@ def test_sc_parameters(self, system): class TestSomdABFE: @staticmethod @pytest.fixture(scope="class") - def system_and_restraint(): + def system_and_boresch_restraint(): # Benzene. m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() @@ -343,9 +422,9 @@ def system_and_restraint(): return system, restraint - def test_turn_on_restraint(self, system_and_restraint): - """Test for turning on the restraint""" - system, restraint = system_and_restraint + def test_turn_on_restraint_boresch(self, system_and_boresch_restraint): + """Test for turning on multiple distance restraints""" + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="restraint") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -387,9 +466,96 @@ def test_turn_on_restraint(self, system_and_restraint): for line in lines: assert line in pert_text - def test_discharge(self, system_and_restraint): + def test_turn_on_restraint_mdr(self, system_and_mdr_restraint): + """Test for turning on the mdr restraint""" + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy(perturbation_type="restraint") + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, protocol, engine="SOMD", restraint=restraint + ) + + # Test .cfg file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.cfg", "r") as f: + cfg_text = f.read() + assert "use distance restraints = True" in cfg_text + assert ( + "distance restraints dictionary = {(1, 0): (3.0, 5.0, 1.0), (3, 2): " + "(3.0, 5.0, 1.0), (5, 4): (3.0, 5.0, 1.0)}" + ) in cfg_text + assert "turn on receptor-ligand restraints mode = True" in cfg_text + + # Test .pert file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.pert", "r") as f: + pert_text = f.read() + # Check all atoms present + carbons = [f"C{i}" for i in range(1, 7)] + hydrogens = [f"H{i}" for i in range(1, 7)] + atoms = carbons + hydrogens + for atom in atoms: + assert atom in pert_text + # Check perturbations are correct + lines = [ + "molecule LIG", + "atom", + "initial_type C1", + "final_type C1", + "initial_LJ 3.48065 0.08688", + "final_LJ 3.48065 0.08688", + "initial_charge -0.13000", + "final_charge -0.13000", + "endatom", + "endmolecule", + ] + for line in lines: + assert line in pert_text + + def test_release_restraint_mdr(self, system_and_mdr_restraint): + """Test for releasing the non-permanent mdr restraints""" + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy(perturbation_type="release_restraint") + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, protocol, engine="SOMD", restraint=restraint + ) + + # Test .cfg file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.cfg", "r") as f: + cfg_text = f.read() + assert "use distance restraints = True" in cfg_text + assert "use permanent distance restraints = True" in cfg_text + assert "turn on receptor-ligand restraints mode = True" in cfg_text + assert ( + "distance restraints dictionary = {(1, 0): (3.0, 5.0, 1.0), " + "(3, 2): (3.0, 5.0, 1.0)}" + ) in cfg_text + assert ( + "permanent distance restraints dictionary = {(5, 4): (3.0, 5.0, 1.0)}" + in cfg_text + ) + + # Test .pert file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.pert", "r") as f: + pert_text = f.read() + # Don't check atoms present as they are given randomised names + # and the atom types are all du + # Check perturbations are correct + lines = [ + "molecule LIG", + "atom", + "initial_type du", + "final_type du", + "initial_LJ 0.00000 0.00000", + "final_LJ 0.00000 0.00000", + "initial_charge 0.00000", + "final_charge 0.00000", + "endatom", + "endmolecule", + ] + for line in lines: + assert line in pert_text + + def test_discharge(self, system_and_boresch_restraint): """Test for discharging the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="discharge_soft") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -431,9 +597,9 @@ def test_discharge(self, system_and_restraint): for line in lines: assert line in pert_text - def test_vanish(self, system_and_restraint): + def test_vanish(self, system_and_boresch_restraint): """Test for vanishing the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="vanish_soft") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -475,9 +641,9 @@ def test_vanish(self, system_and_restraint): for line in lines: assert line in pert_text - def test_discharge_and_vanish(self, system_and_restraint): + def test_discharge_and_vanish(self, system_and_boresch_restraint): """Test for simultaneously discharging and vanishing the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="full") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint diff --git a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py index ed0391534..ee98bfac0 100644 --- a/tests/Sandpit/Exscientia/_SireWrappers/test_system.py +++ b/tests/Sandpit/Exscientia/_SireWrappers/test_system.py @@ -17,6 +17,14 @@ def system(): ) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + # Parameterise the function with a set of molecule indices. @pytest.mark.parametrize("index", [0, -1]) def test_molecule_equivalence(system, index): @@ -453,6 +461,20 @@ def test_rotate_box_vectors(system): assert math.isclose(c1.z().value(), c0.y().value()) +def test_residue_searches_protein(system): + amino_acids = system.getAminoAcids() + assert isinstance(amino_acids[0], BSS._SireWrappers._residue.Residue) + assert len(amino_acids) == system.nAminoAcids() == 3 + assert len(system.getNucleotides()) == system.nNucleotides() == 0 + + +def test_residue_searches_rna(rna_system): + nucleotides = rna_system.getNucleotides() + assert isinstance(nucleotides[0], BSS._SireWrappers._residue.Residue) + assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 + assert len(nucleotides) == rna_system.nNucleotides() == 29 + + def test_set_water_property_preserve(system): # Make sure that unique molecular properties are preserved when swapping # water topology. diff --git a/tests/Sandpit/Exscientia/conftest.py b/tests/Sandpit/Exscientia/conftest.py index eb88cf301..7facc4362 100644 --- a/tests/Sandpit/Exscientia/conftest.py +++ b/tests/Sandpit/Exscientia/conftest.py @@ -63,7 +63,7 @@ has_alchemlyb = True # Check for parquet support. - major, minor, _ = alchemlyb.__version__.split(".") + major, minor, *_ = alchemlyb.__version__.split(".") major = int(major) minor = int(minor) if major < 2 or (major < 3 and minor < 1): diff --git a/tests/_SireWrappers/test_system.py b/tests/_SireWrappers/test_system.py index e715b637f..81ff55a5f 100644 --- a/tests/_SireWrappers/test_system.py +++ b/tests/_SireWrappers/test_system.py @@ -14,6 +14,14 @@ def system(): return BSS.IO.readMolecules(["tests/input/ala.top", "tests/input/ala.crd"]) +@pytest.fixture(scope="session") +def rna_system(): + """An RNA system for re-use.""" + return BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), ["rna_6e1s.rst7", "rna_6e1s.prm7"]) + ) + + # Parameterise the function with a set of molecule indices. @pytest.mark.parametrize("index", [0, -1]) def test_molecule_equivalence(system, index): @@ -449,6 +457,20 @@ def test_rotate_box_vectors(system): assert math.isclose(c1.z().value(), c0.y().value()) +def test_residue_searches_protein(system): + amino_acids = system.getAminoAcids() + assert isinstance(amino_acids[0], BSS._SireWrappers._residue.Residue) + assert len(amino_acids) == system.nAminoAcids() == 3 + assert len(system.getNucleotides()) == system.nNucleotides() == 0 + + +def test_residue_searches_rna(rna_system): + nucleotides = rna_system.getNucleotides() + assert isinstance(nucleotides[0], BSS._SireWrappers._residue.Residue) + assert len(rna_system.getAminoAcids()) == rna_system.nAminoAcids() == 0 + assert len(nucleotides) == rna_system.nNucleotides() == 29 + + def test_set_water_property_preserve(system): # Make sure that unique molecular properties are preserved when swapping # water topology.