From 8780dd052acc7a26e3c2c10aa2cc0ac9615e0155 Mon Sep 17 00:00:00 2001 From: diogom Date: Wed, 27 Nov 2024 11:09:21 -0800 Subject: [PATCH 1/2] add cycle_break attribute to molsetup Bond --- meeko/macrocycle.py | 6 ++ meeko/molsetup.py | 20 ++++-- meeko/preparation.py | 10 ++- test/macrocycle_data/lorlatinib.mol | 106 ++++++++++++++++++++++++++++ test/macrocycle_test.py | 25 +++++++ 5 files changed, 160 insertions(+), 7 deletions(-) create mode 100644 test/macrocycle_data/lorlatinib.mol diff --git a/meeko/macrocycle.py b/meeko/macrocycle.py index d3cd947b..a342b603 100644 --- a/meeko/macrocycle.py +++ b/meeko/macrocycle.py @@ -36,6 +36,7 @@ def __init__( double_bond_penalty: float = DEFAULT_DOUBLE_BOND_PENALTY, max_breaks: int = DEFAULT_MAX_BREAKS, allow_break_atype_A: bool = False, + untyped: bool = False, ): """ Initialize macrocycle typer. @@ -50,6 +51,8 @@ def __init__( max_breaks: int allow_break_type_A: bool Allow breaking bonds involving atoms typed A, default is False. + untyped: bool + Does not use atom typing, any rotatable bond can break """ self._min_ring_size = min_ring_size self._max_ring_size = max_ring_size @@ -57,6 +60,7 @@ def __init__( self._double_bond_penalty = double_bond_penalty self.max_breaks = max_breaks self.allow_break_atype_A = allow_break_atype_A + self.untyped = untyped self.setup = None self.breakable_rings = None @@ -117,6 +121,8 @@ def _score_bond(self, bond: tuple[int, int]) -> int: bond = Bond.get_bond_id(bond[0], bond[1]) if not self.setup.bond_info[bond].rotatable: return -1 + if self.untyped: + return 100 atom_idx1, atom_idx2 = bond for i in (atom_idx1, atom_idx2): atype = self.setup.get_atom_type(i) diff --git a/meeko/molsetup.py b/meeko/molsetup.py index 9d9216b4..5ee79ed6 100644 --- a/meeko/molsetup.py +++ b/meeko/molsetup.py @@ -45,6 +45,7 @@ DEFAULT_GRAPH = [] DEFAULT_BOND_ROTATABLE = False +DEFAULT_BOND_CYCLE_BREAK = False DEFAULT_RING_CORNER_FLIP = False DEFAULT_RING_GRAPH = [] @@ -271,21 +272,19 @@ def from_json(obj: dict): @dataclass class Bond: - canon_id: (int, int) - index1: int - index2: int - rotatable: bool = DEFAULT_BOND_ROTATABLE def __init__( self, index1: int, index2: int, rotatable: bool = DEFAULT_BOND_ROTATABLE, + cycle_break: bool = DEFAULT_BOND_CYCLE_BREAK, ): self.canon_id = self.get_bond_id(index1, index2) self.index1 = index1 self.index2 = index2 self.rotatable = rotatable + self.cycle_break = cycle_break return @staticmethod @@ -332,14 +331,22 @@ def from_json(obj: dict): # Check that all the keys we expect are in the object dictionary as a safety measure expected_json_keys = {"canon_id", "index1", "index2", "rotatable"} - if set(obj.keys()) != expected_json_keys: + # the cycle break attribute was added after v0.6.1, so we are + # defaulting to the default to allow reading .json written + # with v0.6.0 and v0.6.0, at the expense of possibly having + # a macrocycle broken bond that will be incorrectly set with + # cycle_break=False, but JSON is used mostly for the receptor + # and we don't really use macrocycle breaking for the receptor + optional_json_keys = {"cycle_break"} + if set(obj.keys()) - optional_json_keys != expected_json_keys: return obj # Constructs a bond object from the provided keys. index1 = obj["index1"] index2 = obj["index2"] rotatable = obj["rotatable"] - output_bond = Bond(index1, index2, rotatable) + cycle_break = obj.get("cycle_break", DEFAULT_BOND_CYCLE_BREAK) + output_bond = Bond(index1, index2, rotatable, cycle_break) return output_bond @@ -2227,6 +2234,7 @@ def default(self, obj): "index1": obj.index1, "index2": obj.index2, "rotatable": obj.rotatable, + "cycle_break": obj.cycle_break, } return json.JSONEncoder.default(self, obj) diff --git a/meeko/preparation.py b/meeko/preparation.py index 86120e44..370b5df5 100644 --- a/meeko/preparation.py +++ b/meeko/preparation.py @@ -72,6 +72,7 @@ def __init__( hydrate=False, flexible_amides=False, rigid_macrocycles=False, + untyped_macrocycles=False, min_ring_size=meeko.macrocycle.DEFAULT_MIN_RING_SIZE, max_ring_size=meeko.macrocycle.DEFAULT_MAX_RING_SIZE, keep_chorded_rings=False, @@ -126,6 +127,7 @@ def __init__( self.hydrate = hydrate self.flexible_amides = flexible_amides self.rigid_macrocycles = rigid_macrocycles + self.untyped_macrocycles = untyped_macrocycles self.min_ring_size = min_ring_size self.max_ring_size = max_ring_size self.keep_chorded_rings = keep_chorded_rings @@ -184,6 +186,7 @@ def __init__( self.max_ring_size, self.double_bond_penalty, allow_break_atype_A=self.macrocycle_allow_A, + untyped=self.untyped_macrocycles, ) self._water_builder = HydrateMoleculeLegacy() self._classes_setup = {Chem.rdchem.Mol: RDKitMoleculeSetup} @@ -305,7 +308,12 @@ def calc_flex( setup.flexibility_model = flex_model # add G pseudo atoms and set CG types - update_closure_atoms(setup, bonds_to_break, glue_pseudo_atoms) + if not self.untyped_macrocycles: + update_closure_atoms(setup, bonds_to_break, glue_pseudo_atoms) + + for atom1, atom2 in bonds_to_break: + bond_id = Bond.get_bond_id(atom1, atom2) + setup.bond_info[bond_id].cycle_break = True return diff --git a/test/macrocycle_data/lorlatinib.mol b/test/macrocycle_data/lorlatinib.mol new file mode 100644 index 00000000..6ac3b48f --- /dev/null +++ b/test/macrocycle_data/lorlatinib.mol @@ -0,0 +1,106 @@ +_i0 + RDKit 3D + + 49 52 0 0 0 0 0 0 0 0999 V2000 + 2.0287 0.5847 -2.8580 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5178 1.0230 -1.4775 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.8039 2.1855 -1.0050 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7602 1.9598 -0.1565 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4054 1.3343 -0.5803 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4241 1.1052 0.3389 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2623 1.6088 1.6263 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1633 2.2777 2.0483 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8288 2.4468 1.1546 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.9774 3.0843 1.6275 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5429 0.2554 -0.0078 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5683 -1.1293 -0.3755 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4509 -2.1347 -0.5607 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1849 -1.7343 0.0593 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1318 -1.7409 1.5196 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9456 -1.6441 -0.7639 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9295 -1.8994 -1.9639 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2456 -1.2536 -0.1647 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8248 -2.2022 0.7057 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0045 -1.9180 1.3883 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6125 -0.6902 1.1947 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.7392 -0.3992 1.8571 F 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0837 0.2356 0.3000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9075 -0.0357 -0.4407 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8322 -1.5421 -0.5749 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.5864 -0.4479 -0.3709 N 0 0 0 0 0 0 0 0 0 0 0 0 + -6.0209 -0.5372 -0.5309 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8650 0.6556 -0.0159 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4316 1.9274 0.2613 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.9252 2.9499 0.4950 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9823 0.2838 -2.8717 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6373 -0.2332 -3.2566 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0917 1.4280 -3.5564 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4903 1.4646 -1.7456 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5136 0.9899 -1.6014 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0157 1.4573 2.3952 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2724 3.8074 0.9805 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7720 3.4374 2.5572 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3265 -2.2962 -1.6370 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7367 -3.0982 -0.1209 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1176 -2.7534 1.8503 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6194 -1.0400 1.8902 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1006 -1.4673 1.9447 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3465 -3.1678 0.8574 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4448 -2.6381 2.0701 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6138 1.1780 0.1750 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.2915 -1.5507 -0.8390 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.3284 0.1769 -1.2988 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.4900 -0.3038 0.4282 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 1 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 2 0 + 8 9 1 0 + 9 10 1 0 + 6 11 1 0 + 11 12 1 0 + 12 13 1 0 + 13 14 1 0 + 14 15 1 0 + 14 16 1 0 + 16 17 2 0 + 16 18 1 0 + 18 19 2 0 + 19 20 1 0 + 20 21 2 0 + 21 22 1 0 + 21 23 1 0 + 23 24 2 0 + 12 25 2 0 + 25 26 1 0 + 26 27 1 0 + 26 28 1 0 + 28 29 1 0 + 29 30 3 0 + 24 2 1 0 + 9 4 2 0 + 28 11 2 0 + 24 18 1 0 + 1 31 1 0 + 1 32 1 0 + 1 33 1 0 + 2 34 1 6 + 5 35 1 0 + 7 36 1 0 + 10 37 1 0 + 10 38 1 0 + 13 39 1 0 + 13 40 1 0 + 15 41 1 0 + 15 42 1 0 + 15 43 1 0 + 19 44 1 0 + 20 45 1 0 + 23 46 1 0 + 27 47 1 0 + 27 48 1 0 + 27 49 1 0 +M END diff --git a/test/macrocycle_test.py b/test/macrocycle_test.py index 9bae4b79..acd0988a 100644 --- a/test/macrocycle_test.py +++ b/test/macrocycle_test.py @@ -76,3 +76,28 @@ def run(molname): def test_all(): for molname in num_cycle_breaks: run(molname) + +def test_untyped_macrocycle(): + fn = str(workdir / "macrocycle_data" / "lorlatinib.mol") + mol = Chem.MolFromMolFile(fn, removeHs=False) + + # type based, can only break C-C bonds, but we have none + mk_prep_typed = MoleculePreparation() + molsetup_typed = mk_prep_typed(mol)[0] + count_rotatable = 0 + count_broken = 0 + for bond_id, bond_info in molsetup_typed.bond_info.items(): + count_rotatable += bond_info.rotatable + count_broken += bond_info.cycle_break + assert count_rotatable == 2 + assert count_broken == 0 + + mk_prep_untyped = MoleculePreparation(untyped_macrocycles=True) + molsetup_untyped = mk_prep_untyped(mol)[0] + count_rotatable = 0 + count_broken = 0 + for bond_id, bond_info in molsetup_untyped.bond_info.items(): + count_rotatable += bond_info.rotatable + count_broken += bond_info.cycle_break + assert count_rotatable == 10 + assert count_broken == 1 From c563bf97bcff76bd0cdf451c50a2e29eaa7d4d92 Mon Sep 17 00:00:00 2001 From: diogom Date: Sat, 11 Jan 2025 18:42:57 -0800 Subject: [PATCH 2/2] rename Bond's cycle_break to breakable --- meeko/molsetup.py | 12 ++++++------ meeko/preparation.py | 2 +- test/json_serialization_test.py | 15 +++++++++++++++ test/macrocycle_test.py | 12 ++++++------ 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/meeko/molsetup.py b/meeko/molsetup.py index 88824ab6..5c67fb49 100644 --- a/meeko/molsetup.py +++ b/meeko/molsetup.py @@ -51,7 +51,7 @@ DEFAULT_GRAPH = [] DEFAULT_BOND_ROTATABLE = False -DEFAULT_BOND_CYCLE_BREAK = False +DEFAULT_BOND_BREAKABLE = False DEFAULT_RING_CLOSURE_BONDS_REMOVED = [] DEFAULT_RING_CLOSURE_PSEUDOS_BY_ATOM = defaultdict @@ -279,7 +279,7 @@ class Bond(BaseJSONParsable): index1: int index2: int rotatable: bool = DEFAULT_BOND_ROTATABLE - cycle_break: bool = DEFAULT_BOND_CYCLE_BREAK + breakable: bool = DEFAULT_BOND_BREAKABLE def __post_init__(self): self.canon_id = self.get_bond_id(self.index1, self.index2) @@ -293,12 +293,12 @@ def json_encoder(cls, obj: "Bond") -> Optional[dict[str, Any]]: "index1": obj.index1, "index2": obj.index2, "rotatable": obj.rotatable, - "cycle_break": obj.cycle_break, + "breakable": obj.breakable, } return output_dict # Keys to check for deserialized JSON - expected_json_keys = {"canon_id", "index1", "index2", "rotatable", "cycle_break"} + expected_json_keys = {"canon_id", "index1", "index2", "rotatable"} @classmethod def _decode_object(cls, obj: dict[str, Any]): @@ -307,8 +307,8 @@ def _decode_object(cls, obj: dict[str, Any]): index1 = obj["index1"] index2 = obj["index2"] rotatable = obj["rotatable"] - cycle_break = obj["cycle_break"] - output_bond = cls(index1, index2, rotatable, cycle_break) + breakable = obj.get("breakable", DEFAULT_BOND_BREAKABLE) + output_bond = cls(index1, index2, rotatable, breakable) return output_bond # endregion diff --git a/meeko/preparation.py b/meeko/preparation.py index 1b5100bf..2f4cbcf4 100644 --- a/meeko/preparation.py +++ b/meeko/preparation.py @@ -336,7 +336,7 @@ def calc_flex( for atom1, atom2 in bonds_to_break: bond_id = Bond.get_bond_id(atom1, atom2) - setup.bond_info[bond_id].cycle_break = True + setup.bond_info[bond_id].breakable = True return diff --git a/test/json_serialization_test.py b/test/json_serialization_test.py index e153d1cb..b2b3627c 100644 --- a/test/json_serialization_test.py +++ b/test/json_serialization_test.py @@ -341,6 +341,21 @@ def test_dihedral_equality(): check_molsetup_equality(starting_molsetup, decoded_molsetup) return + +def test_broken_bond(): + fn = str(pkgdir / "test" / "macrocycle_data" / "lorlatinib.mol") + mol = Chem.MolFromMolFile(fn, removeHs=False) + mk_prep_untyped = MoleculePreparation(untyped_macrocycles=True) + starting_molsetup = mk_prep_untyped(mol)[0] + decoded_molsetup = RDKitMoleculeSetup.from_json(starting_molsetup.to_json()) + count_rotatable = 0 + count_breakable = 0 + for bond_id, bond_info in decoded_molsetup.bond_info.items(): + count_rotatable += bond_info.rotatable + count_breakable += bond_info.breakable + assert count_rotatable == 10 + assert count_breakable == 1 + # endregion diff --git a/test/macrocycle_test.py b/test/macrocycle_test.py index f1556fe5..28f02512 100644 --- a/test/macrocycle_test.py +++ b/test/macrocycle_test.py @@ -109,19 +109,19 @@ def test_untyped_macrocycle(): mk_prep_typed = MoleculePreparation() molsetup_typed = mk_prep_typed(mol)[0] count_rotatable = 0 - count_broken = 0 + count_breakable = 0 for bond_id, bond_info in molsetup_typed.bond_info.items(): count_rotatable += bond_info.rotatable - count_broken += bond_info.cycle_break + count_breakable += bond_info.breakable assert count_rotatable == 2 - assert count_broken == 0 + assert count_breakable == 0 mk_prep_untyped = MoleculePreparation(untyped_macrocycles=True) molsetup_untyped = mk_prep_untyped(mol)[0] count_rotatable = 0 - count_broken = 0 + count_breakable = 0 for bond_id, bond_info in molsetup_untyped.bond_info.items(): count_rotatable += bond_info.rotatable - count_broken += bond_info.cycle_break + count_breakable += bond_info.breakable assert count_rotatable == 10 - assert count_broken == 1 + assert count_breakable == 1