diff --git a/meeko/macrocycle.py b/meeko/macrocycle.py index 22700d1e..69c9b07b 100644 --- a/meeko/macrocycle.py +++ b/meeko/macrocycle.py @@ -36,6 +36,7 @@ def __init__( double_bond_penalty: float = DEFAULT_DOUBLE_BOND_PENALTY, max_breaks: int = DEFAULT_MAX_BREAKS, allow_break_atype_A: bool = False, + untyped: bool = False, ): """ Initialize macrocycle typer. @@ -50,6 +51,8 @@ def __init__( max_breaks: int allow_break_type_A: bool Allow breaking bonds involving atoms typed A, default is False. + untyped: bool + Does not use atom typing, any rotatable bond can break """ self._min_ring_size = min_ring_size self._max_ring_size = max_ring_size @@ -57,6 +60,7 @@ def __init__( self._double_bond_penalty = double_bond_penalty self.max_breaks = max_breaks self.allow_break_atype_A = allow_break_atype_A + self.untyped = untyped self.setup = None self.breakable_rings = None @@ -115,6 +119,8 @@ def _score_bond(self, bond: tuple[int, int]) -> int: bond = Bond.get_bond_id(bond[0], bond[1]) if not self.setup.bond_info[bond].rotatable: return -1 + if self.untyped: + return 100 atom_idx1, atom_idx2 = bond for i in (atom_idx1, atom_idx2): atype = self.setup.get_atom_type(i) diff --git a/meeko/molsetup.py b/meeko/molsetup.py index f473330f..5c67fb49 100644 --- a/meeko/molsetup.py +++ b/meeko/molsetup.py @@ -51,6 +51,7 @@ DEFAULT_GRAPH = [] DEFAULT_BOND_ROTATABLE = False +DEFAULT_BOND_BREAKABLE = False DEFAULT_RING_CLOSURE_BONDS_REMOVED = [] DEFAULT_RING_CLOSURE_PSEUDOS_BY_ATOM = defaultdict @@ -278,6 +279,7 @@ class Bond(BaseJSONParsable): index1: int index2: int rotatable: bool = DEFAULT_BOND_ROTATABLE + breakable: bool = DEFAULT_BOND_BREAKABLE def __post_init__(self): self.canon_id = self.get_bond_id(self.index1, self.index2) @@ -291,6 +293,7 @@ def json_encoder(cls, obj: "Bond") -> Optional[dict[str, Any]]: "index1": obj.index1, "index2": obj.index2, "rotatable": obj.rotatable, + "breakable": obj.breakable, } return output_dict @@ -304,7 +307,8 @@ def _decode_object(cls, obj: dict[str, Any]): index1 = obj["index1"] index2 = obj["index2"] rotatable = obj["rotatable"] - output_bond = cls(index1, index2, rotatable) + breakable = obj.get("breakable", DEFAULT_BOND_BREAKABLE) + output_bond = cls(index1, index2, rotatable, breakable) return output_bond # endregion @@ -329,7 +333,6 @@ def get_bond_id(idx1: int, idx2: int): idx_max = max(idx1, idx2) return idx_min, idx_max - @dataclass class Ring(BaseJSONParsable): ring_id: tuple diff --git a/meeko/preparation.py b/meeko/preparation.py index f60fe2db..2f4cbcf4 100644 --- a/meeko/preparation.py +++ b/meeko/preparation.py @@ -72,6 +72,7 @@ def __init__( hydrate=False, flexible_amides=False, rigid_macrocycles=False, + untyped_macrocycles=False, min_ring_size=meeko.macrocycle.DEFAULT_MIN_RING_SIZE, max_ring_size=meeko.macrocycle.DEFAULT_MAX_RING_SIZE, keep_chorded_rings=False, @@ -127,6 +128,7 @@ def __init__( self.hydrate = hydrate self.flexible_amides = flexible_amides self.rigid_macrocycles = rigid_macrocycles + self.untyped_macrocycles = untyped_macrocycles self.min_ring_size = min_ring_size self.max_ring_size = max_ring_size self.keep_chorded_rings = keep_chorded_rings @@ -203,6 +205,7 @@ def __init__( self.max_ring_size, self.double_bond_penalty, allow_break_atype_A=self.macrocycle_allow_A, + untyped=self.untyped_macrocycles, ) self._water_builder = HydrateMoleculeLegacy() self._classes_setup = {Chem.rdchem.Mol: RDKitMoleculeSetup} @@ -328,7 +331,12 @@ def calc_flex( setup.flexibility_model = flex_model # add G pseudo atoms and set CG types - update_closure_atoms(setup, bonds_to_break, glue_pseudo_atoms) + if not self.untyped_macrocycles: + update_closure_atoms(setup, bonds_to_break, glue_pseudo_atoms) + + for atom1, atom2 in bonds_to_break: + bond_id = Bond.get_bond_id(atom1, atom2) + setup.bond_info[bond_id].breakable = True return diff --git a/test/json_serialization_test.py b/test/json_serialization_test.py index e153d1cb..b2b3627c 100644 --- a/test/json_serialization_test.py +++ b/test/json_serialization_test.py @@ -341,6 +341,21 @@ def test_dihedral_equality(): check_molsetup_equality(starting_molsetup, decoded_molsetup) return + +def test_broken_bond(): + fn = str(pkgdir / "test" / "macrocycle_data" / "lorlatinib.mol") + mol = Chem.MolFromMolFile(fn, removeHs=False) + mk_prep_untyped = MoleculePreparation(untyped_macrocycles=True) + starting_molsetup = mk_prep_untyped(mol)[0] + decoded_molsetup = RDKitMoleculeSetup.from_json(starting_molsetup.to_json()) + count_rotatable = 0 + count_breakable = 0 + for bond_id, bond_info in decoded_molsetup.bond_info.items(): + count_rotatable += bond_info.rotatable + count_breakable += bond_info.breakable + assert count_rotatable == 10 + assert count_breakable == 1 + # endregion diff --git a/test/macrocycle_data/lorlatinib.mol b/test/macrocycle_data/lorlatinib.mol new file mode 100644 index 00000000..6ac3b48f --- /dev/null +++ b/test/macrocycle_data/lorlatinib.mol @@ -0,0 +1,106 @@ +_i0 + RDKit 3D + + 49 52 0 0 0 0 0 0 0 0999 V2000 + 2.0287 0.5847 -2.8580 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5178 1.0230 -1.4775 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.8039 2.1855 -1.0050 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7602 1.9598 -0.1565 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4054 1.3343 -0.5803 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4241 1.1052 0.3389 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2623 1.6088 1.6263 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1633 2.2777 2.0483 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8288 2.4468 1.1546 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.9774 3.0843 1.6275 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5429 0.2554 -0.0078 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5683 -1.1293 -0.3755 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4509 -2.1347 -0.5607 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1849 -1.7343 0.0593 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1318 -1.7409 1.5196 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9456 -1.6441 -0.7639 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9295 -1.8994 -1.9639 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2456 -1.2536 -0.1647 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8248 -2.2022 0.7057 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0045 -1.9180 1.3883 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6125 -0.6902 1.1947 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.7392 -0.3992 1.8571 F 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0837 0.2356 0.3000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9075 -0.0357 -0.4407 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8322 -1.5421 -0.5749 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.5864 -0.4479 -0.3709 N 0 0 0 0 0 0 0 0 0 0 0 0 + -6.0209 -0.5372 -0.5309 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8650 0.6556 -0.0159 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4316 1.9274 0.2613 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.9252 2.9499 0.4950 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9823 0.2838 -2.8717 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6373 -0.2332 -3.2566 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0917 1.4280 -3.5564 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4903 1.4646 -1.7456 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5136 0.9899 -1.6014 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0157 1.4573 2.3952 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2724 3.8074 0.9805 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7720 3.4374 2.5572 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3265 -2.2962 -1.6370 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7367 -3.0982 -0.1209 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1176 -2.7534 1.8503 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6194 -1.0400 1.8902 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1006 -1.4673 1.9447 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3465 -3.1678 0.8574 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4448 -2.6381 2.0701 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6138 1.1780 0.1750 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.2915 -1.5507 -0.8390 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.3284 0.1769 -1.2988 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.4900 -0.3038 0.4282 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 1 0 + 4 5 1 0 + 5 6 2 0 + 6 7 1 0 + 7 8 2 0 + 8 9 1 0 + 9 10 1 0 + 6 11 1 0 + 11 12 1 0 + 12 13 1 0 + 13 14 1 0 + 14 15 1 0 + 14 16 1 0 + 16 17 2 0 + 16 18 1 0 + 18 19 2 0 + 19 20 1 0 + 20 21 2 0 + 21 22 1 0 + 21 23 1 0 + 23 24 2 0 + 12 25 2 0 + 25 26 1 0 + 26 27 1 0 + 26 28 1 0 + 28 29 1 0 + 29 30 3 0 + 24 2 1 0 + 9 4 2 0 + 28 11 2 0 + 24 18 1 0 + 1 31 1 0 + 1 32 1 0 + 1 33 1 0 + 2 34 1 6 + 5 35 1 0 + 7 36 1 0 + 10 37 1 0 + 10 38 1 0 + 13 39 1 0 + 13 40 1 0 + 15 41 1 0 + 15 42 1 0 + 15 43 1 0 + 19 44 1 0 + 20 45 1 0 + 23 46 1 0 + 27 47 1 0 + 27 48 1 0 + 27 49 1 0 +M END diff --git a/test/macrocycle_test.py b/test/macrocycle_test.py index 7463f4cf..28f02512 100644 --- a/test/macrocycle_test.py +++ b/test/macrocycle_test.py @@ -101,3 +101,27 @@ def test_all(): for molname in num_cycle_breaks: run(molname) +def test_untyped_macrocycle(): + fn = str(workdir / "macrocycle_data" / "lorlatinib.mol") + mol = Chem.MolFromMolFile(fn, removeHs=False) + + # type based, can only break C-C bonds, but we have none + mk_prep_typed = MoleculePreparation() + molsetup_typed = mk_prep_typed(mol)[0] + count_rotatable = 0 + count_breakable = 0 + for bond_id, bond_info in molsetup_typed.bond_info.items(): + count_rotatable += bond_info.rotatable + count_breakable += bond_info.breakable + assert count_rotatable == 2 + assert count_breakable == 0 + + mk_prep_untyped = MoleculePreparation(untyped_macrocycles=True) + molsetup_untyped = mk_prep_untyped(mol)[0] + count_rotatable = 0 + count_breakable = 0 + for bond_id, bond_info in molsetup_untyped.bond_info.items(): + count_rotatable += bond_info.rotatable + count_breakable += bond_info.breakable + assert count_rotatable == 10 + assert count_breakable == 1