Skip to content

Commit

Permalink
write main mcss clustering loop
Browse files Browse the repository at this point in the history
  • Loading branch information
apayne97 committed Apr 27, 2024
1 parent fb02eb6 commit f522d16
Show file tree
Hide file tree
Showing 4 changed files with 474 additions and 31 deletions.
204 changes: 199 additions & 5 deletions examples/chemical_series_clustering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
"outputs": [],
"source": [
"from rdkit import Chem\n",
"mols = Chem.SDMolSupplier(mypath)"
"mols = Chem.SDMolSupplier(str(mypath))"
]
},
{
Expand All @@ -93,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"# define the grid to show the scafffolds\n",
"# define the grid to show the scaffolds\n",
"grid = mols2grid.display(mols)"
]
},
Expand All @@ -106,12 +106,206 @@
"grid"
]
},
{
"cell_type": "markdown",
"source": [
"# MCSS-based Clustering"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"outputs": [],
"source": [
"from harbor.clustering.hierarchical import ClusterResults, ClusterCenter, HeirarchicalClustering\n",
"from openeye import oechem"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"mol: Chem.Mol = mols[0]\n",
"mol.GetPropsAsDict()"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"oemols = []\n",
"mol_ids = []\n",
"for rdkit_mol in mols[:20]:\n",
" smiles = Chem.MolToSmiles(rdkit_mol)\n",
" properties = rdkit_mol.GetPropsAsDict()\n",
" mol_ids.append(properties[\"Compound_ID\"])\n",
" mol = oechem.OEMol()\n",
" oechem.OESmilesToMol(mol, smiles)\n",
" oemols.append(mol)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"from harbor.clustering import hierarchical as h\n",
"from importlib import reload\n",
"reload(h)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"clusterer = h.HeirarchicalClustering(molecules=oemols, mol_ids=mol_ids)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"clusters = clusterer.cluster(max_iterations=10)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"len(clusters)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"def get_descendents(cluster):\n",
" descendents = []\n",
" for child in cluster.children:\n",
" if isinstance(child, str):\n",
" descendents.append(cluster)\n",
" else:\n",
" descendents.extend(get_descendents(child))\n",
" return descendents"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"from harbor.plotting import ligands as l\n",
"reload(l)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"ids_found = []\n",
"for cluster_id, cluster in clusters.items():\n",
" print(f\"Cluster {cluster_id}\")\n",
" descendents = get_descendents(cluster)\n",
" print(f\"Children: {len(descendents)}\")\n",
" l.plot_ligands_with_mcs(filename=f\"cluster_{cluster_id}.png\", mols=[desc.repr for desc in descendents], mcs_mol=cluster.repr)\n",
" ids_found.extend([desc.children[0] for desc in descendents])"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"set(ids_found)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"set(mol_ids) - set(ids_found)"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"def get_row_col(i, max_cols, zero_indexed=True):\n",
" row = i // max_cols + (0 if zero_indexed else 1)\n",
" col = i % max_cols + (0 if zero_indexed else 1)\n",
" return row, col"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"outputs": [],
"source": [
"for i in range(6):\n",
" print(get_row_col(i, 4, zero_indexed=False))"
],
"metadata": {
"collapsed": false
},
"execution_count": null
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
Expand Down
Loading

0 comments on commit f522d16

Please sign in to comment.