From 2db48a9489b53afaa405f03792bea46f44c6173b Mon Sep 17 00:00:00 2001 From: Kohulan Date: Tue, 2 May 2023 15:18:23 +0200 Subject: [PATCH 1/6] Added usage of SMILES for the main functions --- chembl_structure_pipeline/checker.py | 14 ++++++++ chembl_structure_pipeline/standardizer.py | 39 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/chembl_structure_pipeline/checker.py b/chembl_structure_pipeline/checker.py index 4d0e68d..bb2ea3e 100644 --- a/chembl_structure_pipeline/checker.py +++ b/chembl_structure_pipeline/checker.py @@ -508,3 +508,17 @@ def check_molblock(mb): if tpl: res.append(tpl) return tuple(sorted(res, reverse=True)) + + +def check_mol_from_smiles(smiles: str, sanitize=False): + """ + Use the check_molblock function to determine + if the molecule has any issues based on + a given SMILES string. + Args (str): SMILES string. + Returns (tuple): Issues with smiles + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + return check_molblock(mol_block) diff --git a/chembl_structure_pipeline/standardizer.py b/chembl_structure_pipeline/standardizer.py index a71429b..3ab776f 100644 --- a/chembl_structure_pipeline/standardizer.py +++ b/chembl_structure_pipeline/standardizer.py @@ -453,6 +453,25 @@ def get_parent_molblock(ctab, neutralize=True, check_exclusion=True, verbose=Fal return Chem.MolToMolBlock(parent, kekulize=False), exclude +def get_parent_mol_from_smiles( + smiles: str, get_smiles: bool = False, sanitize: bool = False +): + """ + Use the get_parent_molblock function to retraive the parent molblock + using the given SMILES string + Args (str): SMILES string. + Returns (mol): parent mol. + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + if get_smiles: + parent_molblock = get_parent_molblock(mol_block) + parentsmiles = Chem.MolToSmiles(parent_molblock) + return parentsmiles + return get_parent_molblock(mol_block) + + def standardize_mol(m, check_exclusion=True, sanitize=True): if check_exclusion: exclude = exclude_flag(m, includeRDKitSanitization=False) @@ -512,3 +531,23 @@ def standardize_molblock(ctab, check_exclusion=True): if exclude_flag(m, includeRDKitSanitization=False): return ctab return Chem.MolToMolBlock(standardize_mol(m, check_exclusion=False, sanitize=False)) + + +def standardize_molblock_from_smiles( + smiles: str, get_smiles: bool = False, sanitize: bool = False +): + """ + Use the standardize_molblock function to identify issues and fix it for + a molecule based on the given SMILES string. + Args (str): SMILES string. + Returns (mol): Fixed molecule. + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + if get_smiles: + standardized_mol = standardize_molblock(mol_block) + rdkit_mol = Chem.MolFromMolBlock(standardized_mol) + standardizedsmiles = Chem.MolToSmiles(rdkit_mol) + return standardizedsmiles + return standardize_molblock(mol_block) From 6070d59ab239f55a2998d3578fb0b396da9c168c Mon Sep 17 00:00:00 2001 From: Kohulan Rajan Date: Tue, 2 May 2023 15:28:37 +0200 Subject: [PATCH 2/6] Update README.md --- README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/README.md b/README.md index c56dc19..efe0d1d 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,16 @@ M END """ std_molblock = standardizer.standardize_molblock(o_molblock) +``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import standardizer + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +std_smiles = standardizer.standardize_molblock_from_smiles(SMILES, get_smiles=True) + ``` ### Get the parent compound [(info)](https://github.com/chembl/ChEMBL_Structure_Pipeline/wiki/Work-done-by-each-step#get_parent_molblock) @@ -74,6 +84,17 @@ M END parent_molblock, _ = standardizer.get_parent_molblock(o_molblock) ``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import standardizer + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +parent_smiles = standardizer.get_parent_mol_from_smiles(SMILES, get_smiles=True) + +``` + ### Check a compound [(info)](https://github.com/chembl/ChEMBL_Structure_Pipeline/wiki/Work-done-by-each-step#checkmolecule) The checker assesses the quality of a structure. It highlights specific features or issues in the structure that may need to be revised. Together with the description of the issue, the checker process returns a penalty score (between 0-9) which reflects the seriousness of the issue (the higher the score, the more critical is the issue) @@ -96,6 +117,16 @@ M END """ issues = checker.check_molblock(o_molblock) +``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import checker + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +parent_smiles = checker.check_mol_from_smiles(SMILES) + ``` ## References From 9996f284758b5b96b7f26035f7dff18b4db2b9fd Mon Sep 17 00:00:00 2001 From: Kohulan Rajan Date: Tue, 2 May 2023 15:30:40 +0200 Subject: [PATCH 3/6] updates for using SMILES as input --- chembl_structure_pipeline/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chembl_structure_pipeline/__init__.py b/chembl_structure_pipeline/__init__.py index 17b81c5..2ee8d9d 100644 --- a/chembl_structure_pipeline/__init__.py +++ b/chembl_structure_pipeline/__init__.py @@ -101,7 +101,7 @@ from .standardizer import standardize_molblock, standardize_mol from .standardizer import get_parent_molblock, get_parent_mol -__version__ = "1.2.0" +__version__ = "1.2.1" # # Copyright (c) 2019 Greg Landrum From 7cc18adfa3e4e8d076e02aa8a3aaed4f05c51a4b Mon Sep 17 00:00:00 2001 From: Kohulan Rajan Date: Tue, 2 May 2023 15:33:23 +0200 Subject: [PATCH 4/6] fix: get parent mol from SMILES --- chembl_structure_pipeline/standardizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chembl_structure_pipeline/standardizer.py b/chembl_structure_pipeline/standardizer.py index 3ab776f..9a3169b 100644 --- a/chembl_structure_pipeline/standardizer.py +++ b/chembl_structure_pipeline/standardizer.py @@ -466,7 +466,7 @@ def get_parent_mol_from_smiles( if mol: mol_block = Chem.MolToMolBlock(mol) if get_smiles: - parent_molblock = get_parent_molblock(mol_block) + parent_molblock,_ = get_parent_molblock(mol_block) parentsmiles = Chem.MolToSmiles(parent_molblock) return parentsmiles return get_parent_molblock(mol_block) From d7c62334bc5812ca7c6f0d1a2179483fa910e8a2 Mon Sep 17 00:00:00 2001 From: Kohulan Rajan Date: Tue, 2 May 2023 15:37:54 +0200 Subject: [PATCH 5/6] fix: MolFromMolBlock before parsing to SMILES --- chembl_structure_pipeline/standardizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chembl_structure_pipeline/standardizer.py b/chembl_structure_pipeline/standardizer.py index 9a3169b..207de9e 100644 --- a/chembl_structure_pipeline/standardizer.py +++ b/chembl_structure_pipeline/standardizer.py @@ -467,7 +467,8 @@ def get_parent_mol_from_smiles( mol_block = Chem.MolToMolBlock(mol) if get_smiles: parent_molblock,_ = get_parent_molblock(mol_block) - parentsmiles = Chem.MolToSmiles(parent_molblock) + rdkit_mol = Chem.MolFromMolBlock(parent_molblock) + parentsmiles = Chem.MolToSmiles(rdkit_mol) return parentsmiles return get_parent_molblock(mol_block) From 8a1100a4c9b227cfa99d2baacf1c502b5a72b5bb Mon Sep 17 00:00:00 2001 From: Kohulan Rajan Date: Tue, 2 May 2023 15:38:37 +0200 Subject: [PATCH 6/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index efe0d1d..18d3e50 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ issues = checker.check_molblock(o_molblock) from chembl_structure_pipeline import checker SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" -parent_smiles = checker.check_mol_from_smiles(SMILES) +issues = checker.check_mol_from_smiles(SMILES) ```