Skip to content

Commit

Permalink
Merge pull request #445 from Steinbeck-Lab/dev-kohulan
Browse files Browse the repository at this point in the history
feat: Add ertl functional groups and test
  • Loading branch information
Kohulan authored Jan 11, 2024
2 parents 371f845 + 2029334 commit 5e432ad
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 2 deletions.
34 changes: 32 additions & 2 deletions app/modules/toolkits/rdkit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from rdkit.Chem import rdmolops
from rdkit.Chem.FilterCatalog import FilterCatalog
from rdkit.Chem.FilterCatalog import FilterCatalogParams
from rdkit.Contrib.IFG import ifg
from rdkit.Contrib.SA_Score import sascorer


Expand Down Expand Up @@ -434,10 +435,10 @@ def get_VeberFilter(molecule: any) -> bool:
drug-like if it has 10 or fewer rotatable bonds and a TPSA of 140 or less.
Parameters:
molecule (any): A molecule represented as an RDKit Mol object.
molecule (any): A molecule represented as an RDKit Mol object.
Returns:
bool: True if the molecule passes the Veber filter criteria, indicating
bool: True if the molecule passes the Veber filter criteria, indicating
drug-likeness; False otherwise.
Note:
Expand Down Expand Up @@ -534,3 +535,32 @@ def get_RuleofThree(molecule: any) -> bool:
return True
else:
return False


def get_ertl_functional_groups(molecule: any) -> list:
"""
This function takes an organic molecule as input and uses the algorithm proposed by Peter Ertl to
identify functional groups within the molecule. The identification is based on the analysis of
chemical fragments present in the molecular structure.
Parameters:
molecule (any): A molecule represented as an RDKit Mol object.
Returns:
list: A list of identified functional groups in the molecule.
References:
- Ertl, Peter. "Implementation of an algorithm to identify functional groups in organic molecules."
Journal of Cheminformatics 9.1 (2017): 9.
https://jcheminf.springeropen.com/articles/10.1186/s13321-017-0225-z
If no functional groups are found, the function returns a list with a single element:
[{'None': 'No fragments found'}]
"""
if molecule:
fragments = ifg.identify_functional_groups(molecule)
if fragments:
return fragments
else:
return [{"None": "No fragments found"}]
58 changes: 58 additions & 0 deletions app/routers/chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from app.modules.toolkits.cdk_wrapper import get_tanimoto_similarity_CDK
from app.modules.toolkits.helpers import parse_input
from app.modules.toolkits.rdkit_wrapper import check_RO5_violations
from app.modules.toolkits.rdkit_wrapper import get_ertl_functional_groups
from app.modules.toolkits.rdkit_wrapper import get_GhoseFilter
from app.modules.toolkits.rdkit_wrapper import get_PAINS
from app.modules.toolkits.rdkit_wrapper import get_properties
Expand All @@ -45,6 +46,7 @@
from app.schemas import HealthCheck
from app.schemas.chem_schema import FilteredMoleculesResponse
from app.schemas.chem_schema import GenerateDescriptorsResponse
from app.schemas.chem_schema import GenerateFunctionalGroupResponse
from app.schemas.chem_schema import GenerateHOSECodeResponse
from app.schemas.chem_schema import GenerateMultipleDescriptorsResponse
from app.schemas.chem_schema import GenerateStandardizeResponse
Expand Down Expand Up @@ -1042,3 +1044,59 @@ async def all_filter_molecules(
all_smiles.append(final_results)

return all_smiles


@router.get(
"/ertlfunctionalgroup",
summary="using the algorithm proposed by Peter Ertl to identify functional groups",
responses={
200: {
"description": "Successful response",
"model": GenerateFunctionalGroupResponse,
},
400: {"description": "Bad Request", "model": BadRequestModel},
404: {"description": "Not Found", "model": NotFoundModel},
422: {"description": "Unprocessable Entity", "model": ErrorResponse},
},
)
async def get_functional_groups(
smiles: str = Query(
title="SMILES",
description="SMILES string to be enumerated",
openapi_examples={
"example1": {
"summary": "Example: Caffeine",
"value": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
},
"example2": {
"summary": "Example: Topiramate-13C6",
"value": "CC1(C)OC2COC3(COS(N)(=O)=O)OC(C)(C)OC3C2O1",
},
},
),
):
"""
For a given SMILES string this function generates a list of identified functional groups
Parameters:
- **SMILES**: required (query parameter): The SMILES string to be checked for functional groups.
Returns:
- List[str]: A list of identified functional groups, otherwise returns an error message.
Raises:
- ValueError: If the SMILES string is not provided or is invalid.
"""
mol = parse_input(smiles, "rdkit", False)
if mol:
try:
f_groups = get_ertl_functional_groups(mol)
return f_groups
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
else:
raise HTTPException(
status_code=422,
detail="Error reading SMILES string, please check again.",
)
38 changes: 38 additions & 0 deletions app/schemas/chem_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,41 @@ class Config:
},
],
}


class GenerateFunctionalGroupResponse(BaseModel):
"""
Represents a response containing a list of identified functional groups in the molecule.
Properties:
- stereoisomers (List[str]): a list of identified functional groups in the molecule
"""

stereoisomers: list = Field(
...,
title="FunctionalGroups",
description="A list of identified functional groups.",
)

class Config:
"""
Pydantic model configuration.
JSON Schema Extra:
- Includes examples of the response structure.
"""

json_schema_extra = {
"examples": [
{
"input": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
"message": "Success",
"output": """[IFG(atomIds=(1,), atoms='n', type='cn(c)C'),
IFG(atomIds=(3,), atoms='n', type='cnc'),
IFG(atomIds=(7,), atoms='O', type='c=O'),
IFG(atomIds=(8,), atoms='n', type='cn(c)C'),
IFG(atomIds=(10,), atoms='O', type='c=O'),
IFG(atomIds=(11,), atoms='n', type='cn(c)C')]""",
},
],
}
21 changes: 21 additions & 0 deletions tests/test_chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,24 @@ def test_all_filter_molecules(test_smiles):
headers={"Content-Type": "text/plain"},
)
assert response.status_code == 200


def test_get_ertl_functional_groups_invalid_molecule():
response = client.get("/latest/chem/ertlfunctionalgroup?smiles=CN1C=NC2=C1C(=O)N(")
assert response.status_code == 422


def test_get_functional_groups_endpoint(test_smiles):
response = client.get(
"/latest/chem/ertlfunctionalgroup?smiles=CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
)
assert response.status_code == 200
data = response.json()
assert isinstance(data, list)


def test_get_functional_groups_endpoint_invalid_input():
response = client.get("/latest/chem/ertlfunctionalgroup?smiles=invalid_smiles")
assert response.status_code == 422
data = response.json()
assert "Error reading smiles" in data["detail"]
20 changes: 20 additions & 0 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from app.modules.toolkits.helpers import parse_input
from app.modules.toolkits.rdkit_wrapper import check_RO5_violations
from app.modules.toolkits.rdkit_wrapper import get_3d_conformers
from app.modules.toolkits.rdkit_wrapper import get_ertl_functional_groups
from app.modules.toolkits.rdkit_wrapper import get_tanimoto_similarity_rdkit


Expand Down Expand Up @@ -326,3 +327,22 @@ def test_valid_cdk_smiles(test_smiles):
def test_valid_openbabel_smiles(test_smiles):
mol = parse_input(test_smiles, framework="openbabel")
assert mol is not None


def test_get_ertl_functional_groups_valid_molecule(test_smiles):
mol = parse_input(test_smiles, framework="rdkit")

result = get_ertl_functional_groups(mol)

assert isinstance(result, list)
assert len(result) > 0
assert str(result[0]) == "IFG(atomIds=(1,), atoms='n', type='cn(c)C')"


def test_get_ertl_functional_groups_no_fragments():
mol = parse_input("CC", framework="rdkit")
result = get_ertl_functional_groups(mol)

assert isinstance(result, list)
assert len(result) == 1
assert result[0] == {"None": "No fragments found"}

0 comments on commit 5e432ad

Please sign in to comment.