Skip to content

Commit

Permalink
Merge pull request #489 from Steinbeck-Lab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
CS76 authored May 2, 2024
2 parents 6904f57 + 22f9e5c commit ba07ee8
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
password: ${{ env.DOCKER_HUB_PASSWORD }}

- name: Build and push Docker image
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5.3.0
with:
context: .
file: ./Dockerfile
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4.1.4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ RUN pip3 install --no-cache-dir chembl_structure_pipeline --no-deps

COPY ./app /code/app

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "4"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--workers", "8"]
69 changes: 60 additions & 9 deletions app/modules/coconut/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_parent_smiles(molecule: Chem.Mol) -> str:
parent_mol = Chem.MolFromMolBlock(parent)

if parent_mol:
[a.SetAtomMapNum(0) for i, a in enumerate(parent_mol.GetAtoms())]
parent_smiles = Chem.MolToSmiles(
parent_mol, isomericSmiles=False, kekuleSmiles=True
)
Expand All @@ -72,6 +73,62 @@ def get_parent_smiles(molecule: Chem.Mol) -> str:
return "Error Check input SMILES"


def get_smiles(molecule: Chem.Mol, isomeric: bool = True) -> str:
"""
Retrieves the SMILES string (Isomeric or Canonical) for a given RDKit molecule object.
Args:
molecule (Chem.Mol): An RDKit molecule object representing the molecular structure.
isomeric (bool, optional): Whether to retrieve the Isomeric SMILES (True) or the Canonical SMILES (False).
Defaults to True.
Returns:
str: The Isomeric or Canonical SMILES string for the given molecule.
"""
if molecule:
[a.SetAtomMapNum(0) for i, a in enumerate(molecule.GetAtoms())]
initial_smiles = Chem.MolToSmiles(
molecule, isomericSmiles=isomeric, kekuleSmiles=True
)
canonical_mol = Chem.MolFromSmiles(Chem.CanonSmiles(initial_smiles))

if canonical_mol:
new_smiles = Chem.MolToSmiles(
canonical_mol, isomericSmiles=isomeric, kekuleSmiles=True
)
return new_smiles

return "Error Check input SMILES"


def get_standardized_smiles(standardized_mol_block: str) -> str:
"""
Get the standardized SMILES representation of a molecule.
This function takes a standardized molecular structure represented as a MolBlock and generates the corresponding
standardized SMILES representation.
Args:
standardized_mol_block (str): The standardized molecular structure in MolBlock format.
Returns:
str: The standardized SMILES representation of the molecule.
"""
mol = Chem.MolFromMolBlock(standardized_mol_block)
[a.SetAtomMapNum(0) for i, a in enumerate(mol.GetAtoms())]
standardized_smiles = Chem.MolToSmiles(
mol, kekuleSmiles=True
)
canonical_mol = Chem.MolFromSmiles(Chem.CanonSmiles(standardized_smiles))
if canonical_mol:
new_smiles = Chem.MolToSmiles(
canonical_mol, isomericSmiles=True, kekuleSmiles=True
)
return new_smiles

return "Error Check input SMILES"


def get_molecule_hash(molecule: Chem.Mol) -> dict:
"""Return various molecule hashes for the provided SMILES.
Expand All @@ -83,12 +140,8 @@ def get_molecule_hash(molecule: Chem.Mol) -> dict:
"""
if molecule:
Formula = Chem.rdMolDescriptors.CalcMolFormula(molecule)
Isomeric_SMILES = Chem.MolToSmiles(molecule, kekuleSmiles=True)
Canonical_SMILES = Chem.MolToSmiles(
molecule,
kekuleSmiles=True,
isomericSmiles=False,
)
Isomeric_SMILES = get_smiles(molecule, isomeric=True)
Canonical_SMILES = get_smiles(molecule, isomeric=False)
Parent_SMILES = get_parent_smiles(molecule)
return {
"Formula": Formula,
Expand Down Expand Up @@ -152,9 +205,7 @@ def get_COCONUT_preprocessing(

# Standardized molecule
standardized_mol_block = standardizer.standardize_molblock(original_mol_block)
standardized_SMILES = Chem.MolToSmiles(
Chem.MolFromMolBlock(standardized_mol_block), kekuleSmiles=True
)
standardized_SMILES = get_standardized_smiles(standardized_mol_block)
standardized_mol = parse_input(standardized_SMILES, "rdkit", False)
standardized_representations = get_representations(standardized_mol)

Expand Down
9 changes: 7 additions & 2 deletions tests/test_classyfire.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def test_valid_classyfire(valid_smiles):
assert result_["query_type"] == "STRUCTURE"
id_ = result_["id"]
classified = loop.run_until_complete(result(id_))
assert classified["classification_status"] == "In Queue"
assert classified["classification_status"] == "Done"
assert classified["entities"][0]["class"]["name"] == "Imidazopyrimidines"


def test_invalid_classyfire(invalid_smiles):
Expand All @@ -28,4 +29,8 @@ def test_invalid_classyfire(invalid_smiles):
assert result_["query_input"] == "invalid_smiles"
id_ = result_["id"]
classified = loop.run_until_complete(result(id_))
assert classified["classification_status"] == "In Queue"
assert classified["classification_status"] == "Done"
assert (
classified["invalid_entities"][0]["report"][0]
== "Cannot process the input SMILES string, please check again"
)

0 comments on commit ba07ee8

Please sign in to comment.