Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read Partial Charges from mol2 and SDF #258

Merged
merged 20 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
726ae0b
add args to read charges from atom prop
rwxayheee Nov 30, 2024
63b0fff
edit init_atom to support read charges from atom prop
rwxayheee Nov 30, 2024
bedbdb7
finish draft charges from mol2
rwxayheee Nov 30, 2024
2d9492e
change default read_charge_from_propname to _TriposPartialCharge
rwxayheee Nov 30, 2024
31c0d4b
fixup! change default read_charge_from_propname to _TriposPartialCharge
rwxayheee Nov 30, 2024
f6699e7
fixup! change default read_charge_from_propname to _TriposPartialCharge
rwxayheee Dec 1, 2024
e0f7c53
set default atom property name to PartialCharge; revise error handlin…
rwxayheee Dec 5, 2024
6741616
rename charge_propname to charge_atom_prop
rwxayheee Dec 5, 2024
2bfa6a2
add option charge_from_prop to mk_prepare_ligand.py
rwxayheee Dec 5, 2024
c7f0fb4
use arguments from cli to override config, before
rwxayheee Dec 5, 2024
7f1380f
fixup! use arguments from cli to override config, before preparator =…
rwxayheee Dec 5, 2024
0568941
fix mistakes in handling args charge_from_prop and config;
rwxayheee Dec 5, 2024
772c8d9
Only explicitly provided command line arguments override config
rwxayheee Dec 6, 2024
3c4422c
remove special & redundant handling of explicitly prvided args;
rwxayheee Dec 6, 2024
5995773
clean up unused backend options; redirect error msg to file=sys.stderr
rwxayheee Dec 6, 2024
d0fd018
clearify error msg; make them all begin with Error:
rwxayheee Dec 6, 2024
86b0eb2
clean up lines added in testing
rwxayheee Dec 6, 2024
36c3af4
revert unecessary changes to make diffs prettier
rwxayheee Dec 6, 2024
81151af
clean up changes; move --charge_atom_prop to config_group
rwxayheee Dec 6, 2024
a2a7984
clarify argparse overriding config file
diogomart Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 45 additions & 30 deletions meeko/cli/mk_prepare_ligand.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@


def cmd_lineparser():
backend = "rdkit"

conf_parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -52,7 +51,11 @@ def cmd_lineparser():
if confargs.config_file is not None:
with open(confargs.config_file) as f:
c = json.load(f)
config.update(c)
if any(key not in config for key in c):
print(f"Error: Got unsupported keyword ({key}) for MoleculePreparation from the config file ({confargs.config_file}). ",
Copy link
Contributor

@diogomart diogomart Dec 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

key is not defined in the f string (that's why we iterate over the keys in MoleculePreparation.from_config)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not sure we need to test the keys here, MoleculePreparation.from_config does a check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right on both. I made the unnecessary changes but I forgot to revert.

file=sys.stderr,)
sys.exit(2)
config.update(c)

parser = (
argparse.ArgumentParser()
Expand All @@ -78,6 +81,10 @@ def cmd_lineparser():
"--name_from_prop",
help="set molecule name from RDKit/SDF property",
)
io_group.add_argument(
"--charge_atom_prop",
help="set atom partial charges from an RDKit atom property based on the input file. The default is 'PartialCharge' for SDF and '_TriposPartialCharge' for MOL2 unless overriden by a user defined property name. ",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this next to --charge_model in the Molecule Preparation group. While it is certainly related to I/O, being next to charge_model is more important, and charge_model belongs in molecule preparation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

)
io_group.add_argument(
"-o",
"--out",
Expand Down Expand Up @@ -214,7 +221,7 @@ def cmd_lineparser():
)
config_group.add_argument(
"--charge_model",
choices=("gasteiger", "espaloma", "zero"),
choices=("gasteiger", "espaloma", "zero", "read"),
help="default is 'gasteiger', 'zero' sets all zeros",
default="gasteiger",
)
Expand Down Expand Up @@ -293,13 +300,10 @@ def cmd_lineparser():
sys.exit(2)
args.reactive_smarts_idx -= 1 # convert from 1- to 0-index

# command line arguments override config
for key in config:
if key in args.__dict__:
for key in args.__dict__:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would like the "command line arguments override config" comment back :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unnecessary, I would revert that

if key in config:
config[key] = args.__dict__[key]

config["load_atom_params"] = args.load_atom_params

if args.add_atom_types_json is not None:
additional_ats = []
for at in args.add_atom_types_json:
Expand All @@ -308,7 +312,7 @@ def cmd_lineparser():
additional_ats.append(at)
elif type(at) == list:
additional_ats.extend(at)
config["add_atom_types"] = additional_ats
config["add_atom_types"] += additional_ats

if args.multimol_output_dir is not None or args.multimol_prefix is not None:
if args.output_pdbqt_filename is not None:
Expand All @@ -329,7 +333,8 @@ def cmd_lineparser():
num_required_covalent_args += int(args.tether_smarts is not None)
if num_required_covalent_args not in [0, 3]:
print(
"Error: --receptor, --rec_residue, and --tether_smarts are all required for covalent docking."
"Error: --receptor, --rec_residue, and --tether_smarts are all required for covalent docking.",
file=sys.stderr,
)
sys.exit(2)
is_covalent = num_required_covalent_args == 3
Expand Down Expand Up @@ -364,7 +369,7 @@ def cmd_lineparser():
indices[0] = indices[0] - 1 # convert from 1- to 0-index
indices[1] = indices[1] - 1

return args, config, backend, is_covalent
return args, config, is_covalent


class Output:
Expand Down Expand Up @@ -495,27 +500,27 @@ def get_suffixes(molsetups):
return tuple("mk%d" % (i + 1) for i in range(len(molsetups)))

def main():
args, config, backend, is_covalent = cmd_lineparser()
args, config, is_covalent = cmd_lineparser()
input_molecule_filename = args.input_molecule_filename

# read input
input_fname, ext = os.path.splitext(input_molecule_filename)
ext = ext[1:].lower()
if backend == "rdkit":
parsers = {
"sdf": Chem.SDMolSupplier,
"mol2": rdkitutils.Mol2MolSupplier,
"mol": Chem.SDMolSupplier,
}
if not ext in parsers:
print(
"*ERROR* Format [%s] not in supported formats [%s]"
% (ext, "/".join(list(parsers.keys())))
)
sys.exit(1)
mol_supplier = parsers[ext](
input_molecule_filename, removeHs=False
) # input must have explicit H

parsers = {
"sdf": Chem.SDMolSupplier,
"mol2": rdkitutils.Mol2MolSupplier,
"mol": Chem.SDMolSupplier,
}
if not ext in parsers:
print(
"Error: Format [%s] not in supported formats [%s]"
% (ext, "/".join(list(parsers.keys())))
)
sys.exit(1)
mol_supplier = parsers[ext](
input_molecule_filename, removeHs=False
) # input must have explicit H

# configure output writer
if args.output_pdbqt_filename is None:
Expand Down Expand Up @@ -543,13 +548,24 @@ def main():
rec_prody_mol = prody_parser(rec_filename)
covalent_builder = CovalentBuilder(rec_prody_mol, args.rec_residue)

input_mol_counter = 0
input_mol_skipped = 0
input_mol_with_failure = (
0 # if reactive or covalent, each mol can yield multiple PDBQT
)
nr_failures = 0
is_after_first = False

if config["charge_atom_prop"] is not None:
if config["charge_model"] != "read":
print(f'Error: --charge_atom_prop must be used with --charge_model "read", but the current charge_model is "{config["charge_model"]}". ',
file=sys.stderr,)
sys.exit(1)
elif config["charge_model"] == "read":
if ext=="sdf":
config["charge_atom_prop"] = "PartialCharge"
elif ext=="mol2":
config["charge_atom_prop"] = "_TriposPartialCharge"

preparator = MoleculePreparation.from_config(config)
for mol in mol_supplier:
if is_after_first and output.is_multimol == False:
Expand All @@ -561,8 +577,7 @@ def main():
break

# check that molecule was successfully loaded
if backend == "rdkit":
is_valid = mol is not None
is_valid = mol is not None
input_mol_skipped += int(is_valid == False)
if not is_valid:
continue
Expand Down
40 changes: 32 additions & 8 deletions meeko/molsetup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections import defaultdict
from dataclasses import asdict, dataclass, field
import json
from os import linesep as eol
import sys
import warnings

Expand Down Expand Up @@ -1480,7 +1481,7 @@ def add_dihedral_interaction(self, fourier_series):
return index

@abstractmethod
def init_atom(self, assign_charges, coords):
def init_atom(self, compute_gasteiger_charges, read_charges_from_prop, coords):
pass

@abstractmethod
Expand Down Expand Up @@ -1561,7 +1562,8 @@ def from_mol(
mol: Chem.Mol,
keep_chorded_rings: bool = False,
keep_equivalent_rings: bool = False,
assign_charges: bool = True,
compute_gasteiger_charges: bool = True,
read_charges_from_prop: str = None,
conformer_id: int = -1,
):
"""
Expand All @@ -1572,7 +1574,8 @@ def from_mol(
RDKit Mol object to build the RDKitMoleculeSetup from.
keep_chorded_rings: bool
keep_equivalent_rings: bool
assign_charges: bool
compute_gasteiger_charges: bool
read_charges_from_prop: str
conformer_id: int

Returns
Expand Down Expand Up @@ -1615,7 +1618,7 @@ def from_mol(
molsetup.atom_true_count = molsetup.get_num_mol_atoms()
molsetup.name = molsetup.get_mol_name()
coords = rdkit_conformer.GetPositions()
molsetup.init_atom(assign_charges, coords)
molsetup.init_atom(compute_gasteiger_charges, read_charges_from_prop, coords)
molsetup.init_bond()
molsetup.perceive_rings(keep_chorded_rings, keep_equivalent_rings)
molsetup.rmsd_symmetry_indices = cls.get_symmetries_for_rmsd(mol)
Expand Down Expand Up @@ -1659,14 +1662,14 @@ def remove_elements(mol, to_rm=(12, 20, 25, 26, 30)):
return mol, idx_to_rm, rm_to_neigh


def init_atom(self, assign_charges: bool, coords: list[np.ndarray]):
def init_atom(self, compute_gasteiger_charges: bool, read_charges_from_prop: str, coords: list[np.ndarray]):
"""
Generates information about the atoms in an RDKit Mol and adds them to an RDKitMoleculeSetup.

Parameters
----------
assign_charges: bool
Indicates whether we should extract/generate charges.
compute_gasteiger_charges: bool
Indicates whether we should compute gasteiger charges.
coords: list[np.ndarray]
Atom coordinates for the RDKit Mol.

Expand All @@ -1675,7 +1678,11 @@ def init_atom(self, assign_charges: bool, coords: list[np.ndarray]):
None
"""
# extract/generate charges
if assign_charges:
if compute_gasteiger_charges:
if read_charges_from_prop is not None:
raise ValueError(
"Conflicting options: compute_gasteiger_charges and read_charges_from_prop cannot both be set. "
)
things = self.remove_elements(self.mol)
copy_mol, idx_rm_to_formal_charge, rm_to_neigh = things
for atom in copy_mol.GetAtoms():
Expand Down Expand Up @@ -1716,6 +1723,23 @@ def init_atom(self, assign_charges: bool, coords: list[np.ndarray]):
# print(f"{idx=} {newidx=}")
ok_charges[i] += chrg_by_heavy_atom[newidx]
charges = ok_charges
elif read_charges_from_prop is not None:
if not isinstance(read_charges_from_prop, str) or not read_charges_from_prop:
raise ValueError(
f"Invalid atom property name for read_charges_from_prop: expected a nonempty string (str), but got {type(read_charges_from_prop).__name__} instead. "
)
charges = [
float(atom.GetProp(read_charges_from_prop))
if atom.HasProp(read_charges_from_prop) else None
for atom in self.mol.GetAtoms()
]
if None in charges:
for idx, charge in enumerate(charges):
if charge is None:
print(f"Charge at index {idx} is None.")
raise ValueError(
f"The list of charges based on atom property name {read_charges_from_prop} contains None. "
)
else:
charges = [0.0] * self.mol.GetNumAtoms()
# register atom
Expand Down
26 changes: 23 additions & 3 deletions meeko/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(
input_offatom_params=None,
load_offatom_params=None,
charge_model="gasteiger",
charge_atom_prop=None,
dihedral_model=None,
reactive_smarts=None,
reactive_smarts_idx=None,
Expand Down Expand Up @@ -147,15 +148,33 @@ def __init__(
raise NotImplementedError("load_offatom_params not implemented")
self.load_offatom_params = load_offatom_params

allowed_charge_models = ["espaloma", "gasteiger", "zero"]
allowed_charge_models = ["espaloma", "gasteiger", "zero", "read"]
if charge_model not in allowed_charge_models:
raise ValueError(
"unrecognized charge_model: %s, allowed options are: %s"
% (charge_model, allowed_charge_models)
)

self.charge_model = charge_model
self.charge_atom_prop = charge_atom_prop

if self.charge_model!="read" and self.charge_atom_prop:
raise ValueError(
f"A charge_atom_prop ({charge_atom_prop}) is given to MoleculePreparation but its current charge_model is {charge_model}. " + eol +
"To read charges from atom properties in the input mol, set charge_model to 'read' and name the property as 'charge_atom_prop'. "
)
if self.charge_model=="read":
if not self.charge_atom_prop:
self.charge_atom_prop = "PartialCharge"
warnings.warn(
"The charge_model of MoleculePreparation is set to be 'read', but a valid charge_atom_prop is not given. " + eol +
"The default atom property ('PartialCharge') will be used. "
)
elif not isinstance(self.charge_atom_prop, str):
raise ValueError(
f"Invalid value for charge_atom_prop: expected a string (str), but got {type(self.charge_atom_prop).__name__} instead. "
)

allowed_dihedral_models = [None, "openff", "espaloma"]
if dihedral_model in (None, "espaloma"):
dihedral_list = []
Expand Down Expand Up @@ -490,7 +509,8 @@ def prepare(
mol,
keep_chorded_rings=self.keep_chorded_rings,
keep_equivalent_rings=self.keep_equivalent_rings,
assign_charges=self.charge_model == "gasteiger",
compute_gasteiger_charges=self.charge_model == "gasteiger",
read_charges_from_prop=self.charge_atom_prop,
conformer_id=conformer_id,
)

Expand All @@ -500,7 +520,7 @@ def prepare(
AtomTyper.type_everything(
setup,
self.atom_params,
self.charge_model,
self.charge_model, # charge_model is not accessed in type_everything
self.offatom_params,
self.dihedral_params,
)
Expand Down
Loading