diff --git a/meeko/cli/mk_prepare_ligand.py b/meeko/cli/mk_prepare_ligand.py index 834dac8d..d2de2468 100755 --- a/meeko/cli/mk_prepare_ligand.py +++ b/meeko/cli/mk_prepare_ligand.py @@ -33,7 +33,6 @@ def cmd_lineparser(): - backend = "rdkit" conf_parser = argparse.ArgumentParser( description=__doc__, @@ -47,13 +46,6 @@ def cmd_lineparser(): ) confargs, remaining_argv = conf_parser.parse_known_args() - config = MoleculePreparation.get_defaults_dict() - - if confargs.config_file is not None: - with open(confargs.config_file) as f: - c = json.load(f) - config.update(c) - parser = ( argparse.ArgumentParser() ) # parents=[conf_parser]) # parents shows --config_file in help msg @@ -214,10 +206,14 @@ def cmd_lineparser(): ) config_group.add_argument( "--charge_model", - choices=("gasteiger", "espaloma", "zero"), + choices=("gasteiger", "espaloma", "zero", "read"), help="default is 'gasteiger', 'zero' sets all zeros", default="gasteiger", ) + config_group.add_argument( + "--charge_atom_prop", + help="set atom partial charges from an RDKit atom property based on the input file. The default is 'PartialCharge' for SDF and '_TriposPartialCharge' for MOL2 unless overriden by a user defined property name. ", + ) config_group.add_argument( "--bad_charge_ok", help="NaN and Inf charges allowed in PDBQT", @@ -273,6 +269,18 @@ def cmd_lineparser(): help="indices (1-based) of the SMARTS atoms that will be attached (default: 1 2)", ) + config = MoleculePreparation.get_defaults_dict() + + if confargs.config_file is not None: + with open(confargs.config_file) as f: + c = json.load(f) + config.update(c) + + # Command line arguments should override the config file only for options + # set explicitly by the user. The config file still has priority over the + # defaults set in argparse. To achieve this, we reset the argparse + # defaults to the values from the config file. Then, we can just update + # variable `config` with the values parsed with argparse parser.set_defaults(**config) args = parser.parse_args(remaining_argv) @@ -293,13 +301,14 @@ def cmd_lineparser(): sys.exit(2) args.reactive_smarts_idx -= 1 # convert from 1- to 0-index - # command line arguments override config + # This is where command line arguments override config file. + # Relies on key/parameter names being equal. + # Deliberate mismatch for add_atom_types/add_atom_types_json, as these + # are extended below instead of being replaced for key in config: if key in args.__dict__: config[key] = args.__dict__[key] - config["load_atom_params"] = args.load_atom_params - if args.add_atom_types_json is not None: additional_ats = [] for at in args.add_atom_types_json: @@ -308,7 +317,7 @@ def cmd_lineparser(): additional_ats.append(at) elif type(at) == list: additional_ats.extend(at) - config["add_atom_types"] = additional_ats + config["add_atom_types"] += additional_ats if args.multimol_output_dir is not None or args.multimol_prefix is not None: if args.output_pdbqt_filename is not None: @@ -329,7 +338,8 @@ def cmd_lineparser(): num_required_covalent_args += int(args.tether_smarts is not None) if num_required_covalent_args not in [0, 3]: print( - "Error: --receptor, --rec_residue, and --tether_smarts are all required for covalent docking." + "Error: --receptor, --rec_residue, and --tether_smarts are all required for covalent docking.", + file=sys.stderr, ) sys.exit(2) is_covalent = num_required_covalent_args == 3 @@ -364,7 +374,7 @@ def cmd_lineparser(): indices[0] = indices[0] - 1 # convert from 1- to 0-index indices[1] = indices[1] - 1 - return args, config, backend, is_covalent + return args, config, is_covalent class Output: @@ -495,27 +505,27 @@ def get_suffixes(molsetups): return tuple("mk%d" % (i + 1) for i in range(len(molsetups))) def main(): - args, config, backend, is_covalent = cmd_lineparser() + args, config, is_covalent = cmd_lineparser() input_molecule_filename = args.input_molecule_filename # read input input_fname, ext = os.path.splitext(input_molecule_filename) ext = ext[1:].lower() - if backend == "rdkit": - parsers = { - "sdf": Chem.SDMolSupplier, - "mol2": rdkitutils.Mol2MolSupplier, - "mol": Chem.SDMolSupplier, - } - if not ext in parsers: - print( - "*ERROR* Format [%s] not in supported formats [%s]" - % (ext, "/".join(list(parsers.keys()))) - ) - sys.exit(1) - mol_supplier = parsers[ext]( - input_molecule_filename, removeHs=False - ) # input must have explicit H + + parsers = { + "sdf": Chem.SDMolSupplier, + "mol2": rdkitutils.Mol2MolSupplier, + "mol": Chem.SDMolSupplier, + } + if not ext in parsers: + print( + "Error: Format [%s] not in supported formats [%s]" + % (ext, "/".join(list(parsers.keys()))) + ) + sys.exit(1) + mol_supplier = parsers[ext]( + input_molecule_filename, removeHs=False + ) # input must have explicit H # configure output writer if args.output_pdbqt_filename is None: @@ -543,13 +553,24 @@ def main(): rec_prody_mol = prody_parser(rec_filename) covalent_builder = CovalentBuilder(rec_prody_mol, args.rec_residue) - input_mol_counter = 0 input_mol_skipped = 0 input_mol_with_failure = ( 0 # if reactive or covalent, each mol can yield multiple PDBQT ) nr_failures = 0 is_after_first = False + + if config["charge_atom_prop"] is not None: + if config["charge_model"] != "read": + print(f'Error: --charge_atom_prop must be used with --charge_model "read", but the current charge_model is "{config["charge_model"]}". ', + file=sys.stderr,) + sys.exit(1) + elif config["charge_model"] == "read": + if ext=="sdf": + config["charge_atom_prop"] = "PartialCharge" + elif ext=="mol2": + config["charge_atom_prop"] = "_TriposPartialCharge" + preparator = MoleculePreparation.from_config(config) for mol in mol_supplier: if is_after_first and output.is_multimol == False: @@ -561,8 +582,7 @@ def main(): break # check that molecule was successfully loaded - if backend == "rdkit": - is_valid = mol is not None + is_valid = mol is not None input_mol_skipped += int(is_valid == False) if not is_valid: continue diff --git a/meeko/molsetup.py b/meeko/molsetup.py index 9d9216b4..882ec8d4 100644 --- a/meeko/molsetup.py +++ b/meeko/molsetup.py @@ -9,6 +9,7 @@ from collections import defaultdict from dataclasses import asdict, dataclass, field import json +from os import linesep as eol import sys import warnings @@ -1480,7 +1481,7 @@ def add_dihedral_interaction(self, fourier_series): return index @abstractmethod - def init_atom(self, assign_charges, coords): + def init_atom(self, compute_gasteiger_charges, read_charges_from_prop, coords): pass @abstractmethod @@ -1561,7 +1562,8 @@ def from_mol( mol: Chem.Mol, keep_chorded_rings: bool = False, keep_equivalent_rings: bool = False, - assign_charges: bool = True, + compute_gasteiger_charges: bool = True, + read_charges_from_prop: str = None, conformer_id: int = -1, ): """ @@ -1572,7 +1574,8 @@ def from_mol( RDKit Mol object to build the RDKitMoleculeSetup from. keep_chorded_rings: bool keep_equivalent_rings: bool - assign_charges: bool + compute_gasteiger_charges: bool + read_charges_from_prop: str conformer_id: int Returns @@ -1615,7 +1618,7 @@ def from_mol( molsetup.atom_true_count = molsetup.get_num_mol_atoms() molsetup.name = molsetup.get_mol_name() coords = rdkit_conformer.GetPositions() - molsetup.init_atom(assign_charges, coords) + molsetup.init_atom(compute_gasteiger_charges, read_charges_from_prop, coords) molsetup.init_bond() molsetup.perceive_rings(keep_chorded_rings, keep_equivalent_rings) molsetup.rmsd_symmetry_indices = cls.get_symmetries_for_rmsd(mol) @@ -1659,14 +1662,14 @@ def remove_elements(mol, to_rm=(12, 20, 25, 26, 30)): return mol, idx_to_rm, rm_to_neigh - def init_atom(self, assign_charges: bool, coords: list[np.ndarray]): + def init_atom(self, compute_gasteiger_charges: bool, read_charges_from_prop: str, coords: list[np.ndarray]): """ Generates information about the atoms in an RDKit Mol and adds them to an RDKitMoleculeSetup. Parameters ---------- - assign_charges: bool - Indicates whether we should extract/generate charges. + compute_gasteiger_charges: bool + Indicates whether we should compute gasteiger charges. coords: list[np.ndarray] Atom coordinates for the RDKit Mol. @@ -1675,7 +1678,11 @@ def init_atom(self, assign_charges: bool, coords: list[np.ndarray]): None """ # extract/generate charges - if assign_charges: + if compute_gasteiger_charges: + if read_charges_from_prop is not None: + raise ValueError( + "Conflicting options: compute_gasteiger_charges and read_charges_from_prop cannot both be set. " + ) things = self.remove_elements(self.mol) copy_mol, idx_rm_to_formal_charge, rm_to_neigh = things for atom in copy_mol.GetAtoms(): @@ -1716,6 +1723,23 @@ def init_atom(self, assign_charges: bool, coords: list[np.ndarray]): # print(f"{idx=} {newidx=}") ok_charges[i] += chrg_by_heavy_atom[newidx] charges = ok_charges + elif read_charges_from_prop is not None: + if not isinstance(read_charges_from_prop, str) or not read_charges_from_prop: + raise ValueError( + f"Invalid atom property name for read_charges_from_prop: expected a nonempty string (str), but got {type(read_charges_from_prop).__name__} instead. " + ) + charges = [ + float(atom.GetProp(read_charges_from_prop)) + if atom.HasProp(read_charges_from_prop) else None + for atom in self.mol.GetAtoms() + ] + if None in charges: + for idx, charge in enumerate(charges): + if charge is None: + print(f"Charge at index {idx} is None.") + raise ValueError( + f"The list of charges based on atom property name {read_charges_from_prop} contains None. " + ) else: charges = [0.0] * self.mol.GetNumAtoms() # register atom diff --git a/meeko/preparation.py b/meeko/preparation.py index 86120e44..a02a50c0 100644 --- a/meeko/preparation.py +++ b/meeko/preparation.py @@ -86,6 +86,7 @@ def __init__( input_offatom_params=None, load_offatom_params=None, charge_model="gasteiger", + charge_atom_prop=None, dihedral_model=None, reactive_smarts=None, reactive_smarts_idx=None, @@ -147,7 +148,7 @@ def __init__( raise NotImplementedError("load_offatom_params not implemented") self.load_offatom_params = load_offatom_params - allowed_charge_models = ["espaloma", "gasteiger", "zero"] + allowed_charge_models = ["espaloma", "gasteiger", "zero", "read"] if charge_model not in allowed_charge_models: raise ValueError( "unrecognized charge_model: %s, allowed options are: %s" @@ -155,7 +156,25 @@ def __init__( ) self.charge_model = charge_model + self.charge_atom_prop = charge_atom_prop + if self.charge_model!="read" and self.charge_atom_prop: + raise ValueError( + f"A charge_atom_prop ({charge_atom_prop}) is given to MoleculePreparation but its current charge_model is {charge_model}. " + eol + + "To read charges from atom properties in the input mol, set charge_model to 'read' and name the property as 'charge_atom_prop'. " + ) + if self.charge_model=="read": + if not self.charge_atom_prop: + self.charge_atom_prop = "PartialCharge" + warnings.warn( + "The charge_model of MoleculePreparation is set to be 'read', but a valid charge_atom_prop is not given. " + eol + + "The default atom property ('PartialCharge') will be used. " + ) + elif not isinstance(self.charge_atom_prop, str): + raise ValueError( + f"Invalid value for charge_atom_prop: expected a string (str), but got {type(self.charge_atom_prop).__name__} instead. " + ) + allowed_dihedral_models = [None, "openff", "espaloma"] if dihedral_model in (None, "espaloma"): dihedral_list = [] @@ -490,7 +509,8 @@ def prepare( mol, keep_chorded_rings=self.keep_chorded_rings, keep_equivalent_rings=self.keep_equivalent_rings, - assign_charges=self.charge_model == "gasteiger", + compute_gasteiger_charges=self.charge_model == "gasteiger", + read_charges_from_prop=self.charge_atom_prop, conformer_id=conformer_id, ) @@ -500,7 +520,7 @@ def prepare( AtomTyper.type_everything( setup, self.atom_params, - self.charge_model, + self.charge_model, # charge_model is not accessed in type_everything self.offatom_params, self.dihedral_params, )