diff --git a/examples/15-chelpg.py b/examples/15-chelpg.py new file mode 100644 index 00000000..f102741b --- /dev/null +++ b/examples/15-chelpg.py @@ -0,0 +1,39 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from pyscf import gto +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm import chelpg + + +mol = gto.Mole() +mol.verbose = 0 +mol.output = None +mol.atom = [ + [1 , (1. , 0. , 0.000)], + [1 , (0. , 1. , 0.000)], + [1 , (0. , -1.517 , 1.177)], + [1 , (0. , 1.517 , 1.177)] ] +mol.basis = '631g' +mol.unit = 'B' +mol.build() +mol.verbose = 6 + +xc = 'b3lyp' +mf = rks.RKS(mol, xc=xc) +mf.grids.level = 5 +mf.kernel() +q = chelpg.eval_chelpg_layer_gpu(mf) +print(q) # [ 0.04402311 0.11333945 -0.25767919 0.10031663] \ No newline at end of file diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py index 7fb02e52..ca735418 100644 --- a/gpu4pyscf/__init__.py +++ b/gpu4pyscf/__init__.py @@ -1,2 +1,2 @@ from . import lib, grad, hessian, solvent, scf, dft -__version__ = '0.6.3' +__version__ = '0.6.4' diff --git a/gpu4pyscf/df/int3c2e.py b/gpu4pyscf/df/int3c2e.py index df43dfdd..fc755a27 100644 --- a/gpu4pyscf/df/int3c2e.py +++ b/gpu4pyscf/df/int3c2e.py @@ -769,6 +769,7 @@ def get_int3c2e_ip1_vjk(intopt, rhoj, rhok, dm0_tag, aoslices, with_k=True, omeg vk1_ao += contract('xpio,pki->xiko', int3c_ip1_occ, rhok0_slice) vk1 += contract('xiko,ia->axko', vk1_ao, ao2atom) count += 1 + return vj1_buf, vk1_buf, vj1, vk1 def get_int3c2e_ip2_vjk(intopt, rhoj, rhok, dm0_tag, auxslices, with_k=True, omega=None): @@ -822,6 +823,7 @@ def get_int3c2e_ip1_wjk(intopt, dm0_tag, with_k=True, omega=None): naux_sph = len(intopt.sph_aux_idx) orbo = cupy.asarray(dm0_tag.occ_coeff, order='C') nocc = orbo.shape[1] + wj = cupy.zeros([nao_sph,naux_sph,3]) avail_mem = get_avail_mem() use_gpu_memory = True diff --git a/gpu4pyscf/qmmm/__init__.py b/gpu4pyscf/qmmm/__init__.py new file mode 100644 index 00000000..b165366a --- /dev/null +++ b/gpu4pyscf/qmmm/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from gpu4pyscf.qmmm import chelpg \ No newline at end of file diff --git a/gpu4pyscf/qmmm/chelpg.py b/gpu4pyscf/qmmm/chelpg.py new file mode 100644 index 00000000..3851acc0 --- /dev/null +++ b/gpu4pyscf/qmmm/chelpg.py @@ -0,0 +1,371 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import pyscf +import time +import cupy +import numpy as np +import scipy +import ctypes +from pyscf import lib, gto +from pyscf.scf import _vhf +from gpu4pyscf.df import int3c2e +from gpu4pyscf.scf.hf import BasisProdCache +from gpu4pyscf.lib.cupy_helper import load_library, block_c2s_diag +libgint = load_library('libgint') +libgvhf = load_library('libgvhf') +lib.num_threads(8) + + +def get_j_int3c2e_pass1(intopt, dm0): + ''' + get rhoj pass1 for int3c2e + ''' + n_dm = 1 + + naux = intopt.naux + rhoj = cupy.zeros([naux]) + coeff = intopt.coeff + dm_cart = cupy.einsum('pi,ij,qj->pq', coeff, dm0, coeff) + + num_cp_ij = [len(log_qs) for log_qs in intopt.log_qs] + num_cp_kl = [len(log_qs) for log_qs in intopt.aux_log_qs] + + bins_locs_ij = np.append(0, np.cumsum(num_cp_ij)).astype(np.int32) + bins_locs_kl = np.append(0, np.cumsum(num_cp_kl)).astype(np.int32) + + ncp_ij = len(intopt.log_qs) + ncp_kl = len(intopt.aux_log_qs) + norb = dm_cart.shape[0] + err = libgvhf.GINTbuild_j_int3c2e_pass1( + intopt.bpcache, + ctypes.cast(dm_cart.data.ptr, ctypes.c_void_p), + ctypes.cast(rhoj.data.ptr, ctypes.c_void_p), + ctypes.c_int(norb), + ctypes.c_int(naux), + ctypes.c_int(n_dm), + bins_locs_ij.ctypes.data_as(ctypes.c_void_p), + bins_locs_kl.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(ncp_ij), + ctypes.c_int(ncp_kl)) + if err != 0: + raise RuntimeError('CUDA error in get_j_pass1') + return rhoj + + +class VHFOpt(_vhf.VHFOpt): + def __init__(self, mol, auxmol, intor, prescreen='CVHFnoscreen', + qcondname='CVHFsetnr_direct_scf', dmcondname=None): + # use local basis_seg_contraction for efficiency + self.mol = int3c2e.basis_seg_contraction(mol, allow_replica=True) + self.auxmol = int3c2e.basis_seg_contraction(auxmol, allow_replica=True) + ''' + # Note mol._bas will be sorted in .build() method. VHFOpt should be + # initialized after mol._bas updated. + ''' + self.nao = self.mol.nao + self.naux = self.auxmol.nao + + self._intor = intor + self._prescreen = prescreen + self._qcondname = qcondname + self._dmcondname = dmcondname + + self.bpcache = None + + self.sorted_auxmol = None + self.sorted_mol = None + + self.cart_ao_idx = None + self.sph_ao_idx = None + self.cart_aux_idx = None + self.sph_aux_idx = None + + self.cart_ao_loc = [] + self.cart_aux_loc = [] + self.sph_ao_loc = [] + self.sph_aux_loc = [] + + self.cart2sph = None + self.aux_cart2sph = None + + self.angular = None + self.aux_angular = None + + self.cp_idx = None + self.cp_jdx = None + + self.log_qs = None + self.aux_log_qs = None + + def clear(self): + _vhf.VHFOpt.__del__(self) + libgvhf.GINTdel_basis_prod(ctypes.byref(self.bpcache)) + return self + + def __del__(self): + try: + self.clear() + except AttributeError: + pass + + def build(self, cutoff=1e-14, group_size=None, + group_size_aux=None, diag_block_with_triu=False, aosym=False): + ''' + int3c2e is based on int2e with (ao,ao|aux,1) + a tot_mol is created with concatenating [mol, fake_mol, aux_mol] + we will pair (ao,ao) and (aux,1) separately. + ''' + sorted_mol, sorted_idx, uniq_l_ctr, l_ctr_counts = int3c2e.sort_mol( + self.mol) + if group_size is not None: + uniq_l_ctr, l_ctr_counts = int3c2e._split_l_ctr_groups( + uniq_l_ctr, l_ctr_counts, group_size) + self.sorted_mol = sorted_mol + + # sort fake mol + fake_mol = int3c2e.make_fake_mol() + _, _, fake_uniq_l_ctr, fake_l_ctr_counts = int3c2e.sort_mol(fake_mol) + + # sort auxiliary mol + sorted_auxmol, sorted_aux_idx, aux_uniq_l_ctr, aux_l_ctr_counts = int3c2e.sort_mol( + self.auxmol) + if group_size_aux is not None: + aux_uniq_l_ctr, aux_l_ctr_counts = int3c2e._split_l_ctr_groups( + aux_uniq_l_ctr, aux_l_ctr_counts, group_size_aux) + self.sorted_auxmol = sorted_auxmol + tmp_mol = gto.mole.conc_mol(fake_mol, sorted_auxmol) + tot_mol = gto.mole.conc_mol(sorted_mol, tmp_mol) + + # Initialize vhfopt after reordering mol._bas + _vhf.VHFOpt.__init__(self, sorted_mol, self._intor, self._prescreen, + self._qcondname, self._dmcondname) + self.direct_scf_tol = cutoff + + # TODO: is it more accurate to filter with overlap_cond (or exp_cond)? + q_cond = self.get_q_cond() + l_ctr_offsets = np.append(0, np.cumsum(l_ctr_counts)) + log_qs, pair2bra, pair2ket = int3c2e.get_pairing( + l_ctr_offsets, l_ctr_offsets, q_cond, + diag_block_with_triu=diag_block_with_triu, aosym=aosym) + self.log_qs = log_qs.copy() + + # contraction coefficient for ao basis + cart_ao_loc = self.sorted_mol.ao_loc_nr(cart=True) + sph_ao_loc = self.sorted_mol.ao_loc_nr(cart=False) + self.cart_ao_loc = [cart_ao_loc[cp] for cp in l_ctr_offsets] + self.sph_ao_loc = [sph_ao_loc[cp] for cp in l_ctr_offsets] + self.angular = [l[0] for l in uniq_l_ctr] + + cart_ao_loc = self.mol.ao_loc_nr(cart=True) + sph_ao_loc = self.mol.ao_loc_nr(cart=False) + nao = sph_ao_loc[-1] + ao_idx = np.array_split(np.arange(nao), sph_ao_loc[1:-1]) + self.sph_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) + + # cartesian ao index + nao = cart_ao_loc[-1] + ao_idx = np.array_split(np.arange(nao), cart_ao_loc[1:-1]) + self.cart_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) + ncart = cart_ao_loc[-1] + nsph = sph_ao_loc[-1] + self.cart2sph = block_c2s_diag(ncart, nsph, self.angular, l_ctr_counts) + inv_idx = np.argsort(self.sph_ao_idx, kind='stable').astype(np.int32) + self.coeff = self.cart2sph[:, inv_idx] + + # pairing auxiliary basis with fake basis set + fake_l_ctr_offsets = np.append(0, np.cumsum(fake_l_ctr_counts)) + fake_l_ctr_offsets += l_ctr_offsets[-1] + + aux_l_ctr_offsets = np.append(0, np.cumsum(aux_l_ctr_counts)) + + # contraction coefficient for auxiliary basis + cart_aux_loc = self.sorted_auxmol.ao_loc_nr(cart=True) + sph_aux_loc = self.sorted_auxmol.ao_loc_nr(cart=False) + self.cart_aux_loc = [cart_aux_loc[cp] for cp in aux_l_ctr_offsets] + self.sph_aux_loc = [sph_aux_loc[cp] for cp in aux_l_ctr_offsets] + self.aux_angular = [l[0] for l in aux_uniq_l_ctr] + + cart_aux_loc = self.auxmol.ao_loc_nr(cart=True) + sph_aux_loc = self.auxmol.ao_loc_nr(cart=False) + ncart = cart_aux_loc[-1] + nsph = sph_aux_loc[-1] + # inv_idx = np.argsort(self.sph_aux_idx, kind='stable').astype(np.int32) + aux_l_ctr_offsets += fake_l_ctr_offsets[-1] + + # hardcoded for grids + aux_pair2bra = [np.arange(aux_l_ctr_offsets[0], aux_l_ctr_offsets[-1])] + aux_pair2ket = [np.ones(ncart) * fake_l_ctr_offsets[0]] + aux_log_qs = [np.ones(ncart)] + + self.aux_log_qs = aux_log_qs.copy() + pair2bra += aux_pair2bra + pair2ket += aux_pair2ket + + uniq_l_ctr = np.concatenate( + [uniq_l_ctr, fake_uniq_l_ctr, aux_uniq_l_ctr]) + l_ctr_offsets = np.concatenate([ + l_ctr_offsets, + fake_l_ctr_offsets[1:], + aux_l_ctr_offsets[1:]]) + + bas_pair2shls = np.hstack( + pair2bra + pair2ket).astype(np.int32).reshape(2, -1) + bas_pairs_locs = np.append(0, np.cumsum( + [x.size for x in pair2bra])).astype(np.int32) + log_qs = log_qs + aux_log_qs + ao_loc = tot_mol.ao_loc_nr(cart=True) + ncptype = len(log_qs) + + self.bpcache = ctypes.POINTER(BasisProdCache)() + if diag_block_with_triu: + scale_shellpair_diag = 1. + else: + scale_shellpair_diag = 0.5 + libgint.GINTinit_basis_prod( + ctypes.byref(self.bpcache), ctypes.c_double(scale_shellpair_diag), + ao_loc.ctypes.data_as(ctypes.c_void_p), + bas_pair2shls.ctypes.data_as(ctypes.c_void_p), + bas_pairs_locs.ctypes.data_as( + ctypes.c_void_p), ctypes.c_int(ncptype), + tot_mol._atm.ctypes.data_as( + ctypes.c_void_p), ctypes.c_int(tot_mol.natm), + tot_mol._bas.ctypes.data_as( + ctypes.c_void_p), ctypes.c_int(tot_mol.nbas), + tot_mol._env.ctypes.data_as(ctypes.c_void_p)) + self.bas_pairs_locs = bas_pairs_locs + ncptype = len(self.log_qs) + if aosym: + self.cp_idx, self.cp_jdx = np.tril_indices(ncptype) + else: + nl = int(round(np.sqrt(ncptype))) + self.cp_idx, self.cp_jdx = np.unravel_index( + np.arange(ncptype), (nl, nl)) + + +def eval_chelpg_layer_gpu(mf, deltaR=0.3, Rhead=2.8, ifqchem=True): + """Cal chelpg charge + + Args: + mf: mean field object in pyscf + deltaR (float, optional): the intervel in the cube. Defaults to 0.3. + Rhead (float, optional): the head length. Defaults to 3.0. + ifqchem (bool, optional): whether use the modification in qchem. Defaults to True. + + Returns: + numpy.array: charges + """ + t0 = time.process_time() + t0w = time.time() + BOHR = 0.52917721092 # Angstroms + atomcoords = mf.mol.atom_coords(unit='B') + dm = cupy.array(mf.make_rdm1()) + RVDW_bondi = {1: 1.1/BOHR, 2: 1.40/BOHR, + 3: 1.82/BOHR, 6: 1.70/BOHR, 7: 1.55/BOHR, 8: 1.52/BOHR, 9: 1.47/BOHR, 10: 1.54/BOHR, + 11: 2.27/BOHR, 12: 1.73/BOHR, 14: 2.10/BOHR, 15: 1.80/BOHR, 16: 1.80/BOHR, 17: 1.75/BOHR, 18: 1.88/BOHR, + 19: 2.75/BOHR, 35: 1.85/BOHR} + + Roff = Rhead/BOHR + Deltar = 0.1 + + # smoothing function + def tau_f(R, Rcut, Roff): + return (R - Rcut)**2 * (3*Roff - Rcut - 2*R) / (Roff - Rcut)**3 + + Rshort = np.array([RVDW_bondi[iatom] for iatom in mf.mol._atm[:, 0]]) + idxxmin = np.argmin(atomcoords[:, 0] - Rshort) + idxxmax = np.argmax(atomcoords[:, 0] + Rshort) + idxymin = np.argmin(atomcoords[:, 1] - Rshort) + idxymax = np.argmax(atomcoords[:, 1] + Rshort) + idxzmin = np.argmin(atomcoords[:, 2] - Rshort) + idxzmax = np.argmax(atomcoords[:, 2] + Rshort) + atomtypes = np.array(mf.mol._atm[:, 0]) + # Generate the grids in the cube + xmin = atomcoords[:, 0].min() - Rhead/BOHR - RVDW_bondi[atomtypes[idxxmin]] + xmax = atomcoords[:, 0].max() + Rhead/BOHR + RVDW_bondi[atomtypes[idxxmax]] + ymin = atomcoords[:, 1].min() - Rhead/BOHR - RVDW_bondi[atomtypes[idxymin]] + ymax = atomcoords[:, 1].max() + Rhead/BOHR + RVDW_bondi[atomtypes[idxymax]] + zmin = atomcoords[:, 2].min() - Rhead/BOHR - RVDW_bondi[atomtypes[idxzmin]] + zmax = atomcoords[:, 2].max() + Rhead/BOHR + RVDW_bondi[atomtypes[idxzmax]] + x = np.arange(xmin, xmax, deltaR/BOHR) + y = np.arange(ymin, ymax, deltaR/BOHR) + z = np.arange(zmin, zmax, deltaR/BOHR) + gridcoords = np.meshgrid(x, y, z) + gridcoords = np.vstack(list(map(np.ravel, gridcoords))).T + + # [natom, ngrids] distance between an atom and a grid + r_pX = scipy.spatial.distance.cdist(atomcoords, gridcoords) + # delete the grids in the vdw surface and out the Rhead surface. + # the minimum distance to any atom + Rkmin = (r_pX - np.expand_dims(Rshort, axis=1)).min(axis=0) + Ron = Rshort + Deltar + Rlong = Roff - Deltar + AJk = np.ones(r_pX.shape) # the short-range weight + idx = r_pX < np.expand_dims(Rshort, axis=1) + AJk[idx] = 0 + if ifqchem: + idx2 = (r_pX < np.expand_dims(Ron, axis=1)) * \ + (r_pX >= np.expand_dims(Rshort, axis=1)) + AJk[idx2] = tau_f(r_pX, np.expand_dims(Rshort, axis=1), + np.expand_dims(Ron, axis=1))[idx2] + wLR = 1 - tau_f(Rkmin, Rlong, Roff) # the long-range weight + idx1 = Rkmin < Rlong + idx2 = Rkmin > Roff + wLR[idx1] = 1 + wLR[idx2] = 0 + else: + wLR = np.ones(r_pX.shape[-1]) # the long-range weight + idx = Rkmin > Roff + wLR[idx] = 0 + w = wLR*np.prod(AJk, axis=0) # weight for a specific poing + idx = w <= 1.0E-14 + w = np.delete(w, idx) + r_pX = np.delete(r_pX, idx, axis=1) + gridcoords = np.delete(gridcoords, idx, axis=0) + + ngrids = gridcoords.shape[0] + r_pX = cupy.array(r_pX) + r_pX_potential = 1/r_pX + potential_real = cupy.dot(cupy.array( + mf.mol.atom_charges()), r_pX_potential) + nbatch = 256*256 + + # assert nbatch < ngrids + fmol = pyscf.gto.fakemol_for_charges(gridcoords[:nbatch]) + intopt = VHFOpt(mf.mol, fmol, 'int2e') + for ibatch in range(0, ngrids, nbatch): + max_grid = min(ibatch+nbatch, ngrids) + num_grids = max_grid - ibatch + ptr = intopt.auxmol._atm[:num_grids, gto.PTR_COORD] + intopt.auxmol._env[np.vstack( + (ptr, ptr+1, ptr+2)).T] = gridcoords[ibatch:max_grid] + intopt.build(1e-14, diag_block_with_triu=False, aosym=True) + potential_real[ibatch:max_grid] -= 2.0 * \ + get_j_int3c2e_pass1(intopt, dm)[:num_grids] + + w = cupy.array(w) + r_pX_potential_omega = r_pX_potential*w + GXA = r_pX_potential_omega@r_pX_potential.T + eX = r_pX_potential_omega@potential_real + GXA_inv = cupy.linalg.inv(GXA) + g = GXA_inv@eX + alpha = (g.sum() - mf.mol.charge)/(GXA_inv.sum()) + q = g - alpha*GXA_inv@cupy.ones((mf.mol.natm)) + t6 = time.process_time() + t6w = time.time() + print("Total cpu time: ", t6 - t0) + print("Total wall time: ", t6w - t0w) + return q + diff --git a/gpu4pyscf/qmmm/test/test_chelpg.py b/gpu4pyscf/qmmm/test/test_chelpg.py new file mode 100644 index 00000000..836ce15e --- /dev/null +++ b/gpu4pyscf/qmmm/test/test_chelpg.py @@ -0,0 +1,85 @@ +# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import unittest +import numpy as np +import pyscf +from pyscf import lib +from gpu4pyscf.dft import rks +from gpu4pyscf.qmmm import chelpg + +lib.num_threads(8) + +atom = ''' +O 0.0000000000 -0.0000000000 0.1174000000 +H -0.7570000000 -0.0000000000 -0.4696000000 +H 0.7570000000 0.0000000000 -0.4696000000 +''' + +bas='def2tzvpp' +grids_level = 5 + +def setUpModule(): + global mol + mol = pyscf.M(atom=atom, basis=bas, max_memory=32000) + mol.output = '/dev/null' + mol.build() + mol.verbose = 1 + +def tearDownModule(): + global mol + mol.stdout.close() + del mol + +def run_dft_chelpg(xc, deltaR): + mf = rks.RKS(mol, xc=xc) + mf.grids.level = grids_level + e_dft = mf.kernel() + q = chelpg.eval_chelpg_layer_gpu(mf, deltaR=deltaR) + return e_dft, q + + +class KnownValues(unittest.TestCase): + ''' + known values are obtained by Q-Chem + $rem + JOBTYP SP + METHOD b3lyp + BASIS def2-tzvpp + XC_GRID 000099000590 + CHELPG_DX 2 + CHELPG TRUE + SCF_CONVERGENCE 10 + $end + + Ground-State ChElPG Net Atomic Charges + + Atom Charge (a.u.) + ---------------------------------------- + 1 O -0.712558 + 2 H 0.356292 + 3 H 0.356266 + ---------------------------------------- + ''' + def test_rks_b3lyp(self): + print('-------- B3LYP -------------') + e_tot, q = run_dft_chelpg('B3LYP', 0.1) + assert np.allclose(e_tot, -76.4666495181) + assert np.allclose(q, np.array([-0.712558, 0.356292, 0.356266])) + + +if __name__ == "__main__": + print("Full Tests for SCF") + unittest.main() \ No newline at end of file