From ffdc3b170e1e0b8cf490a0291cf554a16ce09ec5 Mon Sep 17 00:00:00 2001 From: "xiaojie.wu" Date: Sun, 19 Nov 2023 08:39:10 +0800 Subject: [PATCH] transpose grad pcm matrices --- examples/00-h2o.py | 4 +- gpu4pyscf/__init__.py | 4 ++ gpu4pyscf/df/hessian/rhf.py | 17 +++--- gpu4pyscf/df/int3c2e.py | 56 ++++++++++------- gpu4pyscf/lib/cutensor.py | 4 +- gpu4pyscf/lib/logger.py | 1 - gpu4pyscf/solvent/grad/pcm.py | 100 ++++++++++++++++++++----------- gpu4pyscf/solvent/hessian/pcm.py | 66 ++++++++++++++++++-- gpu4pyscf/solvent/pcm.py | 4 +- 9 files changed, 184 insertions(+), 72 deletions(-) diff --git a/examples/00-h2o.py b/examples/00-h2o.py index 7df44258..5ac37b6f 100644 --- a/examples/00-h2o.py +++ b/examples/00-h2o.py @@ -32,9 +32,9 @@ screen_tol = 1e-14 grids_level = 3 -mol = pyscf.M(atom=atom, basis=bas, max_memory=32000) +mol = pyscf.M(atom=atom, basis=bas, max_memory=32000, output='./pyscf.log') -mol.verbose = 6 +mol.verbose = 4 mf_GPU = rks.RKS(mol, xc=xc).density_fit(auxbasis=auxbasis) mf_GPU.grids.level = grids_level mf_GPU.conv_tol = scf_tol diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py index 550ac39c..73b45500 100644 --- a/gpu4pyscf/__init__.py +++ b/gpu4pyscf/__init__.py @@ -1,2 +1,6 @@ from . import lib, grad, hessian, solvent, scf, dft __version__ = '0.6.6' + +# monkey patch libxc reference due to a bug in nvcc +from pyscf.dft import libxc +libxc.__reference__ = 'unable to decode the reference due to https://github.com/NVIDIA/cuda-python/issues/29' \ No newline at end of file diff --git a/gpu4pyscf/df/hessian/rhf.py b/gpu4pyscf/df/hessian/rhf.py index 1141de1f..2a06692f 100644 --- a/gpu4pyscf/df/hessian/rhf.py +++ b/gpu4pyscf/df/hessian/rhf.py @@ -188,33 +188,36 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, cupy.get_default_memory_pool().free_all_blocks() # int3c_ipip1 contributions - hj_ao_diag, hk_ao_diag = int3c2e.get_int3c2e_ipip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega) + hj_ao_diag, hk_ao_diag = int3c2e.get_int3c2e_ipip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k) hj_ao_diag *= 2.0 t1 = log.timer_debug1('intermediate variables with int3c2e_ipip1', *t1) # int3c_ipvip1 contributions # (11|0), (0|00) without response of RI basis - hj, hk = int3c2e.get_int3c2e_ipvip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega) + hj, hk = int3c2e.get_int3c2e_ipvip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k) hj_ao_ao += 2.0*hj - hk_ao_ao += hk + if with_k: + hk_ao_ao += hk hj = hk = None t1 = log.timer_debug1('intermediate variables with int3c2e_ipvip1', *t1) # int3c_ip1ip2 contributions # (10|1), (0|0)(0|00) if hessobj.auxbasis_response: - hj, hk = int3c2e.get_int3c2e_ip1ip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega) + hj, hk = int3c2e.get_int3c2e_ip1ip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k) hj_ao_aux += hj - hk_ao_aux += hk + if with_k: + hk_ao_aux += hk hj = hk = None t1 = log.timer_debug1('intermediate variables with int3c2e_ip1ip2', *t1) # int3c_ipip2 contributions if hessobj.auxbasis_response > 1: # (00|2), (0|0)(0|00) - hj, hk = int3c2e.get_int3c2e_ipip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega) + hj, hk = int3c2e.get_int3c2e_ipip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k) hj_aux_diag = hj - hk_aux_diag = .5*hk + if with_k: + hk_aux_diag = .5*hk hj = hk = None t1 = log.timer_debug1('intermediate variables with int3c2e_ipip2', *t1) diff --git a/gpu4pyscf/df/int3c2e.py b/gpu4pyscf/df/int3c2e.py index f3a43442..a542a7fb 100644 --- a/gpu4pyscf/df/int3c2e.py +++ b/gpu4pyscf/df/int3c2e.py @@ -873,15 +873,19 @@ def get_int3c2e_ipip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None): nao_sph = dm0_tag.shape[0] orbo = cupy.asarray(dm0_tag.occ_coeff, order='C') hj = cupy.zeros([nao_sph,9]) - hk = cupy.zeros([nao_sph,9]) + hk = None + if with_k: + hk = cupy.zeros([nao_sph,9]) for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipip1', omega=omega): - rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) - rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1]) tmp = contract('xpji,ij->xpi', int3c_blk, dm0_tag[i0:i1,j0:j1]) hj[i0:i1] += contract('xpi,p->ix', tmp, rhoj[k0:k1]) - hk[i0:i1] += contract('xpji,pij->ix', int3c_blk, rhok_tmp) + if with_k: + rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) + rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1]) + hk[i0:i1] += contract('xpji,pij->ix', int3c_blk, rhok_tmp) hj = hj.reshape([nao_sph,3,3]) - hk = hk.reshape([nao_sph,3,3]) + if with_k: + hk = hk.reshape([nao_sph,3,3]) return hj, hk def get_int3c2e_ipvip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None): @@ -891,15 +895,19 @@ def get_int3c2e_ipvip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None) nao_sph = dm0_tag.shape[0] orbo = cupy.asarray(dm0_tag.occ_coeff, order='C') hj = cupy.zeros([nao_sph,nao_sph,9]) - hk = cupy.zeros([nao_sph,nao_sph,9]) + hk = None + if with_k: + hk = cupy.zeros([nao_sph,nao_sph,9]) for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipvip1', omega=omega): - rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) - rhok_tmp = contract('pio,jo->pji', rhok_tmp, orbo[j0:j1]) tmp = contract('xpji,ij->xpij', int3c_blk, dm0_tag[i0:i1,j0:j1]) hj[i0:i1,j0:j1] += contract('xpij,p->ijx', tmp, rhoj[k0:k1]) - hk[i0:i1,j0:j1] += contract('xpji,pji->ijx', int3c_blk, rhok_tmp) + if with_k: + rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) + rhok_tmp = contract('pio,jo->pji', rhok_tmp, orbo[j0:j1]) + hk[i0:i1,j0:j1] += contract('xpji,pji->ijx', int3c_blk, rhok_tmp) hj = hj.reshape([nao_sph,nao_sph,3,3]) - hk = hk.reshape([nao_sph,nao_sph,3,3]) + if with_k: + hk = hk.reshape([nao_sph,nao_sph,3,3]) return hj, hk def get_int3c2e_ip1ip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None): @@ -910,15 +918,19 @@ def get_int3c2e_ip1ip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None) naux_sph = rhok.shape[0] orbo = cupy.asarray(dm0_tag.occ_coeff, order='C') hj = cupy.zeros([nao_sph,naux_sph,9]) - hk = cupy.zeros([nao_sph,naux_sph,9]) + hk = None + if with_k: + hk = cupy.zeros([nao_sph,naux_sph,9]) for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ip1ip2', omega=omega): - rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) - rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1]) tmp = contract('xpji,ij->xpi', int3c_blk, dm0_tag[i0:i1,j0:j1]) hj[i0:i1,k0:k1] += contract('xpi,p->ipx', tmp, rhoj[k0:k1]) - hk[i0:i1,k0:k1] += contract('xpji,pij->ipx', int3c_blk, rhok_tmp) + if with_k: + rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1]) + rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1]) + hk[i0:i1,k0:k1] += contract('xpji,pij->ipx', int3c_blk, rhok_tmp) hj = hj.reshape([nao_sph,naux_sph,3,3]) - hk = hk.reshape([nao_sph,naux_sph,3,3]) + if with_k: + hk = hk.reshape([nao_sph,naux_sph,3,3]) return hj, hk def get_int3c2e_ipip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None): @@ -928,15 +940,19 @@ def get_int3c2e_ipip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None): naux_sph = rhok.shape[0] orbo = cupy.asarray(dm0_tag.occ_coeff, order='C') hj = cupy.zeros([naux_sph,9]) - hk = cupy.zeros([naux_sph,9]) + hk = None + if with_k: + hk = cupy.zeros([naux_sph,9]) for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipip2', omega=omega): - rhok_tmp = contract('por,jr->pjo', rhok[k0:k1], orbo[j0:j1]) - rhok_tmp = contract('pjo,io->pji', rhok_tmp, orbo[i0:i1]) tmp = contract('xpji,ij->xp', int3c_blk, dm0_tag[i0:i1,j0:j1]) hj[k0:k1] += contract('xp,p->px', tmp, rhoj[k0:k1]) - hk[k0:k1] += contract('xpji,pji->px', int3c_blk, rhok_tmp) + if with_k: + rhok_tmp = contract('por,jr->pjo', rhok[k0:k1], orbo[j0:j1]) + rhok_tmp = contract('pjo,io->pji', rhok_tmp, orbo[i0:i1]) + hk[k0:k1] += contract('xpji,pji->px', int3c_blk, rhok_tmp) hj = hj.reshape([naux_sph,3,3]) - hk = hk.reshape([naux_sph,3,3]) + if with_k: + hk = hk.reshape([naux_sph,3,3]) return hj, hk def get_int3c2e_ip_slice(intopt, cp_aux_id, ip_type, out=None, omega=None, stream=None): diff --git a/gpu4pyscf/lib/cutensor.py b/gpu4pyscf/lib/cutensor.py index aca3b082..c62d6c1a 100644 --- a/gpu4pyscf/lib/cutensor.py +++ b/gpu4pyscf/lib/cutensor.py @@ -165,8 +165,8 @@ def contraction(pattern, a, b, alpha, beta, out=None): if contract_engine == 'opt_einsum': import opt_einsum einsum = opt_einsum.contract - elif contract_engine == 'cuquantum': - from cuquantum import contract as einsum + #elif contract_engine == 'cuquantum': + # from cuquantum import contract as einsum elif contract_engine == 'cupy': einsum = cupy.einsum else: diff --git a/gpu4pyscf/lib/logger.py b/gpu4pyscf/lib/logger.py index 6367b440..7b46fb27 100644 --- a/gpu4pyscf/lib/logger.py +++ b/gpu4pyscf/lib/logger.py @@ -119,4 +119,3 @@ def new_logger(rec=None, verbose=None): else: log = Logger(rec.stdout, rec.verbose) return log - diff --git a/gpu4pyscf/solvent/grad/pcm.py b/gpu4pyscf/solvent/grad/pcm.py index e3929830..d4420488 100644 --- a/gpu4pyscf/solvent/grad/pcm.py +++ b/gpu4pyscf/solvent/grad/pcm.py @@ -170,27 +170,16 @@ def grad_nuc(pcmobj, dm): de -= numpy.asarray([numpy.sum(dv_g[p0:p1], axis=0) for p0,p1 in gridslice]) return de -def grad_elec(pcmobj, dm): +def grad_qv(pcmobj, dm): ''' - dE = 0.5*v* d(K^-1 R) *v + q*dv - v^T* d(K^-1 R)v = v^T*K^-1(dR - dK K^-1R)v = v^T K^-1(dR - dK q) + contributions due to integrals ''' if not pcmobj._intermediates or 'q_sym' not in pcmobj._intermediates: pcmobj._get_vind(dm) gridslice = pcmobj.surface['gslice_by_atom'] - v_grids = pcmobj._intermediates['v_grids'] - A = pcmobj._intermediates['A'] - D = pcmobj._intermediates['D'] - S = pcmobj._intermediates['S'] - K = pcmobj._intermediates['K'] - q = pcmobj._intermediates['q'] q_sym = pcmobj._intermediates['q_sym'] - vK_1 = cupy.linalg.solve(K.T, v_grids) - - # ----------------- potential response ----------------------- - intopt = pcmobj.intopt intopt.clear() # rebuild with aosym @@ -209,15 +198,32 @@ def grad_elec(pcmobj, dm): dq = cupy.asarray([cupy.sum(dq[:,p0:p1], axis=1) for p0,p1 in gridslice]) dvj= 2.0 * cupy.asarray([cupy.sum(dvj[:,p0:p1], axis=1) for p0,p1 in aoslice[:,2:]]) de = dq + dvj + return de.get() + +def grad_solver(pcmobj, dm): + ''' + dE = 0.5*v* d(K^-1 R) *v + q*dv + v^T* d(K^-1 R)v = v^T*K^-1(dR - dK K^-1R)v = v^T K^-1(dR - dK q) + ''' + if not pcmobj._intermediates or 'q_sym' not in pcmobj._intermediates: + pcmobj._get_vind(dm) + + gridslice = pcmobj.surface['gslice_by_atom'] + v_grids = pcmobj._intermediates['v_grids'] + A = pcmobj._intermediates['A'] + D = pcmobj._intermediates['D'] + S = pcmobj._intermediates['S'] + K = pcmobj._intermediates['K'] + q = pcmobj._intermediates['q'] + + vK_1 = cupy.linalg.solve(K.T, v_grids) - ## --------------- response from stiffness matrices ---------------- - gridslice = pcmobj.surface['gslice_by_atom'] dF, dA = get_dF_dA(pcmobj.surface) - with_D = pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE' + with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE'] dD, dS, dSii = get_dD_dS(pcmobj.surface, dF, with_D=with_D, with_S=True) - if pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE': + if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']: DA = D*A epsilon = pcmobj.eps @@ -225,13 +231,31 @@ def grad_elec(pcmobj, dm): #de_dF = v0 * -dSii_dF * q #de += 0.5*numpy.einsum('i,inx->nx', de_dF, dF) # dQ = v^T K^-1 (dR - dK K^-1 R) v - if pcmobj.method.upper() == 'C-PCM' or pcmobj.method.upper() == 'COSMO': + de = cupy.zeros([pcmobj.mol.natm,3]) + if pcmobj.method.upper() in ['C-PCM', 'CPCM', 'COSMO']: + dS = dS.transpose([2,0,1]) + dSii = dSii.transpose([2,0,1]) + # dR = 0, dK = dS - de_dS = cupy.einsum('i,ijx,j->ix', vK_1, dS, q) + de_dS = (vK_1 * dS.dot(q)).T # cupy.einsum('i,xij,j->ix', vK_1, dS, q) de -= cupy.asarray([cupy.sum(de_dS[p0:p1], axis=0) for p0,p1, in gridslice]) - de -= 0.5*cupy.einsum('i,ijx,i->jx', vK_1, dSii, q) + de -= 0.5*contract('i,xij->jx', vK_1*q, dSii) # 0.5*cupy.einsum('i,xij,i->jx', vK_1, dSii, q) + + elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']: + dD = dD.transpose([2,0,1]) + dS = dS.transpose([2,0,1]) + dSii = dSii.transpose([2,0,1]) + dA = dA.transpose([2,0,1]) + def contract_bra(a, B, c): + ''' i,xij,j->jx ''' + tmp = contract('i,xij->xj', a, B) + return (tmp * c).T + + def contract_ket(a, B, c): + ''' i,xij,j->ix ''' + tmp = B.dot(c) + return (a*tmp).T - elif pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE': # IEF-PCM and SS(V)PE formally are the same in gradient calculation # dR = f_eps/(2*pi) * (dD*A + D*dA), # dK = dS - f_eps/(2*pi) * (dD*A*S + D*dA*S + D*A*dS) @@ -239,30 +263,37 @@ def grad_elec(pcmobj, dm): fac = f_epsilon/(2.0*PI) Av = A*v_grids - de_dR = 0.5*fac * cupy.einsum('i,ijx,j->ix', vK_1, dD, Av) - de_dR -= 0.5*fac * cupy.einsum('i,ijx,j->jx', vK_1, dD, Av) + de_dR = 0.5*fac * contract_ket(vK_1, dD, Av) + de_dR -= 0.5*fac * contract_bra(vK_1, dD, Av) de_dR = cupy.asarray([cupy.sum(de_dR[p0:p1], axis=0) for p0,p1 in gridslice]) - de_dR += 0.5*fac * cupy.einsum('i,ij,jnx,j->nx', vK_1, D, dA, v_grids) - de_dS0 = 0.5*cupy.einsum('i,ijx,j->ix', vK_1, dS, q) - de_dS0 -= 0.5*cupy.einsum('i,ijx,j->jx', vK_1, dS, q) + vK_1_D = vK_1.dot(D) + vK_1_Dv = vK_1_D * v_grids + de_dR += 0.5*fac * cupy.einsum('j,xjn->nx', vK_1_Dv, dA) + + de_dS0 = 0.5*contract_ket(vK_1, dS, q) + de_dS0 -= 0.5*contract_bra(vK_1, dS, q) de_dS0 = cupy.asarray([cupy.sum(de_dS0[p0:p1], axis=0) for p0,p1 in gridslice]) - de_dS0 += 0.5*cupy.einsum('i,inx,i->nx', vK_1, dSii, q) + + vK_1_q = vK_1 * q + de_dS0 += 0.5*contract('i,xin->nx', vK_1_q, dSii) vK_1_DA = cupy.dot(vK_1, DA) - de_dS1 = 0.5*cupy.einsum('j,jkx,k->jx', vK_1_DA, dS, q) - de_dS1 -= 0.5*cupy.einsum('j,jkx,k->kx', vK_1_DA, dS, q) + de_dS1 = 0.5*contract_ket(vK_1_DA, dS, q) + de_dS1 -= 0.5*contract_bra(vK_1_DA, dS, q) de_dS1 = cupy.asarray([cupy.sum(de_dS1[p0:p1], axis=0) for p0,p1 in gridslice]) - de_dS1 += 0.5*cupy.einsum('j,jnx,j->nx', vK_1_DA, dSii, q) + + vK_1_DAq = vK_1_DA*q + de_dS1 += 0.5*contract('j,xjn->nx', vK_1_DAq, dSii) Sq = cupy.dot(S,q) ASq = A*Sq - de_dD = 0.5*cupy.einsum('i,ijx,j->ix', vK_1, dD, ASq) - de_dD -= 0.5*cupy.einsum('i,ijx,j->jx', vK_1, dD, ASq) + de_dD = 0.5*contract_ket(vK_1, dD, ASq) + de_dD -= 0.5*contract_bra(vK_1, dD, ASq) de_dD = cupy.asarray([cupy.sum(de_dD[p0:p1], axis=0) for p0,p1 in gridslice]) vK_1_D = cupy.dot(vK_1, D) - de_dA = 0.5*cupy.einsum('j,jnx,j->nx', vK_1_D, dA, Sq) + de_dA = 0.5*contract('j,xjn->nx', vK_1_D*Sq, dA) # 0.5*cupy.einsum('j,xjn,j->nx', vK_1_D, dA, Sq) de_dK = de_dS0 - fac * (de_dD + de_dA + de_dS1) de += de_dR - de_dK @@ -288,7 +319,8 @@ def kernel(self, *args, dm=None, atmlst=None, **kwargs): if dm is None: dm = self.base.make_rdm1(ao_repr=True) - self.de_solvent = grad_elec(self.base.with_solvent, dm) + self.de_solvent = grad_qv(self.base.with_solvent, dm) + self.de_solvent+= grad_solver(self.base.with_solvent, dm) self.de_solvent+= grad_nuc(self.base.with_solvent, dm) self.de_solute = grad_method_class.kernel(self, *args, **kwargs) diff --git a/gpu4pyscf/solvent/hessian/pcm.py b/gpu4pyscf/solvent/hessian/pcm.py index 668dff3f..d0a3e702 100644 --- a/gpu4pyscf/solvent/hessian/pcm.py +++ b/gpu4pyscf/solvent/hessian/pcm.py @@ -25,7 +25,7 @@ from pyscf import gto, df from pyscf.grad import rhf as rhf_grad from gpu4pyscf.solvent.pcm import PI, switch_h -from gpu4pyscf.solvent.grad.pcm import grad_switch_h, get_dF_dA, get_dD_dS, grad_elec, grad_nuc +from gpu4pyscf.solvent.grad.pcm import grad_switch_h, get_dF_dA, get_dD_dS, grad_qv, grad_solver, grad_nuc from gpu4pyscf.df import int3c2e from gpu4pyscf.lib.cupy_helper import contract from gpu4pyscf.lib import logger @@ -86,6 +86,34 @@ def hess_nuc(pcmobj): return de +def hess_qv(pcmobj, dm, verbose=None): + if not pcmobj._intermediates or 'q_sym' not in pcmobj._intermediates: + pcmobj._get_vind(dm) + gridslice = pcmobj.surface['gslice_by_atom'] + q_sym = pcmobj._intermediates['q_sym'] + + intopt = pcmobj.intopt + intopt.clear() + # rebuild with aosym + intopt.build(1e-14, diag_block_with_triu=True, aosym=False) + coeff = intopt.coeff + dm_cart = cupy.einsum('pi,ij,qj->pq', coeff, dm, coeff) + + dvj, _ = int3c2e.get_int3c2e_ipip1_hjk(intopt, q_sym, None, dm_cart, with_k=False) + dq, _ = int3c2e.get_int3c2e_ipvip1_hjk(intopt, q_sym, None, dm_cart, with_k=False) + dvj, _ = int3c2e.get_int3c2e_ip1ip2_hjk(intopt, q_sym, None, dm_cart, with_k=False) + dq, _ = int3c2e.get_int3c2e_ipip2_hjk(intopt, q_sym, None, dm_cart, with_k=False) + + cart_ao_idx = intopt.cart_ao_idx + rev_cart_ao_idx = numpy.argsort(cart_ao_idx) + dvj = dvj[:,rev_cart_ao_idx] + + aoslice = intopt.mol.aoslice_by_atom() + dq = cupy.asarray([cupy.sum(dq[:,p0:p1], axis=1) for p0,p1 in gridslice]) + dvj= 2.0 * cupy.asarray([cupy.sum(dvj[:,p0:p1], axis=1) for p0,p1 in aoslice[:,2:]]) + de = dq + dvj + return de.get() + def hess_elec(pcmobj, dm, verbose=None): ''' slow version with finite difference @@ -98,10 +126,11 @@ def hess_elec(pcmobj, dm, verbose=None): coords = mol.atom_coords(unit='Bohr') def pcm_grad_scanner(mol): + # TODO: use more analytical forms pcmobj.reset(mol) e, v = pcmobj._get_vind(dm) #return grad_elec(pcmobj, dm) - return grad_nuc(pcmobj, dm) + grad_elec(pcmobj, dm) + return grad_nuc(pcmobj, dm) + grad_solver(pcmobj, dm) + grad_qv(pcmobj, dm) de = numpy.zeros([mol.natm, mol.natm, 3, 3]) eps = 1e-3 @@ -121,7 +150,7 @@ def pcm_grad_scanner(mol): pcmobj.reset(pmol) return de -def grad_qv(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None): +def fd_grad_vmat(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None): ''' dv_solv / da slow version with finite difference @@ -164,6 +193,35 @@ def pcm_vmat_scanner(mol): pcmobj.reset(pmol) return vmat +''' +def analytic_grad_vmat(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None): + ''' + dv_solv / da + slow version with finite difference + ''' + log = logger.new_logger(pcmobj, verbose) + t1 = log.init_timer() + pmol = pcmobj.mol.copy() + mol = pmol.copy() + if atmlst is None: + atmlst = range(mol.natm) + nao, nmo = mo_coeff.shape + mocc = mo_coeff[:,mo_occ>0] + nocc = mocc.shape[1] + dm = cupy.dot(mocc, mocc.T) * 2 + coords = mol.atom_coords(unit='Bohr') + + # TODO: add those contributions + # contribution due to _get_v + # contribution due to linear solver + # contribution due to _get_vmat + + vmat = cupy.zeros([len(atmlst), 3, nao, nocc]) + + t1 = log.timer_debug1('computing solvent grad veff', *t1) + pcmobj.reset(pmol) + return vmat +''' def make_hess_object(hess_method): ''' return solvent hessian object @@ -193,7 +251,7 @@ def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): if atmlst is None: atmlst = range(self.mol.natm) h1ao = hess_method_class.make_h1(self, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) - dv = grad_qv(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) + dv = fd_grad_vmat(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose) for i0, ia in enumerate(atmlst): h1ao[i0] += dv[i0] return h1ao diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py index e9634628..0604b046 100644 --- a/gpu4pyscf/solvent/pcm.py +++ b/gpu4pyscf/solvent/pcm.py @@ -243,7 +243,7 @@ def build(self, ng=None): D, S = get_D_S(self.surface, with_S=True, with_D=True) epsilon = self.eps - if self.method.upper() == 'C-PCM': + if self.method.upper() in ['C-PCM', 'CPCM']: f_epsilon = (epsilon-1.)/epsilon K = S R = -f_epsilon * cupy.eye(K.shape[0]) @@ -251,7 +251,7 @@ def build(self, ng=None): f_epsilon = (epsilon - 1.0)/(epsilon + 1.0/2.0) K = S R = -f_epsilon * cupy.eye(K.shape[0]) - elif self.method.upper() == 'IEF-PCM': + elif self.method.upper() in ['IEF-PCM', 'IEFPCM']: f_epsilon = (epsilon - 1.0)/(epsilon + 1.0) DA = D*A DAS = cupy.dot(DA, S)