Skip to content

Commit

Permalink
transpose grad pcm matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
wxj6000 committed Nov 19, 2023
1 parent 6791770 commit ffdc3b1
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 72 deletions.
4 changes: 2 additions & 2 deletions examples/00-h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
screen_tol = 1e-14
grids_level = 3

mol = pyscf.M(atom=atom, basis=bas, max_memory=32000)
mol = pyscf.M(atom=atom, basis=bas, max_memory=32000, output='./pyscf.log')

mol.verbose = 6
mol.verbose = 4
mf_GPU = rks.RKS(mol, xc=xc).density_fit(auxbasis=auxbasis)
mf_GPU.grids.level = grids_level
mf_GPU.conv_tol = scf_tol
Expand Down
4 changes: 4 additions & 0 deletions gpu4pyscf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from . import lib, grad, hessian, solvent, scf, dft
__version__ = '0.6.6'

# monkey patch libxc reference due to a bug in nvcc
from pyscf.dft import libxc
libxc.__reference__ = 'unable to decode the reference due to https://github.com/NVIDIA/cuda-python/issues/29'
17 changes: 10 additions & 7 deletions gpu4pyscf/df/hessian/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,33 +188,36 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,

cupy.get_default_memory_pool().free_all_blocks()
# int3c_ipip1 contributions
hj_ao_diag, hk_ao_diag = int3c2e.get_int3c2e_ipip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega)
hj_ao_diag, hk_ao_diag = int3c2e.get_int3c2e_ipip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k)
hj_ao_diag *= 2.0
t1 = log.timer_debug1('intermediate variables with int3c2e_ipip1', *t1)

# int3c_ipvip1 contributions
# (11|0), (0|00) without response of RI basis
hj, hk = int3c2e.get_int3c2e_ipvip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega)
hj, hk = int3c2e.get_int3c2e_ipvip1_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k)
hj_ao_ao += 2.0*hj
hk_ao_ao += hk
if with_k:
hk_ao_ao += hk
hj = hk = None
t1 = log.timer_debug1('intermediate variables with int3c2e_ipvip1', *t1)

# int3c_ip1ip2 contributions
# (10|1), (0|0)(0|00)
if hessobj.auxbasis_response:
hj, hk = int3c2e.get_int3c2e_ip1ip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega)
hj, hk = int3c2e.get_int3c2e_ip1ip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k)
hj_ao_aux += hj
hk_ao_aux += hk
if with_k:
hk_ao_aux += hk
hj = hk = None
t1 = log.timer_debug1('intermediate variables with int3c2e_ip1ip2', *t1)

# int3c_ipip2 contributions
if hessobj.auxbasis_response > 1:
# (00|2), (0|0)(0|00)
hj, hk = int3c2e.get_int3c2e_ipip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega)
hj, hk = int3c2e.get_int3c2e_ipip2_hjk(intopt, rhoj0_P, rhok0_P__, dm0_tag, omega=omega, with_k=with_k)
hj_aux_diag = hj
hk_aux_diag = .5*hk
if with_k:
hk_aux_diag = .5*hk
hj = hk = None
t1 = log.timer_debug1('intermediate variables with int3c2e_ipip2', *t1)

Expand Down
56 changes: 36 additions & 20 deletions gpu4pyscf/df/int3c2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,15 +873,19 @@ def get_int3c2e_ipip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None):
nao_sph = dm0_tag.shape[0]
orbo = cupy.asarray(dm0_tag.occ_coeff, order='C')
hj = cupy.zeros([nao_sph,9])
hk = cupy.zeros([nao_sph,9])
hk = None
if with_k:
hk = cupy.zeros([nao_sph,9])
for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipip1', omega=omega):
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1])
tmp = contract('xpji,ij->xpi', int3c_blk, dm0_tag[i0:i1,j0:j1])
hj[i0:i1] += contract('xpi,p->ix', tmp, rhoj[k0:k1])
hk[i0:i1] += contract('xpji,pij->ix', int3c_blk, rhok_tmp)
if with_k:
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1])
hk[i0:i1] += contract('xpji,pij->ix', int3c_blk, rhok_tmp)
hj = hj.reshape([nao_sph,3,3])
hk = hk.reshape([nao_sph,3,3])
if with_k:
hk = hk.reshape([nao_sph,3,3])
return hj, hk

def get_int3c2e_ipvip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None):
Expand All @@ -891,15 +895,19 @@ def get_int3c2e_ipvip1_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None)
nao_sph = dm0_tag.shape[0]
orbo = cupy.asarray(dm0_tag.occ_coeff, order='C')
hj = cupy.zeros([nao_sph,nao_sph,9])
hk = cupy.zeros([nao_sph,nao_sph,9])
hk = None
if with_k:
hk = cupy.zeros([nao_sph,nao_sph,9])
for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipvip1', omega=omega):
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pji', rhok_tmp, orbo[j0:j1])
tmp = contract('xpji,ij->xpij', int3c_blk, dm0_tag[i0:i1,j0:j1])
hj[i0:i1,j0:j1] += contract('xpij,p->ijx', tmp, rhoj[k0:k1])
hk[i0:i1,j0:j1] += contract('xpji,pji->ijx', int3c_blk, rhok_tmp)
if with_k:
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pji', rhok_tmp, orbo[j0:j1])
hk[i0:i1,j0:j1] += contract('xpji,pji->ijx', int3c_blk, rhok_tmp)
hj = hj.reshape([nao_sph,nao_sph,3,3])
hk = hk.reshape([nao_sph,nao_sph,3,3])
if with_k:
hk = hk.reshape([nao_sph,nao_sph,3,3])
return hj, hk

def get_int3c2e_ip1ip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None):
Expand All @@ -910,15 +918,19 @@ def get_int3c2e_ip1ip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None)
naux_sph = rhok.shape[0]
orbo = cupy.asarray(dm0_tag.occ_coeff, order='C')
hj = cupy.zeros([nao_sph,naux_sph,9])
hk = cupy.zeros([nao_sph,naux_sph,9])
hk = None
if with_k:
hk = cupy.zeros([nao_sph,naux_sph,9])
for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ip1ip2', omega=omega):
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1])
tmp = contract('xpji,ij->xpi', int3c_blk, dm0_tag[i0:i1,j0:j1])
hj[i0:i1,k0:k1] += contract('xpi,p->ipx', tmp, rhoj[k0:k1])
hk[i0:i1,k0:k1] += contract('xpji,pij->ipx', int3c_blk, rhok_tmp)
if with_k:
rhok_tmp = contract('por,ir->pio', rhok[k0:k1], orbo[i0:i1])
rhok_tmp = contract('pio,jo->pij', rhok_tmp, orbo[j0:j1])
hk[i0:i1,k0:k1] += contract('xpji,pij->ipx', int3c_blk, rhok_tmp)
hj = hj.reshape([nao_sph,naux_sph,3,3])
hk = hk.reshape([nao_sph,naux_sph,3,3])
if with_k:
hk = hk.reshape([nao_sph,naux_sph,3,3])
return hj, hk

def get_int3c2e_ipip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None):
Expand All @@ -928,15 +940,19 @@ def get_int3c2e_ipip2_hjk(intopt, rhoj, rhok, dm0_tag, with_k=True, omega=None):
naux_sph = rhok.shape[0]
orbo = cupy.asarray(dm0_tag.occ_coeff, order='C')
hj = cupy.zeros([naux_sph,9])
hk = cupy.zeros([naux_sph,9])
hk = None
if with_k:
hk = cupy.zeros([naux_sph,9])
for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ipip2', omega=omega):
rhok_tmp = contract('por,jr->pjo', rhok[k0:k1], orbo[j0:j1])
rhok_tmp = contract('pjo,io->pji', rhok_tmp, orbo[i0:i1])
tmp = contract('xpji,ij->xp', int3c_blk, dm0_tag[i0:i1,j0:j1])
hj[k0:k1] += contract('xp,p->px', tmp, rhoj[k0:k1])
hk[k0:k1] += contract('xpji,pji->px', int3c_blk, rhok_tmp)
if with_k:
rhok_tmp = contract('por,jr->pjo', rhok[k0:k1], orbo[j0:j1])
rhok_tmp = contract('pjo,io->pji', rhok_tmp, orbo[i0:i1])
hk[k0:k1] += contract('xpji,pji->px', int3c_blk, rhok_tmp)
hj = hj.reshape([naux_sph,3,3])
hk = hk.reshape([naux_sph,3,3])
if with_k:
hk = hk.reshape([naux_sph,3,3])
return hj, hk

def get_int3c2e_ip_slice(intopt, cp_aux_id, ip_type, out=None, omega=None, stream=None):
Expand Down
4 changes: 2 additions & 2 deletions gpu4pyscf/lib/cutensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def contraction(pattern, a, b, alpha, beta, out=None):
if contract_engine == 'opt_einsum':
import opt_einsum
einsum = opt_einsum.contract
elif contract_engine == 'cuquantum':
from cuquantum import contract as einsum
#elif contract_engine == 'cuquantum':
# from cuquantum import contract as einsum
elif contract_engine == 'cupy':
einsum = cupy.einsum
else:
Expand Down
1 change: 0 additions & 1 deletion gpu4pyscf/lib/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,3 @@ def new_logger(rec=None, verbose=None):
else:
log = Logger(rec.stdout, rec.verbose)
return log

100 changes: 66 additions & 34 deletions gpu4pyscf/solvent/grad/pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,27 +170,16 @@ def grad_nuc(pcmobj, dm):
de -= numpy.asarray([numpy.sum(dv_g[p0:p1], axis=0) for p0,p1 in gridslice])
return de

def grad_elec(pcmobj, dm):
def grad_qv(pcmobj, dm):
'''
dE = 0.5*v* d(K^-1 R) *v + q*dv
v^T* d(K^-1 R)v = v^T*K^-1(dR - dK K^-1R)v = v^T K^-1(dR - dK q)
contributions due to integrals
'''
if not pcmobj._intermediates or 'q_sym' not in pcmobj._intermediates:
pcmobj._get_vind(dm)

gridslice = pcmobj.surface['gslice_by_atom']
v_grids = pcmobj._intermediates['v_grids']
A = pcmobj._intermediates['A']
D = pcmobj._intermediates['D']
S = pcmobj._intermediates['S']
K = pcmobj._intermediates['K']
q = pcmobj._intermediates['q']
q_sym = pcmobj._intermediates['q_sym']

vK_1 = cupy.linalg.solve(K.T, v_grids)

# ----------------- potential response -----------------------

intopt = pcmobj.intopt
intopt.clear()
# rebuild with aosym
Expand All @@ -209,60 +198,102 @@ def grad_elec(pcmobj, dm):
dq = cupy.asarray([cupy.sum(dq[:,p0:p1], axis=1) for p0,p1 in gridslice])
dvj= 2.0 * cupy.asarray([cupy.sum(dvj[:,p0:p1], axis=1) for p0,p1 in aoslice[:,2:]])
de = dq + dvj
return de.get()

def grad_solver(pcmobj, dm):
'''
dE = 0.5*v* d(K^-1 R) *v + q*dv
v^T* d(K^-1 R)v = v^T*K^-1(dR - dK K^-1R)v = v^T K^-1(dR - dK q)
'''
if not pcmobj._intermediates or 'q_sym' not in pcmobj._intermediates:
pcmobj._get_vind(dm)

gridslice = pcmobj.surface['gslice_by_atom']
v_grids = pcmobj._intermediates['v_grids']
A = pcmobj._intermediates['A']
D = pcmobj._intermediates['D']
S = pcmobj._intermediates['S']
K = pcmobj._intermediates['K']
q = pcmobj._intermediates['q']

vK_1 = cupy.linalg.solve(K.T, v_grids)

## --------------- response from stiffness matrices ----------------
gridslice = pcmobj.surface['gslice_by_atom']
dF, dA = get_dF_dA(pcmobj.surface)

with_D = pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE'
with_D = pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']
dD, dS, dSii = get_dD_dS(pcmobj.surface, dF, with_D=with_D, with_S=True)

if pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE':
if pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']:
DA = D*A

epsilon = pcmobj.eps

#de_dF = v0 * -dSii_dF * q
#de += 0.5*numpy.einsum('i,inx->nx', de_dF, dF)
# dQ = v^T K^-1 (dR - dK K^-1 R) v
if pcmobj.method.upper() == 'C-PCM' or pcmobj.method.upper() == 'COSMO':
de = cupy.zeros([pcmobj.mol.natm,3])
if pcmobj.method.upper() in ['C-PCM', 'CPCM', 'COSMO']:
dS = dS.transpose([2,0,1])
dSii = dSii.transpose([2,0,1])

# dR = 0, dK = dS
de_dS = cupy.einsum('i,ijx,j->ix', vK_1, dS, q)
de_dS = (vK_1 * dS.dot(q)).T # cupy.einsum('i,xij,j->ix', vK_1, dS, q)
de -= cupy.asarray([cupy.sum(de_dS[p0:p1], axis=0) for p0,p1, in gridslice])
de -= 0.5*cupy.einsum('i,ijx,i->jx', vK_1, dSii, q)
de -= 0.5*contract('i,xij->jx', vK_1*q, dSii) # 0.5*cupy.einsum('i,xij,i->jx', vK_1, dSii, q)

elif pcmobj.method.upper() in ['IEF-PCM', 'IEFPCM', 'SS(V)PE']:
dD = dD.transpose([2,0,1])
dS = dS.transpose([2,0,1])
dSii = dSii.transpose([2,0,1])
dA = dA.transpose([2,0,1])
def contract_bra(a, B, c):
''' i,xij,j->jx '''
tmp = contract('i,xij->xj', a, B)
return (tmp * c).T

def contract_ket(a, B, c):
''' i,xij,j->ix '''
tmp = B.dot(c)
return (a*tmp).T

elif pcmobj.method.upper() == 'IEF-PCM' or pcmobj.method.upper() == 'SS(V)PE':
# IEF-PCM and SS(V)PE formally are the same in gradient calculation
# dR = f_eps/(2*pi) * (dD*A + D*dA),
# dK = dS - f_eps/(2*pi) * (dD*A*S + D*dA*S + D*A*dS)
f_epsilon = (epsilon - 1.0)/(epsilon + 1.0)
fac = f_epsilon/(2.0*PI)

Av = A*v_grids
de_dR = 0.5*fac * cupy.einsum('i,ijx,j->ix', vK_1, dD, Av)
de_dR -= 0.5*fac * cupy.einsum('i,ijx,j->jx', vK_1, dD, Av)
de_dR = 0.5*fac * contract_ket(vK_1, dD, Av)
de_dR -= 0.5*fac * contract_bra(vK_1, dD, Av)
de_dR = cupy.asarray([cupy.sum(de_dR[p0:p1], axis=0) for p0,p1 in gridslice])
de_dR += 0.5*fac * cupy.einsum('i,ij,jnx,j->nx', vK_1, D, dA, v_grids)

de_dS0 = 0.5*cupy.einsum('i,ijx,j->ix', vK_1, dS, q)
de_dS0 -= 0.5*cupy.einsum('i,ijx,j->jx', vK_1, dS, q)
vK_1_D = vK_1.dot(D)
vK_1_Dv = vK_1_D * v_grids
de_dR += 0.5*fac * cupy.einsum('j,xjn->nx', vK_1_Dv, dA)

de_dS0 = 0.5*contract_ket(vK_1, dS, q)
de_dS0 -= 0.5*contract_bra(vK_1, dS, q)
de_dS0 = cupy.asarray([cupy.sum(de_dS0[p0:p1], axis=0) for p0,p1 in gridslice])
de_dS0 += 0.5*cupy.einsum('i,inx,i->nx', vK_1, dSii, q)

vK_1_q = vK_1 * q
de_dS0 += 0.5*contract('i,xin->nx', vK_1_q, dSii)

vK_1_DA = cupy.dot(vK_1, DA)
de_dS1 = 0.5*cupy.einsum('j,jkx,k->jx', vK_1_DA, dS, q)
de_dS1 -= 0.5*cupy.einsum('j,jkx,k->kx', vK_1_DA, dS, q)
de_dS1 = 0.5*contract_ket(vK_1_DA, dS, q)
de_dS1 -= 0.5*contract_bra(vK_1_DA, dS, q)
de_dS1 = cupy.asarray([cupy.sum(de_dS1[p0:p1], axis=0) for p0,p1 in gridslice])
de_dS1 += 0.5*cupy.einsum('j,jnx,j->nx', vK_1_DA, dSii, q)

vK_1_DAq = vK_1_DA*q
de_dS1 += 0.5*contract('j,xjn->nx', vK_1_DAq, dSii)

Sq = cupy.dot(S,q)
ASq = A*Sq
de_dD = 0.5*cupy.einsum('i,ijx,j->ix', vK_1, dD, ASq)
de_dD -= 0.5*cupy.einsum('i,ijx,j->jx', vK_1, dD, ASq)
de_dD = 0.5*contract_ket(vK_1, dD, ASq)
de_dD -= 0.5*contract_bra(vK_1, dD, ASq)
de_dD = cupy.asarray([cupy.sum(de_dD[p0:p1], axis=0) for p0,p1 in gridslice])

vK_1_D = cupy.dot(vK_1, D)
de_dA = 0.5*cupy.einsum('j,jnx,j->nx', vK_1_D, dA, Sq)
de_dA = 0.5*contract('j,xjn->nx', vK_1_D*Sq, dA) # 0.5*cupy.einsum('j,xjn,j->nx', vK_1_D, dA, Sq)

de_dK = de_dS0 - fac * (de_dD + de_dA + de_dS1)
de += de_dR - de_dK
Expand All @@ -288,7 +319,8 @@ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
if dm is None:
dm = self.base.make_rdm1(ao_repr=True)

self.de_solvent = grad_elec(self.base.with_solvent, dm)
self.de_solvent = grad_qv(self.base.with_solvent, dm)
self.de_solvent+= grad_solver(self.base.with_solvent, dm)
self.de_solvent+= grad_nuc(self.base.with_solvent, dm)

self.de_solute = grad_method_class.kernel(self, *args, **kwargs)
Expand Down
Loading

0 comments on commit ffdc3b1

Please sign in to comment.