Skip to content

Commit

Permalink
aggressive config and take_last2d
Browse files Browse the repository at this point in the history
  • Loading branch information
wxj6000 committed Nov 21, 2023
1 parent c4a5274 commit 7af3fff
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 38 deletions.
6 changes: 3 additions & 3 deletions gpu4pyscf/__config__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
number_of_threads = 1024 * 80
# such as A30-24GB
elif props['totalGlobalMem'] >= 16 * GB:
min_ao_blksize = 64
min_grid_blksize = 64*64
ao_aligned = 16
min_ao_blksize = 128
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 128
mem_fraction = 0.9
number_of_threads = 1024 * 80
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/df/df.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pyscf import lib
from pyscf.df import df, addons
from gpu4pyscf.lib.cupy_helper import (
cholesky, tag_array, get_avail_mem, cart2sph)
cholesky, tag_array, get_avail_mem, cart2sph, take_last2d)
from gpu4pyscf.df import int3c2e, df_jk
from gpu4pyscf.lib import logger
from gpu4pyscf import __config__
Expand Down Expand Up @@ -77,13 +77,13 @@ def build(self, direct_scf_tol=1e-14, omega=None):
j2c_cpu = auxmol.intor('int2c2e', hermi=1)
else:
j2c_cpu = auxmol.intor('int2c2e', hermi=1)
j2c = cupy.asarray(j2c_cpu)
j2c = cupy.asarray(j2c_cpu, order='C')
t0 = log.timer_debug1('2c2e', *t0)
intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e')
intopt.build(direct_scf_tol, diag_block_with_triu=False, aosym=True, group_size=256)
log.timer_debug1('prepare intopt', *t0)
self.j2c = j2c.copy()
j2c = j2c[cupy.ix_(intopt.sph_aux_idx, intopt.sph_aux_idx)]
j2c = take_last2d(j2c, intopt.sph_aux_idx)
try:
self.cd_low = cholesky(j2c)
self.cd_low = tag_array(self.cd_low, tag='cd')
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/df/grad/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pyscf.df.grad import rhf
from pyscf import lib, scf, gto
from gpu4pyscf.df import int3c2e
from gpu4pyscf.lib.cupy_helper import print_mem_info, tag_array, unpack_tril, contract, load_library
from gpu4pyscf.lib.cupy_helper import print_mem_info, tag_array, unpack_tril, contract, load_library, take_last2d
from gpu4pyscf.grad.rhf import grad_elec
from gpu4pyscf import __config__
from gpu4pyscf.lib import logger
Expand Down Expand Up @@ -59,7 +59,7 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
mo_coeff = cupy.asarray(mf_grad.base.mo_coeff)
mo_occ = cupy.asarray(mf_grad.base.mo_occ)
sph_ao_idx = intopt.sph_ao_idx
dm = dm0[numpy.ix_(sph_ao_idx, sph_ao_idx)]
dm = take_last2d(dm0, sph_ao_idx)
orbo = contract('pi,i->pi', mo_coeff[:,mo_occ>0], numpy.sqrt(mo_occ[mo_occ>0]))
orbo = orbo[sph_ao_idx, :]
nocc = orbo.shape[-1]
Expand Down Expand Up @@ -119,7 +119,7 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
#rhok = solve_triangular(low_t, rhok, lower=False)
rhok = solve_triangular(low_t, rhok.reshape(naux, -1), lower=False, overwrite_b=True).reshape(naux, nocc, nocc)
tmp = contract('pij,qij->pq', rhok, rhok)
tmp = tmp[cupy.ix_(rev_aux_idx, rev_aux_idx)]
tmp = take_last2d(tmp, rev_aux_idx)
vkaux = -contract('xpq,pq->xp', int2c_e1, tmp)
vkaux_2c = cupy.array([-vkaux[:,p0:p1].sum(axis=1) for p0, p1 in auxslices[:,2:]])
vkaux = tmp = None
Expand Down
49 changes: 24 additions & 25 deletions gpu4pyscf/df/hessian/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import numpy as np
from pyscf import lib, df
from gpu4pyscf.hessian import rhf as rhf_hess
from gpu4pyscf.lib.cupy_helper import contract, tag_array, release_gpu_stack, print_mem_info
from gpu4pyscf.lib.cupy_helper import contract, tag_array, release_gpu_stack, print_mem_info, take_last2d
from gpu4pyscf.df import int3c2e
from gpu4pyscf.lib import logger

Expand All @@ -65,7 +65,7 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
if mo_coeff is None: mo_coeff = mf.mo_coeff
if atmlst is None: atmlst = range(mol.natm)

mo_coeff = cupy.asarray(mo_coeff)
mo_coeff = cupy.asarray(mo_coeff, order='C')
nao, nmo = mo_coeff.shape
mocc = mo_coeff[:,mo_occ>0]
mocc_2 = cupy.einsum('pi,i->pi', mocc, mo_occ[mo_occ>0]**.5)
Expand All @@ -77,8 +77,8 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
# overlap matrix contributions
# ------------------------------------
s1aa, s1ab, _ = rhf_hess.get_ovlp(mol)
s1aa = cupy.asarray(s1aa)
s1ab = cupy.asarray(s1ab)
s1aa = cupy.asarray(s1aa, order='C')
s1ab = cupy.asarray(s1ab, order='C')

auxmol = df.addons.make_auxmol(mol, auxbasis=mf.with_df.auxbasis)
naux = auxmol.nao
Expand All @@ -101,15 +101,15 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
sph_aux_idx = intopt.sph_aux_idx

mocc_2 = mocc_2[sph_ao_idx, :]
dm0 = dm0[cupy.ix_(sph_ao_idx, sph_ao_idx)]
dm0 = take_last2d(dm0, sph_ao_idx)
dm0_tag = tag_array(dm0, occ_coeff=mocc_2)

int2c = cupy.asarray(int2c)
int2c = int2c[cupy.ix_(sph_aux_idx, sph_aux_idx)]
int2c = cupy.asarray(int2c, order='C')
int2c = take_last2d(int2c, sph_aux_idx)
int2c_inv = cupy.linalg.pinv(int2c, rcond=1e-12)

int2c_ip1 = cupy.asarray(int2c_ip1)
int2c_ip1 = int2c_ip1[cupy.ix_(np.arange(3), sph_aux_idx, sph_aux_idx)]
int2c_ip1 = cupy.asarray(int2c_ip1, order='C')
int2c_ip1 = take_last2d(int2c_ip1, sph_aux_idx)
int2c_ip1_inv = contract('yqp,pr->yqr', int2c_ip1, int2c_inv)

hj_ao_ao = cupy.zeros([nao,nao,3,3])
Expand Down Expand Up @@ -223,14 +223,13 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,

# int2c contributions
if hessobj.auxbasis_response > 1:
aux_aux_9 = np.ix_(np.arange(9), sph_aux_idx, sph_aux_idx)
if omega and omega > 1e-10:
with auxmol.with_range_coulomb(omega):
int2c_ipip1 = auxmol.intor('int2c2e_ipip1', aosym='s1')
else:
int2c_ipip1 = auxmol.intor('int2c2e_ipip1', aosym='s1')
int2c_ipip1 = cupy.asarray(int2c_ipip1)
int2c_ipip1 = int2c_ipip1[aux_aux_9]
int2c_ipip1 = cupy.asarray(int2c_ipip1, order='C')
int2c_ipip1 = take_last2d(int2c_ipip1, sph_aux_idx)
rhoj2c_P = contract('xpq,q->xp', int2c_ipip1, rhoj0_P)
# (00|0)(2|0)(0|00)
hj_aux_diag -= cupy.einsum('p,xp->px', rhoj0_P, rhoj2c_P).reshape(-1,3,3)
Expand All @@ -244,13 +243,13 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
int2c_ip1ip2 = auxmol.intor('int2c2e_ip1ip2', aosym='s1')
else:
int2c_ip1ip2 = auxmol.intor('int2c2e_ip1ip2', aosym='s1')
int2c_ip1ip2 = cupy.asarray(int2c_ip1ip2)
int2c_ip1ip2 = int2c_ip1ip2[aux_aux_9]
int2c_ip1ip2 = cupy.asarray(int2c_ip1ip2, order='C')
int2c_ip1ip2 = take_last2d(int2c_ip1ip2, sph_aux_idx)
hj_aux_aux = -.5 * cupy.einsum('p,xpq,q->pqx', rhoj0_P, int2c_ip1ip2, rhoj0_P).reshape(naux, naux,3,3)
if with_k:
hk_aux_aux = -.5 * contract('xpq,pq->pqx', int2c_ip1ip2, rho2c_0).reshape(naux,naux,3,3)
t1 = log.timer_debug1('intermediate variables with int2c_*', *t1)
int2c_ip1ip2 = aux_aux_9 = None
int2c_ip1ip2 = None

cupy.get_default_memory_pool().free_all_blocks()
release_gpu_stack()
Expand Down Expand Up @@ -402,8 +401,8 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,
atmlst = range(mol.natm)
# FIXME
with_k = True
mo_coeff = cupy.asarray(mo_coeff)
mo_occ = cupy.asarray(mo_occ)
mo_coeff = cupy.asarray(mo_coeff, order='C')
mo_occ = cupy.asarray(mo_occ, order='C')

mf = hessobj.base
#auxmol = hessobj.base.with_df.auxmol
Expand All @@ -420,7 +419,7 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,
int2c = auxmol.intor('int2c2e', aosym='s1')
else:
int2c = auxmol.intor('int2c2e', aosym='s1')
int2c = cupy.asarray(int2c)
int2c = cupy.asarray(int2c, order='C')
# ======================= sorted AO begin ======================================
intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e')
intopt.build(mf.direct_scf_tol, diag_block_with_triu=True, aosym=False, group_size_aux=BLKSIZE, group_size=BLKSIZE)
Expand All @@ -430,10 +429,10 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,

mocc = mocc[sph_ao_idx, :]
mo_coeff = mo_coeff[sph_ao_idx,:]
dm0 = dm0[cupy.ix_(sph_ao_idx, sph_ao_idx)]
dm0 = take_last2d(dm0, sph_ao_idx)
dm0_tag = tag_array(dm0, occ_coeff=mocc)

int2c = int2c[cupy.ix_(sph_aux_idx, sph_aux_idx)]
int2c = take_last2d(int2c, sph_aux_idx)
int2c_inv = cupy.linalg.pinv(int2c, rcond=1e-12)

wj, wk_Pl_, wk_P__ = int3c2e.get_int3c2e_wjk(mol, auxmol, dm0_tag, omega=omega)
Expand All @@ -451,8 +450,8 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,

# int3c_ip1 contributions
vj1_buf, vk1_buf, vj1_ao, vk1_ao = int3c2e.get_int3c2e_ip1_vjk(intopt, rhoj0, rhok0_Pl_, dm0_tag, aoslices, omega=omega)
vj1_buf = vj1_buf[cupy.ix_(numpy.arange(3), rev_ao_idx, rev_ao_idx)]
vk1_buf = vk1_buf[cupy.ix_(numpy.arange(3), rev_ao_idx, rev_ao_idx)]
vj1_buf = take_last2d(vj1_buf, rev_ao_idx)
vk1_buf = take_last2d(vk1_buf, rev_ao_idx)

vj1_int3c_ip1 = -contract('nxiq,ip->nxpq', vj1_ao, mo_coeff)
vk1_int3c_ip1 = -contract('nxiq,ip->nxpq', vk1_ao, mo_coeff)
Expand All @@ -472,8 +471,8 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,
int2c_ip1 = auxmol.intor('int2c2e_ip1', aosym='s1')
else:
int2c_ip1 = auxmol.intor('int2c2e_ip1', aosym='s1')
int2c_ip1 = cupy.asarray(int2c_ip1)
int2c_ip1 = int2c_ip1[cupy.ix_(np.arange(3), sph_aux_idx, sph_aux_idx)]
int2c_ip1 = cupy.asarray(int2c_ip1, order='C')
int2c_ip1 = take_last2d(int2c_ip1, sph_aux_idx)

wj0_10 = contract('xpq,q->xp', int2c_ip1, rhoj0)
wk0_10_P__ = contract('xqp,pro->xqro', int2c_ip1, rhok0_P__)
Expand Down Expand Up @@ -527,7 +526,7 @@ def _ao2mo(mat):
vk1_ao[:,:,p0:p1] -= vk1_buf[:,p0:p1,:].transpose(0,2,1)

h1 = hcore_deriv(ia)
h1 = _ao2mo(cupy.asarray(h1))
h1 = _ao2mo(cupy.asarray(h1, order='C'))
vj1 = vj1_int3c[ia] + _ao2mo(vj1_ao)
if with_k:
vk1 = vk1_int3c[ia] + _ao2mo(vk1_ao)
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/df/int3c2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pyscf.scf import _vhf
from gpu4pyscf.scf.hf import BasisProdCache, _make_s_index_offsets
from gpu4pyscf.lib.cupy_helper import (
block_c2s_diag, cart2sph, block_diag, contract, load_library, c2s_l, get_avail_mem, print_mem_info)
block_c2s_diag, cart2sph, block_diag, contract, load_library, c2s_l, get_avail_mem, print_mem_info, take_last2d)
from gpu4pyscf.lib import logger

LMAX_ON_GPU = 8
Expand Down Expand Up @@ -1183,8 +1183,8 @@ def get_dh1e(mol, dm0):
fakemol = gto.fakemol_for_charges(coords)
intopt = VHFOpt(mol, fakemol, 'int2e')
intopt.build(1e-14, diag_block_with_triu=True, aosym=False, group_size=BLKSIZE, group_size_aux=BLKSIZE)
dm0_sorted = dm0[cupy.ix_(intopt.sph_ao_idx, intopt.sph_ao_idx)]

#dm0_sorted = dm0[cupy.ix_(intopt.sph_ao_idx, intopt.sph_ao_idx)]
dm0_sorted = take_last2d(dm0, intopt.sph_ao_idx)
dh1e = cupy.zeros([natm,3])
for i0,i1,j0,j1,k0,k1,int3c_blk in loop_int3c2e_general(intopt, ip_type='ip1'):
dh1e[k0:k1,:3] += cupy.einsum('xkji,ij->kx', int3c_blk, dm0_sorted[i0:i1,j0:j1])
Expand Down
1 change: 1 addition & 0 deletions gpu4pyscf/df/tests/test_df_hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def setUpModule():

def tearDownModule():
global mol
mol.stdout.close()
del mol

def _make_rhf():
Expand Down
2 changes: 1 addition & 1 deletion gpu4pyscf/scf/cphf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from gpu4pyscf.lib import logger

def solve(fvind, mo_energy, mo_occ, h1, s1=None,
max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN):
max_cycle=50, tol=1e-7, hermi=False, verbose=logger.WARN):
'''
Args:
fvind : function
Expand Down

0 comments on commit 7af3fff

Please sign in to comment.