Skip to content

Commit

Permalink
added unit test for to_gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
wxj6000 committed Dec 22, 2023
1 parent b0a761b commit db7ff97
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 56 deletions.
22 changes: 20 additions & 2 deletions examples/00-h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import numpy as np
import pyscf
from pyscf import lib
from pyscf.hessian import thermo
from gpu4pyscf.dft import rks
lib.num_threads(8)

Expand All @@ -30,13 +32,15 @@
scf_tol = 1e-10
max_scf_cycles = 50
screen_tol = 1e-14
grids_level = 3
grids_level = 5

mol = pyscf.M(atom=atom, basis=bas, max_memory=32000, output='./pyscf.log')
mol = pyscf.M(atom=atom, basis=bas, max_memory=32000)

mol.verbose = 4
mf_GPU = rks.RKS(mol, xc=xc).density_fit(auxbasis=auxbasis)
mf_GPU.grids.level = grids_level
mf_GPU.grids.atom_grid = (99,590)
mf_GPU.small_rho_cutoff = 1e-10
mf_GPU.conv_tol = scf_tol
mf_GPU.max_cycle = max_scf_cycles
mf_GPU.screen_tol = screen_tol
Expand All @@ -55,3 +59,17 @@
h = mf_GPU.Hessian()
h.auxbasis_response = 2
h_dft = h.kernel()

# harmonic analysis
results = thermo.harmonic_analysis(mol, h_dft)
thermo.dump_normal_mode(mol, results)

results = thermo.thermo(mf_GPU, results['freq_au'], 298.15, 101325)
thermo.dump_thermo(mol, results)

# force translational symmetry
natm = mol.natm
h_dft = h_dft.transpose([0,2,1,3]).reshape(3*natm,3*natm)
h_diag = h_dft.sum(axis=0)
h_dft -= np.diag(h_diag)
print(h_dft[:3,:3])
5 changes: 3 additions & 2 deletions examples/dft_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
max_memory=32000)
# set verbose >= 6 for debugging timer

mol.verbose = 6
mol.verbose = 4

mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
mf_df.verbose = 6
mf_df.verbose = 4

if args.solvent:
mf_df = mf_df.PCM()
Expand Down Expand Up @@ -69,3 +69,4 @@
h_dft = h.kernel()
hess_time = time.time() - start_time
print(f'compute time for hessian: {hess_time:.3f} s')
print(h_dft)
10 changes: 8 additions & 2 deletions gpu4pyscf/df/df.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
class DF(df.DF):
from gpu4pyscf.lib.utils import to_gpu, device

_keys = {'intopt'}

def __init__(self, mol, auxbasis=None):
super().__init__(mol, auxbasis)
self.auxmol = None
Expand Down Expand Up @@ -210,8 +212,12 @@ def cholesky_eri_gpu(intopt, mol, auxmol, cd_low, omega=None, sr_only=False):
if(not use_gpu_memory):
log.debug("Not enough GPU memory")
# TODO: async allocate memory
mem = cupy.cuda.alloc_pinned_memory(naux * npair * 8)
cderi = np.ndarray([naux, npair], dtype=np.float64, order='C', buffer=mem)
try:
mem = cupy.cuda.alloc_pinned_memory(naux * npair * 8)
cderi = np.ndarray([naux, npair], dtype=np.float64, order='C', buffer=mem)
except Exception:
raise RuntimeError('Out of CPU memory')

data_stream = cupy.cuda.stream.Stream(non_blocking=False)
count = 0
nq = len(intopt.log_qs)
Expand Down
8 changes: 6 additions & 2 deletions gpu4pyscf/df/grad/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


import copy
import numpy
import cupy
from cupyx.scipy.linalg import solve_triangular
Expand Down Expand Up @@ -44,9 +44,13 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
if key in mf_grad.base.with_df._rsh_df:
with_df = mf_grad.base.with_df._rsh_df[key]
else:
raise RuntimeError(f'omega={omega} is not calculated in SCF')
dfobj = mf_grad.base.with_df
with_df = dfobj._rsh_df[key] = copy.copy(dfobj).reset()
#raise RuntimeError(f'omega={omega} is not calculated in SCF')

auxmol = with_df.auxmol
if not hasattr(with_df, 'intopt') or with_df._cderi is None:
with_df.build(omega=omega)
intopt = with_df.intopt

naux = with_df.naux
Expand Down
7 changes: 6 additions & 1 deletion gpu4pyscf/df/hessian/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,16 +383,21 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
for ia, h1, vj1, vk1 in _gen_jk(hessobj, mo_coeff, mo_occ, chkfile,
atmlst, verbose, True):
h1 += vj1 - vk1 * .5
h1ao[ia] = h1
'''
if chkfile is None:
h1ao[ia] = h1
else:
key = 'scf_f1ao/%d' % ia
lib.chkfile.save(chkfile, key, h1)
'''
return h1ao
'''
if chkfile is None:
return h1ao
else:
return chkfile

'''
def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,
verbose=None, with_k=True, omega=None):
log = logger.new_logger(hessobj, verbose)
Expand Down
6 changes: 5 additions & 1 deletion gpu4pyscf/df/hessian/rks.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,17 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
for ia, h1, vj1_lr, vk1_lr in df_rhf_hess._gen_jk(hessobj, mo_coeff, mo_occ, chkfile,
atmlst, verbose, True, omega=omega):
h1ao[ia] -= .5 * (alpha - hyb) * vk1_lr
return h1ao

# support chkfile?
'''
if chkfile is None:
return h1ao
else:
for ia in atmlst:
lib.chkfile.save(chkfile, 'scf_f1ao/%d'%ia, h1ao[ia])
return chkfile

'''

class Hessian(rks_hess.Hessian):
'''Non-relativistic RKS hessian'''
Expand Down
2 changes: 2 additions & 0 deletions gpu4pyscf/dft/numint.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,8 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
class NumInt(numint.NumInt):
from gpu4pyscf.lib.utils import to_cpu, to_gpu, device

_keys = {'screen_idx', 'xcfuns', 'gdftopt'}

def __init__(self, xc='LDA'):
super().__init__()
self.gdftopt = None
Expand Down
76 changes: 39 additions & 37 deletions gpu4pyscf/hessian/rks.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def _get_vxc_diag(hessobj, mo_coeff, mo_occ, max_memory):

opt = getattr(ni, 'gdftopt', None)
if opt is None:
raise RuntimeError("DFT Options are not initialized")
ni.build(mol, grids.coords)
opt = ni.gdftopt

coeff = cupy.asarray(opt.coeff)
mo_coeff = coeff @ mo_coeff
Expand All @@ -224,12 +225,11 @@ def _get_vxc_diag(hessobj, mo_coeff, mo_occ, max_memory):
aow = None

elif xctype == 'GGA':
def contract_(mat, ao, aoidx, wv, mask):
def contract_(ao, aoidx, wv, mask):
aow = numint._scale_ao(ao[aoidx[0]], wv[1])
aow+= numint._scale_ao(ao[aoidx[1]], wv[2])
aow+= numint._scale_ao(ao[aoidx[2]], wv[3])
mat_tmp = numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
add_sparse(mat, mat_tmp, mask)
return numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)

ao_deriv = 3
for ao, mask, weight, coords \
Expand All @@ -241,24 +241,25 @@ def contract_(mat, ao, aoidx, wv, mask):
#:aow = numpy.einsum('npi,np->pi', ao[:4], wv[:4])
aow = numint._scale_ao(ao[:4], wv[:4])

vmat_tmp = [0]*6
for i in range(6):
vmat_tmp = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
add_sparse(vmat[i], vmat_tmp, mask)
contract_(vmat[0], ao, [XXX,XXY,XXZ], wv, mask)
contract_(vmat[1], ao, [XXY,XYY,XYZ], wv, mask)
contract_(vmat[2], ao, [XXZ,XYZ,XZZ], wv, mask)
contract_(vmat[3], ao, [XYY,YYY,YYZ], wv, mask)
contract_(vmat[4], ao, [XYZ,YYZ,YZZ], wv, mask)
contract_(vmat[5], ao, [XZZ,YZZ,ZZZ], wv, mask)

vmat_tmp[i] = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)

vmat_tmp[0] += contract_(ao, [XXX,XXY,XXZ], wv, mask)
vmat_tmp[1] += contract_(ao, [XXY,XYY,XYZ], wv, mask)
vmat_tmp[2] += contract_(ao, [XXZ,XYZ,XZZ], wv, mask)
vmat_tmp[3] += contract_(ao, [XYY,YYY,YYZ], wv, mask)
vmat_tmp[4] += contract_(ao, [XYZ,YYZ,YZZ], wv, mask)
vmat_tmp[5] += contract_(ao, [XZZ,YZZ,ZZZ], wv, mask)
for i in range(6):
add_sparse(vmat[i], vmat_tmp[i], mask)
rho = vxc = wv = aow = None
elif xctype == 'MGGA':
def contract_(mat, ao, aoidx, wv, mask):
def contract_(ao, aoidx, wv, mask):
aow = numint._scale_ao(ao[aoidx[0]], wv[1])
aow+= numint._scale_ao(ao[aoidx[1]], wv[2])
aow+= numint._scale_ao(ao[aoidx[2]], wv[3])
mat_tmp = numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
add_sparse(mat, mat_tmp, mask)
return numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)

ao_deriv = 3
for ao, mask, weight, coords \
Expand All @@ -269,28 +270,31 @@ def contract_(mat, ao, aoidx, wv, mask):
wv = weight * vxc
wv[4] *= .5 # for the factor 1/2 in tau
#:aow = numpy.einsum('npi,np->pi', ao[:4], wv[:4])
vmat_tmp = [0]*6
aow = numint._scale_ao(ao[:4], wv[:4])
for i in range(6):
vmat_tmp = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
add_sparse(vmat[i], vmat_tmp, mask)
vmat_tmp[i] = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)

contract_(vmat[0], ao, [XXX,XXY,XXZ], wv, mask)
contract_(vmat[1], ao, [XXY,XYY,XYZ], wv, mask)
contract_(vmat[2], ao, [XXZ,XYZ,XZZ], wv, mask)
contract_(vmat[3], ao, [XYY,YYY,YYZ], wv, mask)
contract_(vmat[4], ao, [XYZ,YYZ,YZZ], wv, mask)
contract_(vmat[5], ao, [XZZ,YZZ,ZZZ], wv, mask)
vmat_tmp[0] += contract_(ao, [XXX,XXY,XXZ], wv, mask)
vmat_tmp[1] += contract_(ao, [XXY,XYY,XYZ], wv, mask)
vmat_tmp[2] += contract_(ao, [XXZ,XYZ,XZZ], wv, mask)
vmat_tmp[3] += contract_(ao, [XYY,YYY,YYZ], wv, mask)
vmat_tmp[4] += contract_(ao, [XYZ,YYZ,YZZ], wv, mask)
vmat_tmp[5] += contract_(ao, [XZZ,YZZ,ZZZ], wv, mask)

aow = [numint._scale_ao(ao[i], wv[4]) for i in range(1, 4)]
for i, j in enumerate([XXX, XXY, XXZ, XYY, XYZ, XZZ]):
vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[0], mask, shls_slice, ao_loc)
add_sparse(vmat[i], vmat_tmp, mask)
vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[0], mask, shls_slice, ao_loc)

for i, j in enumerate([XXY, XYY, XYZ, YYY, YYZ, YZZ]):
vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[1], mask, shls_slice, ao_loc)
add_sparse(vmat[i], vmat_tmp, mask)
vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[1], mask, shls_slice, ao_loc)

for i, j in enumerate([XXZ, XYZ, XZZ, YYZ, YZZ, ZZZ]):
vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[2], mask, shls_slice, ao_loc)
add_sparse(vmat[i], vmat_tmp, mask)
vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[2], mask, shls_slice, ao_loc)

for i in range(6):
add_sparse(vmat[i], vmat_tmp[i], mask)

vmat = vmat[[0,1,2,
1,3,4,
2,4,5]]
Expand Down Expand Up @@ -380,15 +384,11 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory):

opt = getattr(ni, 'gdftopt', None)
if opt is None:
raise RuntimeError("DFT Options are not initialized")
ni.build(mol, grids.coords)
opt = ni.gdftopt
coeff = cupy.asarray(opt.coeff)
dm0 = mf.make_rdm1(mo_coeff, mo_occ)

## transform object in sorted AO
#mo_coeff = coeff @ mo_coeff
#dm0 = coeff @ dm0
#dm0 = dm0 @ coeff.T

vmat = cupy.zeros((mol.natm,3,3,nao,nao))
ipip = cupy.zeros((3,3,nao,nao))
if xctype == 'LDA':
Expand Down Expand Up @@ -559,7 +559,9 @@ def _get_vxc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):

opt = getattr(ni, 'gdftopt', None)
if opt is None:
raise RuntimeError("DFT Options are not initialized")
ni.build(mol, grids.coords)
opt = ni.gdftopt

coeff = cupy.asarray(opt.coeff)
dm0 = mf.make_rdm1(mo_coeff, mo_occ)

Expand Down
15 changes: 9 additions & 6 deletions gpu4pyscf/lib/tests/test_cupy_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import unittest
import numpy
import cupy
from gpu4pyscf.lib.cupy_helper import *
from gpu4pyscf.lib.cupy_helper import (
take_last2d, transpose_sum, krylov, unpack_sparse,
add_sparse)

class KnownValues(unittest.TestCase):
def test_take_last2d(self):
Expand All @@ -26,16 +28,16 @@ def test_take_last2d(self):
numpy.random.shuffle(indices)
a = cupy.random.rand(count,n,n)
b = take_last2d(a, indices)
assert(cupy.linalg.norm(a[:,indices][:,:,indices] - b) < 1e-10)
assert(cupy.linalg.norm(a[:,indices][:,:,indices] - b) < 1e-10)

def test_transpose_sum(self):
n = 3
count = 4
a = cupy.random.rand(count,n,n)
b = a + a.transpose(0,2,1)
transpose_sum(a)
assert(cupy.linalg.norm(a - b) < 1e-10)

def test_krylov(self):
a = cupy.random.random((10,10)) * 1e-2
b = cupy.random.random(10)
Expand All @@ -53,14 +55,15 @@ def test_cderi_sparse(self):

row, col = cupy.tril_indices(nao)
cderi_sparse = cderi[row,col,:]
p0 = 1; p1 = 3
p0 = 1
p1 = 3
out = unpack_sparse(cderi_sparse, row, col, p0, p1, nao)
assert cupy.linalg.norm(out - cderi[:,:,p0:p1]) < 1e-10

def test_sparse(self):
a = cupy.random.rand(20, 20)
b = cupy.random.rand(5,5)
indices = cupy.array([3,4,8,10,12]).astype(np.int32)
indices = cupy.array([3,4,8,10,12]).astype(numpy.int32)
a0 = a.copy()
a0[cupy.ix_(indices, indices)] += b
add_sparse(a, b, indices)
Expand Down
Loading

0 comments on commit db7ff97

Please sign in to comment.