added unit test for to_gpu

bytedance · Dec 22, 2023 · db7ff97 · db7ff97
1 parent b0a761b
commit db7ff97
Show file tree

Hide file tree

Showing 11 changed files with 238 additions and 56 deletions.
diff --git a/examples/00-h2o.py b/examples/00-h2o.py
@@ -13,8 +13,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import numpy as np
 import pyscf
 from pyscf import lib
+from pyscf.hessian import thermo
 from gpu4pyscf.dft import rks
 lib.num_threads(8)
 
@@ -30,13 +32,15 @@
 scf_tol = 1e-10
 max_scf_cycles = 50
 screen_tol = 1e-14
-grids_level = 3
+grids_level = 5
 
-mol = pyscf.M(atom=atom, basis=bas, max_memory=32000, output='./pyscf.log')
+mol = pyscf.M(atom=atom, basis=bas, max_memory=32000)
 
 mol.verbose = 4
 mf_GPU = rks.RKS(mol, xc=xc).density_fit(auxbasis=auxbasis)
 mf_GPU.grids.level = grids_level
+mf_GPU.grids.atom_grid = (99,590)
+mf_GPU.small_rho_cutoff = 1e-10
 mf_GPU.conv_tol = scf_tol
 mf_GPU.max_cycle = max_scf_cycles
 mf_GPU.screen_tol = screen_tol
@@ -55,3 +59,17 @@
 h = mf_GPU.Hessian()
 h.auxbasis_response = 2
 h_dft = h.kernel()
+
+# harmonic analysis
+results = thermo.harmonic_analysis(mol, h_dft)
+thermo.dump_normal_mode(mol, results)
+
+results = thermo.thermo(mf_GPU, results['freq_au'], 298.15, 101325)
+thermo.dump_thermo(mol, results)
+
+# force translational symmetry
+natm = mol.natm
+h_dft = h_dft.transpose([0,2,1,3]).reshape(3*natm,3*natm)
+h_diag = h_dft.sum(axis=0)
+h_dft -= np.diag(h_diag)
+print(h_dft[:3,:3])
diff --git a/examples/dft_driver.py b/examples/dft_driver.py
@@ -35,10 +35,10 @@
     max_memory=32000)
 # set verbose >= 6 for debugging timer
 
-mol.verbose = 6
+mol.verbose = 4
 
 mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
-mf_df.verbose = 6
+mf_df.verbose = 4
 
 if args.solvent:
     mf_df = mf_df.PCM()
@@ -69,3 +69,4 @@
 h_dft = h.kernel()
 hess_time = time.time() - start_time
 print(f'compute time for hessian: {hess_time:.3f} s')
+print(h_dft)
diff --git a/gpu4pyscf/df/df.py b/gpu4pyscf/df/df.py
@@ -35,6 +35,8 @@
 class DF(df.DF):
     from gpu4pyscf.lib.utils import to_gpu, device
 
+    _keys = {'intopt'}
+
     def __init__(self, mol, auxbasis=None):
         super().__init__(mol, auxbasis)
         self.auxmol = None
@@ -210,8 +212,12 @@ def cholesky_eri_gpu(intopt, mol, auxmol, cd_low, omega=None, sr_only=False):
     if(not use_gpu_memory):
         log.debug("Not enough GPU memory")
         # TODO: async allocate memory
-        mem = cupy.cuda.alloc_pinned_memory(naux * npair * 8)
-        cderi = np.ndarray([naux, npair], dtype=np.float64, order='C', buffer=mem)
+        try:
+            mem = cupy.cuda.alloc_pinned_memory(naux * npair * 8)
+            cderi = np.ndarray([naux, npair], dtype=np.float64, order='C', buffer=mem)
+        except Exception:
+            raise RuntimeError('Out of CPU memory')
+
     data_stream = cupy.cuda.stream.Stream(non_blocking=False)
     count = 0
     nq = len(intopt.log_qs)

diff --git a/gpu4pyscf/df/grad/rhf.py b/gpu4pyscf/df/grad/rhf.py
@@ -13,7 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-
+import copy
 import numpy
 import cupy
 from cupyx.scipy.linalg import solve_triangular
@@ -44,9 +44,13 @@ def get_jk(mf_grad, mol=None, dm0=None, hermi=0, with_j=True, with_k=True, omega
         if key in mf_grad.base.with_df._rsh_df:
             with_df = mf_grad.base.with_df._rsh_df[key]
         else:
-            raise RuntimeError(f'omega={omega} is not calculated in SCF')
+            dfobj = mf_grad.base.with_df
+            with_df = dfobj._rsh_df[key] = copy.copy(dfobj).reset()
+            #raise RuntimeError(f'omega={omega} is not calculated in SCF')
 
     auxmol = with_df.auxmol
+    if not hasattr(with_df, 'intopt') or with_df._cderi is None:
+        with_df.build(omega=omega)
     intopt = with_df.intopt
 
     naux = with_df.naux

diff --git a/gpu4pyscf/df/hessian/rhf.py b/gpu4pyscf/df/hessian/rhf.py
@@ -383,16 +383,21 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
     for ia, h1, vj1, vk1 in _gen_jk(hessobj, mo_coeff, mo_occ, chkfile,
                                     atmlst, verbose, True):
         h1 += vj1 - vk1 * .5
+        h1ao[ia] = h1
+        '''
         if chkfile is None:
             h1ao[ia] = h1
         else:
             key = 'scf_f1ao/%d' % ia
             lib.chkfile.save(chkfile, key, h1)
+        '''
+    return h1ao
+    '''
     if chkfile is None:
         return h1ao
     else:
         return chkfile
-
+    '''
 def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None,
             verbose=None, with_k=True, omega=None):
     log = logger.new_logger(hessobj, verbose)

diff --git a/gpu4pyscf/df/hessian/rks.py b/gpu4pyscf/df/hessian/rks.py
@@ -103,13 +103,17 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
         for ia, h1, vj1_lr, vk1_lr in df_rhf_hess._gen_jk(hessobj, mo_coeff, mo_occ, chkfile,
                                                 atmlst, verbose, True, omega=omega):
             h1ao[ia] -= .5 * (alpha - hyb) * vk1_lr
+    return h1ao
+
+    # support chkfile?
+    '''
     if chkfile is None:
         return h1ao
     else:
         for ia in atmlst:
             lib.chkfile.save(chkfile, 'scf_f1ao/%d'%ia, h1ao[ia])
         return chkfile
-
+    '''
 
 class Hessian(rks_hess.Hessian):
     '''Non-relativistic RKS hessian'''

diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py
@@ -1306,6 +1306,8 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
 class NumInt(numint.NumInt):
     from gpu4pyscf.lib.utils import to_cpu, to_gpu, device
 
+    _keys = {'screen_idx', 'xcfuns', 'gdftopt'}
+
     def __init__(self, xc='LDA'):
         super().__init__()
         self.gdftopt = None

diff --git a/gpu4pyscf/hessian/rks.py b/gpu4pyscf/hessian/rks.py
@@ -202,7 +202,8 @@ def _get_vxc_diag(hessobj, mo_coeff, mo_occ, max_memory):
 
     opt = getattr(ni, 'gdftopt', None)
     if opt is None:
-        raise RuntimeError("DFT Options are not initialized")
+        ni.build(mol, grids.coords)
+        opt = ni.gdftopt
 
     coeff = cupy.asarray(opt.coeff)
     mo_coeff = coeff @ mo_coeff
@@ -224,12 +225,11 @@ def _get_vxc_diag(hessobj, mo_coeff, mo_occ, max_memory):
             aow = None
 
     elif xctype == 'GGA':
-        def contract_(mat, ao, aoidx, wv, mask):
+        def contract_(ao, aoidx, wv, mask):
             aow = numint._scale_ao(ao[aoidx[0]], wv[1])
             aow+= numint._scale_ao(ao[aoidx[1]], wv[2])
             aow+= numint._scale_ao(ao[aoidx[2]], wv[3])
-            mat_tmp = numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
-            add_sparse(mat, mat_tmp, mask)
+            return numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
 
         ao_deriv = 3
         for ao, mask, weight, coords \
@@ -241,24 +241,25 @@ def contract_(mat, ao, aoidx, wv, mask):
             #:aow = numpy.einsum('npi,np->pi', ao[:4], wv[:4])
             aow = numint._scale_ao(ao[:4], wv[:4])
 
+            vmat_tmp = [0]*6
             for i in range(6):
-                vmat_tmp = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
-                add_sparse(vmat[i], vmat_tmp, mask)
-            contract_(vmat[0], ao, [XXX,XXY,XXZ], wv, mask)
-            contract_(vmat[1], ao, [XXY,XYY,XYZ], wv, mask)
-            contract_(vmat[2], ao, [XXZ,XYZ,XZZ], wv, mask)
-            contract_(vmat[3], ao, [XYY,YYY,YYZ], wv, mask)
-            contract_(vmat[4], ao, [XYZ,YYZ,YZZ], wv, mask)
-            contract_(vmat[5], ao, [XZZ,YZZ,ZZZ], wv, mask)
-
+                vmat_tmp[i] = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
+
+            vmat_tmp[0] += contract_(ao, [XXX,XXY,XXZ], wv, mask)
+            vmat_tmp[1] += contract_(ao, [XXY,XYY,XYZ], wv, mask)
+            vmat_tmp[2] += contract_(ao, [XXZ,XYZ,XZZ], wv, mask)
+            vmat_tmp[3] += contract_(ao, [XYY,YYY,YYZ], wv, mask)
+            vmat_tmp[4] += contract_(ao, [XYZ,YYZ,YZZ], wv, mask)
+            vmat_tmp[5] += contract_(ao, [XZZ,YZZ,ZZZ], wv, mask)
+            for i in range(6):
+                add_sparse(vmat[i], vmat_tmp[i], mask)
             rho = vxc = wv = aow = None
     elif xctype == 'MGGA':
-        def contract_(mat, ao, aoidx, wv, mask):
+        def contract_(ao, aoidx, wv, mask):
             aow = numint._scale_ao(ao[aoidx[0]], wv[1])
             aow+= numint._scale_ao(ao[aoidx[1]], wv[2])
             aow+= numint._scale_ao(ao[aoidx[2]], wv[3])
-            mat_tmp = numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
-            add_sparse(mat, mat_tmp, mask)
+            return numint._dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc)
 
         ao_deriv = 3
         for ao, mask, weight, coords \
@@ -269,28 +270,31 @@ def contract_(mat, ao, aoidx, wv, mask):
             wv = weight * vxc
             wv[4] *= .5  # for the factor 1/2 in tau
             #:aow = numpy.einsum('npi,np->pi', ao[:4], wv[:4])
+            vmat_tmp = [0]*6
             aow = numint._scale_ao(ao[:4], wv[:4])
             for i in range(6):
-                vmat_tmp = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
-                add_sparse(vmat[i], vmat_tmp, mask)
+                vmat_tmp[i] = numint._dot_ao_ao(mol, ao[i+4], aow, mask, shls_slice, ao_loc)
 
-            contract_(vmat[0], ao, [XXX,XXY,XXZ], wv, mask)
-            contract_(vmat[1], ao, [XXY,XYY,XYZ], wv, mask)
-            contract_(vmat[2], ao, [XXZ,XYZ,XZZ], wv, mask)
-            contract_(vmat[3], ao, [XYY,YYY,YYZ], wv, mask)
-            contract_(vmat[4], ao, [XYZ,YYZ,YZZ], wv, mask)
-            contract_(vmat[5], ao, [XZZ,YZZ,ZZZ], wv, mask)
+            vmat_tmp[0] += contract_(ao, [XXX,XXY,XXZ], wv, mask)
+            vmat_tmp[1] += contract_(ao, [XXY,XYY,XYZ], wv, mask)
+            vmat_tmp[2] += contract_(ao, [XXZ,XYZ,XZZ], wv, mask)
+            vmat_tmp[3] += contract_(ao, [XYY,YYY,YYZ], wv, mask)
+            vmat_tmp[4] += contract_(ao, [XYZ,YYZ,YZZ], wv, mask)
+            vmat_tmp[5] += contract_(ao, [XZZ,YZZ,ZZZ], wv, mask)
 
             aow = [numint._scale_ao(ao[i], wv[4]) for i in range(1, 4)]
             for i, j in enumerate([XXX, XXY, XXZ, XYY, XYZ, XZZ]):
-                vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[0], mask, shls_slice, ao_loc)
-                add_sparse(vmat[i], vmat_tmp, mask)
+                vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[0], mask, shls_slice, ao_loc)
+
             for i, j in enumerate([XXY, XYY, XYZ, YYY, YYZ, YZZ]):
-                vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[1], mask, shls_slice, ao_loc)
-                add_sparse(vmat[i], vmat_tmp, mask)
+                vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[1], mask, shls_slice, ao_loc)
+
             for i, j in enumerate([XXZ, XYZ, XZZ, YYZ, YZZ, ZZZ]):
-                vmat_tmp = numint._dot_ao_ao(mol, ao[j], aow[2], mask, shls_slice, ao_loc)
-                add_sparse(vmat[i], vmat_tmp, mask)
+                vmat_tmp[i] += numint._dot_ao_ao(mol, ao[j], aow[2], mask, shls_slice, ao_loc)
+
+            for i in range(6):
+                add_sparse(vmat[i], vmat_tmp[i], mask)
+
     vmat = vmat[[0,1,2,
                  1,3,4,
                  2,4,5]]
@@ -380,15 +384,11 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory):
 
     opt = getattr(ni, 'gdftopt', None)
     if opt is None:
-        raise RuntimeError("DFT Options are not initialized")
+        ni.build(mol, grids.coords)
+        opt = ni.gdftopt
     coeff = cupy.asarray(opt.coeff)
     dm0 = mf.make_rdm1(mo_coeff, mo_occ)
 
-    ## transform object in sorted AO
-    #mo_coeff = coeff @ mo_coeff
-    #dm0 = coeff @ dm0
-    #dm0 = dm0 @ coeff.T
-
     vmat = cupy.zeros((mol.natm,3,3,nao,nao))
     ipip = cupy.zeros((3,3,nao,nao))
     if xctype == 'LDA':
@@ -559,7 +559,9 @@ def _get_vxc_deriv1(hessobj, mo_coeff, mo_occ, max_memory):
 
     opt = getattr(ni, 'gdftopt', None)
     if opt is None:
-        raise RuntimeError("DFT Options are not initialized")
+        ni.build(mol, grids.coords)
+        opt = ni.gdftopt
+
     coeff = cupy.asarray(opt.coeff)
     dm0 = mf.make_rdm1(mo_coeff, mo_occ)
 

diff --git a/gpu4pyscf/lib/tests/test_cupy_helper.py b/gpu4pyscf/lib/tests/test_cupy_helper.py
@@ -16,7 +16,9 @@
 import unittest
 import numpy
 import cupy
-from gpu4pyscf.lib.cupy_helper import *
+from gpu4pyscf.lib.cupy_helper import (
+    take_last2d, transpose_sum, krylov, unpack_sparse,
+    add_sparse)
 
 class KnownValues(unittest.TestCase):
     def test_take_last2d(self):
@@ -26,16 +28,16 @@ def test_take_last2d(self):
         numpy.random.shuffle(indices)
         a = cupy.random.rand(count,n,n)
         b = take_last2d(a, indices)
-        assert(cupy.linalg.norm(a[:,indices][:,:,indices] - b) < 1e-10)    
-    
+        assert(cupy.linalg.norm(a[:,indices][:,:,indices] - b) < 1e-10)
+
     def test_transpose_sum(self):
         n = 3
         count = 4
         a = cupy.random.rand(count,n,n)
         b = a + a.transpose(0,2,1)
         transpose_sum(a)
         assert(cupy.linalg.norm(a - b) < 1e-10)
-    
+
     def test_krylov(self):
         a = cupy.random.random((10,10)) * 1e-2
         b = cupy.random.random(10)
@@ -53,14 +55,15 @@ def test_cderi_sparse(self):
 
         row, col = cupy.tril_indices(nao)
         cderi_sparse = cderi[row,col,:]
-        p0 = 1; p1 = 3
+        p0 = 1
+        p1 = 3
         out = unpack_sparse(cderi_sparse, row, col, p0, p1, nao)
         assert cupy.linalg.norm(out - cderi[:,:,p0:p1]) < 1e-10
 
     def test_sparse(self):
         a = cupy.random.rand(20, 20)
         b = cupy.random.rand(5,5)
-        indices = cupy.array([3,4,8,10,12]).astype(np.int32)
+        indices = cupy.array([3,4,8,10,12]).astype(numpy.int32)
         a0 = a.copy()
         a0[cupy.ix_(indices, indices)] += b
         add_sparse(a, b, indices)