Merge branch 'master' into mep_integral

bytedance · Nov 21, 2024 · 74c2bf2 · 74c2bf2
2 parents 46f3b3f + ce3de23
commit 74c2bf2
Show file tree

Hide file tree

Showing 99 changed files with 13,953 additions and 1,235 deletions.
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -24,7 +24,7 @@ jobs:
         pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
         python3 -m pip install --upgrade pip
         pip3 install flake8 pytest coverage pytest-cov pyscf-dispersion
-        pip3 install "pyscf>2.5"
+        pip3 install pyscf --upgrade
         pip3 install numpy --upgrade
         pip3 install h5py --upgrade
         pip3 install gpu4pyscf-libxc-cuda12x --upgrade

diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ Features
 - Density fitting scheme and direct SCF scheme;
 - SCF, analytical Gradient, and analytical Hessian calculations for Hartree-Fock and DFT;
 - LDA, GGA, mGGA, hybrid, and range-separated functionals via [libXC](https://gitlab.com/libxc/libxc/-/tree/master/);
+- Spin-conserved and spin-flip TDA and TDDFT for excitated states
 - Geometry optimization and transition state search via [geomeTRIC](https://geometric.readthedocs.io/en/latest/);
 - Dispersion corrections via [DFTD3](https://github.com/dftd3/simple-dftd3) and [DFTD4](https://github.com/dftd4/dftd4);
 - Nonlocal functional correction (vv10) for SCF and gradient;

diff --git a/examples/00-h2o.py b/examples/00-h2o.py
@@ -60,6 +60,7 @@
 # Compute Hessian
 h = mf_GPU.Hessian()
 h.auxbasis_response = 2                # 0: no aux contribution, 1: some contributions, 2: all
+mf_GPU.cphf_grids.atom_grid = (50,194) # customize grids for solving CPSCF equation, SG1 by default
 h_dft = h.kernel()
 
 # harmonic analysis

diff --git a/examples/24-cp_bsse.py b/examples/24-cp_bsse.py
@@ -0,0 +1,67 @@
+# Copyright 2024 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+####################################################
+# Example of interaction energy with counterpoise correction
+####################################################
+
+import pyscf
+from gpu4pyscf.dft import rks
+
+atom_A = [
+('O', (0.000000, 0.000000, 0.000000)),
+('H', (0.000000, 0.757160, 0.586260)),
+('H', (0.000000, -0.757160, 0.586260))
+]
+
+atom_B = [
+('O', (0.000000, 0.000000, 2.913530)),
+('H', (0.000000, 0.757160, 3.499790)),
+('H', (0.000000, -0.757160, 3.499790))
+]
+
+atom_AB = atom_A + atom_B
+
+mol_A = pyscf.M(atom=atom_A, basis='cc-pVDZ').build()
+mol_B = pyscf.M(atom=atom_B, basis='cc-pVDZ').build()
+mol_AB = pyscf.M(atom=atom_AB, basis='cc-pVDZ').build()
+
+# Monomer A in the dimer basis
+mol_A_ghost = mol_A.copy()
+ghost_atoms_B = mol_B.atom
+mol_A_ghost.atom.extend([('X-' + atom[0], atom[1]) for atom in ghost_atoms_B])
+mol_A_ghost.build()
+
+# Monomer B in the dimer basis
+mol_B_ghost = mol_B.copy()
+ghost_atoms_A = mol_A.atom
+mol_B_ghost.atom.extend([('X-' + atom[0], atom[1]) for atom in ghost_atoms_A])
+mol_B_ghost.build()
+
+def solve_dft(mol, xc='b3lyp'):
+    mf = rks.RKS(mol, xc='b3lyp').density_fit()
+    mf.grids.atom_grid = (99,590)
+    return mf.kernel()
+
+E_AB = solve_dft(mol_AB)
+E_A = solve_dft(mol_A)
+E_B = solve_dft(mol_B)
+interaction_energy_no_bsse = E_AB - (E_A + E_B)
+print(f"Interaction Energy without BSSE Correction: {interaction_energy_no_bsse:.6f} Hartree")
+
+E_A_ghost = solve_dft(mol_A_ghost)
+E_B_ghost = solve_dft(mol_B_ghost)
+interaction_energy_bsse = E_AB - (E_A_ghost + E_B_ghost)
+print(f"Interaction Energy with BSSE Correction: {interaction_energy_bsse:.6f} Hartree")
diff --git a/gpu4pyscf/__config__.py b/gpu4pyscf/__config__.py
@@ -2,37 +2,16 @@
 
 props = cupy.cuda.runtime.getDeviceProperties(0)
 GB = 1024*1024*1024
-# such as A100-80G
-if props['totalGlobalMem'] >= 64 * GB:
-    min_ao_blksize = 128
-    min_grid_blksize = 128*128
-    ao_aligned = 32
-    grid_aligned = 256
-    mem_fraction = 0.9
-    number_of_threads = 2048 * 108
-# such as V100-32G
-elif props['totalGlobalMem'] >= 32 * GB:
-    min_ao_blksize = 128
-    min_grid_blksize = 128*128
-    ao_aligned = 32
-    grid_aligned = 256
-    mem_fraction = 0.9
-    number_of_threads = 1024 * 80
-# such as A30-24GB
-elif props['totalGlobalMem'] >= 16 * GB:
-    min_ao_blksize = 128
-    min_grid_blksize = 128*128
-    ao_aligned = 32
-    grid_aligned = 256
-    mem_fraction = 0.9
-    number_of_threads = 1024 * 80
-# other gaming cards
-else:
+min_ao_blksize = 128
+min_grid_blksize = 128*128
+ao_aligned = 32
+grid_aligned = 256
+
+# Use smaller blksize for old gaming GPUs
+if props['totalGlobalMem'] < 16 * GB:
     min_ao_blksize = 64
     min_grid_blksize = 64*64
-    ao_aligned = 32
-    grid_aligned = 128
-    mem_fraction = 0.9
-    number_of_threads = 1024 * 80
 
+# Use 90% of the global memory for CuPy memory pool
+mem_fraction = 0.9
 cupy.get_default_memory_pool().set_limit(fraction=mem_fraction)
diff --git a/gpu4pyscf/df/df.py b/gpu4pyscf/df/df.py
@@ -91,7 +91,7 @@ def build(self, direct_scf_tol=1e-14, omega=None):
         log.timer_debug1('prepare intopt', *t0)
         self.j2c = j2c.copy()
 
-        j2c = take_last2d(j2c, intopt.aux_ao_idx)
+        j2c = intopt.sort_orbitals(j2c, aux_axis=[0,1])
         try:
             self.cd_low = cholesky(j2c)
             self.cd_low = tag_array(self.cd_low, tag='cd')
@@ -108,6 +108,7 @@ def build(self, direct_scf_tol=1e-14, omega=None):
         self._cderi = cholesky_eri_gpu(intopt, mol, auxmol, self.cd_low, omega=omega)
         log.timer_debug1('cholesky_eri', *t0)
         self.intopt = intopt
+        return self
 
     def get_jk(self, dm, hermi=1, with_j=True, with_k=True,
                direct_scf_tol=getattr(__config__, 'scf_hf_SCF_direct_scf_tol', 1e-13),

diff --git a/gpu4pyscf/df/df_jk.py b/gpu4pyscf/df/df_jk.py
@@ -1,17 +1,18 @@
-#!/usr/bin/env python
-# Copyright 2014-2019 The PySCF Developers. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Copyright 2024 The GPU4PySCF Developers. All Rights Reserved.
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 # Author: Qiming Sun <[email protected]>
 # Modified by Xiaojie Wu <[email protected]>
@@ -242,7 +243,7 @@ def to_cpu(self):
         obj = self.undo_df().to_cpu().density_fit()
         return utils.to_cpu(self, obj)
 
-def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-14, omega=None):
+def get_jk(dfobj, dms_tag, hermi=0, with_j=True, with_k=True, direct_scf_tol=1e-14, omega=None):
     '''
     get jk with density fitting
     outputs and input are on the same device
@@ -268,39 +269,45 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
 
     assert nao == dfobj.nao
     vj = vk = None
-    ao_idx = dfobj.intopt.ao_idx
-    dms = take_last2d(dms, ao_idx)
+    intopt = dfobj.intopt
+    dms = intopt.sort_orbitals(dms, axis=[1,2])
     dms_shape = dms.shape
-    rows = dfobj.intopt.cderi_row
-    cols = dfobj.intopt.cderi_col
-    
+    rows = intopt.cderi_row
+    cols = intopt.cderi_col
+
     if with_j:
         dm_sparse = dms[:,rows,cols]
-        dm_sparse[:, dfobj.intopt.cderi_diag] *= .5
+        if hermi == 0:
+            dm_sparse += dms[:,cols,rows]
+        else:
+            dm_sparse *= 2
+        dm_sparse[:, intopt.cderi_diag] *= .5
 
     if with_k:
         vk = cupy.zeros_like(dms)
-    
+
     # SCF K matrix with occ
     if getattr(dms_tag, 'mo_coeff', None) is not None:
+        assert hermi == 1
         mo_occ = dms_tag.mo_occ
         mo_coeff = dms_tag.mo_coeff
         nmo = mo_occ.shape[-1]
         mo_coeff = mo_coeff.reshape(-1,nao,nmo)
         mo_occ   = mo_occ.reshape(-1,nmo)
+        mo_coeff = intopt.sort_orbitals(mo_coeff, axis=[1])
         nocc = 0
         occ_coeff = [0]*nset
         for i in range(nset):
             occ_idx = mo_occ[i] > 0
-            occ_coeff[i] = mo_coeff[i][:,occ_idx][ao_idx] * mo_occ[i][occ_idx]**0.5
+            occ_coeff[i] = mo_coeff[i][:,occ_idx] * mo_occ[i][occ_idx]**0.5
             nocc += mo_occ[i].sum()
         blksize = dfobj.get_blksize(extra=nao*nocc)
         if with_j:
             vj_packed = cupy.zeros_like(dm_sparse)
         for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
             # leading dimension is 1
             if with_j:
-                rhoj = 2.0*dm_sparse.dot(cderi_sparse)
+                rhoj = dm_sparse.dot(cderi_sparse)
                 vj_packed += cupy.dot(rhoj, cderi_sparse.T)
             cderi_sparse = rhoj = None
             for i in range(nset):
@@ -316,18 +323,18 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
             vj[:,rows,cols] = vj_packed
             vj[:,cols,rows] = vj_packed
 
-    # CP-HF K matrix
     elif hasattr(dms_tag, 'mo1'):
+        # K matrix in CP-HF or TDDFT
         occ_coeffs = dms_tag.occ_coeff
         mo1s = dms_tag.mo1
-        mo_occ = dms_tag.mo_occ
-        if not isinstance(occ_coeffs, list):
-            occ_coeffs = [occ_coeffs * 2.0] # For restricted
-        if not isinstance(mo1s, list):
+        if not isinstance(occ_coeffs, (tuple, list)):
+            # *2 for double occupancy in RHF/RKS
+            occ_coeffs = [occ_coeffs * 2.0]
+        if not isinstance(mo1s, (tuple, list)):
             mo1s = [mo1s]
 
-        occ_coeffs = [occ_coeff[ao_idx] for occ_coeff in occ_coeffs]
-        mo1s = [mo1[:,ao_idx] for mo1 in mo1s]
+        occ_coeffs = [intopt.sort_orbitals(occ_coeff, axis=[0]) for occ_coeff in occ_coeffs]
+        mo1s = [intopt.sort_orbitals(mo1, axis=[1]) for mo1 in mo1s]
 
         if with_j:
             vj_sparse = cupy.zeros_like(dm_sparse)
@@ -336,7 +343,7 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
         blksize = dfobj.get_blksize(extra=2*nao*nocc)
         for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
             if with_j:
-                rhoj = 2.0*dm_sparse.dot(cderi_sparse)
+                rhoj = dm_sparse.dot(cderi_sparse)
                 vj_sparse += cupy.dot(rhoj, cderi_sparse.T)
                 rhoj = None
             cderi_sparse = None
@@ -346,8 +353,8 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
                     rhok = contract('Lij,jk->Lki', cderi, occ_coeff).reshape([-1,nao])
                     for i in range(mo1.shape[0]):
                         rhok1 = contract('Lij,jk->Lki', cderi, mo1[i]).reshape([-1,nao])
-                        #contract('Lki,Lkj->ij', rhok, rhok1, alpha=1.0, beta=1.0, out=vk[iset])
-                        vk[iset] += cupy.dot(rhok.T, rhok1)
+                        #contract('Lki,Lkj->ij', rhok1, rhok, alpha=1.0, beta=1.0, out=vk[iset])
+                        vk[iset] += cupy.dot(rhok1.T, rhok)
                         iset += 1
                 mo1 = rhok1 = rhok = None
             cderi = None
@@ -356,7 +363,7 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
             vj = cupy.zeros(dms_shape)
             vj[:,rows,cols] = vj_sparse
             vj[:,cols,rows] = vj_sparse
-        if with_k:
+        if with_k and hermi:
             transpose_sum(vk)
         vj_sparse = None
     # general K matrix with density matrix
@@ -366,25 +373,24 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
         blksize = dfobj.get_blksize()
         for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
             if with_j:
-                rhoj = 2.0*dm_sparse.dot(cderi_sparse)
+                rhoj = dm_sparse.dot(cderi_sparse)
                 vj_sparse += cupy.dot(rhoj, cderi_sparse.T)
             if with_k:
                 for k in range(nset):
                     rhok = contract('Lij,jk->Lki', cderi, dms[k]).reshape([-1,nao])
-                    #vk[k] += contract('Lki,Lkj->ij', cderi, rhok)
-                    vk[k] += cupy.dot(cderi.reshape([-1,nao]).T, rhok)
+                    #vk[k] += contract('Lki,Lkj->ij', rhok, cderi)
+                    vk[k] += cupy.dot(rhok.T, cderi.reshape([-1,nao]))
         if with_j:
             vj = cupy.zeros(dms_shape)
             vj[:,rows,cols] = vj_sparse
             vj[:,cols,rows] = vj_sparse
         rhok = None
 
-    rev_ao_idx = dfobj.intopt.rev_ao_idx
     if with_j:
-        vj = take_last2d(vj, rev_ao_idx)
+        vj = intopt.unsort_orbitals(vj, axis=[1,2])
         vj = vj.reshape(out_shape)
     if with_k:
-        vk = take_last2d(vk, rev_ao_idx)
+        vk = intopt.unsort_orbitals(vk, axis=[1,2])
         vk = vk.reshape(out_shape)
     t1 = log.timer_debug1('vj and vk', *t1)
     if out_cupy: