Skip to content

Commit

Permalink
Merge branch 'master' into mep_integral
Browse files Browse the repository at this point in the history
  • Loading branch information
henryw7 committed Nov 21, 2024
2 parents 46f3b3f + ce3de23 commit 74c2bf2
Show file tree
Hide file tree
Showing 99 changed files with 13,953 additions and 1,235 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python3 -m pip install --upgrade pip
pip3 install flake8 pytest coverage pytest-cov pyscf-dispersion
pip3 install "pyscf>2.5"
pip3 install pyscf --upgrade
pip3 install numpy --upgrade
pip3 install h5py --upgrade
pip3 install gpu4pyscf-libxc-cuda12x --upgrade
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Features
- Density fitting scheme and direct SCF scheme;
- SCF, analytical Gradient, and analytical Hessian calculations for Hartree-Fock and DFT;
- LDA, GGA, mGGA, hybrid, and range-separated functionals via [libXC](https://gitlab.com/libxc/libxc/-/tree/master/);
- Spin-conserved and spin-flip TDA and TDDFT for excitated states
- Geometry optimization and transition state search via [geomeTRIC](https://geometric.readthedocs.io/en/latest/);
- Dispersion corrections via [DFTD3](https://github.com/dftd3/simple-dftd3) and [DFTD4](https://github.com/dftd4/dftd4);
- Nonlocal functional correction (vv10) for SCF and gradient;
Expand Down
1 change: 1 addition & 0 deletions examples/00-h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
# Compute Hessian
h = mf_GPU.Hessian()
h.auxbasis_response = 2 # 0: no aux contribution, 1: some contributions, 2: all
mf_GPU.cphf_grids.atom_grid = (50,194) # customize grids for solving CPSCF equation, SG1 by default
h_dft = h.kernel()

# harmonic analysis
Expand Down
67 changes: 67 additions & 0 deletions examples/24-cp_bsse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2024 The GPU4PySCF Authors. All Rights Reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

####################################################
# Example of interaction energy with counterpoise correction
####################################################

import pyscf
from gpu4pyscf.dft import rks

atom_A = [
('O', (0.000000, 0.000000, 0.000000)),
('H', (0.000000, 0.757160, 0.586260)),
('H', (0.000000, -0.757160, 0.586260))
]

atom_B = [
('O', (0.000000, 0.000000, 2.913530)),
('H', (0.000000, 0.757160, 3.499790)),
('H', (0.000000, -0.757160, 3.499790))
]

atom_AB = atom_A + atom_B

mol_A = pyscf.M(atom=atom_A, basis='cc-pVDZ').build()
mol_B = pyscf.M(atom=atom_B, basis='cc-pVDZ').build()
mol_AB = pyscf.M(atom=atom_AB, basis='cc-pVDZ').build()

# Monomer A in the dimer basis
mol_A_ghost = mol_A.copy()
ghost_atoms_B = mol_B.atom
mol_A_ghost.atom.extend([('X-' + atom[0], atom[1]) for atom in ghost_atoms_B])
mol_A_ghost.build()

# Monomer B in the dimer basis
mol_B_ghost = mol_B.copy()
ghost_atoms_A = mol_A.atom
mol_B_ghost.atom.extend([('X-' + atom[0], atom[1]) for atom in ghost_atoms_A])
mol_B_ghost.build()

def solve_dft(mol, xc='b3lyp'):
mf = rks.RKS(mol, xc='b3lyp').density_fit()
mf.grids.atom_grid = (99,590)
return mf.kernel()

E_AB = solve_dft(mol_AB)
E_A = solve_dft(mol_A)
E_B = solve_dft(mol_B)
interaction_energy_no_bsse = E_AB - (E_A + E_B)
print(f"Interaction Energy without BSSE Correction: {interaction_energy_no_bsse:.6f} Hartree")

E_A_ghost = solve_dft(mol_A_ghost)
E_B_ghost = solve_dft(mol_B_ghost)
interaction_energy_bsse = E_AB - (E_A_ghost + E_B_ghost)
print(f"Interaction Energy with BSSE Correction: {interaction_energy_bsse:.6f} Hartree")
39 changes: 9 additions & 30 deletions gpu4pyscf/__config__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,16 @@

props = cupy.cuda.runtime.getDeviceProperties(0)
GB = 1024*1024*1024
# such as A100-80G
if props['totalGlobalMem'] >= 64 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 2048 * 108
# such as V100-32G
elif props['totalGlobalMem'] >= 32 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 1024 * 80
# such as A30-24GB
elif props['totalGlobalMem'] >= 16 * GB:
min_ao_blksize = 128
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 256
mem_fraction = 0.9
number_of_threads = 1024 * 80
# other gaming cards
else:
min_ao_blksize = 128
min_grid_blksize = 128*128
ao_aligned = 32
grid_aligned = 256

# Use smaller blksize for old gaming GPUs
if props['totalGlobalMem'] < 16 * GB:
min_ao_blksize = 64
min_grid_blksize = 64*64
ao_aligned = 32
grid_aligned = 128
mem_fraction = 0.9
number_of_threads = 1024 * 80

# Use 90% of the global memory for CuPy memory pool
mem_fraction = 0.9
cupy.get_default_memory_pool().set_limit(fraction=mem_fraction)
3 changes: 2 additions & 1 deletion gpu4pyscf/df/df.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def build(self, direct_scf_tol=1e-14, omega=None):
log.timer_debug1('prepare intopt', *t0)
self.j2c = j2c.copy()

j2c = take_last2d(j2c, intopt.aux_ao_idx)
j2c = intopt.sort_orbitals(j2c, aux_axis=[0,1])
try:
self.cd_low = cholesky(j2c)
self.cd_low = tag_array(self.cd_low, tag='cd')
Expand All @@ -108,6 +108,7 @@ def build(self, direct_scf_tol=1e-14, omega=None):
self._cderi = cholesky_eri_gpu(intopt, mol, auxmol, self.cd_low, omega=omega)
log.timer_debug1('cholesky_eri', *t0)
self.intopt = intopt
return self

def get_jk(self, dm, hermi=1, with_j=True, with_k=True,
direct_scf_tol=getattr(__config__, 'scf_hf_SCF_direct_scf_tol', 1e-13),
Expand Down
82 changes: 44 additions & 38 deletions gpu4pyscf/df/df_jk.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#!/usr/bin/env python
# Copyright 2014-2019 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Copyright 2024 The GPU4PySCF Developers. All Rights Reserved.
#
# http://www.apache.org/licenses/LICENSE-2.0
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Author: Qiming Sun <[email protected]>
# Modified by Xiaojie Wu <[email protected]>
Expand Down Expand Up @@ -242,7 +243,7 @@ def to_cpu(self):
obj = self.undo_df().to_cpu().density_fit()
return utils.to_cpu(self, obj)

def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-14, omega=None):
def get_jk(dfobj, dms_tag, hermi=0, with_j=True, with_k=True, direct_scf_tol=1e-14, omega=None):
'''
get jk with density fitting
outputs and input are on the same device
Expand All @@ -268,39 +269,45 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-

assert nao == dfobj.nao
vj = vk = None
ao_idx = dfobj.intopt.ao_idx
dms = take_last2d(dms, ao_idx)
intopt = dfobj.intopt
dms = intopt.sort_orbitals(dms, axis=[1,2])
dms_shape = dms.shape
rows = dfobj.intopt.cderi_row
cols = dfobj.intopt.cderi_col
rows = intopt.cderi_row
cols = intopt.cderi_col

if with_j:
dm_sparse = dms[:,rows,cols]
dm_sparse[:, dfobj.intopt.cderi_diag] *= .5
if hermi == 0:
dm_sparse += dms[:,cols,rows]
else:
dm_sparse *= 2
dm_sparse[:, intopt.cderi_diag] *= .5

if with_k:
vk = cupy.zeros_like(dms)

# SCF K matrix with occ
if getattr(dms_tag, 'mo_coeff', None) is not None:
assert hermi == 1
mo_occ = dms_tag.mo_occ
mo_coeff = dms_tag.mo_coeff
nmo = mo_occ.shape[-1]
mo_coeff = mo_coeff.reshape(-1,nao,nmo)
mo_occ = mo_occ.reshape(-1,nmo)
mo_coeff = intopt.sort_orbitals(mo_coeff, axis=[1])
nocc = 0
occ_coeff = [0]*nset
for i in range(nset):
occ_idx = mo_occ[i] > 0
occ_coeff[i] = mo_coeff[i][:,occ_idx][ao_idx] * mo_occ[i][occ_idx]**0.5
occ_coeff[i] = mo_coeff[i][:,occ_idx] * mo_occ[i][occ_idx]**0.5
nocc += mo_occ[i].sum()
blksize = dfobj.get_blksize(extra=nao*nocc)
if with_j:
vj_packed = cupy.zeros_like(dm_sparse)
for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
# leading dimension is 1
if with_j:
rhoj = 2.0*dm_sparse.dot(cderi_sparse)
rhoj = dm_sparse.dot(cderi_sparse)
vj_packed += cupy.dot(rhoj, cderi_sparse.T)
cderi_sparse = rhoj = None
for i in range(nset):
Expand All @@ -316,18 +323,18 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
vj[:,rows,cols] = vj_packed
vj[:,cols,rows] = vj_packed

# CP-HF K matrix
elif hasattr(dms_tag, 'mo1'):
# K matrix in CP-HF or TDDFT
occ_coeffs = dms_tag.occ_coeff
mo1s = dms_tag.mo1
mo_occ = dms_tag.mo_occ
if not isinstance(occ_coeffs, list):
occ_coeffs = [occ_coeffs * 2.0] # For restricted
if not isinstance(mo1s, list):
if not isinstance(occ_coeffs, (tuple, list)):
# *2 for double occupancy in RHF/RKS
occ_coeffs = [occ_coeffs * 2.0]
if not isinstance(mo1s, (tuple, list)):
mo1s = [mo1s]

occ_coeffs = [occ_coeff[ao_idx] for occ_coeff in occ_coeffs]
mo1s = [mo1[:,ao_idx] for mo1 in mo1s]
occ_coeffs = [intopt.sort_orbitals(occ_coeff, axis=[0]) for occ_coeff in occ_coeffs]
mo1s = [intopt.sort_orbitals(mo1, axis=[1]) for mo1 in mo1s]

if with_j:
vj_sparse = cupy.zeros_like(dm_sparse)
Expand All @@ -336,7 +343,7 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
blksize = dfobj.get_blksize(extra=2*nao*nocc)
for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
if with_j:
rhoj = 2.0*dm_sparse.dot(cderi_sparse)
rhoj = dm_sparse.dot(cderi_sparse)
vj_sparse += cupy.dot(rhoj, cderi_sparse.T)
rhoj = None
cderi_sparse = None
Expand All @@ -346,8 +353,8 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
rhok = contract('Lij,jk->Lki', cderi, occ_coeff).reshape([-1,nao])
for i in range(mo1.shape[0]):
rhok1 = contract('Lij,jk->Lki', cderi, mo1[i]).reshape([-1,nao])
#contract('Lki,Lkj->ij', rhok, rhok1, alpha=1.0, beta=1.0, out=vk[iset])
vk[iset] += cupy.dot(rhok.T, rhok1)
#contract('Lki,Lkj->ij', rhok1, rhok, alpha=1.0, beta=1.0, out=vk[iset])
vk[iset] += cupy.dot(rhok1.T, rhok)
iset += 1
mo1 = rhok1 = rhok = None
cderi = None
Expand All @@ -356,7 +363,7 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
vj = cupy.zeros(dms_shape)
vj[:,rows,cols] = vj_sparse
vj[:,cols,rows] = vj_sparse
if with_k:
if with_k and hermi:
transpose_sum(vk)
vj_sparse = None
# general K matrix with density matrix
Expand All @@ -366,25 +373,24 @@ def get_jk(dfobj, dms_tag, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-
blksize = dfobj.get_blksize()
for cderi, cderi_sparse in dfobj.loop(blksize=blksize, unpack=with_k):
if with_j:
rhoj = 2.0*dm_sparse.dot(cderi_sparse)
rhoj = dm_sparse.dot(cderi_sparse)
vj_sparse += cupy.dot(rhoj, cderi_sparse.T)
if with_k:
for k in range(nset):
rhok = contract('Lij,jk->Lki', cderi, dms[k]).reshape([-1,nao])
#vk[k] += contract('Lki,Lkj->ij', cderi, rhok)
vk[k] += cupy.dot(cderi.reshape([-1,nao]).T, rhok)
#vk[k] += contract('Lki,Lkj->ij', rhok, cderi)
vk[k] += cupy.dot(rhok.T, cderi.reshape([-1,nao]))
if with_j:
vj = cupy.zeros(dms_shape)
vj[:,rows,cols] = vj_sparse
vj[:,cols,rows] = vj_sparse
rhok = None

rev_ao_idx = dfobj.intopt.rev_ao_idx
if with_j:
vj = take_last2d(vj, rev_ao_idx)
vj = intopt.unsort_orbitals(vj, axis=[1,2])
vj = vj.reshape(out_shape)
if with_k:
vk = take_last2d(vk, rev_ao_idx)
vk = intopt.unsort_orbitals(vk, axis=[1,2])
vk = vk.reshape(out_shape)
t1 = log.timer_debug1('vj and vk', *t1)
if out_cupy:
Expand Down
Loading

0 comments on commit 74c2bf2

Please sign in to comment.