sync with local changes

bytedance · Nov 9, 2023 · 3b3cb5b · 3b3cb5b
2 parents b7cc20a + e110e9a
commit 3b3cb5b
Show file tree

Hide file tree

Showing 69 changed files with 2,261 additions and 1,005 deletions.
diff --git a/.github/workflows/libxc_wheel.yml b/.github/workflows/libxc_wheel.yml
@@ -6,15 +6,19 @@ on:
 jobs:
   release-pypi-linux:
     runs-on: ubuntu-latest
-    env:
-      img: wxj6000/manylinux2014:cuda118
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda-version:
+        - cuda118
+        - cuda121
     steps:
     - name: Checkout
       uses: actions/checkout@v3
     - name: Build wheels
       run: |
         docker run --rm -v ${{ github.workspace }}:/gpu4pyscf:rw \
-        ${{ env.img }} \
+        wxj6000/manylinux2014:${{ matrix.cuda-version }} \
         bash -exc 'sh /gpu4pyscf/builder/build_libxc.sh'
     - name: List available wheels
       run: |

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -2,13 +2,14 @@ include MANIFEST.in
 include README.md setup.py CHANGELOG AUTHORS LICENSE NOTICE
 
 global-exclude *.py[cod]
+global-exclude *~
 #global-exclude *.cu
 #global-exclude *.h
 #global-exclude *.c
 #global-exclude *.cuh
 #global-exclude *.sh
 
-prune */__pycache__ 
+prune */__pycache__
 recursive-exclude */__pycache__ *
 
 prune gpu4pyscf/lib/build

diff --git a/README.md b/README.md
@@ -4,40 +4,40 @@ Installation
 --------
 
 For **CUDA 11.x**
-```
+```sh
 pip3 install gpu4pyscf-cuda11x
 ```
 and install cutensor
-```
+```sh
 python -m cupyx.tools.install_library --cuda 11.x --library cutensor
 ```
 
 For **CUDA 12.x**
-```
+```sh
 pip3 install gpu4pyscf-cuda12x
 ```
 and install cutensor
-```
+```sh
 python -m cupyx.tools.install_library --cuda 12.x --library cutensor
 ```
 
 Compilation
 --------
 The package provides ```dockerfiles/compile/Dockerfile``` for creating the CUDA environment. One can compile the package with
-```
+```sh
 sh build.sh
 ```
 This script will automatically download LibXC, and compile it with CUDA. The script will also build the wheel for installation. The compilation can take more than 5 mins. Then, one can either install the wheel with
-```
+```sh
 cd output
 pip3 install gpu4pyscf-*
 ```
 or simply add it to ```PYTHONPATH```
-```
+```sh
 export PYTHONPATH="${PYTHONPATH}:/your-local-path/gpu4pyscf"
 ```
 Then install cutensor for acceleration
-```
+```sh
 python -m cupyx.tools.install_library --cuda 11.x --library cutensor
 ```
 
@@ -47,9 +47,10 @@ Features
 - SCF, analytical Gradient, and analytical Hessian calculations for Hartree-Fock and DFT;
 - LDA, GGA, mGGA, hybrid, and range-separated functionals via [libXC](https://gitlab.com/libxc/libxc/-/tree/master/);
 - Geometry optimization and transition state search via [geomeTRIC](https://geometric.readthedocs.io/en/latest/);
-- Dispersion corrections via [DFT3](https://github.com/dftd3/simple-dftd3) and [DFT4](https://github.com/dftd4/dftd4);
+- Dispersion corrections via [DFTD3](https://github.com/dftd3/simple-dftd3) and [DFTD4](https://github.com/dftd4/dftd4);
 - Nonlocal functional correction (vv10) for SCF and gradient;
 - ECP is supported and calculated on CPU;
+- PCM solvent models and their analytical gradients;
 
 Limitations
 --------
@@ -63,7 +64,7 @@ Limitations
 
 Examples
 --------
-```
+```python
 import pyscf
 from gpu4pyscf.dft import rks
 

diff --git a/benchmarks/df/dft_driver.py b/benchmarks/df/dft_driver.py
@@ -16,6 +16,8 @@
 parser.add_argument('--input_path',   type=str, default='./')
 parser.add_argument('--output_path',  type=str, default='./')
 parser.add_argument('--with_hessian', type=bool, default=False)
+parser.add_argument('--solvent',      type=str, default='')
+
 args = parser.parse_args()
 bas = args.basis
 verbose = args.verbose
@@ -39,13 +41,18 @@
     output_file = 'PySCF-16-cores-CPU.csv'
 output_file = args.output_path + output_file
 
-def run_dft(filename):  
+def run_dft(filename):
     mol = pyscf.M(atom=filename, basis=bas, max_memory=64000)
-    start_time = time.time()  
+    start_time = time.time()
     # set verbose >= 6 for debugging timer
     mol.verbose = 4 #verbose
     mol.max_memory = 40000
     mf = rks.RKS(mol, xc=xc).density_fit(auxbasis='def2-universal-jkfit')
+    if args.solvent:
+        mf = mf.PCM()
+        mf.lebedev_order = 29
+        mf.method = 'IEF-PCM'
+
     mf.grids.atom_grid = (99,590)
     mf.chkfile = None
     prep_time = time.time() - start_time
@@ -75,7 +82,7 @@ def run_dft(filename):
     # calculate hessian
     if args.device == 'GPU':
         cupy.get_default_memory_pool().free_all_blocks()
-    
+
     hess_time = -1
     if args.with_hessian:
         try:

diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/020_Vitamin_C.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/020_Vitamin_C.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/031_Inosine.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/031_Inosine.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/033_Bisphenol_A.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/033_Bisphenol_A.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/037_Mg_Porphin.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/037_Mg_Porphin.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/042_Penicillin_V.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/042_Penicillin_V.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/045_Ochratoxin_A.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/045_Ochratoxin_A.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/052_Cetirizine.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/052_Cetirizine.xyz.npz
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/057_Tamoxifen.xyz.npz b/benchmarks/df/organic/solvent/def2-tzvpp/057_Tamoxifen.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/020_Vitamin_C.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/020_Vitamin_C.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/031_Inosine.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/031_Inosine.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/033_Bisphenol_A.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/033_Bisphenol_A.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/037_Mg_Porphin.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/037_Mg_Porphin.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/042_Penicillin_V.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/042_Penicillin_V.xyz.npz
diff --git a/benchmarks/df/organic/solvent/sto-3g/045_Ochratoxin_A.xyz.npz b/benchmarks/df/organic/solvent/sto-3g/045_Ochratoxin_A.xyz.npz
diff --git a/benchmarks/df/run_gpu4pyscf.sh b/benchmarks/df/run_gpu4pyscf.sh
@@ -3,7 +3,7 @@
 DIR="./organic/xc"
 [ ! -d "$DIR" ] && mkdir -p "$DIR"
 for xc in LDA PBE B3LYP M06 wB97m-v
-do 
+do
     python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/$xc/ --xc $xc
 done
 exit

diff --git a/examples/00-h2o.py b/examples/00-h2o.py
@@ -18,13 +18,13 @@
 from gpu4pyscf.dft import rks
 lib.num_threads(8)
 
-atom =''' 
+atom ='''
 O       0.0000000000    -0.0000000000     0.1174000000
 H      -0.7570000000    -0.0000000000    -0.4696000000
 H       0.7570000000     0.0000000000    -0.4696000000
 '''
 
-xc='LDA'
+xc='B3LYP'
 bas='def2-tzvpp'
 auxbasis='def2-tzvpp-jkfit'
 scf_tol = 1e-10
@@ -34,7 +34,7 @@
 
 mol = pyscf.M(atom=atom, basis=bas, max_memory=32000)
 
-mol.verbose = 1
+mol.verbose = 6
 mf_GPU = rks.RKS(mol, xc=xc).density_fit(auxbasis=auxbasis)
 mf_GPU.grids.level = grids_level
 mf_GPU.conv_tol = scf_tol

diff --git a/examples/13-einsum_engine.py b/examples/13-einsum_engine.py
@@ -0,0 +1,34 @@
+# gpu4pyscf is a plugin to use Nvidia GPU in PySCF package
+#
+# Copyright (C) 2022 Qiming Sun
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+os.environ['CONTRACT_ENGINE'] = 'opt_einsum' # 'cupy', 'cuquantum'
+
+import pyscf
+from gpu4pyscf.dft import rks
+
+atom ='''
+O       0.0000000000    -0.0000000000     0.1174000000
+H      -0.7570000000    -0.0000000000    -0.4696000000
+H       0.7570000000     0.0000000000    -0.4696000000
+'''
+
+mol = pyscf.M(atom=atom, basis='def2-tzvpp')
+mf = rks.RKS(mol, xc='LDA').density_fit()
+
+e_dft = mf.kernel()  # compute total energy
+print(f"total energy = {e_dft}")
diff --git a/examples/15-chelpg.py b/examples/15-chelpg.py
@@ -0,0 +1,39 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from pyscf import gto
+from gpu4pyscf.dft import rks
+from gpu4pyscf.qmmm import chelpg
+
+
+mol = gto.Mole()
+mol.verbose = 0
+mol.output = None
+mol.atom = [
+    [1 , (1. ,  0.     , 0.000)],
+    [1 , (0. ,  1.     , 0.000)],
+    [1 , (0. , -1.517  , 1.177)],
+    [1 , (0. ,  1.517  , 1.177)] ]
+mol.basis = '631g'
+mol.unit = 'B'
+mol.build()
+mol.verbose = 6
+
+xc = 'b3lyp'
+mf = rks.RKS(mol, xc=xc)
+mf.grids.level = 5
+mf.kernel()
+q = chelpg.eval_chelpg_layer_gpu(mf)
+print(q) # [ 0.04402311  0.11333945 -0.25767919  0.10031663]
diff --git a/examples/dft_driver.py b/examples/dft_driver.py
@@ -15,17 +15,16 @@
 
 import pyscf
 import time
+import argparse
 from pyscf import lib
-
 from gpu4pyscf.dft import rks
 lib.num_threads(8)
 
-import argparse
-
 parser = argparse.ArgumentParser(description='Run DFT with GPU4PySCF for molecules')
 parser.add_argument("--input",    type=str,  default='benzene/coord')
 parser.add_argument("--basis",    type=str,  default='def2-tzvpp')
 parser.add_argument("--auxbasis", type=str,  default='def2-tzvpp-jkfit')
+parser.add_argument("--xc",       type=str,  default='B3LYP')
 parser.add_argument("--solvent",  type=bool, default=False)
 args = parser.parse_args()
 
@@ -36,23 +35,28 @@
     basis=bas,
     max_memory=32000)
 # set verbose >= 6 for debugging timer
-mol.verbose = 4
+mol.verbose = 1
 
-mf_df = rks.RKS(mol, xc='HYB_GGA_XC_B3LYP').density_fit(auxbasis=args.auxbasis)
+mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
 if args.solvent:
     mf_df = mf_df.PCM()
+    mf_df.lebedev_order = 29
+    mf_df.method = 'IEF-PCM'
 mf_df.grids.atom_grid = (99,590)
 mf_df.kernel()
+scf_time = time.time() - start_time
+print(f'compute time for energy: {scf_time:.3f} s')
 
-print('compute time for energy: {}s'.format((time.time() - start_time)))
 start_time = time.time()
 g = mf_df.nuc_grad_method()
 g.auxbasis_response = True
 f = g.kernel()
-print('compute time for gradient: {}s'.format((time.time() - start_time)))
+grad_time = time.time() - start_time
+print(f'compute time for gradient: {grad_time:.3f} s')
 
 start_time = time.time()
 h = mf_df.Hessian()
 h.auxbasis_response = 2
 h_dft = h.kernel()
-print('compute time for hessian: {}s'.format((time.time() - start_time)))
+hess_time = time.time() - start_time
+print(f'compute time for hessian: {hess_time:.3f} s')
diff --git a/examples/sp.in b/examples/sp.in
diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py
@@ -1,2 +1,2 @@
 from . import lib, grad, hessian, solvent, scf, dft
-__version__ = '0.6.1'
+__version__ = '0.6.5'
diff --git a/gpu4pyscf/df/__init__.py b/gpu4pyscf/df/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
diff --git a/gpu4pyscf/df/cderi.py b/gpu4pyscf/df/cderi.py
@@ -40,7 +40,7 @@ def __init__(self, nao, naux, nblocks) -> None:
             ctypes.c_int(nblocks),
             ctypes.c_int(nao))
         return
-    
+
     def __del__(self):
         self.row = []
         self.col = []
@@ -57,8 +57,8 @@ def add_block(self, data, rows, cols):
         assert rows.dtype == cupy.int64 and cols.dtype == cupy.int64
         nij = len(rows)
         err = libcupy_helper.add_block(
-            ctypes.byref(self.handle), 
-            ctypes.c_int(nij), 
+            ctypes.byref(self.handle),
+            ctypes.c_int(nij),
             ctypes.c_int(self.naux),
             ctypes.cast(rows.data.ptr, ctypes.c_void_p),
             ctypes.cast(cols.data.ptr, ctypes.c_void_p),