diff --git a/.gitignore b/.gitignore
index e0b0b159..9a680274 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 **/__pycache__
 *.so
+*.npz
+*.out
 *current_revision
 **/build
 **/launch_logs
@@ -13,6 +15,7 @@ output/
 *.swo
 *.swp
 *~
+**/qchem_input.in
 #*
 dockerfiles/manylinux/wheelhouse
 gpu4pyscf/lib/bin
diff --git a/benchmarks/df/dft_driver.py b/benchmarks/df/dft_driver.py
index ea979df8..69cc7621 100644
--- a/benchmarks/df/dft_driver.py
+++ b/benchmarks/df/dft_driver.py
@@ -3,6 +3,7 @@
 import pyscf
 import time
 import argparse
+import numpy as np
 from pyscf import lib
 from pyscf.dft import rks
 
@@ -41,18 +42,19 @@
     output_file = 'PySCF-16-cores-CPU.csv'
 output_file = args.output_path + output_file
 
-def run_dft(filename):
-    mol = pyscf.M(atom=filename, basis=bas, max_memory=64000)
+def run_dft(path, filename):
+    mol = pyscf.M(atom=path+filename, basis=bas, max_memory=64000)
     start_time = time.time()
     # set verbose >= 6 for debugging timer
-    mol.verbose = 4 #verbose
+    mol.verbose = 1 #verbose
     mol.max_memory = 40000
     mf = rks.RKS(mol, xc=xc).density_fit(auxbasis='def2-universal-jkfit')
     if args.solvent:
         mf = mf.PCM()
-        mf.lebedev_order = 29
-        mf.method = 'IEF-PCM'
-
+        mf.with_solvent.lebedev_order = 29
+        mf.with_solvent.method = 'IEF-PCM'
+        mf.with_solvent.eps = 78.3553
+    mf.verbose = 6
     mf.grids.atom_grid = (99,590)
     mf.chkfile = None
     prep_time = time.time() - start_time
@@ -62,7 +64,7 @@ def run_dft(filename):
     try:
         e_dft = mf.kernel()
         scf_time = time.time() - start_time
-    except:
+    except Exception:
         scf_time = -1
         e_dft = 0
 
@@ -76,8 +78,9 @@ def run_dft(filename):
         g.auxbasis_response = True
         f = g.kernel()
         grad_time = time.time() - start_time
-    except:
+    except Exception:
         grad_time = -1
+        f = -1
 
     # calculate hessian
     if args.device == 'GPU':
@@ -85,16 +88,18 @@ def run_dft(filename):
 
     hess_time = -1
     if args.with_hessian:
-        try:
-            start_time = time.time()
-            h = mf.Hessian()
-            h.auxbasis_response = 1
-            h.max_memory = 40000
-            hess = h.kernel()
-            hess_time = time.time() - start_time
-        except:
-            hess_time = -1
+        #try:
+        start_time = time.time()
+        h = mf.Hessian()
+        h.auxbasis_response = 1
+        h.max_memory = 40000
+        hess = h.kernel().reshape([3*mol.natm, 3*mol.natm])
+        hess_time = time.time() - start_time
+        #except Exception:
+        #   hess_time = -1
+        #    hess = -1
 
+    np.savez(args.output_path+filename+'.npz', e_dft=e_dft, grad=f, hess=hess)
     return mol.natm, mol.nao, scf_time, grad_time, hess_time, e_dft
 
 fields = ['mol','natm', 'nao', 't_scf', 't_gradient', 't_hessian', 'e_tot']
@@ -105,7 +110,7 @@ def run_dft(filename):
 for filename in sorted(os.listdir(args.input_path)):
     if filename.endswith(".xyz"):
         print(f'running DFT {filename}')
-        info = run_dft(args.input_path+filename)
+        info = run_dft(args.input_path, filename)
         row = [filename[:-4]]+list(info)
         csvwriter.writerow(row)
         csvfile.flush()
diff --git a/benchmarks/df/organic/basis/sto-3g/Tesla V100-SXM2-32GB.csv b/benchmarks/df/organic/basis/sto-3g/Tesla V100-SXM2-32GB.csv
index 18603814..54b1e178 100644
--- a/benchmarks/df/organic/basis/sto-3g/Tesla V100-SXM2-32GB.csv	
+++ b/benchmarks/df/organic/basis/sto-3g/Tesla V100-SXM2-32GB.csv	
@@ -1,14 +1,14 @@
 mol,natm,nao,t_scf,t_gradient,t_hessian,e_tot
-020_Vitamin_C,20,68,7.735399961471558,0.8264245986938477,-1,-675.5191427445513
-031_Inosine,31,107,12.40164589881897,1.2864620685577393,-1,-970.3601703535105
-033_Bisphenol_A,33,101,9.470605850219727,1.3010294437408447,-1,-722.4575094545307
-037_Mg_Porphin,37,141,10.585906982421875,1.7864344120025635,-1,-1173.7927754921361
-042_Penicillin_V,42,142,14.566765546798706,1.8530471324920654,-1,-1485.4649683739317
-045_Ochratoxin_A,45,157,16.409618377685547,2.151444673538208,-1,-1643.4719863900054
-052_Cetirizine,52,164,16.940945386886597,2.433612108230591,-1,-1591.0513291828825
-057_Tamoxifen,57,169,17.164642333984375,2.5976877212524414,-1,-1123.9167963056734
-066_Raffinose,66,202,19.061853408813477,3.485790491104126,-1,-1882.9880554750498
-084_Sphingomyelin,84,220,70.13539099693298,4.33745265007019,-1,-1787.52705174446
-095_Azadirachtin,95,299,35.65800380706787,7.805527210235596,-1,-2530.024523612371
-113_Taxol,113,361,41.06178140640259,12.37047815322876,-1,-2891.8561844202995
-168_Valinomycin,168,480,66.3832175731659,26.61207914352417,-1,-3744.922364373644
+020_Vitamin_C,20,68,7.868626832962036,1.0551254749298096,160.80799293518066,-675.538636198446
+031_Inosine,31,107,13.161744117736816,1.6693317890167236,409.3673324584961,-970.3940011476416
+033_Bisphenol_A,33,101,11.190348148345947,1.703892707824707,454.6797456741333,-722.4680797386372
+037_Mg_Porphin,37,141,13.220695495605469,2.3627896308898926,613.8060190677643,-1173.8015084893464
+042_Penicillin_V,42,142,16.221434831619263,2.5661509037017822,806.1365261077881,-1485.48745890511
+045_Ochratoxin_A,45,157,17.118950366973877,2.869781255722046,942.4001016616821,-1643.4889926123333
+052_Cetirizine,52,164,18.82169008255005,3.2975871562957764,1308.8825283050537,-1591.0707662509653
+057_Tamoxifen,57,169,20.3020281791687,3.712913751602173,1614.187731742859,-1123.9277518506542
+066_Raffinose,66,202,22.84470796585083,4.8171586990356445,2398.439987182617,-1883.0349428068212
+084_Sphingomyelin,84,220,35.961305141448975,-1,-1,-1787.6089723002701
+095_Azadirachtin,95,299,41.4524290561676,10.98064112663269,-1,-2530.0502561379244
+113_Taxol,113,361,56.471648931503296,-1,-1,-2891.887299140597
+168_Valinomycin,168,480,-1,-1,-1,0
diff --git a/benchmarks/df/organic/solvent/def2-tzvpp/Tesla V100-SXM2-32GB.csv b/benchmarks/df/organic/solvent/def2-tzvpp/Tesla V100-SXM2-32GB.csv
new file mode 100644
index 00000000..97c74f34
--- /dev/null
+++ b/benchmarks/df/organic/solvent/def2-tzvpp/Tesla V100-SXM2-32GB.csv	
@@ -0,0 +1,9 @@
+mol,natm,nao,t_scf,t_gradient,t_hessian,e_tot
+020_Vitamin_C,20,484,10.453456163406372,2.7370195388793945,363.0008842945099,-685.0347463979249
+031_Inosine,31,757,20.68114447593689,6.736889600753784,1316.5888483524323,-983.7176439065447
+033_Bisphenol_A,33,751,17.637565851211548,6.877225160598755,1420.3841083049774,-731.960347742662
+037_Mg_Porphin,37,944,24.36361074447632,11.089943408966064,2439.1287682056427,-1188.8981396236977
+042_Penicillin_V,42,1007,30.862167358398438,11.582371473312378,3181.5790650844574,-1504.6450542609673
+045_Ochratoxin_A,45,1100,33.41982841491699,14.166359901428223,4029.220572948456,-1664.4396913351384
+052_Cetirizine,52,1198,40.039607763290405,18.70956540107727,6276.552805900574,-1611.0213497063874
+057_Tamoxifen,57,1274,44.171112298965454,20.529136180877686,7396.354176044464,-1138.3891845703356
diff --git a/benchmarks/df/organic/solvent/sto-3g/Tesla V100-SXM2-32GB.csv b/benchmarks/df/organic/solvent/sto-3g/Tesla V100-SXM2-32GB.csv
new file mode 100644
index 00000000..bf7be591
--- /dev/null
+++ b/benchmarks/df/organic/solvent/sto-3g/Tesla V100-SXM2-32GB.csv	
@@ -0,0 +1,7 @@
+mol,natm,nao,t_scf,t_gradient,t_hessian,e_tot
+020_Vitamin_C,20,68,7.757320165634155,1.040503740310669,174.4666724205017,-675.5386361984456
+031_Inosine,31,107,13.179906129837036,1.6433496475219727,417.5237069129944,-970.3940011476411
+033_Bisphenol_A,33,101,11.242647647857666,1.7435667514801025,456.6348719596863,-722.4680797386372
+037_Mg_Porphin,37,141,13.297797203063965,2.399493455886841,637.2147407531738,-1173.8015084893468
+042_Penicillin_V,42,142,17.28551435470581,2.550989866256714,803.3315801620483,-1485.4874589051096
+045_Ochratoxin_A,45,157,16.84193205833435,2.799733877182007,933.1838977336884,-1643.4889926123315
diff --git a/benchmarks/df/organic/xc/LDA/qchem-32-cores-cpu.csv b/benchmarks/df/organic/xc/LDA/qchem-32-cores-cpu.csv
index 9a2de937..5471ca66 100644
--- a/benchmarks/df/organic/xc/LDA/qchem-32-cores-cpu.csv
+++ b/benchmarks/df/organic/xc/LDA/qchem-32-cores-cpu.csv
@@ -1,13 +1 @@
 mol,t_scf,t_gradient,e_tot
-020_Vitamin_C,14.0,3.56,-679.7314745086
-031_Inosine,49.0,7.96,-975.8114577115
-033_Bisphenol_A,46.0,8.89,-725.553763622
-037_Mg_Porphin,66.0,14.14,-1179.2748793144
-042_Penicillin_V,64.0,13.64,-1494.103579632
-045_Ochratoxin_A,94.0,17.73,-1652.8352834053
-052_Cetirizine,141.0,23.98,-1599.5921033435
-057_Tamoxifen,130.0,28.07,-1128.1503311128
-066_Raffinose,159.0,38.63,-1894.4283011989
-084_Sphingomyelin,181.0,37.76,-1796.8793820306
-095_Azadirachtin,503.0,108.83,-2543.5362671667
-113_Taxol,815.0,175.2,-2906.2707301461
diff --git a/benchmarks/df/qchem.py b/benchmarks/df/qchem.py
index 9225997a..236e89d3 100644
--- a/benchmarks/df/qchem.py
+++ b/benchmarks/df/qchem.py
@@ -8,6 +8,7 @@
 parser.add_argument('--xc',          type=str, default='B3LYP')
 parser.add_argument('--input_path',  type=str, default='./')
 parser.add_argument('--output_path', type=str, default='./')
+
 args = parser.parse_args()
 bas = args.basis
 xc = args.xc
@@ -43,9 +44,9 @@ def run_dft(filename):
         input.write("THRESH        14\n")
         input.write("BASIS_LIN_DEP_THRESH 12\n")
         input.write("$end\n")
-    
+
     filename = args.xc + '_' + args.basis
-    subprocess.run(['qchem', '-np', '32', 'qchem_input.in', filename])
+    subprocess.run(['qchem', '-save', '-np', '32', 'qchem_input.in', filename, args.output_path+'/qcarchive_'+filename])
     with open(filename, 'w') as output_file:
         lines = output_file.readlines()
         for line in lines:
diff --git a/benchmarks/df/qchem_input.in b/benchmarks/df/qchem_input.in
new file mode 100644
index 00000000..9f357459
--- /dev/null
+++ b/benchmarks/df/qchem_input.in
@@ -0,0 +1,86 @@
+$molecule
+0 1
+C                 -1.42958937   -1.01658249    2.44831265
+C                 -1.54149463   -0.27943595    1.12935913
+C                 -2.95243912    1.47129501    2.22492550
+C                 -2.84134324    0.73400280    3.54388810
+C                 -1.55797242   -0.06742235    3.62210684
+H                 -0.66497820    0.41085733    1.01283439
+H                 -0.44314689   -1.54644194    2.49994231
+H                 -3.93813646    2.00251835    2.17350554
+H                 -3.71865156    0.04459988    3.66018043
+H                 -0.68133448    0.63238265    3.63327796
+O                 -1.92499204    2.46279929    2.14636482
+C                 -1.46076301    4.67520648    3.08258818
+C                 -1.65192906    4.60593616    4.58432062
+H                 -0.74117334    5.43388332    2.85565168
+C                 -3.37658473    6.27667868    2.91318631
+C                 -2.26541279    5.88367074    5.11970508
+H                 -2.31747310    3.73866848    4.83580579
+C                 -3.56636040    6.20921206    4.41472333
+H                 -4.36600055    6.45426127    2.41719619
+H                 -1.54350069    6.73075622    4.97913735
+H                 -4.32958469    5.42429404    4.65913754
+O                 -2.52952388    7.38269576    2.59051909
+C                 -3.31900286    8.44866624    2.05635630
+C                 -3.26458455    9.66301263    2.97202724
+C                 -2.80523815   10.85434814    2.14390783
+H                 -4.26911658    9.86120175    3.42267320
+C                 -2.45049436   10.34868118    0.75291070
+H                 -1.92149828   11.34764146    2.62047963
+H                 -3.01626404   10.91739387   -0.02697788
+C                 -4.76356005    7.92754855    1.94107026
+H                 -4.78266313    7.06714393    1.30527236
+H                 -5.12531018    7.66145994    2.91227245
+O                 -2.82580898    0.52071700    1.05150767
+C                 -0.93849427    3.31732760    2.57761263
+H                 -0.77727115    3.36776157    1.52103158
+H                 -0.01653199    3.08491476    3.06839754
+C                 -1.49224313   -1.28985314   -0.03177454
+H                 -0.58145107   -1.84899344    0.02037852
+H                 -2.32526415   -1.95746693    0.04080516
+C                 -0.95163774   10.56175833    0.47072820
+H                 -0.66603955    9.99216789   -0.38886496
+H                 -0.38034325   10.24135666    1.31681648
+O                 -1.55044662   -0.58860344   -1.27666860
+H                 -1.40118565   -1.20385620   -1.99832308
+O                 -2.45542951   -2.00981905    2.52598600
+H                 -2.07814582   -2.83668451    2.83509848
+O                 -1.53066740   -0.81010098    4.84381994
+H                 -0.61998842   -0.98424670    5.09269150
+O                 -2.88646987    1.67226139    4.62209700
+H                 -3.42291955    1.31644411    5.33428785
+O                 -0.39050984    4.37790095    5.21813565
+H                 -0.43907755    4.65272412    6.13667438
+O                 -2.50206790    5.74947404    6.52358741
+H                 -2.24542728    6.55931660    6.97067871
+O                 -4.06906224    7.46048270    4.89065678
+H                 -4.76357327    7.30283533    5.53439721
+O                 -2.76235824    4.99928466    2.37789525
+O                 -5.59594331    8.94915410    1.38576835
+H                 -6.48998576    8.61436213    1.28476127
+O                 -2.79571817    8.86766013    0.68976834
+O                 -2.35757225    9.42976003    4.05268533
+H                 -2.22828280   10.24429373    4.54402949
+O                 -3.84292711   11.83519815    2.06622328
+H                 -3.45759742   12.71343457    2.10890596
+O                 -0.70503591   11.94904429    0.22675035
+H                  0.04258943   12.04182451   -0.36826913
+
+$end
+$rem
+JOBTYPE force
+METHOD LDA
+BASIS def2-tzvpp
+SYMMETRY      FALSE
+SYM_IGNORE    TRUE
+XC_GRID       000099000590
+NL_GRID       000050000194
+MAX_SCF_CYCLES 100
+ri_j        True
+ri_k        True
+aux_basis     RIJK-def2-tzvp
+SCF_CONVERGENCE 9
+THRESH        14
+BASIS_LIN_DEP_THRESH 12
+$end
diff --git a/benchmarks/df/run_gpu4pyscf.sh b/benchmarks/df/run_gpu4pyscf.sh
index c50cfceb..e158ba14 100644
--- a/benchmarks/df/run_gpu4pyscf.sh
+++ b/benchmarks/df/run_gpu4pyscf.sh
@@ -2,16 +2,23 @@
 
 DIR="./organic/xc"
 [ ! -d "$DIR" ] && mkdir -p "$DIR"
-for xc in LDA PBE B3LYP M06 wB97m-v
-do
-    python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/$xc/ --xc $xc
-done
-exit
+
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/LDA/ --xc LDA
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/PBE/ --xc PBE
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/B3LYP/ --xc B3LYP
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/M06/ --xc M06
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/xc/wB97m-v/ --xc wB97m-v
 
 DIR="./organic/basis"
 [ ! -d "$DIR" ] && mkdir -p "$DIR"
 
-for basis in def2-svp def2-tzvpp def2-tzvpd sto-3g 6-31g
-do
-    python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/$basis/ --basis $basis
-done
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/def2-svp/ --basis def2-svp
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/def2-tzvpp/ --basis def2-tzvpp
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/def2-tzvpd/ --basis def2-tzvpd
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/sto-3g/ --basis sto-3g
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/basis/6-31g/ --basis 6-31g
+
+DIR="./organic/solvent"
+[ ! -d "$DIR" ] && mkdir -p "$DIR"
+
+run --cpu 64 --memory 128 --gpu 1 -- python3 dft_driver.py --input_path ../molecules/organic/ --output_path ./organic/solvent/def2-tzvpp/ --basis def2-tzvpp --with_hessian True --solvent C-PCM
diff --git a/benchmarks/df/run_qchem.sh b/benchmarks/df/run_qchem.sh
index adc8731f..b29015c9 100644
--- a/benchmarks/df/run_qchem.sh
+++ b/benchmarks/df/run_qchem.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+export QCSCRATCH=/tmp/
+
 DIR="./organic/xc"
 [ ! -d "$DIR" ] && mkdir -p "$DIR"
 
@@ -8,9 +10,9 @@ DIR="./organic/basis"
 
 #for xc in LDA PBE B3LYP M06 wB97m-v
 run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/LDA/ --xc LDA &&
-run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/PBE/ --xc PBE && 
+run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/PBE/ --xc PBE &&
 run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/B3LYP/ --xc B3LYP &&
-run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/M06/ --xc M06 && 
+run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/M06/ --xc M06 &&
 run --cpu 64 --memory 128 --gpu 0 -- python3 qchem.py --input_path ../molecules/organic/ --output_path ./organic/xc/wB97m-v/ --xc wB97m-v &&
 
 #for basis in def2-svp def2-tzvpp def2-tzvpd sto-3g 6-31g 6-31g*
diff --git a/benchmarks/scf/dft_driver.py b/benchmarks/scf/dft_driver.py
index 0a63d350..5cc05a66 100644
--- a/benchmarks/scf/dft_driver.py
+++ b/benchmarks/scf/dft_driver.py
@@ -15,7 +15,10 @@
 parser.add_argument('--device',       type=str, default='GPU')
 parser.add_argument('--input_path',   type=str, default='./')
 parser.add_argument('--output_path',  type=str, default='./')
+parser.add_argument('--with_gradient', type=bool, default=False)
 parser.add_argument('--with_hessian', type=bool, default=False)
+parser.add_argument("--solvent",  type=bool, default=False)
+
 args = parser.parse_args()
 bas = args.basis
 verbose = args.verbose
@@ -39,23 +42,29 @@
     output_file = 'PySCF-16-cores-CPU.csv'
 output_file = args.output_path + output_file
 
-def run_dft(filename):  
+def run_dft(filename):
     mol = pyscf.M(atom=filename, basis=bas, max_memory=64000)
-    start_time = time.time()  
+    start_time = time.time()
     # set verbose >= 6 for debugging timer
     mol.verbose = 4 #verbose
     mol.max_memory = 40000
     mf = rks.RKS(mol, xc=xc)
+    if args.solvent:
+        mf = mf.PCM()
+        mf.with_solvent.lebedev_order = 29
+        mf.with_solvent.method = 'IEF-PCM'
+        mf.with_solvent.eps = 78.3553
     mf.grids.atom_grid = (99,590)
     mf.chkfile = None
     prep_time = time.time() - start_time
     mf.conv_tol = 1e-9
     mf.nlcgrids.atom_grid = (50,194)
     mf.max_cycle = 100
+    print(mf.scf_summary)
     try:
         e_dft = mf.kernel()
         scf_time = time.time() - start_time
-    except:
+    except Exception:
         scf_time = -1
         e_dft = 0
 
@@ -68,13 +77,13 @@ def run_dft(filename):
         g.max_memory = 40000
         f = g.kernel()
         grad_time = time.time() - start_time
-    except:
+    except Exception:
         grad_time = -1
 
     # calculate hessian
     if args.device == 'GPU':
         cupy.get_default_memory_pool().free_all_blocks()
-    
+
     hess_time = -1
     if args.with_hessian:
         try:
@@ -83,7 +92,7 @@ def run_dft(filename):
             h.max_memory = 40000
             hess = h.kernel()
             hess_time = time.time() - start_time
-        except:
+        except Exception:
             hess_time = -1
 
     return mol.natm, mol.nao, scf_time, grad_time, hess_time, e_dft
diff --git a/examples/01-h2o_dftd3.py b/examples/01-h2o_dftd3.py
index f22c745e..63e432cc 100644
--- a/examples/01-h2o_dftd3.py
+++ b/examples/01-h2o_dftd3.py
@@ -21,7 +21,7 @@
 
 lib.num_threads(8)
 
-atom =''' 
+atom ='''
 O       0.0000000000    -0.0000000000     0.1174000000
 H      -0.7570000000    -0.0000000000    -0.4696000000
 H       0.7570000000     0.0000000000    -0.4696000000
@@ -46,7 +46,7 @@
 
 # Compute Energy
 print('------------------- Energy -----------------------------')
-e_dft = mf_GPU.kernel() 
+e_dft = mf_GPU.kernel()
 print('DFT energy by GPU4PySCF')
 print(e_dft)
 print('DFT energy by Q-Chem')
diff --git a/examples/14-pcm_solvent.py b/examples/14-pcm_solvent.py
index 3def0ee4..8eaabbf8 100644
--- a/examples/14-pcm_solvent.py
+++ b/examples/14-pcm_solvent.py
@@ -13,8 +13,9 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import numpy as np
 import pyscf
-from pyscf import lib
+from pyscf import lib, df
 from gpu4pyscf.dft import rks
 lib.num_threads(8)
 
@@ -23,16 +24,30 @@
 H      -0.7570000000    -0.0000000000    -0.4696000000
 H       0.7570000000     0.0000000000    -0.4696000000
 '''
-mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=4)
+
+mol = pyscf.M(atom=atom, basis='def2-tzvpp', verbose=0)
 
 mf = rks.RKS(mol, xc='HYB_GGA_XC_B3LYP').density_fit()
 mf = mf.PCM()
+mf.verbose = 6
 mf.grids.atom_grid = (99,590)
+mf.small_rho_cutoff = 1e-10
 mf.with_solvent.lebedev_order = 29 # 302 Lebedev grids
-mf.with_solvent.method = 'IEF-PCM'
+mf.with_solvent.method = 'C-PCM'
 mf.with_solvent.eps = 78.3553
 mf.kernel()
 
-g = mf.nuc_grad_method()
-g.auxbasis_response = True
-f = g.kernel()
+gradobj = mf.nuc_grad_method()
+gradobj.auxbasis_response = True
+f = gradobj.kernel()
+
+hessobj = mf.Hessian()
+hess = hessobj.kernel()
+
+# mass weighted hessian
+mass = [15.99491, 1.00783, 1.00783]
+for i in range(3):
+    for j in range(3):
+        hess[i,j] = hess[i,j]/np.sqrt(mass[i]*mass[j])
+
+
diff --git a/examples/15-chelpg.py b/examples/15-chelpg.py
index f102741b..473d500d 100644
--- a/examples/15-chelpg.py
+++ b/examples/15-chelpg.py
@@ -17,7 +17,6 @@
 from gpu4pyscf.dft import rks
 from gpu4pyscf.qmmm import chelpg
 
-    
 mol = gto.Mole()
 mol.verbose = 0
 mol.output = None
@@ -30,7 +29,7 @@
 mol.unit = 'B'
 mol.build()
 mol.verbose = 6
-    
+
 xc = 'b3lyp'
 mf = rks.RKS(mol, xc=xc)
 mf.grids.level = 5
diff --git a/examples/density.npz b/examples/density.npz
deleted file mode 100644
index 3c044d03..00000000
Binary files a/examples/density.npz and /dev/null differ
diff --git a/examples/dft_driver.py b/examples/dft_driver.py
index 12628086..96b1718c 100644
--- a/examples/dft_driver.py
+++ b/examples/dft_driver.py
@@ -18,14 +18,13 @@
 import argparse
 from pyscf import lib
 from gpu4pyscf.dft import rks
-lib.num_threads(8)
 
 parser = argparse.ArgumentParser(description='Run DFT with GPU4PySCF for molecules')
 parser.add_argument("--input",    type=str,  default='benzene/coord')
 parser.add_argument("--basis",    type=str,  default='def2-tzvpp')
 parser.add_argument("--auxbasis", type=str,  default='def2-tzvpp-jkfit')
 parser.add_argument("--xc",       type=str,  default='B3LYP')
-parser.add_argument("--solvent",  type=bool, default=False)
+parser.add_argument("--solvent",  type=str, default='')
 args = parser.parse_args()
 
 start_time = time.time()
@@ -35,15 +34,23 @@
     basis=bas,
     max_memory=32000)
 # set verbose >= 6 for debugging timer
-mol.verbose = 1
+
+mol.verbose = 0
 
 mf_df = rks.RKS(mol, xc=args.xc).density_fit(auxbasis=args.auxbasis)
+mf_df.verbose = 6
+
 if args.solvent:
     mf_df = mf_df.PCM()
-    mf_df.lebedev_order = 29
-    mf_df.method = 'IEF-PCM'
+    mf_df.with_solvent.lebedev_order = 29
+    mf_df.with_solvent.method = args.solvent
+    mf_df.with_solvent.eps = 78.3553
+
 mf_df.grids.atom_grid = (99,590)
-mf_df.kernel()
+mf_df.direct_scf_tol = 1e-14
+mf_df.direct_scf = 1e-14
+mf_df.conv_tol = 1e-12
+e_tot = mf_df.kernel()
 scf_time = time.time() - start_time
 print(f'compute time for energy: {scf_time:.3f} s')
 
@@ -60,3 +67,6 @@
 h_dft = h.kernel()
 hess_time = time.time() - start_time
 print(f'compute time for hessian: {hess_time:.3f} s')
+
+import numpy
+numpy.savez('gpu4pyscf_out.npz', e_tot=e_tot, f=f, h_dft=h_dft)
\ No newline at end of file
diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py
index 681a2e88..550ac39c 100644
--- a/gpu4pyscf/__init__.py
+++ b/gpu4pyscf/__init__.py
@@ -1,2 +1,2 @@
 from . import lib, grad, hessian, solvent, scf, dft
-__version__ = '0.6.5'
+__version__ = '0.6.6'
diff --git a/gpu4pyscf/df/hessian/rks.py b/gpu4pyscf/df/hessian/rks.py
index b7432314..77593b94 100644
--- a/gpu4pyscf/df/hessian/rks.py
+++ b/gpu4pyscf/df/hessian/rks.py
@@ -33,6 +33,7 @@
 from gpu4pyscf.hessian import rks as rks_hess
 from gpu4pyscf.df.hessian import rhf as df_rhf_hess
 from gpu4pyscf.lib import logger
+from gpu4pyscf.lib.cupy_helper import contract
 
 def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
                       atmlst=None, max_memory=4000, verbose=None):
@@ -74,10 +75,10 @@ def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
     for i0, ia in enumerate(atmlst):
         shl0, shl1, p0, p1 = aoslices[ia]
         veff = vxc[ia]
-        de2[i0,i0] += cupy.einsum('xypq,pq->xy', veff_diag[:,:,p0:p1], dm0[p0:p1])*2
+        de2[i0,i0] += contract('xypq,pq->xy', veff_diag[:,:,p0:p1], dm0[p0:p1])*2
         for j0, ja in enumerate(atmlst[:i0+1]):
             q0, q1 = aoslices[ja][2:]
-            de2[i0,j0] += cupy.einsum('xypq,pq->xy', veff[:,:,q0:q1], dm0[q0:q1])*2
+            de2[i0,j0] += contract('xypq,pq->xy', veff[:,:,q0:q1], dm0[q0:q1])*2
         for j0 in range(i0):
             de2[j0,i0] = de2[i0,j0].T
     log.timer('RKS partial hessian', *time0)
diff --git a/gpu4pyscf/grad/rks.py b/gpu4pyscf/grad/rks.py
index 316478cc..82b0befd 100644
--- a/gpu4pyscf/grad/rks.py
+++ b/gpu4pyscf/grad/rks.py
@@ -22,13 +22,13 @@
 import cupy
 import pyscf
 from pyscf import lib, gto
-from pyscf.lib import logger
 from pyscf.dft import radi
 from gpu4pyscf.lib.utils import patch_cpu_kernel
 from gpu4pyscf.grad import rhf as rhf_grad
 from gpu4pyscf.dft import numint, xc_deriv, rks
 from gpu4pyscf.dft.numint import _GDFTOpt, AO_THRESHOLD
 from gpu4pyscf.lib.cupy_helper import contract, get_avail_mem, add_sparse, tag_array, load_library
+from gpu4pyscf.lib import logger
 from pyscf import __config__
 
 MIN_BLK_SIZE = getattr(__config__, 'min_grid_blksize', 128*128)
diff --git a/gpu4pyscf/lib/logger.py b/gpu4pyscf/lib/logger.py
index 60497816..6367b440 100644
--- a/gpu4pyscf/lib/logger.py
+++ b/gpu4pyscf/lib/logger.py
@@ -54,19 +54,20 @@ def timer(rec, msg, cpu0=None, wall0=None, gpu0=None):
         if rec.verbose >= TIMER_LEVEL:
             rec._e0.record()
             rec._e0.synchronize()
-            flush(rec, '    CPU time for %20s %9.2f sec, wall time %9.2f sec, GPU time for %9.2f ms'
+
+            flush(rec, '    CPU time for %50s %9.2f sec, wall time %9.2f sec, GPU time for %9.2f ms'
                   % (msg, rec._t0-cpu0, rec._w0-wall0, cupy.cuda.get_elapsed_time(gpu0,rec._e0)))
         return rec._t0, rec._w0, rec._e0
     elif wall0:
         rec._t0, rec._w0 = process_clock(), perf_counter()
         if rec.verbose >= TIMER_LEVEL:
-            flush(rec, '    CPU time for %20s %9.2f sec, wall time %9.2f sec'
+            flush(rec, '    CPU time for %50s %9.2f sec, wall time %9.2f sec'
                   % (msg, rec._t0-cpu0, rec._w0-wall0))
         return rec._t0, rec._w0
     else:
         rec._t0 = process_clock()
         if rec.verbose >= TIMER_LEVEL:
-            flush(rec, '    CPU time for %20s %9.2f sec' % (msg, rec._t0-cpu0))
+            flush(rec, '    CPU time for %50s %9.2f sec' % (msg, rec._t0-cpu0))
         return rec._t0,
 
 def _timer_debug1(rec, msg, cpu0=None, wall0=None, gpu0=None, sync=True):
@@ -84,6 +85,7 @@ def _timer_debug1(rec, msg, cpu0=None, wall0=None, gpu0=None, sync=True):
         return rec._t0,
 
 info = lib.logger.info
+note = lib.logger.note
 debug = lib.logger.debug
 debug1 = lib.logger.debug1
 debug2 = lib.logger.debug2
diff --git a/gpu4pyscf/scf/cphf.py b/gpu4pyscf/scf/cphf.py
index ddfb6f67..fdd45453 100644
--- a/gpu4pyscf/scf/cphf.py
+++ b/gpu4pyscf/scf/cphf.py
@@ -26,8 +26,9 @@
 from pyscf import lib
 from gpu4pyscf.lib.cupy_helper import krylov
 from gpu4pyscf.lib import logger
+
 def solve(fvind, mo_energy, mo_occ, h1, s1=None,
-          max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN):
+          max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN):
     '''
     Args:
         fvind : function
@@ -36,7 +37,7 @@ def solve(fvind, mo_energy, mo_occ, h1, s1=None,
         hermi : boolean
             Whether the matrix defined by fvind is Hermitian or not.
     '''
-    
+
     if s1 is None:
         return solve_nos1(fvind, mo_energy, mo_occ, h1,
                           max_cycle, tol, hermi, verbose)
@@ -69,7 +70,7 @@ def vind_vo(mo1):
 
 # h1 shape is (:,nocc+nvir,nocc)
 def solve_withs1(fvind, mo_energy, mo_occ, h1, s1,
-                 max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN):
+                 max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN):
     '''For field dependent basis. First order overlap matrix is non-zero.
     The first order orbitals are set to
     C^1_{ij} = -1/2 S1
@@ -96,7 +97,7 @@ def solve_withs1(fvind, mo_energy, mo_occ, h1, s1,
     s1 = s1.reshape(-1,nmo,nocc)
     hs = mo1base = h1.reshape(-1,nmo,nocc) - s1*e_i
     mo_e1 = hs[:,occidx,:].copy()
-    
+
     mo1base[:,viridx] *= -e_ai
     mo1base[:,occidx] = -s1[:,occidx] * .5
 
diff --git a/gpu4pyscf/scf/diis.py b/gpu4pyscf/scf/diis.py
index c39c9dfc..b1e4c1f7 100644
--- a/gpu4pyscf/scf/diis.py
+++ b/gpu4pyscf/scf/diis.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 # Author: Qiming Sun <osirpt.sun@gmail.com>
-# 
+#
 # modified by Xiaojie Wu <wxj6000@gmail.com>
 
 """
@@ -26,9 +26,10 @@
 import cupy
 import scipy.linalg
 import scipy.optimize
-import gpu4pyscf.lib as lib
-from pyscf.lib import logger
 import pyscf.scf.diis as cpu_diis
+import gpu4pyscf.lib as lib
+from gpu4pyscf.lib import logger
+
 DEBUG = False
 
 # J. Mol. Struct. 114, 31-34 (1984); DOI:10.1016/S0022-2860(84)87198-7
diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py
index 8e1a9855..8bcfd65e 100644
--- a/gpu4pyscf/scf/hf.py
+++ b/gpu4pyscf/scf/hf.py
@@ -467,7 +467,6 @@ def _gen_rhf_response(mf, mo_coeff=None, mo_occ=None,
             orbital hessian or CPHF will be generated. If singlet is boolean,
             it is used in TDDFT response kernel.
     '''
-
     if mo_coeff is None: mo_coeff = mf.mo_coeff
     if mo_occ is None: mo_occ = mf.mo_occ
     mol = mf.mol
diff --git a/gpu4pyscf/solvent/_attach_solvent.py b/gpu4pyscf/solvent/_attach_solvent.py
index da556302..23e08af2 100644
--- a/gpu4pyscf/solvent/_attach_solvent.py
+++ b/gpu4pyscf/solvent/_attach_solvent.py
@@ -13,6 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import cupy
 from pyscf import lib
 from pyscf.lib import logger
 from pyscf.solvent._attach_solvent import _Solvation
@@ -98,6 +99,10 @@ def nuc_grad_method(self):
 
         Gradients = nuc_grad_method
 
+        def Hessian(self):
+            hess_method = oldMF.Hessian(self)
+            return self.with_solvent.Hessian(hess_method)
+
         def gen_response(self, *args, **kwargs):
             vind = oldMF.gen_response(self, *args, **kwargs)
             is_uhf = isinstance(self, scf.uhf.UHF)
diff --git a/gpu4pyscf/solvent/grad/pcm.py b/gpu4pyscf/solvent/grad/pcm.py
index 4df17558..88001e35 100644
--- a/gpu4pyscf/solvent/grad/pcm.py
+++ b/gpu4pyscf/solvent/grad/pcm.py
@@ -22,12 +22,14 @@
 import cupy
 from cupyx import scipy
 from pyscf import lib
-from pyscf.lib import logger
 from pyscf import gto, df
 from pyscf.grad import rhf as rhf_grad
 from gpu4pyscf.solvent.pcm import PI, switch_h
 from gpu4pyscf.df import int3c2e
 
+from gpu4pyscf.lib.cupy_helper import contract
+from gpu4pyscf.lib import logger
+
 libdft = lib.load_library('libdft')
 
 def grad_switch_h(x):
@@ -69,9 +71,9 @@ def get_dF_dA(surface):
         riJ = cupy.linalg.norm(ri_rJ, axis=-1)
         diJ = (riJ - R_in_J) / R_sw_J
         diJ[:,ia] = 1.0
-        diJ[diJ < 1e-8] = 0.0
+        diJ[diJ < 1e-12] = 0.0
         ri_rJ[:,ia,:] = 0.0
-        ri_rJ[diJ < 1e-8] = 0.0
+        ri_rJ[diJ < 1e-12] = 0.0
 
         fiJ = switch_h(diJ)
         dfiJ = grad_switch_h(diJ) / (fiJ * riJ * R_sw_J)
@@ -138,16 +140,44 @@ def get_dD_dS(surface, dF, with_S=True, with_D=False):
 
     return dD, dS, dSii
 
-def grad_kernel(pcmobj, dm):
+def grad_nuc(pcmobj):
+    if not pcmobj._intermediates:
+        pcmobj.build()
+    mol = pcmobj.mol
+    q_sym        = pcmobj._intermediates['q_sym'].get()
+    gridslice    = pcmobj.surface['gslice_by_atom']
+    grid_coords  = pcmobj.surface['grid_coords'].get()
+    exponents    = pcmobj.surface['charge_exp'].get()
+
+    atom_coords = mol.atom_coords(unit='B')
+    atom_charges = numpy.asarray(mol.atom_charges(), dtype=numpy.float64)
+    fakemol_nuc = gto.fakemol_for_charges(atom_coords)
+    fakemol = gto.fakemol_for_charges(grid_coords, expnt=exponents**2)
+
+    int2c2e_ip1 = mol._add_suffix('int2c2e_ip1')
+
+    v_ng_ip1 = gto.mole.intor_cross(int2c2e_ip1, fakemol_nuc, fakemol)
+
+    dv_g = numpy.einsum('g,xng->nx', q_sym, v_ng_ip1)
+    de = -numpy.einsum('nx,n->nx', dv_g, atom_charges)
+
+    v_ng_ip1 = gto.mole.intor_cross(int2c2e_ip1, fakemol, fakemol_nuc)
+
+    dv_g = numpy.einsum('n,xgn->gx', atom_charges, v_ng_ip1)
+    dv_g = numpy.einsum('gx,g->gx', dv_g, q_sym)
+
+    de -= numpy.asarray([numpy.sum(dv_g[p0:p1], axis=0) for p0,p1 in gridslice])
+    return de
+
+def grad_elec(pcmobj, dm):
     '''
     dE = 0.5*v* d(K^-1 R) *v + q*dv
     v^T* d(K^-1 R)v = v^T*K^-1(dR - dK K^-1R)v = v^T K^-1(dR - dK q)
     '''
-    mol = pcmobj.mol
+    if not pcmobj._intermediates:
+        pcmobj.build()
 
     gridslice    = pcmobj.surface['gslice_by_atom']
-    grid_coords  = pcmobj.surface['grid_coords']
-    exponents    = pcmobj.surface['charge_exp']
     v_grids      = pcmobj._intermediates['v_grids']
     A            = pcmobj._intermediates['A']
     D            = pcmobj._intermediates['D']
@@ -159,7 +189,6 @@ def grad_kernel(pcmobj, dm):
     vK_1 = cupy.linalg.solve(K.T, v_grids)
 
     # ----------------- potential response -----------------------
-    atom_coords = mol.atom_coords(unit='B')
 
     intopt = pcmobj.intopt
     intopt.clear()
@@ -180,25 +209,6 @@ def grad_kernel(pcmobj, dm):
     dvj= 2.0 * cupy.asarray([cupy.sum(dvj[:,p0:p1], axis=1) for p0,p1 in aoslice[:,2:]])
     de = dq + dvj
 
-    atom_charges = mol.atom_charges()
-    fakemol_nuc = gto.fakemol_for_charges(atom_coords)
-    fakemol = gto.fakemol_for_charges(grid_coords.get(), expnt=exponents.get()**2)
-
-    # nuclei response
-    int2c2e_ip1 = mol._add_suffix('int2c2e_ip1')
-    v_ng_ip1 = gto.mole.intor_cross(int2c2e_ip1, fakemol_nuc, fakemol)
-    v_ng_ip1 = cupy.asarray(v_ng_ip1)
-    dv_g = cupy.einsum('g,xng->nx', q_sym, v_ng_ip1)
-    de -= cupy.einsum('nx,n->nx', dv_g, atom_charges)
-
-    # nuclei potential response
-    int2c2e_ip2 = mol._add_suffix('int2c2e_ip2')
-    v_ng_ip2 = gto.mole.intor_cross(int2c2e_ip2, fakemol_nuc, fakemol)
-    v_ng_ip2 = cupy.asarray(v_ng_ip2)
-    dv_g = cupy.einsum('n,xng->gx', atom_charges, v_ng_ip2)
-    dv_g = cupy.einsum('gx,g->gx', dv_g, q_sym)
-    de -= cupy.asarray([cupy.sum(dv_g[p0:p1], axis=0) for p0,p1 in gridslice])
-
     ## --------------- response from stiffness matrices ----------------
     gridslice = pcmobj.surface['gslice_by_atom']
     dF, dA = get_dF_dA(pcmobj.surface)
@@ -277,7 +287,9 @@ def kernel(self, *args, dm=None, atmlst=None, **kwargs):
             if dm is None:
                 dm = self.base.make_rdm1(ao_repr=True)
 
-            self.de_solvent = grad_kernel(self.base.with_solvent, dm)
+            self.de_solvent = grad_elec(self.base.with_solvent, dm)
+            self.de_solvent+= grad_nuc(self.base.with_solvent)
+
             self.de_solute = grad_method_class.kernel(self, *args, **kwargs)
             self.de = self.de_solute + self.de_solvent
 
diff --git a/gpu4pyscf/solvent/hessian/__init__.py b/gpu4pyscf/solvent/hessian/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/gpu4pyscf/solvent/hessian/pcm.py b/gpu4pyscf/solvent/hessian/pcm.py
new file mode 100644
index 00000000..5d2c4dac
--- /dev/null
+++ b/gpu4pyscf/solvent/hessian/pcm.py
@@ -0,0 +1,207 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+'''
+Gradient of PCM family solvent model
+'''
+# pylint: disable=C0103
+
+import numpy
+import cupy
+from cupyx import scipy
+from pyscf import lib
+from pyscf import gto, df
+from pyscf.grad import rhf as rhf_grad
+from gpu4pyscf.solvent.pcm import PI, switch_h
+from gpu4pyscf.solvent.grad.pcm import grad_switch_h, get_dF_dA, get_dD_dS, grad_elec, grad_nuc
+from gpu4pyscf.df import int3c2e
+from gpu4pyscf.lib.cupy_helper import contract
+from gpu4pyscf.lib import logger
+
+libdft = lib.load_library('libdft')
+
+def hess_nuc(pcmobj):
+    if not pcmobj._intermediates:
+        pcmobj.build()
+    mol = pcmobj.mol
+    q_sym        = pcmobj._intermediates['q_sym'].get()
+    gridslice    = pcmobj.surface['gslice_by_atom']
+    grid_coords  = pcmobj.surface['grid_coords'].get()
+    exponents    = pcmobj.surface['charge_exp'].get()
+
+    atom_coords = mol.atom_coords(unit='B')
+    atom_charges = numpy.asarray(mol.atom_charges(), dtype=numpy.float64)
+    fakemol_nuc = gto.fakemol_for_charges(atom_coords)
+    fakemol = gto.fakemol_for_charges(grid_coords, expnt=exponents**2)
+
+    # nuclei potential response
+    int2c2e_ip1ip2 = mol._add_suffix('int2c2e_ip1ip2')
+    v_ng_ip1ip2 = gto.mole.intor_cross(int2c2e_ip1ip2, fakemol_nuc, fakemol).reshape([3,3,mol.natm,-1])
+    dv_g = numpy.einsum('n,xyng->ngxy', atom_charges, v_ng_ip1ip2)
+    dv_g = numpy.einsum('ngxy,g->ngxy', dv_g, q_sym)
+
+    de = numpy.zeros([mol.natm, mol.natm, 3, 3])
+    for ia in range(mol.natm):
+        p0, p1 = gridslice[ia]
+        de_tmp = numpy.sum(dv_g[:,p0:p1], axis=1)
+        de[:,ia] -= de_tmp
+        #de[ia,:] -= de_tmp.transpose([0,2,1])
+
+
+    int2c2e_ip1ip2 = mol._add_suffix('int2c2e_ip1ip2')
+    v_ng_ip1ip2 = gto.mole.intor_cross(int2c2e_ip1ip2, fakemol, fakemol_nuc).reshape([3,3,-1,mol.natm])
+    dv_g = numpy.einsum('n,xygn->gnxy', atom_charges, v_ng_ip1ip2)
+    dv_g = numpy.einsum('gnxy,g->gnxy', dv_g, q_sym)
+
+    for ia in range(mol.natm):
+        p0, p1 = gridslice[ia]
+        de_tmp = numpy.sum(dv_g[p0:p1], axis=0)
+        de[ia,:] -= de_tmp
+        #de[ia,:] -= de_tmp.transpose([0,2,1])
+
+    int2c2e_ipip1 = mol._add_suffix('int2c2e_ipip1')
+    v_ng_ipip1 = gto.mole.intor_cross(int2c2e_ipip1, fakemol_nuc, fakemol).reshape([3,3,mol.natm,-1])
+    dv_g = numpy.einsum('g,xyng->nxy', q_sym, v_ng_ipip1)
+    for ia in range(mol.natm):
+        de[ia,ia] -= dv_g[ia] * atom_charges[ia]
+
+    v_ng_ipip1 = gto.mole.intor_cross(int2c2e_ipip1, fakemol, fakemol_nuc).reshape([3,3,-1,mol.natm])
+    dv_g = numpy.einsum('n,xygn->gxy', atom_charges, v_ng_ipip1)
+    dv_g = numpy.einsum('g,gxy->gxy', q_sym, dv_g)
+    for ia in range(mol.natm):
+        p0, p1 = gridslice[ia]
+        de[ia,ia] -= numpy.sum(dv_g[p0:p1], axis=0)
+
+    return de
+
+def hess_elec(pcmobj, dm, verbose=None):
+    '''
+    slow version with finite difference
+    TODO: use analytical hess_nuc
+    '''
+    log = logger.new_logger(pcmobj, verbose)
+    t1 = log.init_timer()
+    pmol = pcmobj.mol.copy()
+    mol = pmol.copy()
+    coords = mol.atom_coords(unit='Bohr')
+
+    def pcm_grad_scanner(mol):
+        pcmobj.reset(mol)
+        e, v = pcmobj._get_vind(dm)
+        #return grad_elec(pcmobj, dm)
+        return grad_nuc(pcmobj) + grad_elec(pcmobj, dm)
+
+    de = numpy.zeros([mol.natm, mol.natm, 3, 3])
+    eps = 1e-3
+    for ia in range(mol.natm):
+        for ix in range(3):
+            dv = numpy.zeros_like(coords)
+            dv[ia,ix] = eps
+            mol.set_geom_(coords + dv, unit='Bohr')
+            mol.build()
+            g0 = pcm_grad_scanner(mol)
+
+            mol.set_geom_(coords - dv, unit='Bohr')
+            mol.build()
+            g1 = pcm_grad_scanner(mol)
+            de[ia,:,ix] = (g0 - g1)/2.0/eps
+    t1 = log.timer_debug1('solvent energy', *t1)
+    pcmobj.reset(pmol)
+    return de
+
+def grad_qv(pcmobj, mo_coeff, mo_occ, atmlst=None, verbose=None):
+    '''
+    dv_solv / da
+    slow version with finite difference
+    '''
+    log = logger.new_logger(pcmobj, verbose)
+    t1 = log.init_timer()
+    pmol = pcmobj.mol.copy()
+    mol = pmol.copy()
+    if atmlst is None:
+        atmlst = range(mol.natm)
+    nao, nmo = mo_coeff.shape
+    mocc = mo_coeff[:,mo_occ>0]
+    nocc = mocc.shape[1]
+    dm = cupy.dot(mocc, mocc.T) * 2
+    coords = mol.atom_coords(unit='Bohr')
+    def pcm_vmat_scanner(mol):
+        pcmobj.reset(mol)
+        e, v = pcmobj._get_vind(dm)
+        return v
+
+    vmat = cupy.zeros([len(atmlst), 3, nao, nocc])
+    eps = 1e-3
+    for i0, ia in enumerate(atmlst):
+        for ix in range(3):
+            dv = numpy.zeros_like(coords)
+            dv[ia,ix] = eps
+            mol.set_geom_(coords + dv, unit='Bohr')
+            mol.build()
+            vmat0 = pcm_vmat_scanner(mol)
+
+            mol.set_geom_(coords - dv, unit='Bohr')
+            mol.build()
+            vmat1 = pcm_vmat_scanner(mol)
+
+            grad_vmat = (vmat0 - vmat1)/2.0/eps
+            grad_vmat = contract("ij,jq->iq", grad_vmat, mocc)
+            grad_vmat = contract("iq,ip->pq", grad_vmat, mo_coeff)
+            vmat[i0,ix] = grad_vmat
+    t1 = log.timer_debug1('computing solvent grad veff', *t1)
+    pcmobj.reset(pmol)
+    return vmat
+
+def make_hess_object(hess_method):
+    '''
+    return solvent hessian object
+    '''
+    hess_method_class = hess_method.__class__
+    class WithSolventHess(hess_method_class):
+        def __init__(self, hess_method):
+            self.__dict__.update(hess_method.__dict__)
+            self.de_solvent = None
+            self.de_solute = None
+            self._keys = self._keys.union(['de_solvent', 'de_solute'])
+
+        def kernel(self, *args, dm=None, atmlst=None, **kwargs):
+            dm = kwargs.pop('dm', None)
+            if dm is None:
+                dm = self.base.make_rdm1(ao_repr=True)
+            is_equilibrium = self.base.with_solvent.equilibrium_solvation
+            self.base.with_solvent.equilibrium_solvation = True
+            self.de_solvent = hess_elec(self.base.with_solvent, dm, verbose=self.verbose)
+            #self.de_solvent+= hess_nuc(self.base.with_solvent)
+            self.de_solute = hess_method_class.kernel(self, *args, **kwargs)
+            self.de = self.de_solute + self.de_solvent
+            self.base.with_solvent.equilibrium_solvation = is_equilibrium
+            return self.de
+
+        def make_h1(self, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None):
+            if atmlst is None:
+                atmlst = range(self.mol.natm)
+            h1ao = hess_method_class.make_h1(self, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+            dv = grad_qv(self.base.with_solvent, mo_coeff, mo_occ, atmlst=atmlst, verbose=verbose)
+            for i0, ia in enumerate(atmlst):
+                h1ao[i0] += dv[i0]
+            return h1ao
+
+        def _finalize(self):
+            # disable _finalize. It is called in grad_method.kernel method
+            # where self.de was not yet initialized.
+            pass
+
+    return WithSolventHess(hess_method)
+
diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py
index fd3b36e4..e9634628 100644
--- a/gpu4pyscf/solvent/pcm.py
+++ b/gpu4pyscf/solvent/pcm.py
@@ -22,13 +22,13 @@
 import cupy
 import cupyx.scipy as scipy
 from pyscf import lib
-from pyscf.lib import logger
 from pyscf import gto, df
 from pyscf.dft import gen_grid
 from pyscf.data import radii
 from pyscf.solvent import ddcosmo
 from gpu4pyscf.solvent import _attach_solvent
 from gpu4pyscf.df import int3c2e
+from gpu4pyscf.lib import logger
 
 libdft = lib.load_library('libdft')
 
@@ -124,14 +124,15 @@ def gen_surface(mol, ng=302, vdw_scale=1.2):
         riJ = cupy.sum((atom_grid[:,None,:] - atom_coords[None,:,:])**2, axis=2)**0.5
         diJ = (riJ - R_in_J) / R_sw_J
         diJ[:,ia] = 1.0
-        diJ[diJ<1e-8] = 0.0
+        diJ[diJ<1e-12] = 0.0
+
         fiJ = switch_h(diJ)
 
         w = unit_sphere[:,3] * 4.0 * PI
         swf = cupy.prod(fiJ, axis=1)
         idx = w*swf > 1e-16
 
-        p0, p1 = p1, p1+sum(idx)
+        p0, p1 = p1, p1+sum(idx).get()
         gslice_by_atom.append([p0,p1])
         grid_coords.append(atom_grid[idx,:3])
         weights.append(w[idx])
@@ -301,34 +302,46 @@ def _get_vind(self, dms):
 
         K = self._intermediates['K']
         R = self._intermediates['R']
-        v_grids = self._get_v(dms)
-        b = cupy.dot(R, v_grids)
-        q = cupy.linalg.solve(K, b)
+        v_grids_e = self._get_v(dms)
+        v_grids = self.v_grids_n - v_grids_e
 
-        vK_1 = cupy.linalg.solve(K.T, v_grids)
-        q_sym = (q + cupy.dot(R.T, vK_1))/2.0
+        b = cupy.dot(R, v_grids.T)
+        q = cupy.linalg.solve(K, b).T
+
+        vK_1 = cupy.linalg.solve(K.T, v_grids.T)
+        qt = cupy.dot(R.T, vK_1).T
+        q_sym = (q + qt)/2.0
 
         vmat = self._get_vmat(q_sym)
-        epcm = 0.5 * cupy.dot(q_sym, v_grids)
+        epcm = 0.5 * cupy.dot(v_grids[0], q_sym[0])
 
         self._intermediates['K'] = K
         self._intermediates['R'] = R
-        self._intermediates['q'] = q
-        self._intermediates['q_sym'] = q_sym
-        self._intermediates['v_grids'] = v_grids
+        self._intermediates['q'] = q[0]
+        self._intermediates['q_sym'] = q_sym[0]
+        self._intermediates['v_grids'] = v_grids[0]
 
-        return epcm, vmat
+        return epcm, vmat[0]
 
     def _get_v(self, dms):
         '''
         return electrostatic potential on surface
         '''
-        v_grids_e = 2.0*int3c2e.get_j_int3c2e_pass1(self.intopt, dms[0])
-        v_grids = self.v_grids_n - v_grids_e
-        return v_grids
+        nset = dms.shape[0]
+        ngrids = self.surface['grid_coords'].shape[0]
+        v_grids_e = cupy.empty([nset, ngrids])
+        for i in range(nset):
+            v_grids_e[i] = 2.0*int3c2e.get_j_int3c2e_pass1(self.intopt, dms[i])
+        return v_grids_e
 
     def _get_vmat(self, q):
-        return -int3c2e.get_j_int3c2e_pass2(self.intopt, q)
+        assert q.ndim == 2
+        nset = q.shape[0]
+        nao = self.mol.nao
+        vmat = cupy.empty([nset, nao, nao])
+        for i in range(nset):
+            vmat[i] = -int3c2e.get_j_int3c2e_pass2(self.intopt, q[i])
+        return vmat
 
     def nuc_grad_method(self, grad_method):
         from gpu4pyscf.solvent.grad import pcm as pcm_grad
@@ -340,5 +353,41 @@ def nuc_grad_method(self, grad_method):
         else:
             raise RuntimeError('Only SCF gradient is supported')
 
-    def Hessian(self):
-        raise NotImplementedError('not implemented yet')
+    def Hessian(self, hess_method):
+        from gpu4pyscf.solvent.hessian import pcm as pcm_hess
+        if self.frozen:
+            raise RuntimeError('Frozen solvent model is not supported')
+        from gpu4pyscf import scf
+        if isinstance(hess_method.base, scf.hf.RHF):
+            return pcm_hess.make_hess_object(hess_method)
+        else:
+            raise RuntimeError('Only SCF gradient is supported')
+
+    def reset(self, mol=None):
+        self.surface = None
+        self.intopt = None
+        super().reset(mol)
+        return self
+
+    def _B_dot_x(self, dms):
+        if not self._intermediates:
+            self.build()
+
+        nao = dms.shape[-1]
+        dms = dms.reshape(-1,nao,nao)
+
+        K = self._intermediates['K']
+        R = self._intermediates['R']
+        v_grids = -self._get_v(dms)
+
+        b = cupy.dot(R, v_grids.T)
+        q = cupy.linalg.solve(K, b).T
+
+        vK_1 = cupy.linalg.solve(K.T, v_grids.T)
+        qt = cupy.dot(R.T, vK_1).T
+        q_sym = (q + qt)/2.0
+
+        vmat = self._get_vmat(q_sym)
+
+        return vmat
+
diff --git a/gpu4pyscf/solvent/tests/test_pcm_hessian.py b/gpu4pyscf/solvent/tests/test_pcm_hessian.py
new file mode 100644
index 00000000..ff6074d4
--- /dev/null
+++ b/gpu4pyscf/solvent/tests/test_pcm_hessian.py
@@ -0,0 +1,100 @@
+# Copyright 2023 The GPU4PySCF Authors. All Rights Reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import unittest
+import numpy as np
+from pyscf import gto
+from gpu4pyscf import scf, dft
+from gpu4pyscf.solvent import pcm
+from gpu4pyscf.solvent.grad import pcm as pcm_grad
+
+def setUpModule():
+    global mol, epsilon, lebedev_order, eps, xc, tol
+    mol = gto.Mole()
+    mol.atom = '''
+O       0.0000000000    -0.0000000000     0.1174000000
+H      -0.7570000000    -0.0000000000    -0.4696000000
+H       0.7570000000     0.0000000000    -0.4696000000
+    '''
+    mol.basis = 'def2-tzvpp'
+    mol.output = '/dev/null'
+    mol.build(verbose=0)
+    epsilon = 78.3553
+    lebedev_order = 29
+    eps = 1e-3
+    xc = 'B3LYP'
+    tol = 1e-3
+
+def tearDownModule():
+    global mol
+    mol.stdout.close()
+    del mol
+
+def _check_hessian(method='C-PCM', ix=0, iy=0):
+    pmol = mol.copy()
+    pmol.build()
+
+    mf = dft.rks.RKS(pmol, xc=xc).density_fit().PCM()
+    mf.with_solvent.method = method
+    mf.with_solvent.eps = epsilon
+    mf.with_solvent.lebedev_order = lebedev_order
+    mf.conv_tol = 1e-12
+    mf.grids.atom_grid = (99,590)
+    mf.verbose = 0
+    mf.kernel()
+
+    g = mf.nuc_grad_method()
+    g.auxbasis_response = True
+    g.kernel()
+    g_scanner = g.as_scanner()
+
+    coords = pmol.atom_coords()
+    v = np.zeros_like(coords)
+    v[ix,iy] = eps
+    pmol.set_geom_(coords + v, unit='Bohr')
+    pmol.build()
+    _, g0 = g_scanner(pmol)
+
+    pmol.set_geom_(coords - v, unit='Bohr')
+    pmol.build()
+    _, g1 = g_scanner(pmol)
+
+    h_fd = (g0 - g1)/2.0/eps
+    pmol.set_geom_(coords, unit='Bohr')
+    pmol.build()
+
+    hobj = mf.Hessian()
+    hobj.set(auxbasis_response=2)
+    h = hobj.kernel()
+
+    print(f"analytical Hessian H({ix},{iy})")
+    print(h[ix,:,iy,:])
+    print(f"finite different Hessian H({ix},{iy})")
+    print(h_fd)
+    print('Norm of diff', np.linalg.norm(h[ix,:,iy,:] - h_fd))
+    assert(np.linalg.norm(h[ix,:,iy,:] - h_fd) < tol)
+
+class KnownValues(unittest.TestCase):
+    def test_hess_cpcm(self):
+        _check_hessian(method='C-PCM', ix=0, iy=0)
+        _check_hessian(method='C-PCM', ix=0, iy=1)
+
+    def test_hess_iefpcm(self):
+        _check_hessian(method='IEF-PCM', ix=0, iy=0)
+        _check_hessian(method='IEF-PCM', ix=0, iy=1)
+
+if __name__ == "__main__":
+    print("Full Tests for Hessian of PCMs")
+    unittest.main()
\ No newline at end of file