Skip to content

Commit

Permalink
Minor optimization in Rys integrals (pyscf#262)
Browse files Browse the repository at this point in the history
* some minor optimization

* ruff

* ruff

* merge master

* bugfix

* remove temp changes

* remove comments
  • Loading branch information
wxj6000 authored Dec 4, 2024
1 parent 8a897f3 commit d4dce92
Show file tree
Hide file tree
Showing 46 changed files with 2,450 additions and 84,886 deletions.
3 changes: 2 additions & 1 deletion gpu4pyscf/df/hessian/rhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,

# int3c contributions
wj, wk_P__ = int3c2e.get_int3c2e_jk(mol, auxmol, dm0_tag, omega=omega)
t1 = log.timer_debug1('intermediate variables with int3c2e', *t1)
rhoj0_P = solve_j2c(wj)
rhok0_P__ = solve_j2c(wk_P__)
wj = wk_P__ = None
Expand Down Expand Up @@ -242,7 +243,7 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None,
hk_aux_diag = .5*hk
hj = hk = None
t1 = log.timer_debug1('intermediate variables with int3c2e_ipip2', *t1)

# int2c contributions
if hessobj.auxbasis_response > 1:
if omega and omega > 1e-10:
Expand Down
15 changes: 9 additions & 6 deletions gpu4pyscf/df/int3c2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,6 @@ def loop_int3c2e_general(intopt, ip_type='', omega=None, stream=None):
opt = make_cintopt(pmol._atm, pmol._bas, pmol._env, intor)

nbins = 1

for aux_id, log_q_kl in enumerate(intopt.aux_log_qs):
cp_kl_id = aux_id + len(intopt.log_qs)
lk = intopt.aux_angular[aux_id]
Expand All @@ -518,7 +517,6 @@ def loop_int3c2e_general(intopt, ip_type='', omega=None, stream=None):

ao_offsets = np.array([i0,j0,nao+1+k0,nao], dtype=np.int32)
strides = np.array([1, ni, ni*nj, ni*nj*nk], dtype=np.int32)

# Use GPU kernels for low-angular momentum
if (li + lj + lk + order)//2 + 1 < NROOT_ON_GPU:
int3c_blk = cupy.zeros([comp, nk, nj, ni], order='C', dtype=np.float64)
Expand All @@ -545,13 +543,11 @@ def loop_int3c2e_general(intopt, ip_type='', omega=None, stream=None):
shls_slice = np.array([ishl0, ishl1, jshl0, jshl1, kshl0, kshl1], dtype=np.int64)
int3c_cpu = getints(intor, pmol._atm, pmol._bas, pmol._env, shls_slice, cintopt=opt).transpose([0,3,2,1])
int3c_blk = cupy.asarray(int3c_cpu)

if not intopt.auxmol.cart:
int3c_blk = cart2sph(int3c_blk, axis=1, ang=lk)
if not intopt.mol.cart:
int3c_blk = cart2sph(int3c_blk, axis=2, ang=lj)
int3c_blk = cart2sph(int3c_blk, axis=3, ang=li)

i0, i1 = ao_loc[cpi], ao_loc[cpi+1]
j0, j1 = ao_loc[cpj], ao_loc[cpj+1]
k0, k1 = aux_ao_loc[aux_id], aux_ao_loc[aux_id+1]
Expand Down Expand Up @@ -657,10 +653,12 @@ def get_aux2atom(intopt, auxslices):
aux2atom[p0:p1,ia] = 1.0
return intopt.sort_orbitals(aux2atom, aux_axis=[0])

def get_j_int3c2e_pass1(intopt, dm0, sort_j=True):
def get_j_int3c2e_pass1(intopt, dm0, sort_j=True, stream=None):
'''
get rhoj pass1 for int3c2e
'''
if stream is None: stream = cupy.cuda.get_current_stream()

n_dm = 1

naux = intopt._sorted_auxmol.nao
Expand All @@ -681,7 +679,9 @@ def get_j_int3c2e_pass1(intopt, dm0, sort_j=True):
norb = dm_cart.shape[0]

rhoj = cupy.zeros([naux])

err = libgvhf.GINTbuild_j_int3c2e_pass1(
ctypes.cast(stream.ptr, ctypes.c_void_p),
intopt.bpcache,
ctypes.cast(dm_cart.data.ptr, ctypes.c_void_p),
ctypes.cast(rhoj.data.ptr, ctypes.c_void_p),
Expand All @@ -700,10 +700,12 @@ def get_j_int3c2e_pass1(intopt, dm0, sort_j=True):
rhoj = cupy.dot(rhoj, aux_coeff)
return rhoj

def get_j_int3c2e_pass2(intopt, rhoj):
def get_j_int3c2e_pass2(intopt, rhoj, stream=None):
'''
get vj pass2 for int3c2e
'''
if stream is None: stream = cupy.cuda.get_current_stream()

n_dm = 1
norb = intopt._sorted_mol.nao
naux = intopt._sorted_auxmol.nao
Expand All @@ -723,6 +725,7 @@ def get_j_int3c2e_pass2(intopt, rhoj):
rhoj = intopt.aux_cart2sph @ rhoj

err = libgvhf.GINTbuild_j_int3c2e_pass2(
ctypes.cast(stream.ptr, ctypes.c_void_p),
intopt.bpcache,
ctypes.cast(vj.data.ptr, ctypes.c_void_p),
ctypes.cast(rhoj.data.ptr, ctypes.c_void_p),
Expand Down
1 change: 1 addition & 0 deletions gpu4pyscf/df/tests/test_df_hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def test_hessian_rks_D3(self):
hobj = mf.Hessian()
hobj.set(auxbasis_response=2)
h = hobj.kernel()
print(np.linalg.norm(h))
_check_dft_hessian(mf, h, ix=0,iy=0)

def test_hessian_rks_D4(self):
Expand Down
4 changes: 2 additions & 2 deletions gpu4pyscf/gto/tests/test_int1e_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def setUpModule():
grid_points = lib.cartesian_prod([xs, ys, zs])

# All of the following thresholds bound the max value of the corresponding matrix / tensor.
integral_threshold = 1e-12
density_contraction_threshold = 1e-10
integral_threshold = 1e-10
density_contraction_threshold = 1e-8

def tearDownModule():
global mol_sph, mol_cart, grid_points
Expand Down
9 changes: 3 additions & 6 deletions gpu4pyscf/lib/gint/cint2e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@

//extern __constant__ GINTEnvVars c_envs;
extern __constant__ BasisProdCache c_bpcache;
extern __constant__ int16_t c_idx4c[NFffff*3];
//extern __constant__ int16_t c_idx4c[NFffff*3];

/*
__constant__ GINTEnvVars c_envs;
__constant__ BasisProdCache c_bpcache;
__constant__ int16_t c_idx4c[NFffff*3];
*/
extern __constant__ int c_idx[TOT_NF*3];
extern __constant__ int c_l_locs[GPU_LMAX+2];
20 changes: 17 additions & 3 deletions gpu4pyscf/lib/gint/constant.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@

//__constant__ GINTEnvVars c_envs;
__constant__ BasisProdCache c_bpcache;
__constant__ int16_t c_idx4c[NFffff*3];
__constant__ int c_idx[TOT_NF*3];
__constant__ int c_l_locs[GPU_LMAX+1];
//__constant__ int16_t c_idx4c[NFffff*3];

// Generated with GINTinit_index1d_xyz
__constant__ int c_idx[TOT_NF*3] = {
0, 1, 0, 0, 2, 1, 1, 0, 0, 0, 3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 4, 3, 3,
2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2,
2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2,
1, 0, 0, 1, 0, 2, 1, 0, 3, 2, 1, 0, 0, 1, 0, 2, 1, 0, 3, 2, 1, 0, 4,
3, 2, 1, 0, 0, 1, 0, 2, 1, 0, 3, 2, 1, 0, 4, 3, 2, 1, 0, 5, 4, 3, 2,
1, 0, 0, 1, 0, 2, 1, 0, 3, 2, 1, 0, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0,
6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 1, 2,
0, 1, 2, 3, 0, 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 0, 1, 0,
1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 0, 1, 0, 1, 2,
0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 6};

__constant__ int c_l_locs[GPU_LMAX+2] = {0, 1, 4, 10, 20, 35, 56, 84};
4 changes: 2 additions & 2 deletions gpu4pyscf/lib/gint/g2e.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ void GINTinit_EnvVars(GINTEnvVars *envs,
envs->nrys_roots = nroots;
envs->fac = fac;

int ibase = 1;//i_l >= j_l; //li_ceil >= lj_ceil;
int kbase = 1;//k_l >= l_l; //lk_ceil >= ll_ceil;
int ibase = 1;//li_ceil >= lj_ceil;
int kbase = 1;//lk_ceil >= ll_ceil;
envs->ibase = ibase;
envs->kbase = kbase;

Expand Down
Loading

0 comments on commit d4dce92

Please sign in to comment.