diff --git a/gpu4pyscf/gto/moleintor.py b/gpu4pyscf/gto/moleintor.py index 0a880c0c..fb060d9a 100644 --- a/gpu4pyscf/gto/moleintor.py +++ b/gpu4pyscf/gto/moleintor.py @@ -24,6 +24,7 @@ from gpu4pyscf.scf.int4c2e import BasisProdCache from gpu4pyscf.df.int3c2e import sort_mol, _split_l_ctr_groups, get_pairing from gpu4pyscf.gto.mole import basis_seg_contraction +from gpu4pyscf.__config__ import _num_devices, _streams BLKSIZE = 128 @@ -31,32 +32,18 @@ libgint = load_library('libgint') class VHFOpt(_vhf.VHFOpt): - def __init__(self, mol, intor, prescreen='CVHFnoscreen', + def __init__(self, mol, intor='int2e', prescreen='CVHFnoscreen', qcondname='CVHFsetnr_direct_scf', dmcondname=None): - # use local basis_seg_contraction for efficiency - # TODO: switch _mol and mol - self.mol = basis_seg_contraction(mol,allow_replica=True) - self._mol = mol - - ''' - # Note mol._bas will be sorted in .build() method. VHFOpt should be - # initialized after mol._bas updated. - ''' - self.nao = self.mol.nao + self.mol = mol + self._sorted_mol = None self._intor = intor self._prescreen = prescreen self._qcondname = qcondname self._dmcondname = dmcondname - self.bpcache = None - - self.cart_ao_idx = None - self.sph_ao_idx = None - self.cart_ao_loc = [] self.sph_ao_loc = [] - self.cart2sph = None self.angular = None @@ -69,8 +56,8 @@ def __init__(self, mol, intor, prescreen='CVHFnoscreen', def clear(self): _vhf.VHFOpt.__del__(self) - if self.bpcache is not None: - libgvhf.GINTdel_basis_prod(ctypes.byref(self.bpcache)) + for n, bpcache in self._bpcache.items(): + libgvhf.GINTdel_basis_prod(ctypes.byref(bpcache)) return self def __del__(self): @@ -79,20 +66,21 @@ def __del__(self): except AttributeError: pass - def build(self, cutoff=1e-14, group_size=None, diag_block_with_triu=False, aosym=False): - _mol = self._mol - mol = self.mol + def build(self, cutoff=1e-13, group_size=BLKSIZE, diag_block_with_triu=False, aosym=True): + original_mol = self.mol + mol = basis_seg_contraction(original_mol, allow_replica=True) - log = logger.new_logger(_mol, _mol.verbose) + log = logger.new_logger(original_mol, original_mol.verbose) cput0 = log.init_timer() - sorted_mol, sorted_idx, uniq_l_ctr, l_ctr_counts = sort_mol(mol, log=log) - self.sorted_mol = sorted_mol + _sorted_mol, sorted_idx, uniq_l_ctr, l_ctr_counts = sort_mol(mol, log=log) + self._sorted_mol = _sorted_mol + if group_size is not None : uniq_l_ctr, l_ctr_counts = _split_l_ctr_groups(uniq_l_ctr, l_ctr_counts, group_size) - self.nctr = len(uniq_l_ctr) + self.l_ctr_counts = l_ctr_counts # Initialize vhfopt after reordering mol._bas - _vhf.VHFOpt.__init__(self, sorted_mol, self._intor, self._prescreen, + _vhf.VHFOpt.__init__(self, _sorted_mol, self._intor, self._prescreen, self._qcondname, self._dmcondname) self.direct_scf_tol = cutoff @@ -104,39 +92,22 @@ def build(self, cutoff=1e-14, group_size=None, diag_block_with_triu=False, aosym l_ctr_offsets, l_ctr_offsets, q_cond, diag_block_with_triu=diag_block_with_triu, aosym=aosym) self.log_qs = log_qs.copy() - cput1 = log.timer_debug1('Get pairing', *cput1) + cput1 = log.timer_debug1('Get AO pairing', *cput1) # contraction coefficient for ao basis - cart_ao_loc = sorted_mol.ao_loc_nr(cart=True) - sph_ao_loc = sorted_mol.ao_loc_nr(cart=False) + cart_ao_loc = _sorted_mol.ao_loc_nr(cart=True) + sph_ao_loc = _sorted_mol.ao_loc_nr(cart=False) self.cart_ao_loc = [cart_ao_loc[cp] for cp in l_ctr_offsets] self.sph_ao_loc = [sph_ao_loc[cp] for cp in l_ctr_offsets] self.angular = [l[0] for l in uniq_l_ctr] - cart_ao_loc = mol.ao_loc_nr(cart=True) - sph_ao_loc = mol.ao_loc_nr(cart=False) - nao = sph_ao_loc[-1] - ao_idx = np.array_split(np.arange(nao), sph_ao_loc[1:-1]) - self.sph_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) - - # cartesian ao index - nao = cart_ao_loc[-1] - ao_idx = np.array_split(np.arange(nao), cart_ao_loc[1:-1]) - self.cart_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) - self.cart2sph = block_c2s_diag(self.angular, l_ctr_counts) - cput1 = log.timer_debug1('AO cart2sph coeff', *cput1) - - if _mol.cart: - ncart = cart_ao_loc[-1] - inv_idx = np.argsort(self.cart_ao_idx, kind='stable').astype(np.int32) - self.coeff = cp.eye(ncart)[:,inv_idx] - else: - inv_idx = np.argsort(self.sph_ao_idx, kind='stable').astype(np.int32) - self.coeff = self.cart2sph[:, inv_idx] - cput1 = log.timer_debug1('AO cart2sph coeff', *cput1) + # Sorted AO indices + ao_loc = mol.ao_loc_nr(cart=original_mol.cart) + ao_idx = np.array_split(np.arange(original_mol.nao), ao_loc[1:-1]) + self._ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) + cput1 = log.timer_debug1('AO indices', *cput1) - ao_loc = sorted_mol.ao_loc_nr(cart=True) - cput1 = log.timer_debug1('Get AO pairs', *cput1) + ao_loc = cart_ao_loc self.pair2bra = pair2bra self.pair2ket = pair2ket @@ -156,16 +127,20 @@ def get_n_hermite_density_of_angular_pair(l): n_density_per_angular_pair = (bas_pairs_locs[1:] - bas_pairs_locs[:-1]) * n_density_per_pair self.density_offset = np.append(0, np.cumsum(n_density_per_angular_pair)).astype(np.int32) - self.bpcache = ctypes.POINTER(BasisProdCache)() - scale_shellpair_diag = 1.0 - libgint.GINTinit_basis_prod( - ctypes.byref(self.bpcache), ctypes.c_double(scale_shellpair_diag), - ao_loc.ctypes.data_as(ctypes.c_void_p), - bas_pair2shls.ctypes.data_as(ctypes.c_void_p), - bas_pairs_locs.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(ncptype), - sorted_mol._atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sorted_mol.natm), - sorted_mol._bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sorted_mol.nbas), - sorted_mol._env.ctypes.data_as(ctypes.c_void_p)) + self._bpcache = {} + for n in range(_num_devices): + with cp.cuda.Device(n), _streams[n]: + bpcache = ctypes.POINTER(BasisProdCache)() + scale_shellpair_diag = 1.0 + libgint.GINTinit_basis_prod( + ctypes.byref(bpcache), ctypes.c_double(scale_shellpair_diag), + ao_loc.ctypes.data_as(ctypes.c_void_p), + bas_pair2shls.ctypes.data_as(ctypes.c_void_p), + bas_pairs_locs.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(ncptype), + _sorted_mol._atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(_sorted_mol.natm), + _sorted_mol._bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(_sorted_mol.nbas), + _sorted_mol._env.ctypes.data_as(ctypes.c_void_p)) + self._bpcache[n] = bpcache cput1 = log.timer_debug1('Initialize GPU cache', *cput1) ncptype = len(self.log_qs) @@ -176,22 +151,44 @@ def get_n_hermite_density_of_angular_pair(l): nl = int(round(np.sqrt(ncptype))) self.cp_idx, self.cp_jdx = np.unravel_index(np.arange(ncptype), (nl, nl)) - if _mol.cart: + if original_mol.cart: self.ao_loc = self.cart_ao_loc - self.ao_idx = self.cart_ao_idx else: self.ao_loc = self.sph_ao_loc - self.ao_idx = self.sph_ao_idx + + def sort_orbitals(self, mat, axis=[]): + ''' Transform given axis of a matrix into sorted AO, + and transform given auxiliary axis of a matrix into sorted auxiliary AO + ''' + idx = self._ao_idx + shape_ones = (1,) * mat.ndim + fancy_index = [] + for dim, n in enumerate(mat.shape): + if dim in axis: + assert n == len(idx) + indices = idx + else: + indices = np.arange(n) + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + return mat[tuple(fancy_index)] + + @property + def bpcache(self): + device_id = cp.cuda.Device().id + bpcache = self._bpcache[device_id] + return bpcache + + @property + def cart2sph(self): + return block_c2s_diag(self.angular, self.l_ctr_counts) # end of class VHFOpt -def get_int3c1e(mol, grids, direct_scf_tol): +def get_int3c1e(mol, grids, intopt): omega = mol.omega assert omega >= 0.0, "Short-range one electron integrals with GPU acceleration is not implemented." - intopt = VHFOpt(mol, 'int2e') - intopt.build(direct_scf_tol, diag_block_with_triu=True, aosym=True, group_size=BLKSIZE) - nao = mol.nao ngrids = grids.shape[0] total_double_number = ngrids * nao * nao @@ -221,7 +218,7 @@ def get_int3c1e(mol, grids, direct_scf_tol): lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt.mol.nao + nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -265,7 +262,7 @@ def get_int3c1e(mol, grids, direct_scf_tol): row, col = np.tril_indices(nao) int3c_grid_slice[:, row, col] = int3c_grid_slice[:, col, row] - ao_idx = np.argsort(intopt.ao_idx) + ao_idx = np.argsort(intopt._ao_idx) grid_idx = np.arange(ngrids_of_split) int3c_grid_slice = int3c_grid_slice[np.ix_(grid_idx, ao_idx, ao_idx)] @@ -273,13 +270,10 @@ def get_int3c1e(mol, grids, direct_scf_tol): return int3c -def get_int3c1e_charge_contracted(mol, grids, charges, direct_scf_tol): +def get_int3c1e_charge_contracted(mol, grids, charges, intopt): omega = mol.omega assert omega >= 0.0, "Short-range one electron integrals with GPU acceleration is not implemented." - intopt = VHFOpt(mol, 'int2e') - intopt.build(direct_scf_tol, diag_block_with_triu=True, aosym=True, group_size=BLKSIZE) - nao = mol.nao assert charges.ndim == 1 and charges.shape[0] == grids.shape[0] @@ -296,7 +290,7 @@ def get_int3c1e_charge_contracted(mol, grids, charges, direct_scf_tol): lj = intopt.angular[cpj] stream = cp.cuda.get_current_stream() - nao_cart = intopt.mol.nao + nao_cart = intopt._sorted_mol.nao log_q_ij = intopt.log_qs[cp_ij_id] @@ -312,8 +306,8 @@ def get_int3c1e_charge_contracted(mol, grids, charges, direct_scf_tol): strides = np.array([ni, ni*nj], dtype=np.int32) ngrids = grids.shape[0] - # n_charge_sum_per_thread = 1 means every thread processes one pair and one grid - # n_charge_sum_per_thread = ngrids or larger number gaurantees one thread processes one pair and all grid points + # n_charge_sum_per_thread = 1 # means every thread processes one pair and one grid + # n_charge_sum_per_thread = ngrids # or larger number gaurantees one thread processes one pair and all grid points n_charge_sum_per_thread = 10 int1e_angular_slice = cp.zeros([j1-j0, i1-i0], order='C') @@ -346,26 +340,30 @@ def get_int3c1e_charge_contracted(mol, grids, charges, direct_scf_tol): row, col = np.tril_indices(nao) int1e[row, col] = int1e[col, row] - ao_idx = np.argsort(intopt.ao_idx) + ao_idx = np.argsort(intopt._ao_idx) int1e = int1e[np.ix_(ao_idx, ao_idx)] - return cp.asnumpy(int1e) + return int1e -def get_int3c1e_density_contracted(mol, grids, dm, direct_scf_tol): +def get_int3c1e_density_contracted(mol, grids, dm, intopt): omega = mol.omega assert omega >= 0.0, "Short-range one electron integrals with GPU acceleration is not implemented." dm = cp.asarray(dm) + if dm.ndim == 3: + if dm.shape[0] > 2: + print("Warning: There are more than two density matrices to contract with one electron integrals, " + "it's not from an unrestricted calculation, and we're unsure about your purpose. " + "We sum the density matrices up, please check if that's expected.") + dm = cp.einsum("ijk->jk", dm) + assert dm.ndim == 2 assert dm.shape[0] == dm.shape[1] and dm.shape[0] == mol.nao - intopt = VHFOpt(mol, 'int2e') - intopt.build(direct_scf_tol, diag_block_with_triu=False, aosym=True, group_size=BLKSIZE) - - nao_cart = intopt.mol.nao + nao_cart = intopt._sorted_mol.nao ngrids = grids.shape[0] - dm = dm[np.ix_(intopt.ao_idx, intopt.ao_idx)] # intopt.ao_idx is in spherical basis + dm = intopt.sort_orbitals(dm, [0,1]) if not mol.cart: cart2sph_transformation_matrix = intopt.cart2sph # TODO: This part is inefficient (O(N^3)), should be changed to the O(N^2) algorithm @@ -374,9 +372,9 @@ def get_int3c1e_density_contracted(mol, grids, dm, direct_scf_tol): dm = cp.asnumpy(dm) - ao_loc_sorted_order = intopt.sorted_mol.ao_loc_nr(cart = True) + ao_loc_sorted_order = intopt._sorted_mol.ao_loc_nr(cart = True) l_ij = intopt.l_ij.T.flatten() - bas_coords = intopt.sorted_mol.atom_coords()[intopt.sorted_mol._bas[:, ATOM_OF]].flatten() + bas_coords = intopt._sorted_mol.atom_coords()[intopt._sorted_mol._bas[:, ATOM_OF]].flatten() n_total_hermite_density = intopt.density_offset[-1] dm_pair_ordered = np.zeros(n_total_hermite_density) @@ -412,8 +410,8 @@ def get_int3c1e_density_contracted(mol, grids, dm, direct_scf_tol): nbins = 1 bins_locs_ij = np.array([0, len(log_q_ij)], dtype=np.int32) - # n_pair_sum_per_thread = 1 means every thread processes one pair and one grid - # n_pair_sum_per_thread = nao_cart or larger number gaurantees one thread processes one grid and all pairs of the same type + # n_pair_sum_per_thread = 1 # means every thread processes one pair and one grid + # n_pair_sum_per_thread = nao_cart # or larger number gaurantees one thread processes one grid and all pairs of the same type n_pair_sum_per_thread = nao_cart err = libgint.GINTfill_int3c1e_density_contracted( @@ -433,19 +431,28 @@ def get_int3c1e_density_contracted(mol, grids, dm, direct_scf_tol): if err != 0: raise RuntimeError('GINTfill_int3c1e_density_contracted failed') - return cp.asnumpy(int3c_density_contracted) + return int3c_density_contracted -def intor(mol, intor, grids, dm=None, charges=None, direct_scf_tol=1e-13): - assert intor == 'int1e_grids' and grids is not None +def intor(mol, intor, grids, dm=None, charges=None, direct_scf_tol=1e-13, intopt=None): + assert intor == 'int1e_grids' + assert grids is not None assert dm is None or charges is None, \ "Are you sure you want to contract the one electron integrals with both charge and density? " + \ "If so, pass in density, obtain the result with n_charge and contract with the charges yourself." + if intopt is None: + intopt = VHFOpt(mol) + intopt.build(direct_scf_tol) + else: + assert isinstance(intopt, VHFOpt), \ + f"Please make sure intopt is a {VHFOpt.__module__}.{VHFOpt.__name__} object." + assert hasattr(intopt, "density_offset"), "Please call build() function for VHFOpt object first." + if dm is None and charges is None: - return get_int3c1e(mol, grids, direct_scf_tol) + return get_int3c1e(mol, grids, intopt) elif dm is not None: - return get_int3c1e_density_contracted(mol, grids, dm, direct_scf_tol) + return get_int3c1e_density_contracted(mol, grids, dm, intopt) elif charges is not None: - return get_int3c1e_charge_contracted(mol, grids, charges, direct_scf_tol) + return get_int3c1e_charge_contracted(mol, grids, charges, intopt) else: raise ValueError(f"Logic error in {__file__} {__name__}") diff --git a/gpu4pyscf/gto/tests/test_int1e_grids.py b/gpu4pyscf/gto/tests/test_int1e_grids.py index 56ee9065..53d7dbad 100644 --- a/gpu4pyscf/gto/tests/test_int1e_grids.py +++ b/gpu4pyscf/gto/tests/test_int1e_grids.py @@ -15,12 +15,13 @@ import unittest import numpy as np +import cupy as cp import pyscf from pyscf import lib from gpu4pyscf.gto.moleintor import intor def setUpModule(): - global mol_sph, mol_cart, grid_points, integral_threshold, density_contraction_threshold + global mol_sph, mol_cart, grid_points, integral_threshold, density_contraction_threshold, charge_contraction_threshold atom = ''' O 0.0000 0.7375 -0.0528 O 0.0000 -0.7375 -0.0528 @@ -45,8 +46,9 @@ def setUpModule(): grid_points = lib.cartesian_prod([xs, ys, zs]) # All of the following thresholds bound the max value of the corresponding matrix / tensor. - integral_threshold = 1e-10 - density_contraction_threshold = 1e-8 + integral_threshold = 1e-12 + density_contraction_threshold = 1e-10 + charge_contraction_threshold = 1e-12 def tearDownModule(): global mol_sph, mol_cart, grid_points @@ -56,17 +58,17 @@ def tearDownModule(): class KnownValues(unittest.TestCase): ''' - known values are obtained by Q-Chem + Values are compared to PySCF CPU intor() function ''' def test_int1e_grids_full_tensor_cart(self): ref_int1e = mol_cart.intor('int1e_grids', grids=grid_points) test_int1e = intor(mol_cart, 'int1e_grids', grid_points) - assert np.abs(ref_int1e - test_int1e).max() < integral_threshold + np.testing.assert_allclose(ref_int1e, test_int1e, atol = integral_threshold) def test_int1e_grids_full_tensor_sph(self): ref_int1e = mol_sph.intor('int1e_grids', grids=grid_points) test_int1e = intor(mol_sph, 'int1e_grids', grid_points) - assert np.abs(ref_int1e - test_int1e).max() < integral_threshold + np.testing.assert_allclose(ref_int1e, test_int1e, atol = integral_threshold) def test_int1e_grids_density_contracted_cart_symmetric(self): np.random.seed(12345) @@ -74,7 +76,8 @@ def test_int1e_grids_density_contracted_cart_symmetric(self): dm = 0.5 * (dm + dm.T) ref_int1e_dot_D = np.einsum('pij,ij->p', mol_cart.intor('int1e_grids', grids=grid_points), dm) test_int1e_dot_D = intor(mol_cart, 'int1e_grids', grid_points, dm = dm) - assert np.abs(ref_int1e_dot_D - test_int1e_dot_D).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_D, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_D, test_int1e_dot_D, atol = density_contraction_threshold) def test_int1e_grids_density_contracted_sph_symmetric(self): np.random.seed(12346) @@ -82,35 +85,42 @@ def test_int1e_grids_density_contracted_sph_symmetric(self): dm = 0.5 * (dm + dm.T) ref_int1e_dot_D = np.einsum('pij,ij->p', mol_sph.intor('int1e_grids', grids=grid_points), dm) test_int1e_dot_D = intor(mol_sph, 'int1e_grids', grid_points, dm = dm) - assert np.abs(ref_int1e_dot_D - test_int1e_dot_D).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_D, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_D, test_int1e_dot_D, atol = density_contraction_threshold) def test_int1e_grids_density_contracted_cart_asymmetric(self): np.random.seed(12347) dm = np.random.uniform(-2.0, 2.0, (mol_cart.nao, mol_cart.nao)) ref_int1e_dot_D = np.einsum('pij,ij->p', mol_cart.intor('int1e_grids', grids=grid_points), dm) test_int1e_dot_D = intor(mol_cart, 'int1e_grids', grid_points, dm = dm) - assert np.abs(ref_int1e_dot_D - test_int1e_dot_D).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_D, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_D, test_int1e_dot_D, atol = density_contraction_threshold) def test_int1e_grids_density_contracted_sph_asymmetric(self): np.random.seed(12348) dm = np.random.uniform(-2.0, 2.0, (mol_sph.nao, mol_sph.nao)) ref_int1e_dot_D = np.einsum('pij,ij->p', mol_sph.intor('int1e_grids', grids=grid_points), dm) test_int1e_dot_D = intor(mol_sph, 'int1e_grids', grid_points, dm = dm) - assert np.abs(ref_int1e_dot_D - test_int1e_dot_D).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_D, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_D, test_int1e_dot_D, atol = density_contraction_threshold) def test_int1e_grids_charge_contracted_cart(self): np.random.seed(12347) charges = np.random.uniform(-2.0, 2.0, grid_points.shape[0]) ref_int1e_dot_q = np.einsum('pij,p->ij', mol_cart.intor('int1e_grids', grids=grid_points), charges) test_int1e_dot_q = intor(mol_cart, 'int1e_grids', grid_points, charges = charges) - assert np.abs(ref_int1e_dot_q - test_int1e_dot_q).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_q, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_q, test_int1e_dot_q, atol = charge_contraction_threshold) def test_int1e_grids_charge_contracted_sph(self): np.random.seed(12348) charges = np.random.uniform(-2.0, 2.0, grid_points.shape[0]) ref_int1e_dot_q = np.einsum('pij,p->ij', mol_sph.intor('int1e_grids', grids=grid_points), charges) test_int1e_dot_q = intor(mol_sph, 'int1e_grids', grid_points, charges = charges) - assert np.abs(ref_int1e_dot_q - test_int1e_dot_q).max() < density_contraction_threshold + assert isinstance(test_int1e_dot_q, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_q, test_int1e_dot_q, atol = charge_contraction_threshold) + + # Range separated integrals def test_int1e_grids_full_tensor_omega(self): omega = 0.8 @@ -119,7 +129,7 @@ def test_int1e_grids_full_tensor_omega(self): ref_int1e = mol_sph_omega.intor('int1e_grids', grids=grid_points) test_int1e = intor(mol_sph_omega, 'int1e_grids', grid_points) - assert np.abs(ref_int1e - test_int1e).max() < integral_threshold + np.testing.assert_allclose(ref_int1e, test_int1e, atol = integral_threshold) def test_int1e_grids_density_contracted_omega(self): np.random.seed(12349) @@ -131,7 +141,8 @@ def test_int1e_grids_density_contracted_omega(self): ref_int1e_dot_D = np.einsum('pij,ij->p', mol_sph_omega.intor('int1e_grids', grids=grid_points), dm) test_int1e_dot_D = intor(mol_sph_omega, 'int1e_grids', grid_points, dm = dm) - assert np.abs(ref_int1e_dot_D - test_int1e_dot_D).max() < integral_threshold + assert isinstance(test_int1e_dot_D, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_D, test_int1e_dot_D, atol = density_contraction_threshold) def test_int1e_grids_charge_contracted_omega(self): np.random.seed(12349) @@ -143,7 +154,8 @@ def test_int1e_grids_charge_contracted_omega(self): ref_int1e_dot_q = np.einsum('pij,p->ij', mol_sph_omega.intor('int1e_grids', grids=grid_points), charges) test_int1e_dot_q = intor(mol_sph_omega, 'int1e_grids', grid_points, charges = charges) - assert np.abs(ref_int1e_dot_q - test_int1e_dot_q).max() < integral_threshold + assert isinstance(test_int1e_dot_q, cp.ndarray) + cp.testing.assert_allclose(ref_int1e_dot_q, test_int1e_dot_q, atol = charge_contraction_threshold) if __name__ == "__main__": print("Full Tests for One Electron Coulomb Integrals") diff --git a/gpu4pyscf/hessian/rks.py b/gpu4pyscf/hessian/rks.py index 66daf960..96e57279 100644 --- a/gpu4pyscf/hessian/rks.py +++ b/gpu4pyscf/hessian/rks.py @@ -374,8 +374,8 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmat_dm[ia][:,:,mask] += contract('yjg,xjg->xyj', ao_mask[1:4], aow) ao_dm0 = aow = None t1 = log.timer_debug2('integration', *t1) + vmat_dm = opt.unsort_orbitals(vmat_dm, axis=[3]) for ia in range(_sorted_mol.natm): - vmat_dm[ia][:,:,opt._ao_idx] = vmat_dm[ia] p0, p1 = aoslices[ia][2:] vmat_dm[ia] += contract('xypq,pq->xyp', ipip[:,:,:,p0:p1], dm0[:,p0:p1]) elif xctype == 'GGA': @@ -411,8 +411,8 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmat_dm[ia][:,:,mask] += vmat_dm_tmp ao_dm0 = aow = None t1 = log.timer_debug2('integration', *t1) + vmat_dm = opt.unsort_orbitals(vmat_dm, axis=[3]) for ia in range(_sorted_mol.natm): - vmat_dm[ia][:,:,opt._ao_idx] = vmat_dm[ia] p0, p1 = aoslices[ia][2:] vmat_dm[ia] += contract('xypq,pq->xyp', ipip[:,:,:,p0:p1], dm0[:,p0:p1]) vmat_dm[ia] += contract('yxqp,pq->xyp', ipip[:,:,p0:p1], dm0[:,p0:p1]) @@ -478,8 +478,8 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmat_dm[ia][:,:,mask] += vmat_dm_tmp t1 = log.timer_debug2('integration', *t1) + vmat_dm = opt.unsort_orbitals(vmat_dm, axis=[3]) for ia in range(_sorted_mol.natm): - vmat_dm[ia][:,:,opt._ao_idx] = vmat_dm[ia] p0, p1 = aoslices[ia][2:] vmat_dm[ia] += contract('xypq,pq->xyp', ipip[:,:,:,p0:p1], dm0[:,p0:p1]) vmat_dm[ia] += contract('yxqp,pq->xyp', ipip[:,:,p0:p1], dm0[:,p0:p1]) diff --git a/gpu4pyscf/hessian/uks.py b/gpu4pyscf/hessian/uks.py index d7537cc4..d42c8f76 100644 --- a/gpu4pyscf/hessian/uks.py +++ b/gpu4pyscf/hessian/uks.py @@ -446,10 +446,10 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmatb_dm[ia][:,:,mask] += contract('yjg,xjg->xyj', ao_mask[1:4], aow) ao_dm0a = ao_dm0b = aow = None t1 = log.timer_debug2('integration', *t1) + vmata_dm = opt.unsort_orbitals(vmata_dm, axis=[3]) + vmatb_dm = opt.unsort_orbitals(vmatb_dm, axis=[3]) for ia in range(_sorted_mol.natm): p0, p1 = aoslices[ia][2:] - vmata_dm[ia][:,:,opt._ao_idx] = vmata_dm[ia] - vmatb_dm[ia][:,:,opt._ao_idx] = vmatb_dm[ia] vmata_dm[ia] += contract('xypq,pq->xyp', ipipa[:,:,:,p0:p1], dm0a[:,p0:p1]) vmatb_dm[ia] += contract('xypq,pq->xyp', ipipb[:,:,:,p0:p1], dm0b[:,p0:p1]) elif xctype == 'GGA': @@ -503,9 +503,9 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmatb_dm[ia][:,:,mask] += vmatb_dm_tmp ao_dm0a = ao_dm0b = aow = None t1 = log.timer_debug2('integration', *t1) + vmata_dm = opt.unsort_orbitals(vmata_dm, axis=[3]) + vmatb_dm = opt.unsort_orbitals(vmatb_dm, axis=[3]) for ia in range(_sorted_mol.natm): - vmata_dm[ia][:,:,opt._ao_idx] = vmata_dm[ia] - vmatb_dm[ia][:,:,opt._ao_idx] = vmatb_dm[ia] p0, p1 = aoslices[ia][2:] vmata_dm[ia] += contract('xypq,pq->xyp', ipipa[:,:,:,p0:p1], dm0a[:,p0:p1]) vmata_dm[ia] += contract('yxqp,pq->xyp', ipipa[:,:,p0:p1], dm0a[:,p0:p1]) @@ -618,9 +618,9 @@ def _get_vxc_deriv2(hessobj, mo_coeff, mo_occ, max_memory): vmata_dm[ia][:,:,mask] += vmata_dm_tmp vmatb_dm[ia][:,:,mask] += vmatb_dm_tmp t1 = log.timer_debug2('integration', *t1) + vmata_dm = opt.unsort_orbitals(vmata_dm, axis=[3]) + vmatb_dm = opt.unsort_orbitals(vmatb_dm, axis=[3]) for ia in range(_sorted_mol.natm): - vmata_dm[ia][:,:,opt._ao_idx] = vmata_dm[ia] - vmatb_dm[ia][:,:,opt._ao_idx] = vmatb_dm[ia] p0, p1 = aoslices[ia][2:] vmata_dm[ia] += contract('xypq,pq->xyp', ipipa[:,:,:,p0:p1], dm0a[:,p0:p1]) vmata_dm[ia] += contract('yxqp,pq->xyp', ipipa[:,:,p0:p1], dm0a[:,p0:p1]) diff --git a/gpu4pyscf/pop/esp.py b/gpu4pyscf/pop/esp.py index 4934e4e2..1b1f0f6f 100644 --- a/gpu4pyscf/pop/esp.py +++ b/gpu4pyscf/pop/esp.py @@ -23,7 +23,7 @@ import cupy from pyscf import gto from pyscf.data import radii -from gpu4pyscf.df import int3c2e +from gpu4pyscf.gto.moleintor import intor from gpu4pyscf.lib.cupy_helper import dist_matrix #modified_Bondi = radii.VDW.copy() @@ -137,10 +137,7 @@ def build_ab(mol, dm, A[:natm, :natm] = rinv.dot(rinv.T) # For right hand side B - auxmol = gto.fakemol_for_charges(surface_points) - intopt = int3c2e.VHFOpt(mol, auxmol, 'int2e') - intopt.build(1e-14, diag_block_with_triu=False, aosym=True, group_size=256) - v_grids_e = 2.0*int3c2e.get_j_int3c2e_pass1(intopt, dm, sort_j=False) + v_grids_e = intor(mol, 'int1e_grids', surface_points, dm=dm, direct_scf_tol=1e-14) v_grids_n = cupy.dot(charges, rinv) B = cupy.empty([dim]) diff --git a/gpu4pyscf/qmmm/chelpg.py b/gpu4pyscf/qmmm/chelpg.py index 11d2687c..12c27552 100644 --- a/gpu4pyscf/qmmm/chelpg.py +++ b/gpu4pyscf/qmmm/chelpg.py @@ -13,163 +13,16 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import pyscf -import time import cupy import numpy as np import scipy -import ctypes -from pyscf import lib, gto -from pyscf.scf import _vhf -from gpu4pyscf.df import int3c2e -from gpu4pyscf.scf.int4c2e import BasisProdCache, libgint, libgvhf -from gpu4pyscf.lib.cupy_helper import load_library, block_c2s_diag +from gpu4pyscf.gto.moleintor import intor, VHFOpt from gpu4pyscf.lib import logger -from gpu4pyscf.__config__ import _num_devices, _streams from pyscf.data import radii modified_Bondi = radii.VDW.copy() modified_Bondi[1] = 1.1/radii.BOHR # modified version -# TODO: replace int3c2e.get_j_int3c2e_pass1 with int1e_grids -def _build_VHFOpt(intopt, cutoff=1e-14, group_size=None, - group_size_aux=None, diag_block_with_triu=False, aosym=False): - ''' - Implement the similar functionality as VHFOpt.build, - but without transformation for auxiliary basis. - ''' - sorted_mol, sorted_idx, uniq_l_ctr, l_ctr_counts = int3c2e.sort_mol( - intopt.mol) - if group_size is not None: - uniq_l_ctr, l_ctr_counts = int3c2e._split_l_ctr_groups( - uniq_l_ctr, l_ctr_counts, group_size) - intopt.l_ctr_counts = l_ctr_counts - - # sort fake mol - fake_mol = int3c2e.make_fake_mol() - _, _, fake_uniq_l_ctr, fake_l_ctr_counts = int3c2e.sort_mol(fake_mol) - - # sort auxiliary mol - sorted_auxmol, _, aux_uniq_l_ctr, aux_l_ctr_counts = int3c2e.sort_mol( - intopt.auxmol) - if group_size_aux is not None: - aux_uniq_l_ctr, aux_l_ctr_counts = int3c2e._split_l_ctr_groups( - aux_uniq_l_ctr, aux_l_ctr_counts, group_size_aux) - intopt.aux_l_ctr_counts = aux_l_ctr_counts - - tmp_mol = gto.mole.conc_mol(fake_mol, sorted_auxmol) - tot_mol = gto.mole.conc_mol(sorted_mol, tmp_mol) - - # Initialize vhfopt after reordering mol._bas - _vhf.VHFOpt.__init__(intopt, sorted_mol, intopt._intor, intopt._prescreen, - intopt._qcondname, intopt._dmcondname) - intopt.direct_scf_tol = cutoff - - # TODO: is it more accurate to filter with overlap_cond (or exp_cond)? - q_cond = intopt.get_q_cond() - l_ctr_offsets = np.append(0, np.cumsum(l_ctr_counts)) - log_qs, pair2bra, pair2ket = int3c2e.get_pairing( - l_ctr_offsets, l_ctr_offsets, q_cond, - diag_block_with_triu=diag_block_with_triu, aosym=aosym) - intopt.log_qs = log_qs.copy() - - # contraction coefficient for ao basis - cart_ao_loc = sorted_mol.ao_loc_nr(cart=True) - sph_ao_loc = sorted_mol.ao_loc_nr(cart=False) - intopt.cart_ao_loc = [cart_ao_loc[cp] for cp in l_ctr_offsets] - intopt.sph_ao_loc = [sph_ao_loc[cp] for cp in l_ctr_offsets] - intopt.angular = [l[0] for l in uniq_l_ctr] - - cart_ao_loc = intopt.mol.ao_loc_nr(cart=True) - sph_ao_loc = intopt.mol.ao_loc_nr(cart=False) - nao = sph_ao_loc[-1] - ao_idx = np.array_split(np.arange(nao), sph_ao_loc[1:-1]) - intopt.sph_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) - - # cartesian ao index - nao = cart_ao_loc[-1] - ao_idx = np.array_split(np.arange(nao), cart_ao_loc[1:-1]) - intopt.cart_ao_idx = np.hstack([ao_idx[i] for i in sorted_idx]) - ncart = cart_ao_loc[-1] - - # pairing auxiliary basis with fake basis set - fake_l_ctr_offsets = np.append(0, np.cumsum(fake_l_ctr_counts)) - fake_l_ctr_offsets += l_ctr_offsets[-1] - - aux_l_ctr_offsets = np.append(0, np.cumsum(aux_l_ctr_counts)) - - # contraction coefficient for auxiliary basis - cart_aux_loc = sorted_auxmol.ao_loc_nr(cart=True) - sph_aux_loc = sorted_auxmol.ao_loc_nr(cart=False) - intopt.cart_aux_loc = [cart_aux_loc[cp] for cp in aux_l_ctr_offsets] - intopt.sph_aux_loc = [sph_aux_loc[cp] for cp in aux_l_ctr_offsets] - intopt.aux_angular = [l[0] for l in aux_uniq_l_ctr] - - cart_aux_loc = intopt.auxmol.ao_loc_nr(cart=True) - sph_aux_loc = intopt.auxmol.ao_loc_nr(cart=False) - ncart = cart_aux_loc[-1] - # inv_idx = np.argsort(intopt.sph_aux_idx, kind='stable').astype(np.int32) - aux_l_ctr_offsets += fake_l_ctr_offsets[-1] - - # hardcoded for grids - aux_pair2bra = [np.arange(aux_l_ctr_offsets[0], aux_l_ctr_offsets[-1])] - aux_pair2ket = [np.ones(ncart) * fake_l_ctr_offsets[0]] - aux_log_qs = [np.ones(ncart)] - - intopt.aux_log_qs = aux_log_qs.copy() - pair2bra += aux_pair2bra - pair2ket += aux_pair2ket - - uniq_l_ctr = np.concatenate( - [uniq_l_ctr, fake_uniq_l_ctr, aux_uniq_l_ctr]) - l_ctr_offsets = np.concatenate([ - l_ctr_offsets, - fake_l_ctr_offsets[1:], - aux_l_ctr_offsets[1:]]) - - bas_pair2shls = np.hstack( - pair2bra + pair2ket).astype(np.int32).reshape(2, -1) - bas_pairs_locs = np.append(0, np.cumsum( - [x.size for x in pair2bra])).astype(np.int32) - log_qs = log_qs + aux_log_qs - ao_loc = tot_mol.ao_loc_nr(cart=True) - ncptype = len(log_qs) - - intopt._bpcache = {} - for n in range(_num_devices): - with cupy.cuda.Device(n), _streams[n]: - bpcache = ctypes.POINTER(BasisProdCache)() - scale_shellpair_diag = 1. - - libgint.GINTinit_basis_prod( - ctypes.byref(bpcache), - ctypes.c_double(scale_shellpair_diag), - ao_loc.ctypes.data_as(ctypes.c_void_p), - bas_pair2shls.ctypes.data_as(ctypes.c_void_p), - bas_pairs_locs.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(ncptype), - tot_mol._atm.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(tot_mol.natm), - tot_mol._bas.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(tot_mol.nbas), - tot_mol._env.ctypes.data_as(ctypes.c_void_p)) - intopt._bpcache[n] = bpcache - intopt.bas_pairs_locs = bas_pairs_locs - ncptype = len(intopt.log_qs) - if aosym: - intopt.cp_idx, intopt.cp_jdx = np.tril_indices(ncptype) - else: - nl = int(round(np.sqrt(ncptype))) - intopt.cp_idx, intopt.cp_jdx = np.unravel_index( - np.arange(ncptype), (nl, nl)) - - intopt._sorted_mol = sorted_mol - intopt._sorted_auxmol = sorted_auxmol - if intopt.mol.cart: - intopt._ao_idx = intopt.cart_ao_idx - else: - intopt._ao_idx = intopt.sph_ao_idx - def eval_chelpg_layer_gpu(mf, deltaR=0.3, Rhead=2.8, ifqchem=True, Rvdw=modified_Bondi, verbose=None): """Cal chelpg charge @@ -254,17 +107,12 @@ def tau_f(R, Rcut, Roff): nbatch = 256*256 # assert nbatch < ngrids - fmol = pyscf.gto.fakemol_for_charges(gridcoords[:nbatch]) - intopt = int3c2e.VHFOpt(mf.mol, fmol, 'int2e') + intopt = VHFOpt(mf.mol) + intopt.build(cutoff=1e-14) for ibatch in range(0, ngrids, nbatch): max_grid = min(ibatch+nbatch, ngrids) - num_grids = max_grid - ibatch - ptr = intopt.auxmol._atm[:num_grids, gto.PTR_COORD] - intopt.auxmol._env[np.vstack( - (ptr, ptr+1, ptr+2)).T] = gridcoords[ibatch:max_grid] - _build_VHFOpt(intopt, 1e-14, diag_block_with_triu=False, aosym=True) - potential_real[ibatch:max_grid] -= 2.0 * \ - int3c2e.get_j_int3c2e_pass1(intopt, dm, sort_j=False)[:num_grids] + potential_real[ibatch:max_grid] -= \ + intor(mf.mol, 'int1e_grids', gridcoords[ibatch:max_grid], dm=dm, intopt=intopt) w = cupy.array(w) r_pX_potential_omega = r_pX_potential*w