diff --git a/gpu4pyscf/__init__.py b/gpu4pyscf/__init__.py index b823b43b..af2c2982 100644 --- a/gpu4pyscf/__init__.py +++ b/gpu4pyscf/__init__.py @@ -14,4 +14,4 @@ __version__ = '1.3.0' -from . import lib, grad, hessian, solvent, scf, dft +from . import lib, grad, hessian, solvent, scf, dft, tdscf diff --git a/gpu4pyscf/grad/__init__.py b/gpu4pyscf/grad/__init__.py index 89353466..45654153 100644 --- a/gpu4pyscf/grad/__init__.py +++ b/gpu4pyscf/grad/__init__.py @@ -4,3 +4,4 @@ from gpu4pyscf.grad.rks import Gradients as RKS from gpu4pyscf.grad.uhf import Gradients as UHF from gpu4pyscf.grad.uks import Gradients as UKS +from . import tdrhf \ No newline at end of file diff --git a/gpu4pyscf/grad/tdrhf.py b/gpu4pyscf/grad/tdrhf.py new file mode 100644 index 00000000..e8b9fcfb --- /dev/null +++ b/gpu4pyscf/grad/tdrhf.py @@ -0,0 +1,361 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from functools import reduce +import cupy as cp +import numpy as np +from pyscf import gto +from pyscf import lib +from pyscf.lib import logger +from gpu4pyscf.grad import rhf as rhf_grad +from gpu4pyscf.df import int3c2e +from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.scf import cphf +from gpu4pyscf import lib as lib_gpu +from pyscf import __config__ +from pyscf.scf import _vhf + + +def get_jk(mol, dm): + '''J = ((-nabla i) j| kl) D_lk + K = ((-nabla i) j| kl) D_jk + ''' + if not isinstance(dm, np.ndarray): dm = dm.get() + # vhfopt = _VHFOpt(mol, 'int2e_ip1').build() + intor = mol._add_suffix('int2e_ip1') + vj, vk = _vhf.direct_mapdm(intor, # (nabla i,j|k,l) + 's2kl', # ip1_sph has k>=l, + ('lk->s1ij', 'jk->s1il'), + dm, 3, # xyz, 3 components + mol._atm, mol._bas, mol._env) + return -vj, -vk + +def grad_elec(td_grad, x_y, singlet=True, atmlst=None, + max_memory=2000, verbose=logger.INFO): + ''' + Electronic part of TDA, TDHF nuclear gradients + + Args: + td_grad : grad.tdrhf.Gradients or grad.tdrks.Gradients object. + + x_y : a two-element list of numpy arrays + TDDFT X and Y amplitudes. If Y is set to 0, this function computes + TDA energy gradients. + ''' + log = logger.new_logger(td_grad, verbose) + time0 = logger.process_clock(), logger.perf_counter() + + mol = td_grad.mol + mf = td_grad.base._scf + mo_coeff = cp.asarray(mf.mo_coeff) + mo_energy = cp.asarray(mf.mo_energy) + mo_occ = cp.asarray(mf.mo_occ) + nao, nmo = mo_coeff.shape + nocc = int((mo_occ>0).sum()) + nvir = nmo - nocc + x, y = x_y + x = cp.asarray(x) + y = cp.asarray(y) + xpy = (x+y).reshape(nocc,nvir).T + xmy = (x-y).reshape(nocc,nvir).T + orbv = mo_coeff[:,nocc:] + orbo = mo_coeff[:,:nocc] + dvv = cp.einsum('ai,bi->ab', xpy, xpy) + cp.einsum('ai,bi->ab', xmy, xmy) # 2 T_{ab} + doo =-cp.einsum('ai,aj->ij', xpy, xpy) - cp.einsum('ai,aj->ij', xmy, xmy) # 2 T_{ij} + dmxpy = reduce(cp.dot, (orbv, xpy, orbo.T)) # (X+Y) in ao basis + dmxmy = reduce(cp.dot, (orbv, xmy, orbo.T)) # (X-Y) in ao basis + dmzoo = reduce(cp.dot, (orbo, doo, orbo.T)) # T_{ij}*2 in ao basis + dmzoo+= reduce(cp.dot, (orbv, dvv, orbv.T)) # T_{ij}*2 + T_{ab}*2 in ao basis + + vj, vk = mf.get_jk(mol, (dmzoo, dmxpy+dmxpy.T, dmxmy-dmxmy.T), hermi=0) + if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): vk = cp.asarray(vk) + veff0doo = vj[0] * 2 - vk[0] # 2 for alpha and beta + wvo = reduce(cp.dot, (orbv.T, veff0doo, orbo)) * 2 + if singlet: + veff = vj[1] * 2 - vk[1] + else: + veff = -vk[1] + veff0mop = reduce(cp.dot, (mo_coeff.T, veff, mo_coeff)) + wvo -= cp.einsum('ki,ai->ak', veff0mop[:nocc,:nocc], xpy) * 2 # 2 for dm + dm.T + wvo += cp.einsum('ac,ai->ci', veff0mop[nocc:,nocc:], xpy) * 2 + veff = -vk[2] + veff0mom = reduce(cp.dot, (mo_coeff.T, veff, mo_coeff)) + wvo -= cp.einsum('ki,ai->ak', veff0mom[:nocc,:nocc], xmy) * 2 + wvo += cp.einsum('ac,ai->ci', veff0mom[nocc:,nocc:], xmy) * 2 + + # set singlet=None, generate function for CPHF type response kernel + vresp = mf.gen_response(singlet=None, hermi=1) + def fvind(x): # For singlet, closed shell ground state + dm = reduce(cp.dot, (orbv, x.reshape(nvir,nocc)*2, orbo.T)) # 2 for double occupancy + v1ao = vresp(dm+dm.T) # for the upused 2 + return reduce(cp.dot, (orbv.T, v1ao, orbo)).ravel() + z1 = cphf.solve(fvind, mo_energy, mo_occ, wvo, + max_cycle=td_grad.cphf_max_cycle, + tol=td_grad.cphf_conv_tol)[0] + z1 = z1.reshape(nvir,nocc) + time1 = log.timer('Z-vector using CPHF solver', *time0) + + z1ao = reduce(cp.dot, (orbv, z1, orbo.T)) + veff = vresp(z1ao+z1ao.T) + + im0 = cp.zeros((nmo,nmo)) + # in the following, all should be doubled, due to double occupancy + # and 0.5 for i<=j and a<= b + # but this is reduced. + im0[:nocc,:nocc] = reduce(cp.dot, (orbo.T, veff0doo+veff, orbo)) # H_{ij}^+[T] + H_{ij}^+[Z] # + im0[:nocc,:nocc]+= cp.einsum('ak,ai->ki', veff0mop[nocc:,:nocc], xpy) # H_{ij}^+[T] + H_{ij}^+[Z] + sum_{a} (X+Y)_{aj}H_{ai}^+[(X+Y)] + im0[:nocc,:nocc]+= cp.einsum('ak,ai->ki', veff0mom[nocc:,:nocc], xmy) # H_{ij}^+[T] + H_{ij}^+[Z] + sum_{a} (X+Y)_{aj}H_{ai}^+[(X+Y)] + sum_{a} (X-Y)_{aj}H_{ai}^-[(X-Y)] + im0[nocc:,nocc:] = cp.einsum('ci,ai->ac', veff0mop[nocc:,:nocc], xpy) # sum_{i} (X+Y)_{ci}H_{ai}^+[(X+Y)] + im0[nocc:,nocc:]+= cp.einsum('ci,ai->ac', veff0mom[nocc:,:nocc], xmy) # sum_{i} (X+Y)_{ci}H_{ai}^+[(X+Y)] + sum_{i} (X-Y)_{cj}H_{ai}^-[(X-Y)] + im0[nocc:,:nocc] = cp.einsum('ki,ai->ak', veff0mop[:nocc,:nocc], xpy)*2 # sum_{i} (X+Y)_{ki}H_{ai}^+[(X+Y)] * 2 + im0[nocc:,:nocc]+= cp.einsum('ki,ai->ak', veff0mom[:nocc,:nocc], xmy)*2 # sum_{i} (X+Y)_{ki}H_{ai}^+[(X+Y)] + sum_{i} (X-Y)_{ki}H_{ai}^-[(X-Y)] * 2 + + zeta = lib_gpu.cupy_helper.direct_sum('i+j->ij', mo_energy, mo_energy) * .5 + zeta[nocc:,:nocc] = mo_energy[:nocc] + zeta[:nocc,nocc:] = mo_energy[nocc:] + dm1 = cp.zeros((nmo,nmo)) + dm1[:nocc,:nocc] = doo + dm1[nocc:,nocc:] = dvv + dm1[nocc:,:nocc] = z1 + dm1[:nocc,:nocc] += cp.eye(nocc)*2 # for ground state + im0 = reduce(cp.dot, (mo_coeff, im0+zeta*dm1, mo_coeff.T)) + + # Initialize hcore_deriv with the underlying SCF object because some + # extensions (e.g. QM/MM, solvent) modifies the SCF object only. + mf_grad = td_grad.base._scf.nuc_grad_method() + s1 = mf_grad.get_ovlp(mol) + + dmz1doo = z1ao + dmzoo # P + oo0 = reduce(cp.dot, (orbo, orbo.T)) #D + + if atmlst is None: + atmlst = range(mol.natm) + # offsetdic = mol.offset_nr_by_atom() + de = cp.zeros((len(atmlst),3)) + h1 = cp.asarray(mf_grad.get_hcore(mol)) # without 1/r like terms + s1 = cp.asarray(mf_grad.get_ovlp(mol)) + dh_ground = contract('xij,ij->xi', h1, oo0*2) + dh_td = contract('xij,ij->xi', h1, (dmz1doo+dmz1doo.T)*0.5) + ds = contract('xij,ij->xi', s1, (im0+im0.T)*0.5) + + dh1e_ground = int3c2e.get_dh1e(mol, oo0*2) # 1/r like terms + if mol.has_ecp(): + dh1e_ground += rhf_grad.get_dh1e_ecp(mol, oo0*2) # 1/r like terms + dh1e_td = int3c2e.get_dh1e(mol, (dmz1doo+dmz1doo.T)*0.5) # 1/r like terms + if mol.has_ecp(): + dh1e_td += rhf_grad.get_dh1e_ecp(mol, (dmz1doo+dmz1doo.T)*0.5) # 1/r like terms + + dvhf_DD_DP = mf_grad.get_veff(mol, (dmz1doo+dmz1doo.T)*0.5 + oo0*2) + dvhf_DD_DP -= mf_grad.get_veff(mol, (dmz1doo+dmz1doo.T)*0.5) + dvhf_xpy = mf_grad.get_veff(mol, dmxpy+dmxpy.T)*2 + dvhf_xmy = mf_grad.get_veff(mol, dmxmy-dmxmy.T)*2 + vj, vk = get_jk(mol, (dmxmy-dmxmy.T)) #D, P, (X+Y), (X-Y) + if not isinstance(vj, cp.ndarray): vj = cp.asarray(vj) + if not isinstance(vk, cp.ndarray): vk = cp.asarray(vk) + vj = vj.reshape(3,nao,nao) + vk = vk.reshape(3,nao,nao) + vhf1 = -vk + if singlet: + vhf1 += vj * 2 + else: + vhf1 += vj*2 + extra_force = cp.zeros((len(atmlst),3)) + for k, ia in enumerate(atmlst): + extra_force[k] += mf_grad.extra_force(ia, locals()) + + delec = 2.0*(dh_ground + dh_td - ds) + aoslices = mol.aoslice_by_atom() + delec= cp.asarray([cp.sum(delec[:, p0:p1], axis=1) for p0, p1 in aoslices[:,2:]]) + de = 2.0 * (dvhf_DD_DP + dvhf_xpy) + dh1e_ground + dh1e_td + delec + extra_force + if atmlst is None: + atmlst = range(mol.natm) + offsetdic = mol.offset_nr_by_atom() + for k, ia in enumerate(atmlst): + shl0, shl1, p0, p1 = offsetdic[ia] + de[k] += cp.einsum('xij,ij->x', vhf1[:,p0:p1], dmxmy[p0:p1,:]) * 2 + de[k] -= cp.einsum('xji,ij->x', vhf1[:,p0:p1], dmxmy[:,p0:p1]) * 2 + + log.timer('TDHF nuclear gradients', *time0) + return de.get() + + +# def as_scanner(td_grad, state=1): +# '''Generating a nuclear gradients scanner/solver (for geometry optimizer). + +# The returned solver is a function. This function requires one argument +# "mol" as input and returns energy and first order nuclear derivatives. + +# The solver will automatically use the results of last calculation as the +# initial guess of the new calculation. All parameters assigned in the +# nuc-grad object and SCF object (DIIS, conv_tol, max_memory etc) are +# automatically applied in the solver. + +# Note scanner has side effects. It may change many underlying objects +# (_scf, with_df, with_x2c, ...) during calculation. + +# Examples:: + +# >>> from pyscf import gto, scf, tdscf, grad +# >>> mol = gto.M(atom='H 0 0 0; F 0 0 1') +# >>> td_grad_scanner = scf.RHF(mol).apply(tdscf.TDA).nuc_grad_method().as_scanner() +# >>> e_tot, grad = td_grad_scanner(gto.M(atom='H 0 0 0; F 0 0 1.1')) +# >>> e_tot, grad = td_grad_scanner(gto.M(atom='H 0 0 0; F 0 0 1.5')) +# ''' +# from pyscf import gto +# if isinstance(td_grad, lib.GradScanner): +# return td_grad + +# if state == 0: +# return td_grad.base._scf.nuc_grad_method().as_scanner() + +# logger.info(td_grad, 'Create scanner for %s', td_grad.__class__) +# name = td_grad.__class__.__name__ + TDSCF_GradScanner.__name_mixin__ +# return lib.set_class(TDSCF_GradScanner(td_grad, state), +# (TDSCF_GradScanner, td_grad.__class__), name) + +# class TDSCF_GradScanner(lib.GradScanner): +# _keys = {'e_tot'} + +# def __init__(self, g, state): +# lib.GradScanner.__init__(self, g) +# if state is not None: +# self.state = state + +# def __call__(self, mol_or_geom, state=None, **kwargs): +# if isinstance(mol_or_geom, gto.MoleBase): +# assert mol_or_geom.__class__ == gto.Mole +# mol = mol_or_geom +# else: +# mol = self.mol.set_geom_(mol_or_geom, inplace=False) +# self.reset(mol) + +# if state is None: +# state = self.state +# else: +# self.state = state + +# td_scanner = self.base +# td_scanner(mol) +# # TODO: Check root flip. Maybe avoid the initial guess in TDHF otherwise +# # large error may be found in the excited states amplitudes +# de = self.kernel(state=state, **kwargs) +# e_tot = self.e_tot[state-1] +# return e_tot, de + +# @property +# def converged(self): +# td_scanner = self.base +# return all((td_scanner._scf.converged, +# td_scanner.converged[self.state])) + + +class Gradients(rhf_grad.GradientsBase): + + cphf_max_cycle = getattr(__config__, 'grad_tdrhf_Gradients_cphf_max_cycle', 20) + cphf_conv_tol = getattr(__config__, 'grad_tdrhf_Gradients_cphf_conv_tol', 1e-8) + + _keys = { + 'cphf_max_cycle', 'cphf_conv_tol', + 'mol', 'base', 'chkfile', 'state', 'atmlst', 'de', + } + + def __init__(self, td): + self.verbose = td.verbose + self.stdout = td.stdout + self.mol = td.mol + self.base = td + self.chkfile = td.chkfile + self.max_memory = td.max_memory + self.state = 1 # of which the gradients to be computed. + self.atmlst = None + self.de = None + + def dump_flags(self, verbose=None): + log = logger.new_logger(self, verbose) + log.info('\n') + log.info('******** LR %s gradients for %s ********', + self.base.__class__, self.base._scf.__class__) + log.info('cphf_conv_tol = %g', self.cphf_conv_tol) + log.info('cphf_max_cycle = %d', self.cphf_max_cycle) + log.info('chkfile = %s', self.chkfile) + log.info('State ID = %d', self.state) + log.info('max_memory %d MB (current use %d MB)', + self.max_memory, lib.current_memory()[0]) + log.info('\n') + return self + + @lib.with_doc(grad_elec.__doc__) + def grad_elec(self, xy, singlet, atmlst=None): + return grad_elec(self, xy, singlet, atmlst, self.max_memory, self.verbose) + + def kernel(self, xy=None, state=None, singlet=None, atmlst=None): + ''' + Args: + state : int + Excited state ID. state = 1 means the first excited state. + ''' + if xy is None: + if state is None: + state = self.state + else: + self.state = state + + if state == 0: + logger.warn(self, 'state=0 found in the input. ' + 'Gradients of ground state is computed.') + return self.base._scf.nuc_grad_method().kernel(atmlst=atmlst) + + xy = self.base.xy[state-1] + + if singlet is None: singlet = self.base.singlet + if atmlst is None: + atmlst = self.atmlst + else: + self.atmlst = atmlst + + if self.verbose >= logger.WARN: + self.check_sanity() + if self.verbose >= logger.INFO: + self.dump_flags() + + de = self.grad_elec(xy, singlet, atmlst) + self.de = de = de + self.grad_nuc(atmlst=atmlst) + if self.mol.symmetry: + self.de = self.symmetrize(self.de, atmlst) + self._finalize() + return self.de + + # Calling the underlying SCF nuclear gradients because it may be modified + # by external modules (e.g. QM/MM, solvent) + def grad_nuc(self, mol=None, atmlst=None): + mf_grad = self.base._scf.nuc_grad_method() + return mf_grad.grad_nuc(mol, atmlst) + + def _finalize(self): + if self.verbose >= logger.NOTE: + logger.note(self, '--------- %s gradients for state %d ----------', + self.base.__class__.__name__, self.state) + self._write(self.mol, self.de, self.atmlst) + logger.note(self, '----------------------------------------------') + + # as_scanner = as_scanner + + to_gpu = lib.to_gpu + +Grad = Gradients + +from pyscf import tdscf +tdscf.rhf.TDA.Gradients = tdscf.rhf.TDHF.Gradients = lib.class_as_method(Gradients) \ No newline at end of file diff --git a/gpu4pyscf/lib/CMakeLists.txt b/gpu4pyscf/lib/CMakeLists.txt index 91e1d87a..521c2361 100644 --- a/gpu4pyscf/lib/CMakeLists.txt +++ b/gpu4pyscf/lib/CMakeLists.txt @@ -157,7 +157,7 @@ if(BUILD_LIBXC) PREFIX ${PROJECT_BINARY_DIR}/deps INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps CMAKE_ARGS -DBUILD_SHARED_LIBS=ON -DENABLE_CUDA=ON - -DENABLE_FORTRAN=OFF -DDISABLE_KXC=ON -DDISABLE_LXC=ON -DDISABLE_FHC=ON + -DENABLE_FORTRAN=OFF -DDISABLE_KXC=OFF -DDISABLE_LXC=ON -DDISABLE_FHC=ON -DCMAKE_INSTALL_PREFIX:PATH= -DCMAKE_INSTALL_LIBDIR:PATH=lib -DCMAKE_C_CREATE_SHARED_LIBRARY=${C_LINK_TEMPLATE} diff --git a/gpu4pyscf/lib/cupy_helper.py b/gpu4pyscf/lib/cupy_helper.py index d68cbcff..fe197c71 100644 --- a/gpu4pyscf/lib/cupy_helper.py +++ b/gpu4pyscf/lib/cupy_helper.py @@ -1023,3 +1023,71 @@ def sandwich_dot(a, c, out=None): if a_ndim == 2: out = out[0] return out + + +def direct_sum(subscripts, *operands): + '''Apply the summation over many operands with the einsum fashion. + Examples: + >>> a = numpy.random.random((6,5)) + >>> b = numpy.random.random((4,3,2)) + >>> direct_sum('ij,klm->ijklm', a, b).shape + (6, 5, 4, 3, 2) + >>> direct_sum('ij,klm', a, b).shape + (6, 5, 4, 3, 2) + >>> direct_sum('i,j,klm->mjlik', a[0], a[:,0], b).shape + (2, 6, 3, 5, 4) + >>> direct_sum('ij-klm->ijklm', a, b).shape + (6, 5, 4, 3, 2) + >>> direct_sum('ij+klm', a, b).shape + (6, 5, 4, 3, 2) + >>> direct_sum('-i-j+klm->mjlik', a[0], a[:,0], b).shape + (2, 6, 3, 5, 4) + >>> c = numpy.random((3,5)) + >>> z = direct_sum('ik+jk->kij', a, c).shape # This is slow + >>> abs(a.T.reshape(5,6,1) + c.reshape(5,1,3) - z).sum() + 0.0 + ''' + + def sign_and_symbs(subscript): + ''' sign list and notation list''' + subscript = subscript.replace(' ', '').replace(',', '+') + + if subscript[0] not in '+-': + subscript = '+' + subscript + sign = [x for x in subscript if x in '+-'] + + symbs = subscript[1:].replace('-', '+').split('+') + #s = ''.join(symbs) + #assert (len(set(s)) == len(s)) # make sure no duplicated symbols + return sign, symbs + + if '->' in subscripts: + src, dest = subscripts.split('->') + sign, src = sign_and_symbs(src) + dest = dest.replace(' ', '') + else: + sign, src = sign_and_symbs(subscripts) + dest = ''.join(src) + assert (len(src) == len(operands)) + + for i, symb in enumerate(src): + op = cupy.asarray(operands[i]) + assert (len(symb) == op.ndim) + unisymb = set(symb) + if len(unisymb) != len(symb): + unisymb = ''.join(unisymb) + op = cupy.einsum('->'.join((symb, unisymb)), op) + src[i] = unisymb + if i == 0: + if sign[i] == '+': + out = op + else: + out = -op + elif sign[i] == '+': + out = out.reshape(out.shape+(1,)*op.ndim) + op + else: + out = out.reshape(out.shape+(1,)*op.ndim) - op + + out = cupy.einsum('->'.join((''.join(src), dest)), out) + out.flags.writeable = True # old numpy has this issue + return out \ No newline at end of file