forked from pyscf/gpu4pyscf
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add ss special case for int1e derivative
- Loading branch information
Showing
4 changed files
with
126 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* Copyright 2024 The GPU4PySCF Authors. All Rights Reserved. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include <math.h> | ||
#include "cint2e.cuh" | ||
|
||
__global__ | ||
static void GINTfill_int3c1e_ip_kernel00(double* output, const BasisProdOffsets offsets, const int i_l, const int j_l, const int nprim_ij, | ||
const int stride_j, const int stride_ij, const int ao_offsets_i, const int ao_offsets_j, | ||
const double omega, const double* grid_points, const double* charge_exponents) | ||
{ | ||
const int ntasks_ij = offsets.ntasks_ij; | ||
const int ngrids = offsets.ntasks_kl; | ||
const int task_ij = blockIdx.x * blockDim.x + threadIdx.x; | ||
const int task_grid = blockIdx.y * blockDim.y + threadIdx.y; | ||
|
||
if (task_ij >= ntasks_ij || task_grid >= ngrids) { | ||
return; | ||
} | ||
const int bas_ij = offsets.bas_ij + task_ij; | ||
const int prim_ij = offsets.primitive_ij + task_ij * nprim_ij; | ||
const int* bas_pair2bra = c_bpcache.bas_pair2bra; | ||
const int* bas_pair2ket = c_bpcache.bas_pair2ket; | ||
const int ish = bas_pair2bra[bas_ij]; | ||
const int jsh = bas_pair2ket[bas_ij]; | ||
|
||
const double* __restrict__ a12 = c_bpcache.a12; | ||
const double* __restrict__ e12 = c_bpcache.e12; | ||
const double* __restrict__ x12 = c_bpcache.x12; | ||
const double* __restrict__ y12 = c_bpcache.y12; | ||
const double* __restrict__ z12 = c_bpcache.z12; | ||
|
||
const double* __restrict__ a_exponents = c_bpcache.a1; | ||
const int nbas = c_bpcache.nbas; | ||
const double* __restrict__ bas_x = c_bpcache.bas_coords; | ||
const double* __restrict__ bas_y = bas_x + nbas; | ||
const double* __restrict__ bas_z = bas_y + nbas; | ||
const double Ax = bas_x[ish]; | ||
const double Ay = bas_y[ish]; | ||
const double Az = bas_z[ish]; | ||
|
||
const double* grid_point = grid_points + task_grid * 3; | ||
const double Cx = grid_point[0]; | ||
const double Cy = grid_point[1]; | ||
const double Cz = grid_point[2]; | ||
const double charge_exponent = (charge_exponents != NULL) ? charge_exponents[task_grid] : 0.0; | ||
|
||
double deri_dAx = 0; | ||
double deri_dAy = 0; | ||
double deri_dAz = 0; | ||
double deri_dCx = 0; | ||
double deri_dCy = 0; | ||
double deri_dCz = 0; | ||
for (int ij = prim_ij; ij < prim_ij + nprim_ij; ij++) { | ||
const double aij = a12[ij]; | ||
const double eij = e12[ij]; | ||
const double Px = x12[ij]; | ||
const double Py = y12[ij]; | ||
const double Pz = z12[ij]; | ||
const double PCx = Px - Cx; | ||
const double PCy = Py - Cy; | ||
const double PCz = Pz - Cz; | ||
const double PAx = Px - Ax; | ||
const double PAy = Py - Ay; | ||
const double PAz = Pz - Az; | ||
const double minus_two_a = -2.0 * a_exponents[ij]; | ||
const double one_over_two_p = 0.5 / aij; | ||
double a0 = aij; | ||
const double q_over_p_plus_q = charge_exponent > 0.0 ? charge_exponent / (aij + charge_exponent) : 1.0; | ||
const double sqrt_q_over_p_plus_q = charge_exponent > 0.0 ? sqrt(q_over_p_plus_q) : 1.0; | ||
a0 *= q_over_p_plus_q; | ||
const double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0; | ||
const double sqrt_theta = omega > 0.0 ? sqrt(theta) : 1.0; | ||
a0 *= theta; | ||
|
||
const double prefactor = 2.0 * M_PI / aij * eij * sqrt_theta * sqrt_q_over_p_plus_q; | ||
const double boys_input = a0 * (PCx * PCx + PCy * PCy + PCz * PCz); | ||
if (boys_input > 3.e-7) { | ||
const double sqrt_boys_input = sqrt(boys_input); | ||
const double R000_0 = SQRTPIE4 / sqrt_boys_input * erf(sqrt_boys_input); | ||
const double R000_1 = -a0 * (R000_0 - exp(-boys_input)) / boys_input; | ||
const double R100_0 = R000_1 * PCx; | ||
const double R010_0 = R000_1 * PCy; | ||
const double R001_0 = R000_1 * PCz; | ||
deri_dAx += prefactor * minus_two_a * (PAx * R000_0 + one_over_two_p * R100_0); | ||
deri_dAy += prefactor * minus_two_a * (PAy * R000_0 + one_over_two_p * R010_0); | ||
deri_dAz += prefactor * minus_two_a * (PAz * R000_0 + one_over_two_p * R001_0); | ||
deri_dCx += prefactor * R100_0; | ||
deri_dCy += prefactor * R010_0; | ||
deri_dCz += prefactor * R001_0; | ||
} | ||
} | ||
|
||
const int* ao_loc = c_bpcache.ao_loc; | ||
const int i0 = ao_loc[ish] - ao_offsets_i; | ||
const int j0 = ao_loc[jsh] - ao_offsets_j; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 0 * stride_ij * ngrids] = deri_dAx; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 1 * stride_ij * ngrids] = deri_dAy; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 2 * stride_ij * ngrids] = deri_dAz; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 3 * stride_ij * ngrids] = deri_dCx; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 4 * stride_ij * ngrids] = deri_dCy; | ||
output[i0 + j0 * stride_j + task_grid * stride_ij + 5 * stride_ij * ngrids] = deri_dCz; | ||
} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters