Skip to content

Commit

Permalink
Add ss special case for int1e derivative
Browse files Browse the repository at this point in the history
  • Loading branch information
henryw7 committed Dec 11, 2024
1 parent 06bf584 commit ed0ef9e
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 12 deletions.
116 changes: 116 additions & 0 deletions gpu4pyscf/lib/gint/g1e_ip_root_1.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/* Copyright 2024 The GPU4PySCF Authors. All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <math.h>
#include "cint2e.cuh"

__global__
static void GINTfill_int3c1e_ip_kernel00(double* output, const BasisProdOffsets offsets, const int i_l, const int j_l, const int nprim_ij,
const int stride_j, const int stride_ij, const int ao_offsets_i, const int ao_offsets_j,
const double omega, const double* grid_points, const double* charge_exponents)
{
const int ntasks_ij = offsets.ntasks_ij;
const int ngrids = offsets.ntasks_kl;
const int task_ij = blockIdx.x * blockDim.x + threadIdx.x;
const int task_grid = blockIdx.y * blockDim.y + threadIdx.y;

if (task_ij >= ntasks_ij || task_grid >= ngrids) {
return;
}
const int bas_ij = offsets.bas_ij + task_ij;
const int prim_ij = offsets.primitive_ij + task_ij * nprim_ij;
const int* bas_pair2bra = c_bpcache.bas_pair2bra;
const int* bas_pair2ket = c_bpcache.bas_pair2ket;
const int ish = bas_pair2bra[bas_ij];
const int jsh = bas_pair2ket[bas_ij];

const double* __restrict__ a12 = c_bpcache.a12;
const double* __restrict__ e12 = c_bpcache.e12;
const double* __restrict__ x12 = c_bpcache.x12;
const double* __restrict__ y12 = c_bpcache.y12;
const double* __restrict__ z12 = c_bpcache.z12;

const double* __restrict__ a_exponents = c_bpcache.a1;
const int nbas = c_bpcache.nbas;
const double* __restrict__ bas_x = c_bpcache.bas_coords;
const double* __restrict__ bas_y = bas_x + nbas;
const double* __restrict__ bas_z = bas_y + nbas;
const double Ax = bas_x[ish];
const double Ay = bas_y[ish];
const double Az = bas_z[ish];

const double* grid_point = grid_points + task_grid * 3;
const double Cx = grid_point[0];
const double Cy = grid_point[1];
const double Cz = grid_point[2];
const double charge_exponent = (charge_exponents != NULL) ? charge_exponents[task_grid] : 0.0;

double deri_dAx = 0;
double deri_dAy = 0;
double deri_dAz = 0;
double deri_dCx = 0;
double deri_dCy = 0;
double deri_dCz = 0;
for (int ij = prim_ij; ij < prim_ij + nprim_ij; ij++) {
const double aij = a12[ij];
const double eij = e12[ij];
const double Px = x12[ij];
const double Py = y12[ij];
const double Pz = z12[ij];
const double PCx = Px - Cx;
const double PCy = Py - Cy;
const double PCz = Pz - Cz;
const double PAx = Px - Ax;
const double PAy = Py - Ay;
const double PAz = Pz - Az;
const double minus_two_a = -2.0 * a_exponents[ij];
const double one_over_two_p = 0.5 / aij;
double a0 = aij;
const double q_over_p_plus_q = charge_exponent > 0.0 ? charge_exponent / (aij + charge_exponent) : 1.0;
const double sqrt_q_over_p_plus_q = charge_exponent > 0.0 ? sqrt(q_over_p_plus_q) : 1.0;
a0 *= q_over_p_plus_q;
const double theta = omega > 0.0 ? omega * omega / (omega * omega + a0) : 1.0;
const double sqrt_theta = omega > 0.0 ? sqrt(theta) : 1.0;
a0 *= theta;

const double prefactor = 2.0 * M_PI / aij * eij * sqrt_theta * sqrt_q_over_p_plus_q;
const double boys_input = a0 * (PCx * PCx + PCy * PCy + PCz * PCz);
if (boys_input > 3.e-7) {
const double sqrt_boys_input = sqrt(boys_input);
const double R000_0 = SQRTPIE4 / sqrt_boys_input * erf(sqrt_boys_input);
const double R000_1 = -a0 * (R000_0 - exp(-boys_input)) / boys_input;
const double R100_0 = R000_1 * PCx;
const double R010_0 = R000_1 * PCy;
const double R001_0 = R000_1 * PCz;
deri_dAx += prefactor * minus_two_a * (PAx * R000_0 + one_over_two_p * R100_0);
deri_dAy += prefactor * minus_two_a * (PAy * R000_0 + one_over_two_p * R010_0);
deri_dAz += prefactor * minus_two_a * (PAz * R000_0 + one_over_two_p * R001_0);
deri_dCx += prefactor * R100_0;
deri_dCy += prefactor * R010_0;
deri_dCz += prefactor * R001_0;
}
}

const int* ao_loc = c_bpcache.ao_loc;
const int i0 = ao_loc[ish] - ao_offsets_i;
const int j0 = ao_loc[jsh] - ao_offsets_j;
output[i0 + j0 * stride_j + task_grid * stride_ij + 0 * stride_ij * ngrids] = deri_dAx;
output[i0 + j0 * stride_j + task_grid * stride_ij + 1 * stride_ij * ngrids] = deri_dAy;
output[i0 + j0 * stride_j + task_grid * stride_ij + 2 * stride_ij * ngrids] = deri_dAz;
output[i0 + j0 * stride_j + task_grid * stride_ij + 3 * stride_ij * ngrids] = deri_dCx;
output[i0 + j0 * stride_j + task_grid * stride_ij + 4 * stride_ij * ngrids] = deri_dCy;
output[i0 + j0 * stride_j + task_grid * stride_ij + 5 * stride_ij * ngrids] = deri_dCz;
}
File renamed without changes.
2 changes: 1 addition & 1 deletion gpu4pyscf/lib/gint/nr_fill_ao_int3c1e.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "rys_roots.cu"
#include "g1e.cu"
#include "g1e_root_123.cu"
#include "g1e_root_1.cu"
#include "g3c1e.cu"

static int GINTfill_int3c1e_tasks(double* output, const BasisProdOffsets offsets, const int i_l, const int j_l, const int nprim_ij,
Expand Down
20 changes: 9 additions & 11 deletions gpu4pyscf/lib/gint/nr_fill_ao_int3c1e_ip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "rys_roots.cu"
#include "g1e.cu"
// #include "g1e_root_123.cu"
#include "g1e_ip_root_1.cu"
#include "g3c1e_ip.cu"

static int GINTfill_int3c1e_ip_tasks(double* output, const BasisProdOffsets offsets, const int i_l, const int j_l, const int nprim_ij,
Expand All @@ -42,16 +42,14 @@ static int GINTfill_int3c1e_ip_tasks(double* output, const BasisProdOffsets offs
const dim3 blocks((ntasks_ij+THREADSX-1)/THREADSX, (ngrids+THREADSY-1)/THREADSY);
int type_ijkl;
switch (nrys_roots) {
// case 1:
// type_ijkl = (i_l << 2) | j_l;
// switch (type_ijkl) {
// case (0<<2)|0: GINTfill_int3c1e_kernel00<<<blocks, threads, 0, stream>>>(output, offsets, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
// case (1<<2)|0: GINTfill_int3c1e_kernel10<<<blocks, threads, 0, stream>>>(output, offsets, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
// default:
// fprintf(stderr, "roots=1 type_ijkl %d\n", type_ijkl);
// }
// break;
case 1: GINTfill_int3c1e_ip_kernel_general<1, GSIZE1_INT3C_1E> <<<blocks, threads, 0, stream>>>(output, offsets, i_l, j_l, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
case 1:
type_ijkl = (i_l + 1) * 10 + j_l;
switch (type_ijkl) {
case 10: GINTfill_int3c1e_ip_kernel00<<<blocks, threads, 0, stream>>>(output, offsets, i_l, j_l, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
default:
fprintf(stderr, "roots=1 type_ijkl %d\n", type_ijkl);
}
break;
case 2: GINTfill_int3c1e_ip_kernel_general<2, GSIZE2_INT3C_1E> <<<blocks, threads, 0, stream>>>(output, offsets, i_l, j_l, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
case 3: GINTfill_int3c1e_ip_kernel_general<3, GSIZE3_INT3C_1E> <<<blocks, threads, 0, stream>>>(output, offsets, i_l, j_l, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
case 4: GINTfill_int3c1e_ip_kernel_general<4, GSIZE4_INT3C_1E> <<<blocks, threads, 0, stream>>>(output, offsets, i_l, j_l, nprim_ij, stride_j, stride_ij, ao_offsets_i, ao_offsets_j, omega, grid_points, charge_exponents); break;
Expand Down

0 comments on commit ed0ef9e

Please sign in to comment.