From d80662ec70e812f7860f65b4fa882306312de203 Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Sun, 19 Apr 2020 13:41:52 -0700 Subject: [PATCH 01/95] Fixing spacing --- src/interaction_lists/interaction_lists.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index 143d0d2e..61ca0df4 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -444,7 +444,8 @@ void cc_compute_interaction_list_1( if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { sizeof_direct_list[target_tree_node] *= 1.5; - target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], sizeof_direct_list[target_tree_node]); + target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], + sizeof_direct_list[target_tree_node]); } target_direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; @@ -523,7 +524,8 @@ void cc_compute_interaction_list_2( if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { sizeof_direct_list[target_tree_node] *= 1.5; - target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], sizeof_direct_list[target_tree_node]); + target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], + sizeof_direct_list[target_tree_node]); } target_direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; From 8aca6941aa81a1357ae39828f232f569441c1604 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Tue, 21 Apr 2020 14:42:47 -0500 Subject: [PATCH 02/95] Adding gaussian and exponential distribution capabilities to random cube example --- examples/example.in | 1 + examples/random_cube.c | 52 +++---- examples/random_cube_reproducible.c | 6 +- examples/support_fns.c | 204 ++++++++++++++++++++++++---- examples/support_fns.h | 20 ++- 5 files changed, 233 insertions(+), 50 deletions(-) diff --git a/examples/example.in b/examples/example.in index bba8fbc7..29109a46 100644 --- a/examples/example.in +++ b/examples/example.in @@ -9,6 +9,7 @@ kernel_name coulomb kernel_params 1.0 approximation lagrange compute_type particle-cluster +distribution uniform run_direct 1 slice 10 verbosity 1 diff --git a/examples/random_cube.c b/examples/random_cube.c index d3d0cb74..babaf332 100644 --- a/examples/random_cube.c +++ b/examples/random_cube.c @@ -33,10 +33,18 @@ int main(int argc, char **argv) /* run parameters */ int N, M, run_direct, slice; - struct RunParams *run_params = NULL; + double xyz_limits[6]; + DISTRIBUTION distribution; int sample_size = 10000; + + struct RunParams *run_params = NULL; + FILE *fp = fopen(argv[1], "r"); - Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); + + double xmin = xyz_limits[0], xmax = xyz_limits[1]; + double ymin = xyz_limits[2], ymax = xyz_limits[3]; + double zmin = xyz_limits[4], zmax = xyz_limits[5]; /* Zoltan variables */ int rc; @@ -98,11 +106,9 @@ int main(int argc, char **argv) srand(1); for (int i = 0; i < sample_size; ++i) { - mySources.x[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.y[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.z[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + mySources.x[i] = Point_Set_Init(distribution); + mySources.y[i] = Point_Set_Init(distribution); + mySources.z[i] = Point_Set_Init(distribution); mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); mySources.b[i] = 1.0; // dummy weighting scheme @@ -158,8 +164,6 @@ int main(int argc, char **argv) mySources.x[i] = mySources.x[mySources.numMyPoints-1]; mySources.y[i] = mySources.y[mySources.numMyPoints-1]; mySources.z[i] = mySources.z[mySources.numMyPoints-1]; - mySources.q[i] = mySources.q[mySources.numMyPoints-1]; - mySources.w[i] = mySources.w[mySources.numMyPoints-1]; mySources.myGlobalIDs[i] = mySources.myGlobalIDs[mySources.numMyPoints-1]; mySources.numMyPoints--; } else { @@ -174,12 +178,12 @@ int main(int argc, char **argv) exit(0); } - double xmin = minval(mySources.x, mySources.numMyPoints); - double ymin = minval(mySources.y, mySources.numMyPoints); - double zmin = minval(mySources.z, mySources.numMyPoints); - double xmax = maxval(mySources.x, mySources.numMyPoints); - double ymax = maxval(mySources.y, mySources.numMyPoints); - double zmax = maxval(mySources.z, mySources.numMyPoints); + double zz_bound_x_min = minval(mySources.x, mySources.numMyPoints); + double zz_bound_y_min = minval(mySources.y, mySources.numMyPoints); + double zz_bound_z_min = minval(mySources.z, mySources.numMyPoints); + double zz_bound_x_max = maxval(mySources.x, mySources.numMyPoints); + double zz_bound_y_max = maxval(mySources.y, mySources.numMyPoints); + double zz_bound_z_max = maxval(mySources.z, mySources.numMyPoints); Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, @@ -224,11 +228,11 @@ int main(int argc, char **argv) /* Generating sources and targets based on Zoltan bounding box */ for (int i = 0; i < sources->num; ++i) { - sources->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin; - sources->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin; - sources->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin; - sources->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - sources->w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + sources->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin; + sources->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin; + sources->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin; + sources->q[i] = Point_Set(UNIFORM, -1., 1.); + sources->w[i] = Point_Set(UNIFORM, -1., 1.); } /* MPI-allocated target arrays for RMA use */ @@ -241,10 +245,10 @@ int main(int argc, char **argv) /* Generating targets based on Zoltan bounding box */ for (int i = 0; i < targets->num; ++i) { - targets->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin; - targets->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin; - targets->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin; - targets->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + targets->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin; + targets->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin; + targets->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin; + targets->q[i] = Point_Set(UNIFORM, -1., 1.); } #ifdef OPENACC_ENABLED diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index dcff9c51..005de00e 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -30,9 +30,13 @@ int main(int argc, char **argv) /* run parameters */ int N, M, run_direct, slice; + double xyz_limits[6]; + DISTRIBUTION distribution; + struct RunParams *run_params = NULL; + FILE *fp = fopen(argv[1], "r"); - Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); if (N != M) { if (rank == 0) printf("[random cube example] ERROR! This executable requires sources and targets " diff --git a/examples/support_fns.c b/examples/support_fns.c index 984af7f6..ff1ae2d3 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -10,24 +10,29 @@ #include "support_fns.h" +static double erfinv (double x); -void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice) + +void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice, + double *xyz_limits, DISTRIBUTION *distribution) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + /* BaryTree params */ int verbosity = 0; int interp_order = 5; double theta = 0.5; int max_per_source_leaf = 500; int max_per_target_leaf = 500; double size_check_factor = 1.0; - + char kernel_string[256] = "COULOMB"; char singularity_string[256] = "SKIPPING"; char approximation_string[256] = "LAGRANGE"; char compute_type_string[256] = "PARTICLE_CLUSTER"; char run_direct_string[256] = "OFF"; + char distribution_string[256] = "UNIFORM"; KERNEL kernel; SINGULARITY singularity; @@ -37,9 +42,17 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * int num_kernel_params = 0; double kernel_params[32]; + /* random_cube_example params */ *N = 10000; *M = 10000; *slice = 1; + + xyz_limits[0] = -1.; + xyz_limits[1] = 1.; + xyz_limits[2] = -1.; + xyz_limits[3] = 1.; + xyz_limits[4] = -1.; + xyz_limits[5] = 1.; char c[256], c1[256], c2[256]; @@ -47,7 +60,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * while (fgets(c, 256, fp) != NULL) { sscanf(c, "%s %s", c1, c2); - // Parameters for the RunParam struct + /* Parameters for the RunParam struct */ if (strcmp(c1, "order") == 0) { interp_order = atoi(c2); @@ -86,7 +99,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcmp(c1, "verbosity") == 0) { verbosity = atoi(c2); - // Other run parameters + /* Other run parameters */ } else if (strcmp(c1, "num_particles") == 0) { *N = atoi(c2); *M = atoi(c2); @@ -102,6 +115,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcmp(c1, "slice") == 0) { *slice = atoi(c2); + + } else if (strcmp(c1, "distribution") == 0) { + strcpy(distribution_string, c2); } else { if (rank == 0) { @@ -111,25 +127,8 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } } - - if ((strcasecmp(run_direct_string, "ON") == 0) - || (strcasecmp(run_direct_string, "YES") == 0) - || (strcasecmp(run_direct_string, "1") == 0)) { - *run_direct = 1; - - } else if ((strcasecmp(run_direct_string, "OFF") == 0) - || (strcasecmp(run_direct_string, "NO") == 0) - || (strcasecmp(run_direct_string, "0") == 0)) { - *run_direct = 0; - } else { - if (rank == 0) { - printf("[random cube example] ERROR! Undefined run direct token \"%s\". Exiting.\n", - run_direct_string); - } - exit(1); - } - + /* Validating tokens for RunParam struct */ if (strcasecmp(kernel_string, "COULOMB") == 0) { kernel = COULOMB; @@ -217,6 +216,44 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } exit(1); } + + + /* Validating other tokens */ + if ((strcasecmp(run_direct_string, "ON") == 0) + || (strcasecmp(run_direct_string, "YES") == 0) + || (strcasecmp(run_direct_string, "1") == 0)) { + *run_direct = 1; + + } else if ((strcasecmp(run_direct_string, "OFF") == 0) + || (strcasecmp(run_direct_string, "NO") == 0) + || (strcasecmp(run_direct_string, "0") == 0)) { + *run_direct = 0; + } else { + if (rank == 0) { + printf("[random cube example] ERROR! Undefined run direct token \"%s\". Exiting.\n", + run_direct_string); + } + exit(1); + } + + + if (strcasecmp(distribution_string, "UNIFORM") == 0) { + *distribution = UNIFORM; + + } else if ((strcasecmp(distribution_string, "GAUSSIAN") == 0) + || (strcasecmp(distribution_string, "NORMAL") == 0)) { + *distribution = GAUSSIAN; + + } else if (strcasecmp(distribution_string, "EXPONENTIAL") == 0) { + *distribution = EXPONENTIAL; + + } else { + if (rank == 0) { + printf("[random cube example] ERROR! Undefined distribution token \"%s\". Exiting.\n", + distribution_string); + } + exit(1); + } RunParams_Setup(run_params, @@ -231,6 +268,59 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * +/*----------------------------------------------------------------------------*/ +double Point_Set_Init(DISTRIBUTION distribution) +{ + if (distribution == UNIFORM) { + return (double)rand()/(double)(RAND_MAX); + + } else if (distribution == GAUSSIAN) { + double sum = 0.; + + for (int i = 0; i < 12; ++i) { + sum += (double)rand()/(double)(RAND_MAX); + } + + return sum / 12.; + + } else if (distribution == EXPONENTIAL) { + double u; + double x = 10.; + + while (x > 1.) { + u = (double)rand()/(1. + (double)(RAND_MAX)); + x = -log(1. - u) / sqrt(12.); + } + + return x; + } +} + + +/*----------------------------------------------------------------------------*/ +double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) +{ + if (distribution == UNIFORM) { + return (double)rand()/(double)(RAND_MAX) * (xmax - xmin) + xmin; + + } else if (distribution == GAUSSIAN) { + double sigma = 1. / sqrt(12.); + double mu = 0.5; + double u = (double)rand()/(double)(RAND_MAX) * (xmax - xmin) + xmin; + + return mu + sigma * sqrt(2.) * erfinv(2. * u - 1.); + + } else if (distribution == EXPONENTIAL) { + double lambda = sqrt(12.) / (xmax - xmin); + double u = (double)rand()/(1. + (double)(RAND_MAX)) * (xmax - xmin) + xmin; + + return -log(1. - u) / lambda; + } +} + + + +/*----------------------------------------------------------------------------*/ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]) { @@ -250,7 +340,7 @@ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], } - +/*----------------------------------------------------------------------------*/ void Timing_Print(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], int run_direct, struct RunParams *run_params) { @@ -449,6 +539,7 @@ void Timing_Print(double time_run_glob[3][4], double time_tree_glob[3][13], doub +/*----------------------------------------------------------------------------*/ void Accuracy_Calculate(double *potential_engy_glob, double *potential_engy_direct_glob, double *glob_inf_err, double *glob_relinf_err, double *glob_n2_err, double *glob_reln2_err, double *potential, double *potential_direct, int targets_num, int slice) @@ -488,7 +579,7 @@ void Accuracy_Calculate(double *potential_engy_glob, double *potential_engy_dire } - +/*----------------------------------------------------------------------------*/ void Accuracy_Print(double potential_engy_glob, double potential_engy_direct_glob, double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err, int slice) @@ -517,6 +608,7 @@ void Accuracy_Print(double potential_engy_glob, double potential_engy_direct_glo +/*----------------------------------------------------------------------------*/ void CSV_Print(int N, int M, struct RunParams *run_params, double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double potential_engy_glob, double potential_engy_direct_glob, @@ -596,3 +688,67 @@ void CSV_Print(int N, int M, struct RunParams *run_params, return; } + + + +/*----------------------------------------------------------------------------*/ +#define erfinv_a3 -0.140543331 +#define erfinv_a2 0.914624893 +#define erfinv_a1 -1.645349621 +#define erfinv_a0 0.886226899 + +#define erfinv_b4 0.012229801 +#define erfinv_b3 -0.329097515 +#define erfinv_b2 1.442710462 +#define erfinv_b1 -2.118377725 +#define erfinv_b0 1 + +#define erfinv_c3 1.641345311 +#define erfinv_c2 3.429567803 +#define erfinv_c1 -1.62490649 +#define erfinv_c0 -1.970840454 + +#define erfinv_d2 1.637067800 +#define erfinv_d1 3.543889200 +#define erfinv_d0 1 + +double erfinv (double x) +{ + double x2, r, y; + int sign_x; + + if (x < -1 || x > 1) + return NAN; + + if (x == 0) + return 0; + + if (x > 0) + sign_x = 1; + else { + sign_x = -1; + x = -x; + } + + if (x <= 0.7) { + + x2 = x * x; + r = + x * (((erfinv_a3 * x2 + erfinv_a2) * x2 + erfinv_a1) * x2 + erfinv_a0); + r /= (((erfinv_b4 * x2 + erfinv_b3) * x2 + erfinv_b2) * x2 + + erfinv_b1) * x2 + erfinv_b0; + } + else { + y = sqrt (-log ((1 - x) / 2)); + r = (((erfinv_c3 * y + erfinv_c2) * y + erfinv_c1) * y + erfinv_c0); + r /= ((erfinv_d2 * y + erfinv_d1) * y + erfinv_d0); + } + + r = r * sign_x; + x = x * sign_x; + + r -= (erf (r) - x) / (2 / sqrt (M_PI) * exp (-r * r)); + r -= (erf (r) - x) / (2 / sqrt (M_PI) * exp (-r * r)); + + return r; +} diff --git a/examples/support_fns.h b/examples/support_fns.h index 8532ef2d..80d3dce1 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -6,7 +6,23 @@ #include "../src/run_params/struct_run_params.h" -void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice); +typedef enum DISTRIBUTION +{ + NO_DISTRIBUTION, + UNIFORM, + GAUSSIAN, + EXPONENTIAL +} DISTRIBUTION; + + +void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice, + double *xyz_limits, DISTRIBUTION *distribution); + + +double Point_Set_Init(DISTRIBUTION distribution); + +double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); + void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]); @@ -14,6 +30,7 @@ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], void Timing_Print(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], int run_direct, struct RunParams *run_params); + void Accuracy_Calculate(double *potential_engy_glob, double *potential_engy_direct_glob, double *glob_inf_err, double *glob_relinf_err, double *glob_n2_err, double *glob_reln2_err, double *potential, double *potential_direct, int targets_num, int slice); @@ -22,6 +39,7 @@ void Accuracy_Print(double potential_engy_glob, double potential_engy_direct_glo double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err, int slice); + void CSV_Print(int N, int M, struct RunParams *run_params, double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double potential_engy_glob, double potential_engy_direct_glob, From e640c3b063d22d705cf5ae729a031287ea35f12b Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Fri, 24 Apr 2020 14:14:45 -0500 Subject: [PATCH 03/95] Fixing exponential and gaussian distributions --- examples/random_cube.c | 16 +++++++++++-- examples/support_fns.c | 53 ++++++++++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/examples/random_cube.c b/examples/random_cube.c index babaf332..456cfcec 100644 --- a/examples/random_cube.c +++ b/examples/random_cube.c @@ -35,7 +35,7 @@ int main(int argc, char **argv) int N, M, run_direct, slice; double xyz_limits[6]; DISTRIBUTION distribution; - int sample_size = 10000; + int sample_size = 1000000; struct RunParams *run_params = NULL; @@ -103,7 +103,8 @@ int main(int argc, char **argv) unsigned t_hashed = (unsigned) t; t_hashed = mrand * t_hashed + crand; srand(t_hashed ^ rank); - srand(1); + srandom(t_hashed ^ rank); + //srand(1); for (int i = 0; i < sample_size; ++i) { mySources.x[i] = Point_Set_Init(distribution); @@ -227,14 +228,25 @@ int main(int argc, char **argv) /* Generating sources and targets based on Zoltan bounding box */ + printf("zz xmin and xmax: %f, %f\n", zz_bound_x_min, zz_bound_x_max); + for (int i = 0; i < sources->num; ++i) { sources->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin; sources->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin; sources->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin; + sources->q[i] = Point_Set(UNIFORM, -1., 1.); sources->w[i] = Point_Set(UNIFORM, -1., 1.); } + char points_file[256]; + sprintf(points_file, "points_rank_%d.csv", rank); + FILE *points_fp = fopen(points_file, "w"); + for (int i = 0; i < sources->num; ++i) { + fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); + } + fclose(points_fp); + /* MPI-allocated target arrays for RMA use */ MPI_Alloc_mem(targets->num * sizeof(double), MPI_INFO_NULL, &(targets->x)); diff --git a/examples/support_fns.c b/examples/support_fns.c index 93ce3799..a43df075 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -276,22 +276,14 @@ double Point_Set_Init(DISTRIBUTION distribution) return (double)rand()/(double)(RAND_MAX); } else if (distribution == GAUSSIAN) { - double sum = 0.; - for (int i = 0; i < 12; ++i) { - sum += (double)rand()/(double)(RAND_MAX); - } - - return sum / 12.; + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double x = 1. / sqrt(6.) * erfinv(2. * u - 1.); } else if (distribution == EXPONENTIAL) { - double u; - double x = 10.; - while (x > 1.) { - u = (double)rand()/(1. + (double)(RAND_MAX)); - x = -log(1. - u) / sqrt(12.); - } + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double x = -log(1. - u) / sqrt(12.); return x; } @@ -301,21 +293,42 @@ double Point_Set_Init(DISTRIBUTION distribution) /*----------------------------------------------------------------------------*/ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) { + double cdf_min, cdf_max; + if (distribution == UNIFORM) { return (double)rand()/(double)(RAND_MAX) * (xmax - xmin) + xmin; } else if (distribution == GAUSSIAN) { - double sigma = 1. / sqrt(12.); - double mu = 0.5; - double u = (double)rand()/(double)(RAND_MAX) * (xmax - xmin) + xmin; - return mu + sigma * sqrt(2.) * erfinv(2. * u - 1.); + if (xmin < -1) { + cdf_min = 0; + } else { + cdf_min = 0.5 * (1. + erf((xmin) * sqrt(6.))); + } + + if (xmax > 1) { + cdf_max = 1; + } else { + cdf_max = 0.5 * (1. + erf((xmax) * sqrt(6.))); + } + + double u = (double)random()/(double)(RAND_MAX) * (cdf_max - cdf_min) + cdf_min; + + return 0.5 + 1. / sqrt(6.) * erfinv(2. * u - 1.); } else if (distribution == EXPONENTIAL) { - double lambda = sqrt(12.) / (xmax - xmin); - double u = (double)rand()/(1. + (double)(RAND_MAX)) * (xmax - xmin) + xmin; - return -log(1. - u) / lambda; + cdf_min = 1 - exp(-sqrt(12) * xmin); + if (xmax > 1) { + cdf_max = 1; + } else { + cdf_max = 1 - exp(-sqrt(12) * xmax); + } + + double u = (double)random()/(1. + (double)(RAND_MAX)) * (cdf_max - cdf_min) + cdf_min; + + return -log(1. - u) / sqrt(12.); + } } @@ -750,6 +763,6 @@ double erfinv (double x) r -= (erf (r) - x) / (2 / sqrt (M_PI) * exp (-r * r)); r -= (erf (r) - x) / (2 / sqrt (M_PI) * exp (-r * r)); - + return r; } From f60b47d58ce3f4d48a92d1661eb288a75169b24b Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Fri, 24 Apr 2020 13:54:04 -0700 Subject: [PATCH 04/95] Fixing gaussian dist. --- examples/support_fns.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index a43df075..2d0305e6 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -279,6 +279,8 @@ double Point_Set_Init(DISTRIBUTION distribution) double u = (double)random()/(1.+ (double)(RAND_MAX)); double x = 1. / sqrt(6.) * erfinv(2. * u - 1.); + + return x; } else if (distribution == EXPONENTIAL) { @@ -300,17 +302,8 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) } else if (distribution == GAUSSIAN) { - if (xmin < -1) { - cdf_min = 0; - } else { - cdf_min = 0.5 * (1. + erf((xmin) * sqrt(6.))); - } - - if (xmax > 1) { - cdf_max = 1; - } else { - cdf_max = 0.5 * (1. + erf((xmax) * sqrt(6.))); - } + cdf_min = 0.5 * (1. + erf((xmin) * sqrt(6.))); + cdf_max = 0.5 * (1. + erf((xmax) * sqrt(6.))); double u = (double)random()/(double)(RAND_MAX) * (cdf_max - cdf_min) + cdf_min; From 87f57823b137f312d7d94c6f5a7c588b16100888 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sun, 26 Apr 2020 01:50:46 -0500 Subject: [PATCH 05/95] Removing print --- examples/random_cube.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/random_cube.c b/examples/random_cube.c index 456cfcec..9fb0c88e 100644 --- a/examples/random_cube.c +++ b/examples/random_cube.c @@ -228,8 +228,6 @@ int main(int argc, char **argv) /* Generating sources and targets based on Zoltan bounding box */ - printf("zz xmin and xmax: %f, %f\n", zz_bound_x_min, zz_bound_x_max); - for (int i = 0; i < sources->num; ++i) { sources->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin; sources->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin; @@ -239,6 +237,7 @@ int main(int argc, char **argv) sources->w[i] = Point_Set(UNIFORM, -1., 1.); } +/* char points_file[256]; sprintf(points_file, "points_rank_%d.csv", rank); FILE *points_fp = fopen(points_file, "w"); @@ -246,6 +245,7 @@ int main(int argc, char **argv) fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); } fclose(points_fp); +*/ /* MPI-allocated target arrays for RMA use */ From 63e777aa9f542d04e15311421f8c4ebf12ffa0aa Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Tue, 28 Apr 2020 12:47:43 -0500 Subject: [PATCH 06/95] Reads in a file for testing --- examples/CMakeLists.txt | 8 + examples/run_readin.c | 357 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 examples/run_readin.c diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5c696224..156469d0 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -14,6 +14,10 @@ if(BUILD_EXAMPLES) target_link_libraries(random_cube_reproducible_cpu PRIVATE BaryTree_cpu Zoltan_Interface) install(TARGETS random_cube_reproducible_cpu DESTINATION bin) + add_executable(run_readin_cpu run_readin.c ${AUX_SRCS}) + target_link_libraries(run_readin_cpu PRIVATE BaryTree_cpu Zoltan_Interface) + install(TARGETS run_readin_cpu DESTINATION bin) + add_executable(test_BaryTreeInterface_cpu test_BaryTreeInterface.c) target_link_libraries(test_BaryTreeInterface_cpu PRIVATE BaryTree_cpu) install(TARGETS test_BaryTreeInterface_cpu DESTINATION bin) @@ -27,6 +31,10 @@ if(BUILD_EXAMPLES) target_link_libraries(random_cube_reproducible_gpu PRIVATE BaryTree_gpu Zoltan_Interface) install(TARGETS random_cube_reproducible_gpu DESTINATION bin) + add_executable(run_readin_gpu run_readin.c ${AUX_SRCS}) + target_link_libraries(run_readin_gpu PRIVATE BaryTree_gpu Zoltan_Interface) + install(TARGETS run_readin_gpu DESTINATION bin) + add_executable(test_BaryTreeInterface_gpu test_BaryTreeInterface.c) target_link_libraries(test_BaryTreeInterface_gpu PRIVATE BaryTree_gpu) install(TARGETS test_BaryTreeInterface_gpu DESTINATION bin) diff --git a/examples/run_readin.c b/examples/run_readin.c new file mode 100644 index 00000000..24b82186 --- /dev/null +++ b/examples/run_readin.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include +#include +#include + +#include "../src/utilities/tools.h" +#include "../src/utilities/timers.h" + +#include "../src/particles/struct_particles.h" +#include "../src/run_params/struct_run_params.h" +#include "../src/run_params/run_params.h" + +#include "../src/drivers/treedriver.h" +#include "../src/drivers/directdriver.h" + +#include "zoltan_fns.h" +#include "support_fns.h" + + +int main(int argc, char **argv) +{ + /* MPI initialization */ + int rank, numProcs; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &numProcs); + if (rank == 0) printf("[random cube example] Beginning random cube example with %d ranks.\n", numProcs); + + /* run parameters */ + int N, M, run_direct, slice; + double xyz_limits[6]; + DISTRIBUTION distribution; + + struct RunParams *run_params = NULL; + + FILE *fp = fopen(argv[1], "r"); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); + + if (N != M) { + if (rank == 0) printf("[random cube example] ERROR! This executable requires sources and targets " + "be equivalent. Exiting.\n"); + exit(1); + } + + /* Zoltan variables */ + int rc; + float ver; + struct Zoltan_Struct *zz; + int changes, numGidEntries, numLidEntries, numImport, numExport; + ZOLTAN_ID_PTR importGlobalGids, importLocalGids, exportGlobalGids, exportLocalGids; + int *importProcs, *importToPart, *exportProcs, *exportToPart; + int *parts; + MESH_DATA mySources, myTargets; + + /* data structures for BaryTree calculation and comparison */ + struct Particles *sources = NULL; + struct Particles *targets = NULL; + struct Particles *targets_sample = NULL; + double *potential = NULL, *potential_direct = NULL; + + /* variables for collecting accuracy info */ + double potential_engy = 0, potential_engy_glob = 0; + double potential_engy_direct = 0, potential_engy_direct_glob = 0; + double glob_inf_err = 0, glob_n2_err = 0, glob_relinf_err = 0, glob_reln2_err = 0; + + /* variables for date-time calculation */ + double time_run[4], time_tree[13], time_direct[4]; + double time_run_glob[3][4], time_tree_glob[3][13], time_direct_glob[3][4]; + + + /* Beginning total runtime timer */ + START_TIMER(&time_run[3]); + + + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Setup + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + + START_TIMER(&time_run[0]); + + /* Zoltan initialization */ + if (Zoltan_Initialize(argc, argv, &ver) != ZOLTAN_OK) { + if (rank == 0) printf("[random cube example] Zoltan failed to initialize. Exiting.\n"); + MPI_Finalize(); + exit(0); + } + + zz = Zoltan_Create(MPI_COMM_WORLD); + + /* General parameters */ + + Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0"); + Zoltan_Set_Param(zz, "LB_METHOD", "RCB"); + Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1"); + Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1"); + Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1"); + Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL"); + Zoltan_Set_Param(zz, "AUTO_MIGRATE", "TRUE"); + + /* RCB parameters */ + + Zoltan_Set_Param(zz, "RCB_OUTPUT_LEVEL", "0"); + Zoltan_Set_Param(zz, "RCB_RECTILINEAR_BLOCKS", "1"); + + /* Setting up sources and load balancing */ + + srand(1); + mySources.numGlobalPoints = N * numProcs; + mySources.numMyPoints = N; + mySources.x = malloc(N*sizeof(double)); + mySources.y = malloc(N*sizeof(double)); + mySources.z = malloc(N*sizeof(double)); + mySources.q = malloc(N*sizeof(double)); + mySources.w = malloc(N*sizeof(double)); + mySources.b = malloc(N*sizeof(double)); // load balancing weights + mySources.myGlobalIDs = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * N); + + for (int j = 0; j < rank+1; ++j) { + for (int i = 0; i < N; ++i) { + mySources.q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; // dummy weighting scheme + } + } + + + FILE *points_fp = fopen(argv[2], "r"); + for (int i = 0; i < N; ++i) { + fscanf(points_fp, "%lf %lf %lf", &mySources.x[i], &mySources.y[i], &mySources.z[i]); + } + fclose(points_fp); + + + /* Query functions, to provide geometry to Zoltan */ + + Zoltan_Set_Num_Obj_Fn(zz, ztn_get_number_of_objects, &mySources); + Zoltan_Set_Obj_List_Fn(zz, ztn_get_object_list, &mySources); + Zoltan_Set_Num_Geom_Fn(zz, ztn_get_num_geometry, &mySources); + Zoltan_Set_Geom_Multi_Fn(zz, ztn_get_geometry_list, &mySources); + Zoltan_Set_Obj_Size_Fn(zz, ztn_obj_size, &mySources); + Zoltan_Set_Pack_Obj_Fn(zz, ztn_pack, &mySources); + Zoltan_Set_Unpack_Obj_Fn(zz, ztn_unpack, &mySources); + + rc = Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */ + &changes, /* 1 if partitioning was changed, 0 otherwise */ + &numGidEntries, /* Number of integers used for a global ID */ + &numLidEntries, /* Number of integers used for a local ID */ + &numImport, /* Number of vertices to be sent to me */ + &importGlobalGids, /* Global IDs of vertices to be sent to me */ + &importLocalGids, /* Local IDs of vertices to be sent to me */ + &importProcs, /* Process rank for source of each incoming vertex */ + &importToPart, /* New partition for each incoming vertex */ + &numExport, /* Number of vertices I must send to other processes*/ + &exportGlobalGids, /* Global IDs of the vertices I must send */ + &exportLocalGids, /* Local IDs of the vertices I must send */ + &exportProcs, /* Process to which I send each of the vertices */ + &exportToPart); /* Partition to which each vertex will belong */ + + int i = 0; + while (i < mySources.numMyPoints) { + if ((int)mySources.myGlobalIDs[i] < 0) { + mySources.x[i] = mySources.x[mySources.numMyPoints-1]; + mySources.y[i] = mySources.y[mySources.numMyPoints-1]; + mySources.z[i] = mySources.z[mySources.numMyPoints-1]; + mySources.q[i] = mySources.q[mySources.numMyPoints-1]; + mySources.w[i] = mySources.w[mySources.numMyPoints-1]; + mySources.myGlobalIDs[i] = mySources.myGlobalIDs[mySources.numMyPoints-1]; + mySources.numMyPoints--; + } else { + i++; + } + } + + if (rc != ZOLTAN_OK) { + printf("[random cube example] Error! Zoltan has failed. Exiting. \n"); + MPI_Finalize(); + Zoltan_Destroy(&zz); + exit(1); + } + + Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, &importProcs, &importToPart); + Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart); + Zoltan_Destroy(&zz); + + /* Setting up sources with MPI-allocated source arrays for RMA use */ + + sources = malloc(sizeof(struct Particles)); + sources->num = mySources.numMyPoints; + + + MPI_Alloc_mem(sources->num * sizeof(double), MPI_INFO_NULL, &(sources->x)); + MPI_Alloc_mem(sources->num * sizeof(double), MPI_INFO_NULL, &(sources->y)); + MPI_Alloc_mem(sources->num * sizeof(double), MPI_INFO_NULL, &(sources->z)); + MPI_Alloc_mem(sources->num * sizeof(double), MPI_INFO_NULL, &(sources->q)); + MPI_Alloc_mem(sources->num * sizeof(double), MPI_INFO_NULL, &(sources->w)); + memcpy(sources->x, mySources.x, sources->num * sizeof(double)); + memcpy(sources->y, mySources.y, sources->num * sizeof(double)); + memcpy(sources->z, mySources.z, sources->num * sizeof(double)); + memcpy(sources->q, mySources.q, sources->num * sizeof(double)); + memcpy(sources->w, mySources.w, sources->num * sizeof(double)); + + char points_file[256]; + sprintf(points_file, "points_rank_%d.csv", rank); + points_fp = fopen(points_file, "w"); + for (int i = 0; i < sources->num; ++i) { + fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); + } + fclose(points_fp); + + /* Setting up targets */ + + targets = malloc(sizeof(struct Particles)); + targets->num = mySources.numMyPoints; + + MPI_Alloc_mem(targets->num * sizeof(double), MPI_INFO_NULL, &(targets->x)); + MPI_Alloc_mem(targets->num * sizeof(double), MPI_INFO_NULL, &(targets->y)); + MPI_Alloc_mem(targets->num * sizeof(double), MPI_INFO_NULL, &(targets->z)); + MPI_Alloc_mem(targets->num * sizeof(double), MPI_INFO_NULL, &(targets->q)); + memcpy(targets->x, mySources.x, targets->num * sizeof(double)); + memcpy(targets->y, mySources.y, targets->num * sizeof(double)); + memcpy(targets->z, mySources.z, targets->num * sizeof(double)); + memcpy(targets->q, mySources.q, targets->num * sizeof(double)); + + /* Deallocating arrays used for Zoltan load balancing */ + + free(mySources.x); + free(mySources.y); + free(mySources.z); + free(mySources.q); + free(mySources.w); + free(mySources.b); + free(mySources.myGlobalIDs); + + if (rank == 0) printf("[random cube example] Zoltan load balancing has finished.\n"); + + /* Initializing direct and treedriver runs */ + + targets_sample = malloc(sizeof(struct Particles)); + + potential = malloc(sizeof(double) * mySources.numMyPoints); + potential_direct = malloc(sizeof(double) * mySources.numMyPoints); + + memset(potential, 0, targets->num * sizeof(double)); + memset(potential_direct, 0, targets->num * sizeof(double)); + + +#ifdef OPENACC_ENABLED + #pragma acc set device_num(rank) device_type(acc_device_nvidia) + #pragma acc init device_type(acc_device_nvidia) +#endif + + STOP_TIMER(&time_run[0]); + MPI_Barrier(MPI_COMM_WORLD); + + + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Running direct comparison + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + + if (run_direct == 1) { + + targets_sample->num = targets->num / slice; + targets_sample->x = malloc(targets_sample->num * sizeof(double)); + targets_sample->y = malloc(targets_sample->num * sizeof(double)); + targets_sample->z = malloc(targets_sample->num * sizeof(double)); + targets_sample->q = malloc(targets_sample->num * sizeof(double)); + + for (int i = 0; i < targets_sample->num; i++) { + targets_sample->x[i] = targets->x[i*slice]; + targets_sample->y[i] = targets->y[i*slice]; + targets_sample->z[i] = targets->z[i*slice]; + targets_sample->q[i] = targets->q[i*slice]; + } + + if (rank == 0) printf("[random cube example] Running direct comparison...\n"); + + START_TIMER(&time_run[1]); + directdriver(sources, targets_sample, run_params, potential_direct, time_direct); + STOP_TIMER(&time_run[1]); + + free(targets_sample->x); + free(targets_sample->y); + free(targets_sample->z); + free(targets_sample->q); + free(targets_sample); + + } + + MPI_Barrier(MPI_COMM_WORLD); + + + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Running treecode + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + + if (rank == 0) printf("[random cube example] Running treedriver...\n"); + + START_TIMER(&time_run[2]); + treedriver(sources, targets, run_params, potential, time_tree); + STOP_TIMER(&time_run[2]); + + + MPI_Barrier(MPI_COMM_WORLD); + /* Ending total runtime timer */ + STOP_TIMER(&time_run[3]); + + + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Calculate results + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Timing_Calculate(time_run_glob, time_tree_glob, time_direct_glob, + time_run, time_tree, time_direct); + Timing_Print(time_run_glob, time_tree_glob, time_direct_glob, run_direct, run_params); + + if (run_direct == 1) { + Accuracy_Calculate(&potential_engy_glob, &potential_engy_direct_glob, + &glob_inf_err, &glob_relinf_err, &glob_n2_err, &glob_reln2_err, + potential, potential_direct, targets->num, slice); + Accuracy_Print(potential_engy_glob, potential_engy_direct_glob, + glob_inf_err, glob_relinf_err, glob_n2_err, glob_reln2_err, slice); + } + + CSV_Print(N, M, run_params, time_run_glob, time_tree_glob, time_direct_glob, + potential_engy_glob, potential_engy_direct_glob, + glob_inf_err, glob_relinf_err, glob_n2_err, glob_reln2_err); + + + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Cleanup + //~~~~~~~~~~~~~~~~~~~~~~~~~~ + + MPI_Free_mem(sources->x); + MPI_Free_mem(sources->y); + MPI_Free_mem(sources->z); + MPI_Free_mem(sources->q); + MPI_Free_mem(sources->w); + free(sources); + + MPI_Free_mem(targets->x); + MPI_Free_mem(targets->y); + MPI_Free_mem(targets->z); + MPI_Free_mem(targets->q); + free(targets); + + free(potential); + free(potential_direct); + + RunParams_Free(&run_params); + + MPI_Finalize(); + + return 0; +} From f5969dac81fa8a806bffde5a3eda8683fcb8f683 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Tue, 28 Apr 2020 12:53:40 -0500 Subject: [PATCH 07/95] Changing rands to random --- examples/random_cube.c | 3 +-- examples/random_cube_reproducible.c | 10 +++++----- examples/support_fns.c | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/random_cube.c b/examples/random_cube.c index 9fb0c88e..547249c4 100644 --- a/examples/random_cube.c +++ b/examples/random_cube.c @@ -102,9 +102,8 @@ int main(int argc, char **argv) time_t t = time(NULL); unsigned t_hashed = (unsigned) t; t_hashed = mrand * t_hashed + crand; - srand(t_hashed ^ rank); srandom(t_hashed ^ rank); - //srand(1); + //srandom(1); for (int i = 0; i < sample_size; ++i) { mySources.x[i] = Point_Set_Init(distribution); diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 005de00e..66816ab2 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -106,7 +106,7 @@ int main(int argc, char **argv) /* Setting up sources and load balancing */ - srand(1); + srandom(1); mySources.numGlobalPoints = N * numProcs; mySources.numMyPoints = N; mySources.x = malloc(N*sizeof(double)); @@ -119,10 +119,10 @@ int main(int argc, char **argv) for (int j = 0; j < rank+1; ++j) { for (int i = 0; i < N; ++i) { - mySources.x[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.y[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.z[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; - mySources.q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; mySources.w[i] = 1.0; mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); mySources.b[i] = 1.0; // dummy weighting scheme diff --git a/examples/support_fns.c b/examples/support_fns.c index 2d0305e6..dee724e7 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -273,7 +273,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * double Point_Set_Init(DISTRIBUTION distribution) { if (distribution == UNIFORM) { - return (double)rand()/(double)(RAND_MAX); + return (double)random()/(double)(RAND_MAX); } else if (distribution == GAUSSIAN) { @@ -298,7 +298,7 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) double cdf_min, cdf_max; if (distribution == UNIFORM) { - return (double)rand()/(double)(RAND_MAX) * (xmax - xmin) + xmin; + return (double)random()/(double)(RAND_MAX) * (xmax - xmin) + xmin; } else if (distribution == GAUSSIAN) { From 53c3324e539a08682ff1077f2495b901facffb16 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Tue, 12 May 2020 03:50:18 -0500 Subject: [PATCH 08/95] Modifying approach to interaction lists for cc --- src/interaction_lists/interaction_lists.c | 110 ++++------------------ 1 file changed, 18 insertions(+), 92 deletions(-) diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index 61ca0df4..e84fcc87 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -25,7 +25,7 @@ void pc_compute_interaction_list(int tree_node, const int *tree_numpar, const do const struct RunParams *run_params); -void cc_compute_interaction_list_1( +void cc_compute_interaction_list( int source_tree_node, const int *source_tree_numpar, const double *source_tree_radius, const double *source_tree_x_mid, const double *source_tree_y_mid, const double *source_tree_z_mid, const int *source_tree_num_children, const int *source_tree_children, @@ -38,22 +38,6 @@ void cc_compute_interaction_list_1( int *sizeof_approx_list, int *sizeof_direct_list, int *approx_index_counter, int *direct_index_counter, const struct RunParams *run_params); - - -void cc_compute_interaction_list_2( - int target_tree_node, const int *target_tree_numpar, const double *target_tree_radius, - const double *target_tree_x_mid, const double *target_tree_y_mid, const double *target_tree_z_mid, - const int *target_tree_num_children, const int *target_tree_children, - - int source_tree_node, const int *source_tree_numpar, const double *source_tree_radius, - const double *source_tree_x_mid, const double *source_tree_y_mid, const double *source_tree_z_mid, - const int *source_tree_num_children, const int *source_tree_children, - - int **target_approx_list, int **target_direct_list, - int *sizeof_approx_list, int *sizeof_direct_list, - int *approx_index_counter, int *direct_index_counter, - const struct RunParams *run_params); - void InteractionLists_Make(struct InteractionLists **interaction_list_addr, @@ -143,7 +127,7 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, } else if (run_params->compute_type == CLUSTER_CLUSTER) { - cc_compute_interaction_list_1( + cc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, source_tree_num_children, source_tree_children, @@ -264,7 +248,7 @@ void InteractionLists_MakeRemote(const struct Tree *source_tree, } else if (run_params->compute_type == CLUSTER_CLUSTER) { - cc_compute_interaction_list_1( + cc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, source_tree_num_children, source_tree_children, @@ -396,7 +380,7 @@ void pc_compute_interaction_list( -void cc_compute_interaction_list_1( +void cc_compute_interaction_list( int source_tree_node, const int *source_tree_numpar, const double *source_tree_radius, const double *source_tree_x_mid, const double *source_tree_y_mid, const double *source_tree_z_mid, const int *source_tree_num_children, const int *source_tree_children, @@ -440,7 +424,8 @@ void cc_compute_interaction_list_1( * If MAC fails check to see if there are children. If not, perform direct * calculation. If there are children, call routine recursively for each. */ - if (target_tree_num_children[target_tree_node] == 0) { + if ((target_tree_num_children[target_tree_node] == 0) && + (source_tree_num_children[source_tree_node] == 0)) { if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { sizeof_direct_list[target_tree_node] *= 1.5; @@ -451,89 +436,30 @@ void cc_compute_interaction_list_1( target_direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; direct_index_counter[target_tree_node]++; - } else { + } else if (target_tree_num_children[target_tree_node] > + source_tree_num_children[source_tree_node]) { + for (int i = 0; i < target_tree_num_children[target_tree_node]; i++) { - cc_compute_interaction_list_2(target_tree_children[8*target_tree_node + i], - target_tree_numpar, target_tree_radius, - target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, - target_tree_num_children, target_tree_children, - + cc_compute_interaction_list( source_tree_node, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, source_tree_num_children, source_tree_children, + + target_tree_children[8*target_tree_node + i], + target_tree_numpar, target_tree_radius, + target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, + target_tree_num_children, target_tree_children, target_tree_list, target_direct_list, sizeof_tree_list, sizeof_direct_list, tree_index_counter, direct_index_counter, run_params); } - } - } - - return; - -} - - - -void cc_compute_interaction_list_2( - int target_tree_node, const int *target_tree_numpar, const double *target_tree_radius, - const double *target_tree_x_mid, const double *target_tree_y_mid, const double *target_tree_z_mid, - const int *target_tree_num_children, const int *target_tree_children, - - int source_tree_node, const int *source_tree_numpar, const double *source_tree_radius, - const double *source_tree_x_mid, const double *source_tree_y_mid, const double *source_tree_z_mid, - const int *source_tree_num_children, const int *source_tree_children, - - int **target_tree_list, int **target_direct_list, - int *sizeof_tree_list, int *sizeof_direct_list, - int *tree_index_counter, int *direct_index_counter, - const struct RunParams *run_params) -{ - - /* determine DIST for MAC test */ - double tx = target_tree_x_mid[target_tree_node] - source_tree_x_mid[source_tree_node]; - double ty = target_tree_y_mid[target_tree_node] - source_tree_y_mid[source_tree_node]; - double tz = target_tree_z_mid[target_tree_node] - source_tree_z_mid[source_tree_node]; - double dist = sqrt(tx*tx + ty*ty + tz*tz); - - if (((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) < dist * run_params->theta) - && (target_tree_radius[source_tree_node] != 0.00) - && (pow(run_params->size_check_factor * run_params->interp_pts_per_cluster, 2) - < source_tree_numpar[source_tree_node] * target_tree_numpar[target_tree_node])) { - /* - * If MAC is accepted and there is more than 1 particle - * in the box, use the expansion for the approximation. - */ - - if (tree_index_counter[target_tree_node] >= sizeof_tree_list[target_tree_node]) { - sizeof_tree_list[target_tree_node] *= 1.5; - target_tree_list[target_tree_node] = realloc_vector(target_tree_list[target_tree_node], - sizeof_tree_list[target_tree_node]); - } - - target_tree_list[target_tree_node][tree_index_counter[target_tree_node]] = source_tree_node; - tree_index_counter[target_tree_node]++; - - } else { - /* - * If MAC fails check to see if there are children. If not, perform direct - * calculation. If there are children, call routine recursively for each. - */ - if (source_tree_num_children[source_tree_node] == 0) { - - if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { - sizeof_direct_list[target_tree_node] *= 1.5; - target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], - sizeof_direct_list[target_tree_node]); - } - - target_direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; - direct_index_counter[target_tree_node]++; - + } else { for (int i = 0; i < source_tree_num_children[source_tree_node]; i++) { - cc_compute_interaction_list_1(source_tree_children[8*source_tree_node + i], + cc_compute_interaction_list( + source_tree_children[8*source_tree_node + i], source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, source_tree_num_children, source_tree_children, From 68e07e809b4552473c42faca2c2b0498ef692941 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Thu, 14 May 2020 22:22:35 -0500 Subject: [PATCH 09/95] Adding plummer to random cube reproducible --- examples/random_cube_reproducible.c | 41 ++++++++++++++++++++++------- examples/support_fns.c | 34 ++++++++++++++++++++++++ examples/support_fns.h | 5 +++- 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 66816ab2..a6e2eb8a 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -117,16 +117,39 @@ int main(int argc, char **argv) mySources.b = malloc(N*sizeof(double)); // load balancing weights mySources.myGlobalIDs = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * N); - for (int j = 0; j < rank+1; ++j) { - for (int i = 0; i < N; ++i) { - mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.w[i] = 1.0; - mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); - mySources.b[i] = 1.0; // dummy weighting scheme + if (distribution == UNIFORM) { + + for (int j = 0; j < rank+1; ++j) { // Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; // dummy weighting scheme + } } + + } else if (distribution == PLUMMER) { + + double plummer_R = 1.0; + double plummer_M = 1.0; + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + Point_Plummer(plummer_R , &mySources.x[i], &mySources.y[i], &mySources.z[i]); + mySources.q[i] = plummer_M / N; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + } + + } else { + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } /* Query functions, to provide geometry to Zoltan */ diff --git a/examples/support_fns.c b/examples/support_fns.c index dee724e7..b97f9488 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -247,6 +247,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcasecmp(distribution_string, "EXPONENTIAL") == 0) { *distribution = EXPONENTIAL; + + } else if (strcasecmp(distribution_string, "PLUMMER") == 0) { + *distribution = PLUMMER; } else { if (rank == 0) { @@ -288,6 +291,12 @@ double Point_Set_Init(DISTRIBUTION distribution) double x = -log(1. - u) / sqrt(12.); return x; + + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } @@ -322,10 +331,35 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) return -log(1. - u) / sqrt(12.); + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } +/*----------------------------------------------------------------------------*/ +void Point_Plummer(double R, double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double theta = acos(-1 + u * 2.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double phi = u * 2.0 * M_PI; + + *x = radius * sin(theta) * cos(phi); + *y = radius * sin(theta) * sin(phi); + *z = radius * cos(theta); + + return; +} + + /*----------------------------------------------------------------------------*/ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], diff --git a/examples/support_fns.h b/examples/support_fns.h index 80d3dce1..9304128e 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -11,7 +11,8 @@ typedef enum DISTRIBUTION NO_DISTRIBUTION, UNIFORM, GAUSSIAN, - EXPONENTIAL + EXPONENTIAL, + PLUMMER } DISTRIBUTION; @@ -23,6 +24,8 @@ double Point_Set_Init(DISTRIBUTION distribution); double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); +void Point_Plummer(double R, double *x, double *y, double *z); + void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]); From b5c7539a974d214892a5de7641236cd9fc8648ab Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Thu, 14 May 2020 22:43:41 -0500 Subject: [PATCH 10/95] Adding plummer to random cube reproducible --- examples/random_cube_reproducible.c | 41 ++++++++++++++++++++++------- examples/support_fns.c | 34 ++++++++++++++++++++++++ examples/support_fns.h | 5 +++- 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 66816ab2..a6e2eb8a 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -117,16 +117,39 @@ int main(int argc, char **argv) mySources.b = malloc(N*sizeof(double)); // load balancing weights mySources.myGlobalIDs = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * N); - for (int j = 0; j < rank+1; ++j) { - for (int i = 0; i < N; ++i) { - mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.w[i] = 1.0; - mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); - mySources.b[i] = 1.0; // dummy weighting scheme + if (distribution == UNIFORM) { + + for (int j = 0; j < rank+1; ++j) { // Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; // dummy weighting scheme + } } + + } else if (distribution == PLUMMER) { + + double plummer_R = 1.0; + double plummer_M = 1.0; + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + Point_Plummer(plummer_R , &mySources.x[i], &mySources.y[i], &mySources.z[i]); + mySources.q[i] = plummer_M / N; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + } + + } else { + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } /* Query functions, to provide geometry to Zoltan */ diff --git a/examples/support_fns.c b/examples/support_fns.c index dee724e7..b97f9488 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -247,6 +247,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcasecmp(distribution_string, "EXPONENTIAL") == 0) { *distribution = EXPONENTIAL; + + } else if (strcasecmp(distribution_string, "PLUMMER") == 0) { + *distribution = PLUMMER; } else { if (rank == 0) { @@ -288,6 +291,12 @@ double Point_Set_Init(DISTRIBUTION distribution) double x = -log(1. - u) / sqrt(12.); return x; + + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } @@ -322,10 +331,35 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) return -log(1. - u) / sqrt(12.); + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } +/*----------------------------------------------------------------------------*/ +void Point_Plummer(double R, double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double theta = acos(-1 + u * 2.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double phi = u * 2.0 * M_PI; + + *x = radius * sin(theta) * cos(phi); + *y = radius * sin(theta) * sin(phi); + *z = radius * cos(theta); + + return; +} + + /*----------------------------------------------------------------------------*/ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], diff --git a/examples/support_fns.h b/examples/support_fns.h index 80d3dce1..9304128e 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -11,7 +11,8 @@ typedef enum DISTRIBUTION NO_DISTRIBUTION, UNIFORM, GAUSSIAN, - EXPONENTIAL + EXPONENTIAL, + PLUMMER } DISTRIBUTION; @@ -23,6 +24,8 @@ double Point_Set_Init(DISTRIBUTION distribution); double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); +void Point_Plummer(double R, double *x, double *y, double *z); + void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]); From b4096c0d7e9ae8c2e159e58867f1623c67a2e434 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Thu, 14 May 2020 23:59:59 -0500 Subject: [PATCH 11/95] Printing out points --- examples/random_cube_reproducible.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index a6e2eb8a..d679af0a 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -218,6 +218,16 @@ int main(int argc, char **argv) memcpy(sources->z, mySources.z, sources->num * sizeof(double)); memcpy(sources->q, mySources.q, sources->num * sizeof(double)); memcpy(sources->w, mySources.w, sources->num * sizeof(double)); + + /* Output load balanced points */ + + char points_file[256]; + sprintf(points_file, "points_rank_%d.csv", rank); + FILE *points_fp = fopen(points_file, "w"); + for (int i = 0; i < sources->num; ++i) { + fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); + } + fclose(points_fp); /* Setting up targets */ From c138de44f171f1ca138767ca0e579f5eecdad7b5 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Fri, 15 May 2020 00:01:10 -0500 Subject: [PATCH 12/95] Keep the commit commented out --- examples/random_cube_reproducible.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index d679af0a..4332b893 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -221,6 +221,7 @@ int main(int argc, char **argv) /* Output load balanced points */ + /* char points_file[256]; sprintf(points_file, "points_rank_%d.csv", rank); FILE *points_fp = fopen(points_file, "w"); @@ -228,6 +229,7 @@ int main(int argc, char **argv) fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); } fclose(points_fp); + */ /* Setting up targets */ From 388c883daeaee62fd2d6232c5e6c2eff79d7d9e3 Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Sat, 16 May 2020 00:21:55 -0700 Subject: [PATCH 13/95] Importing plummer --- examples/random_cube_reproducible.c | 53 ++++++++++++++++++++++++----- examples/support_fns.c | 34 ++++++++++++++++++ examples/support_fns.h | 5 ++- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 66816ab2..4332b893 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -117,16 +117,39 @@ int main(int argc, char **argv) mySources.b = malloc(N*sizeof(double)); // load balancing weights mySources.myGlobalIDs = (ZOLTAN_ID_TYPE *)malloc(sizeof(ZOLTAN_ID_TYPE) * N); - for (int j = 0; j < rank+1; ++j) { - for (int i = 0; i < N; ++i) { - mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; - mySources.w[i] = 1.0; - mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); - mySources.b[i] = 1.0; // dummy weighting scheme + if (distribution == UNIFORM) { + + for (int j = 0; j < rank+1; ++j) { // Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + mySources.x[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.y[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.z[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; // dummy weighting scheme + } } + + } else if (distribution == PLUMMER) { + + double plummer_R = 1.0; + double plummer_M = 1.0; + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + Point_Plummer(plummer_R , &mySources.x[i], &mySources.y[i], &mySources.z[i]); + mySources.q[i] = plummer_M / N; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + } + + } else { + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } /* Query functions, to provide geometry to Zoltan */ @@ -195,6 +218,18 @@ int main(int argc, char **argv) memcpy(sources->z, mySources.z, sources->num * sizeof(double)); memcpy(sources->q, mySources.q, sources->num * sizeof(double)); memcpy(sources->w, mySources.w, sources->num * sizeof(double)); + + /* Output load balanced points */ + + /* + char points_file[256]; + sprintf(points_file, "points_rank_%d.csv", rank); + FILE *points_fp = fopen(points_file, "w"); + for (int i = 0; i < sources->num; ++i) { + fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); + } + fclose(points_fp); + */ /* Setting up targets */ diff --git a/examples/support_fns.c b/examples/support_fns.c index dee724e7..b97f9488 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -247,6 +247,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcasecmp(distribution_string, "EXPONENTIAL") == 0) { *distribution = EXPONENTIAL; + + } else if (strcasecmp(distribution_string, "PLUMMER") == 0) { + *distribution = PLUMMER; } else { if (rank == 0) { @@ -288,6 +291,12 @@ double Point_Set_Init(DISTRIBUTION distribution) double x = -log(1. - u) / sqrt(12.); return x; + + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } @@ -322,10 +331,35 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) return -log(1. - u) / sqrt(12.); + } else { + + printf("[random cube example] ERROR! Distribution %d undefined in this " + "context. Exiting.\n", distribution); + exit(1); } } +/*----------------------------------------------------------------------------*/ +void Point_Plummer(double R, double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double theta = acos(-1 + u * 2.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double phi = u * 2.0 * M_PI; + + *x = radius * sin(theta) * cos(phi); + *y = radius * sin(theta) * sin(phi); + *z = radius * cos(theta); + + return; +} + + /*----------------------------------------------------------------------------*/ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], diff --git a/examples/support_fns.h b/examples/support_fns.h index 80d3dce1..9304128e 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -11,7 +11,8 @@ typedef enum DISTRIBUTION NO_DISTRIBUTION, UNIFORM, GAUSSIAN, - EXPONENTIAL + EXPONENTIAL, + PLUMMER } DISTRIBUTION; @@ -23,6 +24,8 @@ double Point_Set_Init(DISTRIBUTION distribution); double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); +void Point_Plummer(double R, double *x, double *y, double *z); + void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]); From 9c5e586eb1b58d24a93d7c928a5faa66479d5856 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 16 May 2020 21:56:30 -0500 Subject: [PATCH 14/95] Re-engineering CC interaction lists to include PC and CP interactions --- src/interaction_lists/interaction_lists.c | 422 ++++++++++++++---- .../struct_interaction_lists.h | 6 + 2 files changed, 340 insertions(+), 88 deletions(-) diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index e84fcc87..eb66b219 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -19,9 +19,9 @@ void pc_compute_interaction_list(int tree_node, const int *tree_numpar, const do double batch_radius, double batch_x_mid, double batch_y_mid, double batch_z_mid, - int **batch_tree_list, int **batch_direct_list, - int *sizeof_tree_list, int *sizeof_direct_list, - int *tree_index_counter, int *direct_index_counter, + int **batch_approx_list, int **batch_direct_list, + int *sizeof_approx_list, int *sizeof_direct_list, + int *approx_index_counter, int *direct_index_counter, const struct RunParams *run_params); @@ -37,6 +37,11 @@ void cc_compute_interaction_list( int **target_approx_list, int **target_direct_list, int *sizeof_approx_list, int *sizeof_direct_list, int *approx_index_counter, int *direct_index_counter, + + int **cc_source_approx_list, int **cc_target_approx_list, + int *sizeof_source_approx_list, int *sizeof_target_approx_list, + int *cc_source_approx_index_counter, int *cc_target_approx_index_counter, + const struct RunParams *run_params); @@ -48,6 +53,24 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, *interaction_list_addr = malloc(sizeof(struct InteractionLists)); struct InteractionLists *interaction_list = *interaction_list_addr; + + + /* Nullify unallocated arrays in interaction_list struct */ + + interaction_list->approx_interactions = NULL; + interaction_list->direct_interactions = NULL; + + interaction_list->num_approx = NULL; + interaction_list->num_direct = NULL; + + interaction_list->cc_source_approx_interactions = NULL; + interaction_list->cc_target_approx_interactions = NULL; + + interaction_list->num_cc_source_approx = NULL; + interaction_list->num_cc_target_approx = NULL; + + + /* Set addresses for interaction lists common to PC, CP, and CC */ int ***approx_inter_list_addr = &(interaction_list->approx_interactions); int ***direct_inter_list_addr = &(interaction_list->direct_interactions); @@ -56,61 +79,67 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, int **num_direct_addr = &(interaction_list->num_direct); - int source_tree_numnodes = source_tree->numnodes; - const int *source_tree_numpar = source_tree->numpar; - const double *source_tree_radius = source_tree->radius; - const double *source_tree_x_mid = source_tree->x_mid; - const double *source_tree_y_mid = source_tree->y_mid; - const double *source_tree_z_mid = source_tree->z_mid; + /* Set addresses for variables pointing to source and target tree struct members */ + + int source_tree_numnodes = source_tree->numnodes; + const int *source_tree_numpar = source_tree->numpar; + const double *source_tree_radius = source_tree->radius; + const double *source_tree_x_mid = source_tree->x_mid; + const double *source_tree_y_mid = source_tree->y_mid; + const double *source_tree_z_mid = source_tree->z_mid; - const int *source_tree_num_children = source_tree->num_children; - const int *source_tree_children = source_tree->children; + const int *source_tree_num_children = source_tree->num_children; + const int *source_tree_children = source_tree->children; - int target_tree_numnodes = target_tree->numnodes; - const int *target_tree_numpar = target_tree->numpar; - const double *target_tree_radius = target_tree->radius; - const double *target_tree_x_mid = target_tree->x_mid; - const double *target_tree_y_mid = target_tree->y_mid; - const double *target_tree_z_mid = target_tree->z_mid; + int target_tree_numnodes = target_tree->numnodes; + const int *target_tree_numpar = target_tree->numpar; + const double *target_tree_radius = target_tree->radius; + const double *target_tree_x_mid = target_tree->x_mid; + const double *target_tree_y_mid = target_tree->y_mid; + const double *target_tree_z_mid = target_tree->z_mid; - const int *target_tree_num_children = target_tree->num_children; - const int *target_tree_children = target_tree->children; + const int *target_tree_num_children = target_tree->num_children; + const int *target_tree_children = target_tree->children; - make_matrix(*approx_inter_list_addr, target_tree_numnodes, 50); - make_matrix(*direct_inter_list_addr, target_tree_numnodes, 50); - int **approx_inter_list = *approx_inter_list_addr; - int **direct_inter_list = *direct_inter_list_addr; + /* Allocate and initialize interaction lists common to PC, CP, and CC */ + + make_matrix(*approx_inter_list_addr, target_tree_numnodes, 50); + make_matrix(*direct_inter_list_addr, target_tree_numnodes, 50); + int **approx_inter_list = *approx_inter_list_addr; + int **direct_inter_list = *direct_inter_list_addr; - make_vector(*num_approx_addr, target_tree_numnodes); - make_vector(*num_direct_addr, target_tree_numnodes); - int *num_approx_inter = *num_approx_addr; - int *num_direct_inter = *num_direct_addr; + make_vector(*num_approx_addr, target_tree_numnodes); + make_vector(*num_direct_addr, target_tree_numnodes); + int *num_approx_inter = *num_approx_addr; + int *num_direct_inter = *num_direct_addr; - int *sizeof_approx_inter_list, *sizeof_direct_inter_list; - make_vector(sizeof_approx_inter_list, target_tree_numnodes); - make_vector(sizeof_direct_inter_list, target_tree_numnodes); + int *sizeof_approx_inter_list, *sizeof_direct_inter_list; + make_vector(sizeof_approx_inter_list, target_tree_numnodes); + make_vector(sizeof_direct_inter_list, target_tree_numnodes); - for (int i = 0; i < target_tree_numnodes; i++) sizeof_approx_inter_list[i] = 50; - for (int i = 0; i < target_tree_numnodes; i++) sizeof_direct_inter_list[i] = 50; + for (int i = 0; i < target_tree_numnodes; i++) sizeof_approx_inter_list[i] = 50; + for (int i = 0; i < target_tree_numnodes; i++) sizeof_direct_inter_list[i] = 50; - for (int i = 0; i < target_tree_numnodes; i++) - for (int j = 0; j < 50; j++) - approx_inter_list[i][j] = -1; + for (int i = 0; i < target_tree_numnodes; i++) + for (int j = 0; j < 50; j++) + approx_inter_list[i][j] = -1; - for (int i = 0; i < target_tree_numnodes; i++) - for (int j = 0; j < 50; j++) - direct_inter_list[i][j] = -1; + for (int i = 0; i < target_tree_numnodes; i++) + for (int j = 0; j < 50; j++) + direct_inter_list[i][j] = -1; - for (int i = 0; i < target_tree_numnodes; i++) num_approx_inter[i] = 0; - for (int i = 0; i < target_tree_numnodes; i++) num_direct_inter[i] = 0; + for (int i = 0; i < target_tree_numnodes; i++) num_approx_inter[i] = 0; + for (int i = 0; i < target_tree_numnodes; i++) num_direct_inter[i] = 0; if (run_params->compute_type == PARTICLE_CLUSTER || run_params->compute_type == CLUSTER_PARTICLE) { + /* Build PC and CP interaction lists */ + for (int i = 0; i < target_tree_numnodes; i++) { pc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, @@ -127,6 +156,30 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, } else if (run_params->compute_type == CLUSTER_CLUSTER) { + /* Allocate interaction lists exclusive to CC */ + + int ***cc_source_approx_inter_list_addr = &(interaction_list->cc_source_approx_interactions); + int ***cc_target_approx_inter_list_addr = &(interaction_list->cc_target_approx_interactions); + + int **num_cc_source_approx_addr = &(interaction_list->num_cc_source_approx); + int **num_cc_target_approx_addr = &(interaction_list->num_cc_target_approx); + + make_matrix(*cc_source_approx_inter_list_addr, target_tree_numnodes, 50); + make_matrix(*cc_target_approx_inter_list_addr, target_tree_numnodes, 50); + int **cc_source_approx_inter_list = *cc_source_approx_inter_list_addr; + int **cc_target_approx_inter_list = *cc_target_approx_inter_list_addr; + + make_vector(*num_cc_source_approx_addr, target_tree_numnodes); + make_vector(*num_cc_target_approx_addr, target_tree_numnodes); + int *num_cc_source_approx_inter = *num_cc_source_approx_addr; + int *num_cc_target_approx_inter = *num_cc_target_approx_addr; + + int *sizeof_cc_source_approx_inter_list, *sizeof_cc_target_approx_inter_list; + make_vector(sizeof_cc_source_approx_inter_list, target_tree_numnodes); + make_vector(sizeof_cc_target_approx_inter_list, target_tree_numnodes); + + /* Build CC interaction lists */ + cc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, @@ -139,13 +192,20 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, approx_inter_list, direct_inter_list, sizeof_approx_inter_list, sizeof_direct_inter_list, num_approx_inter, num_direct_inter, + + cc_source_approx_inter_list, cc_target_approx_inter_list, + sizeof_cc_source_approx_inter_list, sizeof_cc_target_approx_inter_list, + num_cc_source_approx_inter, num_cc_target_approx_inter, + run_params); + + free_vector(sizeof_cc_source_approx_inter_list); + free_vector(sizeof_cc_target_approx_inter_list); } free_vector(sizeof_approx_inter_list); free_vector(sizeof_direct_inter_list); - return; } /* END of function Interaction_MakeList */ @@ -158,8 +218,16 @@ void InteractionLists_Free(struct InteractionLists **interaction_list_addr) free_matrix(interaction_list->approx_interactions); free_matrix(interaction_list->direct_interactions); + free_vector(interaction_list->num_approx); free_vector(interaction_list->num_direct); + + free_matrix(interaction_list->cc_source_approx_interactions); + free_matrix(interaction_list->cc_target_approx_interactions); + + free_vector(interaction_list->num_cc_source_approx); + free_vector(interaction_list->num_cc_target_approx); + free(interaction_list); interaction_list = NULL; @@ -232,8 +300,8 @@ void InteractionLists_MakeRemote(const struct Tree *source_tree, if (run_params->compute_type == PARTICLE_CLUSTER) { - for (int i = 0; i < target_tree_numnodes; i++) { - pc_compute_interaction_list( + for (int i = 0; i < target_tree_numnodes; i++) { + pc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, source_tree_num_children, source_tree_children, @@ -248,6 +316,33 @@ void InteractionLists_MakeRemote(const struct Tree *source_tree, } else if (run_params->compute_type == CLUSTER_CLUSTER) { + int **temp_cc_source_approx_inter_list, **temp_cc_target_approx_inter_list; + int *sizeof_cc_source_approx_inter_list, *sizeof_cc_target_approx_inter_list; + int *num_cc_source_approx_inter, *num_cc_target_approx_inter; + + make_matrix(temp_cc_source_approx_inter_list, target_tree_numnodes, 50); + make_matrix(temp_cc_target_approx_inter_list, target_tree_numnodes, 50); + + make_vector(sizeof_cc_source_approx_inter_list, target_tree_numnodes); + make_vector(sizeof_cc_target_approx_inter_list, target_tree_numnodes); + + make_vector(num_cc_source_approx_inter, target_tree_numnodes); + make_vector(num_cc_target_approx_inter, target_tree_numnodes); + + for (int i = 0; i < target_tree_numnodes; i++) sizeof_cc_source_approx_inter_list[i] = 50; + for (int i = 0; i < target_tree_numnodes; i++) sizeof_cc_target_approx_inter_list[i] = 50; + + for (int i = 0; i < target_tree_numnodes; i++) + for(int j = 0; j < 50; j++) + temp_cc_source_approx_inter_list[i][j] = -1; + + for (int i = 0; i < target_tree_numnodes; i++) + for(int j = 0; j < 50; j++) + temp_cc_target_approx_inter_list[i][j] = -1; + + for (int i = 0; i < target_tree_numnodes; i++) num_cc_source_approx_inter[i] = 0; + for (int i = 0; i < target_tree_numnodes; i++) num_cc_target_approx_inter[i] = 0; + cc_compute_interaction_list( 0, source_tree_numpar, source_tree_radius, source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, @@ -260,24 +355,61 @@ void InteractionLists_MakeRemote(const struct Tree *source_tree, temp_approx_inter_list, temp_direct_inter_list, sizeof_approx_inter_list, sizeof_direct_inter_list, num_approx_inter, num_direct_inter, + + temp_cc_source_approx_inter_list, temp_cc_target_approx_inter_list, + sizeof_cc_source_approx_inter_list, sizeof_cc_target_approx_inter_list, + num_cc_source_approx_inter, num_cc_target_approx_inter, + run_params); - } + + for (int j = 0; j < target_tree_numnodes; j++) { + + /* CC source approx is a PC interaction and requires remote interpolation points */ + + for (int i = 0; i < num_cc_source_approx_inter[j]; i++) { + int source_node_index = temp_cc_source_approx_inter_list[j][i]; + approx_list_unpacked[source_node_index] = source_node_index; + } + /* CC target approx is a CP interaction and requires remote sources */ + + for (int i = 0; i < num_cc_target_approx_inter[j]; i++) { + int source_node_index = temp_cc_target_approx_inter_list[j][i]; + direct_list[source_node_index] = source_node_index; + } + } + + free_matrix(temp_cc_source_approx_inter_list); + free_matrix(temp_cc_target_approx_inter_list); + free_vector(sizeof_cc_source_approx_inter_list); + free_vector(sizeof_cc_target_approx_inter_list); + + free_vector(num_cc_source_approx_inter); + free_vector(num_cc_target_approx_inter); + + } + + + /* Fill in unpacked approx list and direct list for communication from interaction + * lists common to PC, CP, and CC + */ + for (int j = 0; j < target_tree_numnodes; j++) { + for (int i = 0; i < num_approx_inter[j]; i++) { - int source_node_index = temp_approx_inter_list[j][i]; approx_list_unpacked[source_node_index] = source_node_index; } for (int i = 0; i < num_direct_inter[j]; i++) { - int source_node_index = temp_direct_inter_list[j][i]; direct_list[source_node_index] = source_node_index; } } + + /* Build the packed approx list for remote communication of interpolation points */ int approx_counter = 0; for (int i = 0; i < source_tree_numnodes; i++) { @@ -287,7 +419,8 @@ void InteractionLists_MakeRemote(const struct Tree *source_tree, } } - + /* Free temp lists common to PC, CP, and CC */ + free_matrix(temp_approx_inter_list); free_matrix(temp_direct_inter_list); @@ -314,9 +447,9 @@ void pc_compute_interaction_list( double batch_radius, double batch_x_mid, double batch_y_mid, double batch_z_mid, - int **batch_tree_list, int **batch_direct_list, - int *sizeof_tree_list, int *sizeof_direct_list, - int *tree_index_counter, int *direct_index_counter, + int **batch_approx_list, int **batch_direct_list, + int *sizeof_approx_list, int *sizeof_direct_list, + int *approx_index_counter, int *direct_index_counter, const struct RunParams *run_params) { @@ -334,13 +467,13 @@ void pc_compute_interaction_list( * If MAC is accepted use the expansion for the approximation. */ - if (*tree_index_counter >= *sizeof_tree_list) { - (*sizeof_tree_list) *= 1.5; - (*batch_tree_list) = realloc_vector(*batch_tree_list, *sizeof_tree_list); + if (*approx_index_counter >= *sizeof_approx_list) { + (*sizeof_approx_list) *= 1.5; + (*batch_approx_list) = realloc_vector(*batch_approx_list, *sizeof_approx_list); } - (*batch_tree_list)[*tree_index_counter] = tree_node; - (*tree_index_counter)++; + (*batch_approx_list)[*approx_index_counter] = tree_node; + (*approx_index_counter)++; } else { /* @@ -366,9 +499,9 @@ void pc_compute_interaction_list( batch_radius, batch_x_mid, batch_y_mid, batch_z_mid, - batch_tree_list, batch_direct_list, - sizeof_tree_list, sizeof_direct_list, - tree_index_counter, direct_index_counter, + batch_approx_list, batch_direct_list, + sizeof_approx_list, sizeof_direct_list, + approx_index_counter, direct_index_counter, run_params); } } @@ -389,11 +522,18 @@ void cc_compute_interaction_list( const double *target_tree_x_mid, const double *target_tree_y_mid, const double *target_tree_z_mid, const int *target_tree_num_children, const int *target_tree_children, - int **target_tree_list, int **target_direct_list, - int *sizeof_tree_list, int *sizeof_direct_list, - int *tree_index_counter, int *direct_index_counter, + int **approx_list, int **direct_list, + int *sizeof_approx_list, int *sizeof_direct_list, + int *approx_index_counter, int *direct_index_counter, + + int **source_approx_list, int **target_approx_list, + int *sizeof_source_approx_list, int *sizeof_target_approx_list, + int *source_approx_index_counter, int *target_approx_index_counter, + const struct RunParams *run_params) { + + int size_check = run_params->size_check_factor * run_params->interp_pts_per_cluster; /* determine DIST for MAC test */ double tx = target_tree_x_mid[target_tree_node] - source_tree_x_mid[source_tree_node]; @@ -401,23 +541,60 @@ void cc_compute_interaction_list( double tz = target_tree_z_mid[target_tree_node] - source_tree_z_mid[source_tree_node]; double dist = sqrt(tx*tx + ty*ty + tz*tz); - if (((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) < dist * run_params->theta) - && (source_tree_radius[source_tree_node] != 0.00) - && (pow(run_params->size_check_factor * run_params->interp_pts_per_cluster, 2) - < source_tree_numpar[source_tree_node] * target_tree_numpar[target_tree_node])) { - /* - * If MAC is accepted and there is more than 1 particle - * in the box, use the expansion for the approximation. - */ - - if (tree_index_counter[target_tree_node] >= sizeof_tree_list[target_tree_node]) { - sizeof_tree_list[target_tree_node] *= 1.5; - target_tree_list[target_tree_node] = realloc_vector(target_tree_list[target_tree_node], - sizeof_tree_list[target_tree_node]); + if ((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) + < dist * run_params->theta) { + + if ((source_tree_numpar[source_tree_node] < size_check) && + (target_tree_numpar[target_tree_node] < size_check)) { + + /* add to direct list */ + + if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { + sizeof_direct_list[target_tree_node] *= 1.5; + direct_list[target_tree_node] = realloc_vector(direct_list[target_tree_node], + sizeof_direct_list[target_tree_node]); + } + direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; + direct_index_counter[target_tree_node]++; + + } else if (source_tree_numpar[source_tree_node] < size_check) { + + /* add to CP approx list */ + + if (target_approx_index_counter[target_tree_node] >= sizeof_target_approx_list[target_tree_node]) { + sizeof_target_approx_list[target_tree_node] *= 1.5; + target_approx_list[target_tree_node] = realloc_vector(target_approx_list[target_tree_node], + sizeof_target_approx_list[target_tree_node]); + } + target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; + target_approx_index_counter[target_tree_node]++; + + } else if (target_tree_numpar[target_tree_node] < size_check) { + + /* add to PC approx list */ + + if (source_approx_index_counter[target_tree_node] >= sizeof_source_approx_list[target_tree_node]) { + sizeof_source_approx_list[target_tree_node] *= 1.5; + source_approx_list[target_tree_node] = realloc_vector(source_approx_list[target_tree_node], + sizeof_source_approx_list[target_tree_node]); + } + source_approx_list[target_tree_node][source_approx_index_counter[target_tree_node]] = source_tree_node; + source_approx_index_counter[target_tree_node]++; + + } else { + + /* add to CC approx list */ + + if (approx_index_counter[target_tree_node] >= sizeof_approx_list[target_tree_node]) { + sizeof_approx_list[target_tree_node] *= 1.5; + approx_list[target_tree_node] = realloc_vector(approx_list[target_tree_node], + sizeof_approx_list[target_tree_node]); + } + approx_list[target_tree_node][approx_index_counter[target_tree_node]] = source_tree_node; + approx_index_counter[target_tree_node]++; + } - - target_tree_list[target_tree_node][tree_index_counter[target_tree_node]] = source_tree_node; - tree_index_counter[target_tree_node]++; + } else { /* @@ -426,18 +603,74 @@ void cc_compute_interaction_list( */ if ((target_tree_num_children[target_tree_node] == 0) && (source_tree_num_children[source_tree_node] == 0)) { + + /* add to direct list */ if (direct_index_counter[target_tree_node] >= sizeof_direct_list[target_tree_node]) { sizeof_direct_list[target_tree_node] *= 1.5; - target_direct_list[target_tree_node] = realloc_vector(target_direct_list[target_tree_node], - sizeof_direct_list[target_tree_node]); + direct_list[target_tree_node] = realloc_vector(direct_list[target_tree_node], + sizeof_direct_list[target_tree_node]); } - target_direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; + direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; direct_index_counter[target_tree_node]++; + + } else if (source_tree_num_children[source_tree_node] == 0) { + + /* traverse target tree */ + + for (int i = 0; i < target_tree_num_children[target_tree_node]; i++) { + cc_compute_interaction_list( + source_tree_node, source_tree_numpar, source_tree_radius, + source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, + source_tree_num_children, source_tree_children, + + target_tree_children[8*target_tree_node + i], + target_tree_numpar, target_tree_radius, + target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, + target_tree_num_children, target_tree_children, - } else if (target_tree_num_children[target_tree_node] > - source_tree_num_children[source_tree_node]) { + approx_list, direct_list, + sizeof_approx_list, sizeof_direct_list, + approx_index_counter, direct_index_counter, + + source_approx_list, target_approx_list, + sizeof_source_approx_list, sizeof_target_approx_list, + source_approx_index_counter, target_approx_index_counter, + + run_params); + } + + } else if (target_tree_num_children[target_tree_node] == 0) { + + /* traverse source tree */ + + for (int i = 0; i < source_tree_num_children[source_tree_node]; i++) { + cc_compute_interaction_list( + source_tree_children[8*source_tree_node + i], + source_tree_numpar, source_tree_radius, + source_tree_x_mid, source_tree_y_mid, source_tree_z_mid, + source_tree_num_children, source_tree_children, + + target_tree_node, target_tree_numpar, target_tree_radius, + target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, + target_tree_num_children, target_tree_children, + + approx_list, direct_list, + sizeof_approx_list, sizeof_direct_list, + approx_index_counter, direct_index_counter, + + source_approx_list, target_approx_list, + sizeof_source_approx_list, sizeof_target_approx_list, + source_approx_index_counter, target_approx_index_counter, + + run_params); + } + + } else if (source_tree_numpar[source_tree_node] < + target_tree_numpar[target_tree_node]) { + + /* traverse target tree */ for (int i = 0; i < target_tree_num_children[target_tree_node]; i++) { cc_compute_interaction_list( @@ -450,13 +683,21 @@ void cc_compute_interaction_list( target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, target_tree_num_children, target_tree_children, - target_tree_list, target_direct_list, - sizeof_tree_list, sizeof_direct_list, - tree_index_counter, direct_index_counter, + approx_list, direct_list, + sizeof_approx_list, sizeof_direct_list, + approx_index_counter, direct_index_counter, + + source_approx_list, target_approx_list, + sizeof_source_approx_list, sizeof_target_approx_list, + source_approx_index_counter, target_approx_index_counter, + run_params); } } else { + + /* traverse source tree */ + for (int i = 0; i < source_tree_num_children[source_tree_node]; i++) { cc_compute_interaction_list( source_tree_children[8*source_tree_node + i], @@ -468,9 +709,14 @@ void cc_compute_interaction_list( target_tree_x_mid, target_tree_y_mid, target_tree_z_mid, target_tree_num_children, target_tree_children, - target_tree_list, target_direct_list, - sizeof_tree_list, sizeof_direct_list, - tree_index_counter, direct_index_counter, + approx_list, direct_list, + sizeof_approx_list, sizeof_direct_list, + approx_index_counter, direct_index_counter, + + source_approx_list, target_approx_list, + sizeof_source_approx_list, sizeof_target_approx_list, + source_approx_index_counter, target_approx_index_counter, + run_params); } } diff --git a/src/interaction_lists/struct_interaction_lists.h b/src/interaction_lists/struct_interaction_lists.h index 3b65406d..59397c73 100644 --- a/src/interaction_lists/struct_interaction_lists.h +++ b/src/interaction_lists/struct_interaction_lists.h @@ -9,6 +9,12 @@ struct InteractionLists int *num_approx; int *num_direct; + + int **cc_source_approx_interactions; + int **cc_target_approx_interactions; + + int *num_cc_source_approx; + int *num_cc_target_approx; }; From a538a1975241810fe3ec7764239fa2334cc45cc5 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sun, 17 May 2020 01:54:10 -0500 Subject: [PATCH 15/95] It works --- src/drivers/treedriver.c | 53 +- .../interaction_compute_cc.c | 580 ++++++++++++++++-- src/interaction_lists/interaction_lists.c | 14 + 3 files changed, 584 insertions(+), 63 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index eff466dc..e858a3fb 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -58,10 +58,14 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run int total_num_direct = 0; int total_num_approx = 0; int total_num_inter = 0; + + // These types of interactions only occur for CC + int total_num_source_approx = 0; + int total_num_target_approx = 0; //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - + //-------------------------------------------------------------------- //-------------------------------------------------------------------- @@ -290,7 +294,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run int get_from = (num_procs + rank - proc_id) % num_procs; CommWindows_Lock(comm_windows, get_from); - //This is a non-blocking call! + // This is a non-blocking call! CommWindows_GetData(let_clusters, let_sources, comm_types, comm_windows, get_from, run_params); CommWindows_Unlock(comm_windows, get_from); } @@ -503,6 +507,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run if (run_params->verbosity > 0) { total_num_approx += sum_int(local_interaction_list->num_approx, target_tree->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, target_tree->numnodes); + + total_num_source_approx += sum_int(local_interaction_list->num_cc_source_approx, + target_tree->numnodes); + total_num_target_approx += sum_int(local_interaction_list->num_cc_target_approx, + target_tree->numnodes); } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -533,6 +542,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run if (run_params->verbosity > 0) { total_num_approx += sum_int(let_interaction_list->num_approx, target_tree->numnodes); total_num_direct += sum_int(let_interaction_list->num_direct, target_tree->numnodes); + + total_num_source_approx += sum_int(let_interaction_list->num_cc_source_approx, + target_tree->numnodes); + total_num_target_approx += sum_int(let_interaction_list->num_cc_target_approx, + target_tree->numnodes); } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -608,8 +622,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run int global_num_inter, max_num_inter, min_num_inter; int global_num_direct, max_num_direct, min_num_direct; int global_num_approx, max_num_approx, min_num_approx; + + int global_num_source_approx, max_num_source_approx, min_num_source_approx; + int global_num_target_approx, max_num_target_approx, min_num_target_approx; - total_num_inter = total_num_direct + total_num_approx; + total_num_inter = total_num_direct + total_num_approx + + total_num_source_approx + total_num_target_approx; + MPI_Reduce(&total_num_inter, &global_num_inter, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&total_num_inter, &max_num_inter, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&total_num_inter, &min_num_inter, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); @@ -622,6 +641,17 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Reduce(&total_num_approx, &max_num_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&total_num_approx, &min_num_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + // These types of interactions only occur for CC + if (run_params->compute_type == CLUSTER_CLUSTER) { + MPI_Reduce(&total_num_source_approx, &global_num_source_approx, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx, &max_num_source_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx, &min_num_source_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&total_num_target_approx, &global_num_target_approx, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx, &max_num_target_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx, &min_num_target_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + } + if (rank == 0) { printf("[BaryTree]\n"); printf("[BaryTree] Interaction information: \n"); @@ -644,6 +674,23 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run printf("[BaryTree] Ratio: %f\n", (double)max_num_approx / (double)min_num_approx); printf("[BaryTree]\n"); + + // These types of interactions only occur for CC + if (run_params->compute_type == CLUSTER_CLUSTER) { + printf("[BaryTree] Cumulative source approx inter across all ranks: %d\n", global_num_source_approx); + printf("[BaryTree] Maximum source approx inter across all ranks: %d\n", max_num_source_approx); + printf("[BaryTree] Minimum source approx inter across all ranks: %d\n", min_num_source_approx); + printf("[BaryTree] Ratio: %f\n", + (double)max_num_source_approx / (double)min_num_source_approx); + printf("[BaryTree]\n"); + printf("[BaryTree] Cumulative target approx inter across all ranks: %d\n", global_num_target_approx); + printf("[BaryTree] Maximum target approx inter across all ranks: %d\n", max_num_target_approx); + printf("[BaryTree] Minimum target approx inter across all ranks: %d\n", min_num_target_approx); + printf("[BaryTree] Ratio: %f\n", + (double)max_num_target_approx / (double)min_num_target_approx); + printf("[BaryTree]\n"); + } + printf("[BaryTree] BaryTree has finished.\n"); printf("[BaryTree]\n"); } diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 0be6de17..bf92d831 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -32,6 +32,12 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T int *num_approx = interaction_list->num_approx; int *num_direct = interaction_list->num_direct; + + int **source_approx_inter_list = interaction_list->cc_source_approx_interactions; + int **target_approx_inter_list = interaction_list->cc_target_approx_interactions; + + int *num_source_approx = interaction_list->num_cc_source_approx; + int *num_target_approx = interaction_list->num_cc_target_approx; int source_tree_numnodes = source_tree->numnodes; int target_tree_numnodes = target_tree->numnodes; @@ -105,11 +111,14 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T int num_approx_in_cluster = num_approx[i]; int num_direct_in_cluster = num_direct[i]; + + int num_source_approx_in_cluster = num_source_approx[i]; + int num_target_approx_in_cluster = num_target_approx[i]; -/**********************************************************/ -/************** POTENTIAL FROM APPROX *********************/ -/**********************************************************/ +/* * ********************************************************/ +/* * ************ POTENTIAL FROM APPROX *********************/ +/* * ********************************************************/ for (int j = 0; j < num_approx_in_cluster; j++) { int source_node_index = approx_inter_list[i][j]; @@ -117,9 +126,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T int stream_id = j%3; - /***********************************************/ - /***************** Coulomb *********************/ - /***********************************************/ + /* * *********************************************/ + /* * *************** Coulomb *********************/ + /* * *********************************************/ if (run_params->kernel == COULOMB) { if (run_params->approximation == LAGRANGE) { @@ -151,14 +160,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - //K_Coulomb_CP_Hermite(interp_pts_per_cluster, interp_pts_per_cluster, - // source_cluster_start, target_cluster_start, - // source_cluster_x, source_cluster_y, source_cluster_z, - // source_cluster_q, source_cluster_w, - // target_cluster_x, target_cluster_y, target_cluster_z, - // target_cluster_q, - // run_params, stream_id); - } else if (run_params->singularity == SUBTRACTION) { printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); @@ -174,9 +175,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /***************** Yukawa **********************/ - /***********************************************/ + /* * *********************************************/ + /* * *************** Yukawa **********************/ + /* * *********************************************/ } else if (run_params->kernel == YUKAWA) { @@ -209,14 +210,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - //K_Yukawa_CP_Hermite(interp_pts_per_cluster, interp_pts_per_cluster, - // source_cluster_start, target_cluster_start, - // source_cluster_x, source_cluster_y, source_cluster_z, - // source_cluster_q, source_cluster_w, - // target_cluster_x, target_cluster_y, target_cluster_z, - // target_cluster_q, - // run_params, stream_id); - } else if (run_params->singularity == SUBTRACTION) { printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); @@ -232,9 +225,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /********* Regularized Coulomb *****************/ - /***********************************************/ + /* * *********************************************/ + /* * ******* Regularized Coulomb *****************/ + /* * *********************************************/ } else if (run_params->kernel == REGULARIZED_COULOMB) { @@ -267,14 +260,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - //K_RegularizedCoulomb_CP_Hermite(interp_pts_per_cluster, interp_pts_per_cluster, - // source_cluster_start, target_cluster_start, - // source_cluster_x, source_cluster_y, source_cluster_z, - // source_cluster_q, source_cluster_w, - // target_cluster_x, target_cluster_y, target_cluster_z, - // target_cluster_q, - // run_params, stream_id); - } else if (run_params->singularity == SUBTRACTION) { printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); @@ -290,9 +275,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /********* Regularized Yukawa ******************/ - /***********************************************/ + /* * *********************************************/ + /* * ******* Regularized Yukawa ******************/ + /* * *********************************************/ } else if (run_params->kernel == REGULARIZED_YUKAWA) { @@ -343,9 +328,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /********* Sin Over R **************************/ - /***********************************************/ + /* * *********************************************/ + /* * ******* Sin Over R **************************/ + /* * *********************************************/ } else if (run_params->kernel == SIN_OVER_R) { @@ -369,12 +354,487 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } } // end loop over cluster approximations + + + +/* * ********************************************************/ +/* * ************ POTENTIAL FROM SOURCE APPROX (PC) *********/ +/* * ********************************************************/ + + for (int j = 0; j < num_source_approx_in_cluster; j++) { + int source_node_index = source_approx_inter_list[i][j]; + int source_cluster_start = interp_pts_per_cluster * source_tree_cluster_ind[source_node_index]; + int stream_id = j%3; + + /* * *********************************************/ + /* * *************** Coulomb *********************/ + /* * *********************************************/ + if (run_params->kernel == COULOMB) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_Coulomb_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * *************** Yukawa **********************/ + /* * *********************************************/ + + } else if (run_params->kernel == YUKAWA) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_Yukawa_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Regularized Coulomb *****************/ + /* * *********************************************/ + + } else if (run_params->kernel == REGULARIZED_COULOMB) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_RegularizedCoulomb_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Regularized Yukawa ******************/ + /* * *********************************************/ + + } else if (run_params->kernel == REGULARIZED_YUKAWA) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_RegularizedYukawa_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB HERMITE. EXITING.\n"); + exit(1); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Sin Over R **************************/ + /* * *********************************************/ + + } else if (run_params->kernel == SIN_OVER_R) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_SinOverR_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + } + } + + } else { + printf("**ERROR** INVALID KERNEL. EXITING.\n"); + exit(1); + } + + } // end loop over cluster approximations + + + +/* * ********************************************************/ +/* * ************ POTENTIAL FROM TARGET APPROX (PC) *********/ +/* * ********************************************************/ + + for (int j = 0; j < num_target_approx_in_cluster; j++) { + + int source_node_index = target_approx_inter_list[i][j]; + int source_ibeg = source_tree_ibeg[source_node_index]; + int source_iend = source_tree_iend[source_node_index]; + + int num_sources_in_cluster = source_iend - source_ibeg + 1; + int source_start = source_ibeg - 1; + int stream_id = j%3; + + /* * *********************************************/ + /* * *************** Coulomb *********************/ + /* * *********************************************/ + if (run_params->kernel == COULOMB) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_Coulomb_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * *************** Yukawa **********************/ + /* * *********************************************/ + + } else if (run_params->kernel == YUKAWA) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_Yukawa_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Regularized Coulomb *****************/ + /* * *********************************************/ + + } else if (run_params->kernel == REGULARIZED_COULOMB) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_RegularizedCoulomb_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Regularized Yukawa ******************/ + /* * *********************************************/ + + } else if (run_params->kernel == REGULARIZED_YUKAWA) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_RegularizedYukawa_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else if (run_params->approximation == HERMITE) { + + printf("**ERROR** CC HERMITE CURRENTLY INOPERABLE. EXITING. \n"); + exit(1); + + if (run_params->singularity == SKIPPING) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB HERMITE. EXITING.\n"); + exit(1); + + } else if (run_params->singularity == SUBTRACTION) { + + printf("**ERROR** NOT SET UP FOR CC REGULARIZED COULOMB SS. EXITING.\n"); + exit(1); + + } else { + printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); + exit(1); + } + + } else { + printf("**ERROR** INVALID CHOICE OF APPROXIMATION. EXITING. \n"); + exit(1); + } + + /* * *********************************************/ + /* * ******* Sin Over R **************************/ + /* * *********************************************/ + + } else if (run_params->kernel == SIN_OVER_R) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_SinOverR_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + } + } + + } else { + printf("**ERROR** INVALID KERNEL. EXITING.\n"); + exit(1); + } + + } // end loop over cluster approximations -/**********************************************************/ -/************** POTENTIAL FROM DIRECT *********************/ -/**********************************************************/ +/* * ********************************************************/ +/* * ************ POTENTIAL FROM DIRECT *********************/ +/* * ********************************************************/ for (int j = 0; j < num_direct_in_cluster; j++) { @@ -386,9 +846,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T int source_start = source_ibeg - 1; int stream_id = j%3; - /***********************************************/ - /***************** Coulomb *********************/ - /***********************************************/ + /* * *********************************************/ + /* * *************** Coulomb *********************/ + /* * *********************************************/ if (run_params->kernel == COULOMB) { @@ -413,9 +873,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /***************** Yukawa **********************/ - /***********************************************/ + /* * *********************************************/ + /* * *************** Yukawa **********************/ + /* * *********************************************/ } else if (run_params->kernel == YUKAWA) { @@ -440,9 +900,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /************ Regularized Coulomb **************/ - /***********************************************/ + /* * *********************************************/ + /* * ********** Regularized Coulomb **************/ + /* * *********************************************/ } else if (run_params->kernel == REGULARIZED_COULOMB) { @@ -467,9 +927,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /************ Regularized Yukawa ***************/ - /***********************************************/ + /* * *********************************************/ + /* * ********** Regularized Yukawa ***************/ + /* * *********************************************/ } else if (run_params->kernel == REGULARIZED_YUKAWA) { @@ -494,9 +954,9 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T exit(1); } - /***********************************************/ - /************ Sin Over R ***********************/ - /***********************************************/ + /* * *********************************************/ + /* * ********** Sin Over R ***********************/ + /* * *********************************************/ } else if (run_params->kernel == SIN_OVER_R) { diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index eb66b219..40f208f4 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -177,6 +177,20 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, int *sizeof_cc_source_approx_inter_list, *sizeof_cc_target_approx_inter_list; make_vector(sizeof_cc_source_approx_inter_list, target_tree_numnodes); make_vector(sizeof_cc_target_approx_inter_list, target_tree_numnodes); + + for (int i = 0; i < target_tree_numnodes; i++) sizeof_cc_source_approx_inter_list[i] = 50; + for (int i = 0; i < target_tree_numnodes; i++) sizeof_cc_target_approx_inter_list[i] = 50; + + for (int i = 0; i < target_tree_numnodes; i++) + for (int j = 0; j < 50; j++) + cc_source_approx_inter_list[i][j] = -1; + + for (int i = 0; i < target_tree_numnodes; i++) + for (int j = 0; j < 50; j++) + cc_target_approx_inter_list[i][j] = -1; + + for (int i = 0; i < target_tree_numnodes; i++) num_cc_source_approx_inter[i] = 0; + for (int i = 0; i < target_tree_numnodes; i++) num_cc_target_approx_inter[i] = 0; /* Build CC interaction lists */ From e68fa3741348d943f1360fbc319da9ed9a6a74b0 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 20 May 2020 23:20:51 -0400 Subject: [PATCH 16/95] Adding exp and gaussian to random cube reproducible --- examples/random_cube_reproducible.c | 28 ++++++++++++++++++++++-- examples/support_fns.c | 34 ++++++++++++++++++++++++++++- examples/support_fns.h | 4 ++++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 4332b893..6531d54d 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -145,6 +145,30 @@ int main(int argc, char **argv) mySources.b[i] = 1.0; } } + + } else if (distribution == GAUSSIAN) { + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + Point_Gaussian(&mySources.x[i], &mySources.y[i], &mySources.z[i]); + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + } + + } else if (distribution == EXPONENTIAL) { + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + Point_Exponential(&mySources.x[i], &mySources.y[i], &mySources.z[i]); + mySources.q[i] = ((double)random()/(double)(RAND_MAX)) * 2. - 1.; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + } } else { printf("[random cube example] ERROR! Distribution %d undefined in this " @@ -221,7 +245,7 @@ int main(int argc, char **argv) /* Output load balanced points */ - /* +/* char points_file[256]; sprintf(points_file, "points_rank_%d.csv", rank); FILE *points_fp = fopen(points_file, "w"); @@ -229,7 +253,7 @@ int main(int argc, char **argv) fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); } fclose(points_fp); - */ +*/ /* Setting up targets */ diff --git a/examples/support_fns.c b/examples/support_fns.c index b97f9488..3db0ec99 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -283,7 +283,7 @@ double Point_Set_Init(DISTRIBUTION distribution) double u = (double)random()/(1.+ (double)(RAND_MAX)); double x = 1. / sqrt(6.) * erfinv(2. * u - 1.); - return x; + return x; } else if (distribution == EXPONENTIAL) { @@ -360,6 +360,38 @@ void Point_Plummer(double R, double *x, double *y, double *z) } +/*----------------------------------------------------------------------------*/ +void Point_Gaussian(double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + *x = 1. / sqrt(6.) * erfinv(2. * u - 1.); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + *y = 1. / sqrt(6.) * erfinv(2. * u - 1.); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + *z = 1. / sqrt(6.) * erfinv(2. * u - 1.); + + return; +} + + +/*----------------------------------------------------------------------------*/ +void Point_Exponential(double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + *x = -log(1. - u) / sqrt(12.); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + *y = -log(1. - u) / sqrt(12.); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + *z = -log(1. - u) / sqrt(12.); + + return; +} + + /*----------------------------------------------------------------------------*/ void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], diff --git a/examples/support_fns.h b/examples/support_fns.h index 9304128e..fe7a15da 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -26,6 +26,10 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); void Point_Plummer(double R, double *x, double *y, double *z); +void Point_Gaussian(double *x, double *y, double *z); + +void Point_Exponential(double *x, double *y, double *z); + void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], double time_run[4], double time_tree[13], double time_direct[4]); From abfa3311a4e6bf60e098517c9fe85569571da452 Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Sat, 23 May 2020 18:45:42 -0700 Subject: [PATCH 17/95] Adding plummer symmetric to random_cube_reproducible --- examples/random_cube_reproducible.c | 34 +++++++++++++++++++++++++++-- examples/support_fns.c | 23 +++++++++++++++++++ examples/support_fns.h | 5 ++++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 6531d54d..c05a875c 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -145,6 +145,36 @@ int main(int argc, char **argv) mySources.b[i] = 1.0; } } + + } else if (distribution == PLUMMER_SYMMETRIC) { + + double plummer_R = 1.0; + double plummer_M = 1.0; + + for (int j = 0; j < rank+1; ++j) { //Cycle to generate same particle no matter num ranks + for (int i = 0; i < N; ++i) { + mySources.q[i] = plummer_M / N; + mySources.w[i] = 1.0; + mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i); + mySources.b[i] = 1.0; + } + + for (int i = 0; i < N/8; ++i) { + double xx, yy, zz; + Point_Plummer_Octant(plummer_R , &xx, &yy, &zz); + + for (int ii = 0; ii < 2; ++ii) { + for (int jj = 0; jj < 2; ++jj) { + for (int kk = 0; kk < 2; ++kk) { + int index = (N/8) * (ii*4 + jj*2 + kk) + i; + mySources.x[index] = xx * pow(-1, ii); + mySources.y[index] = yy * pow(-1, jj); + mySources.z[index] = zz * pow(-1, kk); + } + } + } + } + } } else if (distribution == GAUSSIAN) { @@ -245,7 +275,7 @@ int main(int argc, char **argv) /* Output load balanced points */ -/* + char points_file[256]; sprintf(points_file, "points_rank_%d.csv", rank); FILE *points_fp = fopen(points_file, "w"); @@ -253,7 +283,7 @@ int main(int argc, char **argv) fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); } fclose(points_fp); -*/ + /* Setting up targets */ diff --git a/examples/support_fns.c b/examples/support_fns.c index 3db0ec99..6e853856 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -251,6 +251,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcasecmp(distribution_string, "PLUMMER") == 0) { *distribution = PLUMMER; + } else if (strcasecmp(distribution_string, "PLUMMER_SYMMETRIC") == 0) { + *distribution = PLUMMER_SYMMETRIC; + } else { if (rank == 0) { printf("[random cube example] ERROR! Undefined distribution token \"%s\". Exiting.\n", @@ -360,6 +363,26 @@ void Point_Plummer(double R, double *x, double *y, double *z) } +/*----------------------------------------------------------------------------*/ +void Point_Plummer_Octant(double R, double *x, double *y, double *z) +{ + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double theta = acos(u); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double phi = u * M_PI / 2.0; + + *x = radius * sin(theta) * cos(phi); + *y = radius * sin(theta) * sin(phi); + *z = radius * cos(theta); + + return; +} + + /*----------------------------------------------------------------------------*/ void Point_Gaussian(double *x, double *y, double *z) { diff --git a/examples/support_fns.h b/examples/support_fns.h index fe7a15da..93bbc45d 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -12,7 +12,8 @@ typedef enum DISTRIBUTION UNIFORM, GAUSSIAN, EXPONENTIAL, - PLUMMER + PLUMMER, + PLUMMER_SYMMETRIC } DISTRIBUTION; @@ -26,6 +27,8 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); void Point_Plummer(double R, double *x, double *y, double *z); +void Point_Plummer_Octant(double R, double *x, double *y, double *z); + void Point_Gaussian(double *x, double *y, double *z); void Point_Exponential(double *x, double *y, double *z); From 5f8b4d800037944b70bc96babeb3bf4862c2251d Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Sat, 23 May 2020 19:59:28 -0700 Subject: [PATCH 18/95] Adding ability to use HSFC in reproducible example --- examples/random_cube.c | 3 ++- examples/random_cube_reproducible.c | 10 ++++++++-- examples/run_readin.c | 3 ++- examples/support_fns.c | 22 +++++++++++++++++++++- examples/support_fns.h | 9 ++++++++- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/examples/random_cube.c b/examples/random_cube.c index 547249c4..f095ff2f 100644 --- a/examples/random_cube.c +++ b/examples/random_cube.c @@ -35,12 +35,13 @@ int main(int argc, char **argv) int N, M, run_direct, slice; double xyz_limits[6]; DISTRIBUTION distribution; + PARTITION partition; int sample_size = 1000000; struct RunParams *run_params = NULL; FILE *fp = fopen(argv[1], "r"); - Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution, &partition); double xmin = xyz_limits[0], xmax = xyz_limits[1]; double ymin = xyz_limits[2], ymax = xyz_limits[3]; diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index c05a875c..b8fa3843 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -32,11 +32,12 @@ int main(int argc, char **argv) int N, M, run_direct, slice; double xyz_limits[6]; DISTRIBUTION distribution; + PARTITION partition; struct RunParams *run_params = NULL; FILE *fp = fopen(argv[1], "r"); - Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution, &partition); if (N != M) { if (rank == 0) printf("[random cube example] ERROR! This executable requires sources and targets " @@ -92,13 +93,18 @@ int main(int argc, char **argv) /* General parameters */ Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0"); - Zoltan_Set_Param(zz, "LB_METHOD", "RCB"); Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1"); Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1"); Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1"); Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL"); Zoltan_Set_Param(zz, "AUTO_MIGRATE", "TRUE"); + if (partition == RCB) { + Zoltan_Set_Param(zz, "LB_METHOD", "RCB"); + } else if (partition == HSFC) { + Zoltan_Set_Param(zz, "LB_METHOD", "HSFC"); + } + /* RCB parameters */ Zoltan_Set_Param(zz, "RCB_OUTPUT_LEVEL", "0"); diff --git a/examples/run_readin.c b/examples/run_readin.c index 24b82186..0a28e973 100644 --- a/examples/run_readin.c +++ b/examples/run_readin.c @@ -32,11 +32,12 @@ int main(int argc, char **argv) int N, M, run_direct, slice; double xyz_limits[6]; DISTRIBUTION distribution; + PARTITION partition; struct RunParams *run_params = NULL; FILE *fp = fopen(argv[1], "r"); - Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution); + Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution, &partition); if (N != M) { if (rank == 0) printf("[random cube example] ERROR! This executable requires sources and targets " diff --git a/examples/support_fns.c b/examples/support_fns.c index 6e853856..749630be 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -14,7 +14,7 @@ static double erfinv (double x); void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice, - double *xyz_limits, DISTRIBUTION *distribution) + double *xyz_limits, DISTRIBUTION *distribution, PARTITION *partition) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -33,6 +33,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * char compute_type_string[256] = "PARTICLE_CLUSTER"; char run_direct_string[256] = "OFF"; char distribution_string[256] = "UNIFORM"; + char partition_string[256] = "RCB"; KERNEL kernel; SINGULARITY singularity; @@ -119,6 +120,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcmp(c1, "distribution") == 0) { strcpy(distribution_string, c2); + } else if (strcmp(c1, "partition") == 0) { + strcpy(partition_string, c2); + } else { if (rank == 0) { printf("[random cube example] ERROR! Undefined token \"%s\". Exiting.\n", c1); @@ -263,6 +267,22 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } + if (strcasecmp(partition_string, "RCB") == 0) { + *partition = RCB; + + } else if (strcasecmp(partition_string, "HSFC") == 0) { + *partition = HSFC; + + } else { + if (rank == 0) { + printf("[random cube example] ERROR! Undefined distribution token \"%s\". Exiting.\n", + distribution_string); + } + exit(1); + } + + + RunParams_Setup(run_params, kernel, num_kernel_params, kernel_params, approximation, singularity, compute_type, diff --git a/examples/support_fns.h b/examples/support_fns.h index 93bbc45d..190a4e10 100644 --- a/examples/support_fns.h +++ b/examples/support_fns.h @@ -16,9 +16,16 @@ typedef enum DISTRIBUTION PLUMMER_SYMMETRIC } DISTRIBUTION; +typedef enum PARTITION +{ + NO_PARTITION, + RCB, + HSFC +} PARTITION; + void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice, - double *xyz_limits, DISTRIBUTION *distribution); + double *xyz_limits, DISTRIBUTION *distribution, PARTITION *partition); double Point_Set_Init(DISTRIBUTION distribution); From c6a0db358f398845b0ecd1fbd41b5114b8266e29 Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Sat, 23 May 2020 20:43:40 -0700 Subject: [PATCH 19/95] Setting max aspect ratio for rcb to 1E9 --- examples/random_cube_reproducible.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index b8fa3843..2109472a 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -109,6 +109,7 @@ int main(int argc, char **argv) Zoltan_Set_Param(zz, "RCB_OUTPUT_LEVEL", "0"); Zoltan_Set_Param(zz, "RCB_RECTILINEAR_BLOCKS", "1"); + Zoltan_Set_Param(zz, "RCB_MAX_ASPECT_RATIO", "1000000000"); /* Setting up sources and load balancing */ From 2d24d0da14b292d7a7db5c258970b5f7c816bdf7 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Fri, 12 Jun 2020 23:17:16 -0400 Subject: [PATCH 20/95] printing pointwise interaction info for cc --- src/drivers/treedriver.c | 112 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index e858a3fb..834e759f 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -58,10 +58,16 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run int total_num_direct = 0; int total_num_approx = 0; int total_num_inter = 0; + + int total_num_direct_interact = 0; + int total_num_approx_interact = 0; + int total_num_interact = 0; // These types of interactions only occur for CC int total_num_source_approx = 0; int total_num_target_approx = 0; + int total_num_source_approx_interact = 0; + int total_num_target_approx_interact = 0; //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -512,6 +518,24 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run target_tree->numnodes); total_num_target_approx += sum_int(local_interaction_list->num_cc_target_approx, target_tree->numnodes); + + + total_num_approx_interact += sum_int(local_interaction_list->num_approx, target_tree->numnodes) + * pow(run_params->interp_pts_per_cluster, 2); + + for (int i = 0; i < target_tree->numnodes; ++i) { + for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += target_tree->numpar[i] + * source_tree->numpar[local_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < local_interaction_list->num_cc_source_approx[i]; ++j) { + total_num_source_approx_interact += target_tree->numpar[i] * run_params->interp_pts_per_cluster; + } + for (int j = 0; j < local_interaction_list->num_cc_target_approx[i]; ++j) { + total_num_target_approx_interact += run_params->interp_pts_per_cluster + * source_tree->numpar[local_interaction_list->cc_target_approx_interactions[i][j]]; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -547,6 +571,24 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run target_tree->numnodes); total_num_target_approx += sum_int(let_interaction_list->num_cc_target_approx, target_tree->numnodes); + + + total_num_approx_interact += sum_int(let_interaction_list->num_approx, target_tree->numnodes) + * pow(run_params->interp_pts_per_cluster, 2); + + for (int i = 0; i < target_tree->numnodes; ++i) { + for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += target_tree->numpar[i] + * let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < let_interaction_list->num_cc_source_approx[i]; ++j) { + total_num_source_approx_interact += target_tree->numpar[i] * run_params->interp_pts_per_cluster; + } + for (int j = 0; j < let_interaction_list->num_cc_target_approx[i]; ++j) { + total_num_target_approx_interact += run_params->interp_pts_per_cluster + * let_trees[get_from]->numpar[let_interaction_list->cc_target_approx_interactions[i][j]]; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -625,7 +667,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run int global_num_source_approx, max_num_source_approx, min_num_source_approx; int global_num_target_approx, max_num_target_approx, min_num_target_approx; - + total_num_inter = total_num_direct + total_num_approx + total_num_source_approx + total_num_target_approx; @@ -690,6 +732,73 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run (double)max_num_target_approx / (double)min_num_target_approx); printf("[BaryTree]\n"); } + } + + + /* For the pointwise interactions */ + + int global_num_interact, max_num_interact, min_num_interact; + int global_num_direct_interact, max_num_direct_interact, min_num_direct_interact; + int global_num_approx_interact, max_num_approx_interact, min_num_approx_interact; + + int global_num_source_approx_interact, max_num_source_approx_interact, min_num_source_approx_interact; + int global_num_target_approx_interact, max_num_target_approx_interact, min_num_target_approx_interact; + + total_num_interact = total_num_direct_interact + total_num_approx_interact + + total_num_source_approx_interact + total_num_target_approx_interact; + + MPI_Reduce(&total_num_interact, &global_num_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_interact, &max_num_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_interact, &min_num_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&total_num_direct_interact, &global_num_direct_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_direct_interact, &max_num_direct_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_direct_interact, &min_num_direct_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&total_num_approx_interact, &global_num_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_approx_interact, &max_num_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_approx_interact, &min_num_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + // These types of interactions only occur for CC + if (run_params->compute_type == CLUSTER_CLUSTER) { + MPI_Reduce(&total_num_source_approx_interact, &global_num_source_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx_interact, &max_num_source_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx_interact, &min_num_source_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&total_num_target_approx_interact, &global_num_target_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx_interact, &max_num_target_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx_interact, &min_num_target_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + } + + if (rank == 0) { + printf("[BaryTree]\n"); + printf("[BaryTree] Cumulative pointwise interactions across all ranks: %d\n", global_num_interact); + printf("[BaryTree] Maximum pointwise interactions across all ranks: %d\n", max_num_interact); + printf("[BaryTree] Minimum pointwise interactions across all ranks: %d\n", min_num_interact); + printf("[BaryTree]\n"); + + printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %d\n", global_num_approx_interact); + printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %d\n", max_num_approx_interact); + printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %d\n", min_num_approx_interact); + printf("[BaryTree]\n"); + + printf("[BaryTree] Cumulative direct pointwise interactions across all ranks: %d\n", global_num_direct_interact); + printf("[BaryTree] Maximum direct pointwise interactions across all ranks: %d\n", max_num_direct_interact); + printf("[BaryTree] Minimum direct pointwise interactions across all ranks: %d\n", min_num_direct_interact); + printf("[BaryTree]\n"); + + // These types of interactions only occur for CC + if (run_params->compute_type == CLUSTER_CLUSTER) { + printf("[BaryTree] Cumulative source approx pointwise interactions across all ranks: %d\n", global_num_source_approx_interact); + printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %d\n", max_num_source_approx_interact); + printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %d\n", min_num_source_approx_interact); + printf("[BaryTree]\n"); + + printf("[BaryTree] Cumulative target approx pointwise interactions across all ranks: %d\n", global_num_target_approx_interact); + printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %d\n", max_num_target_approx_interact); + printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %d\n", min_num_target_approx_interact); + printf("[BaryTree]\n"); + } printf("[BaryTree] BaryTree has finished.\n"); printf("[BaryTree]\n"); @@ -697,6 +806,5 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - return; } From e13ceefa32e44eaacaeadf6165541fdb179e3337 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 13 Jun 2020 01:00:05 -0400 Subject: [PATCH 21/95] Adding printed output to CP and PC --- src/drivers/treedriver.c | 60 +++++++++++++++++++---- src/interaction_lists/interaction_lists.c | 52 ++++++++++---------- 2 files changed, 77 insertions(+), 35 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 834e759f..52aa9999 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -132,6 +132,16 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run if (run_params->verbosity > 0) { total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); + + for (int i = 0; i < batches->numnodes; ++i) { + for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += batches->numpar[i] + * tree->numpar[local_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { + total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -163,8 +173,18 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(let_interaction_list->num_approx, batches->numnodes); - total_num_direct += sum_int(let_interaction_list->num_direct, batches->numnodes); + total_num_approx += sum_int(let_interaction_list->num_approx, remote_batches->numnodes); + total_num_direct += sum_int(let_interaction_list->num_direct, remote_batches->numnodes); + + for (int i = 0; i < remote_batches->numnodes; ++i) { + for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += remote_batches->numpar[i] + * tree->numpar[let_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { + total_num_approx_interact += remote_batches->numpar[i] * run_params->interp_pts_per_cluster; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -323,6 +343,16 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run if (run_params->verbosity > 0) { total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); + + for (int i = 0; i < batches->numnodes; ++i) { + for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += batches->numpar[i] + * tree->numpar[local_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { + total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -353,6 +383,16 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run if (run_params->verbosity > 0) { total_num_approx += sum_int(let_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(let_interaction_list->num_direct, batches->numnodes); + + for (int i = 0; i < batches->numnodes; ++i) { + for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { + total_num_direct_interact += batches->numpar[i] + * let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + } + for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { + total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + } + } } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -521,8 +561,8 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run total_num_approx_interact += sum_int(local_interaction_list->num_approx, target_tree->numnodes) - * pow(run_params->interp_pts_per_cluster, 2); - + * run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; + for (int i = 0; i < target_tree->numnodes; ++i) { for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { total_num_direct_interact += target_tree->numpar[i] @@ -574,7 +614,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run total_num_approx_interact += sum_int(let_interaction_list->num_approx, target_tree->numnodes) - * pow(run_params->interp_pts_per_cluster, 2); + * run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; for (int i = 0; i < target_tree->numnodes; ++i) { for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { @@ -777,16 +817,16 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run printf("[BaryTree] Minimum pointwise interactions across all ranks: %d\n", min_num_interact); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %d\n", global_num_approx_interact); - printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %d\n", max_num_approx_interact); - printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %d\n", min_num_approx_interact); - printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative direct pointwise interactions across all ranks: %d\n", global_num_direct_interact); printf("[BaryTree] Maximum direct pointwise interactions across all ranks: %d\n", max_num_direct_interact); printf("[BaryTree] Minimum direct pointwise interactions across all ranks: %d\n", min_num_direct_interact); printf("[BaryTree]\n"); + printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %d\n", global_num_approx_interact); + printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %d\n", max_num_approx_interact); + printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %d\n", min_num_approx_interact); + printf("[BaryTree]\n"); + // These types of interactions only occur for CC if (run_params->compute_type == CLUSTER_CLUSTER) { printf("[BaryTree] Cumulative source approx pointwise interactions across all ranks: %d\n", global_num_source_approx_interact); diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index 40f208f4..6db47c28 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -558,7 +558,7 @@ void cc_compute_interaction_list( if ((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) < dist * run_params->theta) { - if ((source_tree_numpar[source_tree_node] < size_check) && + if ((source_tree_numpar[source_tree_node] < size_check) || (target_tree_numpar[target_tree_node] < size_check)) { /* add to direct list */ @@ -571,30 +571,32 @@ void cc_compute_interaction_list( direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; direct_index_counter[target_tree_node]++; - } else if (source_tree_numpar[source_tree_node] < size_check) { - - /* add to CP approx list */ - - if (target_approx_index_counter[target_tree_node] >= sizeof_target_approx_list[target_tree_node]) { - sizeof_target_approx_list[target_tree_node] *= 1.5; - target_approx_list[target_tree_node] = realloc_vector(target_approx_list[target_tree_node], - sizeof_target_approx_list[target_tree_node]); - } - target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; - target_approx_index_counter[target_tree_node]++; - - } else if (target_tree_numpar[target_tree_node] < size_check) { - - /* add to PC approx list */ - - if (source_approx_index_counter[target_tree_node] >= sizeof_source_approx_list[target_tree_node]) { - sizeof_source_approx_list[target_tree_node] *= 1.5; - source_approx_list[target_tree_node] = realloc_vector(source_approx_list[target_tree_node], - sizeof_source_approx_list[target_tree_node]); - } - source_approx_list[target_tree_node][source_approx_index_counter[target_tree_node]] = source_tree_node; - source_approx_index_counter[target_tree_node]++; - +// +// } else if (source_tree_numpar[source_tree_node] < size_check) { +// +// /* add to CP approx list */ +// +// if (target_approx_index_counter[target_tree_node] >= sizeof_target_approx_list[target_tree_node]) { +// sizeof_target_approx_list[target_tree_node] *= 1.5; +// target_approx_list[target_tree_node] = realloc_vector(target_approx_list[target_tree_node], +// sizeof_target_approx_list[target_tree_node]); +// } +// target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; +// target_approx_index_counter[target_tree_node]++; +// +// } else if (target_tree_numpar[target_tree_node] < size_check) { +// +// /* add to PC approx list */ +// +// if (source_approx_index_counter[target_tree_node] >= sizeof_source_approx_list[target_tree_node]) { +// sizeof_source_approx_list[target_tree_node] *= 1.5; +// source_approx_list[target_tree_node] = realloc_vector(source_approx_list[target_tree_node], +// sizeof_source_approx_list[target_tree_node]); +// } +// source_approx_list[target_tree_node][source_approx_index_counter[target_tree_node]] = source_tree_node; +// source_approx_index_counter[target_tree_node]++; +// +// } else { /* add to CC approx list */ From f08fce7378a22042fb4eea84e8b6a28e31bae274 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 13 Jun 2020 01:11:09 -0400 Subject: [PATCH 22/95] Readding CP and PC for CC code --- src/interaction_lists/interaction_lists.c | 52 +++++++++++------------ 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index 6db47c28..95e28e26 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -558,7 +558,7 @@ void cc_compute_interaction_list( if ((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) < dist * run_params->theta) { - if ((source_tree_numpar[source_tree_node] < size_check) || + if ((source_tree_numpar[source_tree_node] < size_check) && (target_tree_numpar[target_tree_node] < size_check)) { /* add to direct list */ @@ -571,32 +571,30 @@ void cc_compute_interaction_list( direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; direct_index_counter[target_tree_node]++; -// -// } else if (source_tree_numpar[source_tree_node] < size_check) { -// -// /* add to CP approx list */ -// -// if (target_approx_index_counter[target_tree_node] >= sizeof_target_approx_list[target_tree_node]) { -// sizeof_target_approx_list[target_tree_node] *= 1.5; -// target_approx_list[target_tree_node] = realloc_vector(target_approx_list[target_tree_node], -// sizeof_target_approx_list[target_tree_node]); -// } -// target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; -// target_approx_index_counter[target_tree_node]++; -// -// } else if (target_tree_numpar[target_tree_node] < size_check) { -// -// /* add to PC approx list */ -// -// if (source_approx_index_counter[target_tree_node] >= sizeof_source_approx_list[target_tree_node]) { -// sizeof_source_approx_list[target_tree_node] *= 1.5; -// source_approx_list[target_tree_node] = realloc_vector(source_approx_list[target_tree_node], -// sizeof_source_approx_list[target_tree_node]); -// } -// source_approx_list[target_tree_node][source_approx_index_counter[target_tree_node]] = source_tree_node; -// source_approx_index_counter[target_tree_node]++; -// -// + } else if (source_tree_numpar[source_tree_node] < size_check) { + + /* add to CP approx list */ + + if (target_approx_index_counter[target_tree_node] >= sizeof_target_approx_list[target_tree_node]) { + sizeof_target_approx_list[target_tree_node] *= 1.5; + target_approx_list[target_tree_node] = realloc_vector(target_approx_list[target_tree_node], + sizeof_target_approx_list[target_tree_node]); + } + target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; + target_approx_index_counter[target_tree_node]++; + + } else if (target_tree_numpar[target_tree_node] < size_check) { + + /* add to PC approx list */ + + if (source_approx_index_counter[target_tree_node] >= sizeof_source_approx_list[target_tree_node]) { + sizeof_source_approx_list[target_tree_node] *= 1.5; + source_approx_list[target_tree_node] = realloc_vector(source_approx_list[target_tree_node], + sizeof_source_approx_list[target_tree_node]); + } + source_approx_list[target_tree_node][source_approx_index_counter[target_tree_node]] = source_tree_node; + source_approx_index_counter[target_tree_node]++; + } else { /* add to CC approx list */ From d17389a732a1af6b9618895bed91b656940d9d4b Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 13 Jun 2020 13:46:41 -0400 Subject: [PATCH 23/95] Printing out num interactions needs to be long long --- src/drivers/treedriver.c | 161 ++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 77 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 52aa9999..76718be3 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -55,19 +55,19 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } double time1; - int total_num_direct = 0; - int total_num_approx = 0; - int total_num_inter = 0; + long long int total_num_direct = 0; + long long int total_num_approx = 0; + long long int total_num_inter = 0; + + long long int total_num_direct_interact = 0; + long long int total_num_approx_interact = 0; + long long int total_num_interact = 0; - int total_num_direct_interact = 0; - int total_num_approx_interact = 0; - int total_num_interact = 0; - // These types of interactions only occur for CC - int total_num_source_approx = 0; - int total_num_target_approx = 0; - int total_num_source_approx_interact = 0; - int total_num_target_approx_interact = 0; + long long int total_num_source_approx = 0; + long long int total_num_target_approx = 0; + long long int total_num_source_approx_interact = 0; + long long int total_num_target_approx_interact = 0; //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -135,11 +135,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run for (int i = 0; i < batches->numnodes; ++i) { for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += batches->numpar[i] - * tree->numpar[local_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) batches->numpar[i] + * (long long int) tree->numpar[local_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) batches->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } } } @@ -178,11 +179,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run for (int i = 0; i < remote_batches->numnodes; ++i) { for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += remote_batches->numpar[i] - * tree->numpar[let_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) remote_batches->numpar[i] + * (long long int) tree->numpar[let_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += remote_batches->numpar[i] * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) remote_batches->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } } } @@ -279,10 +281,10 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[2]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { - Tree_Print(tree); - Batches_Print(batches); - } + if (run_params->verbosity > 0) { + Tree_Print(tree); + Batches_Print(batches); + } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -346,11 +348,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run for (int i = 0; i < batches->numnodes; ++i) { for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += batches->numpar[i] - * tree->numpar[local_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) batches->numpar[i] + * (long long int) tree->numpar[local_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) batches->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } } } @@ -386,11 +389,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run for (int i = 0; i < batches->numnodes; ++i) { for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += batches->numpar[i] - * let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) batches->numpar[i] + * (long long int) let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += batches->numpar[i] * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) batches->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } } } @@ -560,20 +564,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run target_tree->numnodes); - total_num_approx_interact += sum_int(local_interaction_list->num_approx, target_tree->numnodes) - * run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) sum_int(local_interaction_list->num_approx, target_tree->numnodes) + * (long long int) run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; for (int i = 0; i < target_tree->numnodes; ++i) { for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += target_tree->numpar[i] - * source_tree->numpar[local_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) target_tree->numpar[i] + * (long long int) source_tree->numpar[local_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < local_interaction_list->num_cc_source_approx[i]; ++j) { - total_num_source_approx_interact += target_tree->numpar[i] * run_params->interp_pts_per_cluster; + total_num_source_approx_interact += (long long int) target_tree->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } for (int j = 0; j < local_interaction_list->num_cc_target_approx[i]; ++j) { - total_num_target_approx_interact += run_params->interp_pts_per_cluster - * source_tree->numpar[local_interaction_list->cc_target_approx_interactions[i][j]]; + total_num_target_approx_interact += (long long int) run_params->interp_pts_per_cluster + * (long long int) source_tree->numpar[local_interaction_list->cc_target_approx_interactions[i][j]]; } } } @@ -613,20 +618,22 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run target_tree->numnodes); - total_num_approx_interact += sum_int(let_interaction_list->num_approx, target_tree->numnodes) - * run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; + total_num_approx_interact += (long long int) sum_int(let_interaction_list->num_approx, target_tree->numnodes) + * (long long int) run_params->interp_pts_per_cluster + * (long long int) run_params->interp_pts_per_cluster; for (int i = 0; i < target_tree->numnodes; ++i) { for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += target_tree->numpar[i] - * let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + total_num_direct_interact += (long long int) target_tree->numpar[i] + * (long long int) let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; } for (int j = 0; j < let_interaction_list->num_cc_source_approx[i]; ++j) { - total_num_source_approx_interact += target_tree->numpar[i] * run_params->interp_pts_per_cluster; + total_num_source_approx_interact += (long long int) target_tree->numpar[i] + * (long long int) run_params->interp_pts_per_cluster; } for (int j = 0; j < let_interaction_list->num_cc_target_approx[i]; ++j) { - total_num_target_approx_interact += run_params->interp_pts_per_cluster - * let_trees[get_from]->numpar[let_interaction_list->cc_target_approx_interactions[i][j]]; + total_num_target_approx_interact += (long long int) run_params->interp_pts_per_cluster + * (long long int) let_trees[get_from]->numpar[let_interaction_list->cc_target_approx_interactions[i][j]]; } } } @@ -777,66 +784,66 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run /* For the pointwise interactions */ - int global_num_interact, max_num_interact, min_num_interact; - int global_num_direct_interact, max_num_direct_interact, min_num_direct_interact; - int global_num_approx_interact, max_num_approx_interact, min_num_approx_interact; - - int global_num_source_approx_interact, max_num_source_approx_interact, min_num_source_approx_interact; - int global_num_target_approx_interact, max_num_target_approx_interact, min_num_target_approx_interact; + long long int global_num_interact, max_num_interact, min_num_interact; + long long int global_num_direct_interact, max_num_direct_interact, min_num_direct_interact; + long long int global_num_approx_interact, max_num_approx_interact, min_num_approx_interact; + + long long int global_num_source_approx_interact, max_num_source_approx_interact, min_num_source_approx_interact; + long long int global_num_target_approx_interact, max_num_target_approx_interact, min_num_target_approx_interact; total_num_interact = total_num_direct_interact + total_num_approx_interact + total_num_source_approx_interact + total_num_target_approx_interact; - MPI_Reduce(&total_num_interact, &global_num_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_interact, &max_num_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_interact, &min_num_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_interact, &global_num_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_interact, &max_num_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_interact, &min_num_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &global_num_direct_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &max_num_direct_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &min_num_direct_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_direct_interact, &global_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_direct_interact, &max_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_direct_interact, &min_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &global_num_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &max_num_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &min_num_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_approx_interact, &global_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_approx_interact, &max_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_approx_interact, &min_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); // These types of interactions only occur for CC if (run_params->compute_type == CLUSTER_CLUSTER) { - MPI_Reduce(&total_num_source_approx_interact, &global_num_source_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx_interact, &max_num_source_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx_interact, &min_num_source_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx_interact, &global_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx_interact, &max_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_source_approx_interact, &min_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx_interact, &global_num_target_approx_interact, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx_interact, &max_num_target_approx_interact, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx_interact, &min_num_target_approx_interact, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx_interact, &global_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx_interact, &max_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&total_num_target_approx_interact, &min_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); } if (rank == 0) { printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative pointwise interactions across all ranks: %d\n", global_num_interact); - printf("[BaryTree] Maximum pointwise interactions across all ranks: %d\n", max_num_interact); - printf("[BaryTree] Minimum pointwise interactions across all ranks: %d\n", min_num_interact); + printf("[BaryTree] Cumulative pointwise interactions across all ranks: %lld\n", global_num_interact); + printf("[BaryTree] Maximum pointwise interactions across all ranks: %lld\n", max_num_interact); + printf("[BaryTree] Minimum pointwise interactions across all ranks: %lld\n", min_num_interact); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative direct pointwise interactions across all ranks: %d\n", global_num_direct_interact); - printf("[BaryTree] Maximum direct pointwise interactions across all ranks: %d\n", max_num_direct_interact); - printf("[BaryTree] Minimum direct pointwise interactions across all ranks: %d\n", min_num_direct_interact); + printf("[BaryTree] Cumulative direct pointwise interactions across all ranks: %lld\n", global_num_direct_interact); + printf("[BaryTree] Maximum direct pointwise interactions across all ranks: %lld\n", max_num_direct_interact); + printf("[BaryTree] Minimum direct pointwise interactions across all ranks: %lld\n", min_num_direct_interact); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %d\n", global_num_approx_interact); - printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %d\n", max_num_approx_interact); - printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %d\n", min_num_approx_interact); + printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %lld\n", global_num_approx_interact); + printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %lld\n", max_num_approx_interact); + printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %lld\n", min_num_approx_interact); printf("[BaryTree]\n"); // These types of interactions only occur for CC if (run_params->compute_type == CLUSTER_CLUSTER) { - printf("[BaryTree] Cumulative source approx pointwise interactions across all ranks: %d\n", global_num_source_approx_interact); - printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %d\n", max_num_source_approx_interact); - printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %d\n", min_num_source_approx_interact); + printf("[BaryTree] Cumulative source approx pointwise interactions across all ranks: %lld\n", global_num_source_approx_interact); + printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %lld\n", max_num_source_approx_interact); + printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %lld\n", min_num_source_approx_interact); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative target approx pointwise interactions across all ranks: %d\n", global_num_target_approx_interact); - printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %d\n", max_num_target_approx_interact); - printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %d\n", min_num_target_approx_interact); + printf("[BaryTree] Cumulative target approx pointwise interactions across all ranks: %lld\n", global_num_target_approx_interact); + printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %lld\n", max_num_target_approx_interact); + printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %lld\n", min_num_target_approx_interact); printf("[BaryTree]\n"); } From 07f989d08d135c0427700d300af130f7b103e7dd Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 13 Jun 2020 13:53:01 -0400 Subject: [PATCH 24/95] Tightening the size check --- src/interaction_lists/interaction_lists.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index 95e28e26..eb4ae017 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -558,8 +558,8 @@ void cc_compute_interaction_list( if ((source_tree_radius[source_tree_node] + target_tree_radius[target_tree_node]) < dist * run_params->theta) { - if ((source_tree_numpar[source_tree_node] < size_check) && - (target_tree_numpar[target_tree_node] < size_check)) { + if ((source_tree_numpar[source_tree_node] <= size_check) && + (target_tree_numpar[target_tree_node] <= size_check)) { /* add to direct list */ @@ -571,7 +571,7 @@ void cc_compute_interaction_list( direct_list[target_tree_node][direct_index_counter[target_tree_node]] = source_tree_node; direct_index_counter[target_tree_node]++; - } else if (source_tree_numpar[source_tree_node] < size_check) { + } else if (source_tree_numpar[source_tree_node] <= size_check) { /* add to CP approx list */ @@ -583,7 +583,7 @@ void cc_compute_interaction_list( target_approx_list[target_tree_node][target_approx_index_counter[target_tree_node]] = source_tree_node; target_approx_index_counter[target_tree_node]++; - } else if (target_tree_numpar[target_tree_node] < size_check) { + } else if (target_tree_numpar[target_tree_node] <= size_check) { /* add to PC approx list */ From f82f3f5c242877353242e85181c94916e0b71884 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 25 Jun 2020 08:41:16 -0400 Subject: [PATCH 25/95] Can now pass Beta accuracy parameter. Under Testing. --- examples/support_fns.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/examples/support_fns.c b/examples/support_fns.c index 749630be..ecdb2faa 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -23,6 +23,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * int verbosity = 0; int interp_order = 5; double theta = 0.5; + double beta = -1.0; int max_per_source_leaf = 500; int max_per_target_leaf = 500; double size_check_factor = 1.0; @@ -68,6 +69,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcmp(c1, "theta") == 0) { theta = atof(c2); + } else if (strcmp(c1, "beta") == 0) { + beta = atof(c2); + } else if (strcmp(c1, "max_per_source_leaf") == 0) { max_per_source_leaf = atoi(c2); @@ -283,6 +287,18 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * + if (beta!=-1.0){ + if (rank==0){ + printf("beta = %f\n", beta); + theta = 0.9 - 0.4*pow(beta,2); + interp_order = (int) (12 - 11*pow(1-beta,3) ); + printf("computed theta = %f\n", theta); + printf("computed degree = %i\n", interp_order); + } + } + + + RunParams_Setup(run_params, kernel, num_kernel_params, kernel_params, approximation, singularity, compute_type, From b21515f76d3e2fbaaffb68b819ef8037cc3e235c Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Fri, 26 Jun 2020 10:33:37 -0400 Subject: [PATCH 26/95] Beta included in run_params. Print bug on GL. --- examples/example.in | 1 + examples/support_fns.c | 27 +++++++++++++++------------ examples/test_BaryTreeInterface.c | 3 ++- src/interface/BaryTreeInterface.c | 4 ++-- src/interface/BaryTreeInterface.h | 2 +- src/run_params/run_params.c | 3 ++- src/run_params/run_params.h | 2 +- src/run_params/struct_run_params.h | 1 + tests/serial_tests.c | 25 ++++++++++++++++--------- 9 files changed, 41 insertions(+), 27 deletions(-) diff --git a/examples/example.in b/examples/example.in index 29109a46..cc807c99 100644 --- a/examples/example.in +++ b/examples/example.in @@ -2,6 +2,7 @@ num_sources 20000 num_targets 20000 order 2 theta 0.9 +beta -1.0 size_check 0.0 max_per_source_leaf 100 max_per_target_leaf 100 diff --git a/examples/support_fns.c b/examples/support_fns.c index ecdb2faa..8e66a97d 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -287,22 +287,22 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * - if (beta!=-1.0){ - if (rank==0){ - printf("beta = %f\n", beta); - theta = 0.9 - 0.4*pow(beta,2); - interp_order = (int) (12 - 11*pow(1-beta,3) ); - printf("computed theta = %f\n", theta); - printf("computed degree = %i\n", interp_order); - } - } +// if (beta>=0.0){ +// if (rank==0){ +// printf("beta = %f\n", beta); +// theta = 0.9 - 0.4*pow(beta,2); +// interp_order = (int) (12 - 11*pow(1-beta,3) ); +// printf("computed theta = %f\n", theta); +// printf("computed degree = %i\n", interp_order); +// } +// } RunParams_Setup(run_params, kernel, num_kernel_params, kernel_params, approximation, singularity, compute_type, - theta, size_check_factor, interp_order, + theta, beta, size_check_factor, interp_order, max_per_source_leaf, max_per_target_leaf, verbosity); @@ -751,13 +751,16 @@ void CSV_Print(int N, int M, struct RunParams *run_params, MPI_Comm_size(MPI_COMM_WORLD, &numProcs); if (rank == 0) { + printf("ABOUT TO WRITE CSV. THETA = %f, ORDER = %d, MAX LEAF = %d\n", run_params->theta, run_params->interp_order, run_params->max_per_source_leaf); + + RunParams_Print(run_params); FILE *fp = fopen("out.csv", "a"); - fprintf(fp, "%d,%d,%d,%f,%d,%d,%d,%d,%d,%d," + fprintf(fp, "%d,%d,%d,%f,%f,%d,%d,%d,%d,%d,%d," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e\n", - N, M, run_params->interp_order, run_params->theta, + N, M, run_params->interp_order, run_params->theta, run_params->beta, run_params->max_per_source_leaf, run_params->max_per_target_leaf, run_params->kernel, run_params->singularity, run_params->approximation, numProcs, // 1 ends diff --git a/examples/test_BaryTreeInterface.c b/examples/test_BaryTreeInterface.c index 55f59fd6..8c608dd5 100644 --- a/examples/test_BaryTreeInterface.c +++ b/examples/test_BaryTreeInterface.c @@ -25,6 +25,7 @@ int main(int argc, char **argv) int interpOrder = 5; double theta = 0.8; + double beta = 1.0; int maxPerLeaf = 500; int maxPerBatch = 500; @@ -64,7 +65,7 @@ int main(int argc, char **argv) xS, yS, zS, qS, wS, potential, kernel, numParams, kernelParams, singularity, approximation, compute_type, - interpOrder, theta, maxPerLeaf, maxPerBatch, + interpOrder, theta, beta, maxPerLeaf, maxPerBatch, sizeCheck, verbosity); printf("[test BaryTree interface] BaryTree has finished.\n"); diff --git a/src/interface/BaryTreeInterface.c b/src/interface/BaryTreeInterface.c index 67c3abbf..e39fff09 100644 --- a/src/interface/BaryTreeInterface.c +++ b/src/interface/BaryTreeInterface.c @@ -21,7 +21,7 @@ void BaryTreeInterface(int numTargets, int numSources, double *outputArray, KERNEL kernel, int numKernelParams, double *kernelParams, SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, - int interpOrder, double theta, int maxPerSourceLeaf, int maxPerTargetLeaf, + int interpOrder, double theta, double beta, int maxPerSourceLeaf, int maxPerTargetLeaf, double sizeCheck, int verbosity) { @@ -32,7 +32,7 @@ void BaryTreeInterface(int numTargets, int numSources, RunParams_Setup(&run_params, kernel, numKernelParams, kernelParams, approximation, singularity, compute_type, - theta, sizeCheck, interpOrder, + theta, beta, sizeCheck, interpOrder, maxPerSourceLeaf, maxPerTargetLeaf, verbosity); diff --git a/src/interface/BaryTreeInterface.h b/src/interface/BaryTreeInterface.h index b7885739..8a0765b9 100644 --- a/src/interface/BaryTreeInterface.h +++ b/src/interface/BaryTreeInterface.h @@ -10,7 +10,7 @@ void BaryTreeInterface(int numTargets, int numSources, double *outputArray, KERNEL kernel, int numKernelParams, double *kernelParams, SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, - int interpOrder, double theta, int maxPerSourceLeaf, int maxPerTargetLeaf, + int interpOrder, double theta, double beta, int maxPerSourceLeaf, int maxPerTargetLeaf, double sizeCheck, int verbosity); diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index 43093a7a..b093ac86 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -15,7 +15,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, double size_check_factor, int interp_order, + double theta, double beta, double size_check_factor, int interp_order, int max_per_source_leaf, int max_per_target_leaf, int verbosity) { @@ -35,6 +35,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, run_params->compute_type = compute_type; run_params->theta = theta; + run_params->beta = beta; run_params->size_check_factor = size_check_factor; run_params->interp_order = interp_order; diff --git a/src/run_params/run_params.h b/src/run_params/run_params.h index f6d3d314..a9212c21 100644 --- a/src/run_params/run_params.h +++ b/src/run_params/run_params.h @@ -10,7 +10,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, double size_check_factor, int interp_order, + double theta, double beta, double size_check_factor, int interp_order, int max_per_source_leaf, int max_per_target_leaf, int verbosity); diff --git a/src/run_params/struct_run_params.h b/src/run_params/struct_run_params.h index 9c299bcc..8ee90509 100644 --- a/src/run_params/struct_run_params.h +++ b/src/run_params/struct_run_params.h @@ -26,6 +26,7 @@ struct RunParams int max_per_target_leaf; int verbosity; + double beta; }; diff --git a/tests/serial_tests.c b/tests/serial_tests.c index 74249631..ef19caf6 100644 --- a/tests/serial_tests.c +++ b/tests/serial_tests.c @@ -74,7 +74,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, COULOMB, numKernelParams, kernelParams, NO_APPROX, SKIPPING, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, 0, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -101,7 +101,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, COULOMB, numKernelParams, kernelParams, NO_APPROX, SUBTRACTION, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, 0, verbosity); fprintf(stderr, "I'm here 3.\n"); directdriver(sources, targets, run_params, potential, time_tree); @@ -130,7 +130,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, YUKAWA, numKernelParams, kernelParams, NO_APPROX, SKIPPING, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, 0, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -157,7 +157,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, YUKAWA, numKernelParams, kernelParams, NO_APPROX, SUBTRACTION, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, 0, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -207,6 +207,8 @@ static char *test_treecode_on_100_particles() int verbosity = 1; int N = 100; + double beta = -1.0; + struct Particles *sources = NULL; struct Particles *targets = NULL; double *potential = NULL; @@ -257,7 +259,7 @@ static char *test_treecode_on_100_particles() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); /***********************************************/ @@ -541,6 +543,8 @@ static char *test_treecode_on_1_target_10000_sources() int verbosity = 1; int N = 10000; + double beta = -1.0; + struct Particles *sources = NULL; struct Particles *targets = NULL; double *potential = NULL, *potential_direct = NULL; @@ -598,7 +602,7 @@ static char *test_treecode_on_1_target_10000_sources() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); /***********************************************/ @@ -895,6 +899,7 @@ static char *test_treecode_wrapper() int order = 4; double theta = 0.8; + double beta = -1.0; double size_check = 1.0; int num_kernel_params = 1; @@ -902,7 +907,7 @@ static char *test_treecode_wrapper() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); /***********************************************/ @@ -921,7 +926,7 @@ static char *test_treecode_wrapper() sources->x,sources->y,sources->z,sources->q,sources->w, potential_wrapper, COULOMB, num_kernel_params, kernel_params, SKIPPING, LAGRANGE, PARTICLE_CLUSTER, - order, theta, max_per_source_leaf, max_per_target_leaf, + order, theta, beta, max_per_source_leaf, max_per_target_leaf, size_check, verbosity); treedriver(sources, targets, run_params, potential, time_tree); @@ -966,6 +971,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() int verbosity = 1; int N = 10000; + double beta = -1.0; struct Particles *sources = NULL; struct Particles *targets = NULL; @@ -1021,9 +1027,10 @@ static char *test_treecode_parameters_on_1_target_10000_sources() int num_kernel_params = 1; double kernel_params[1] = {0.5}; + RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - 0, size_check, 0, max_per_source_leaf, max_per_target_leaf, verbosity); + 0, beta, size_check, 0, max_per_source_leaf, max_per_target_leaf, verbosity); // 3 parameter sets. Set 2 increases order, set 3 reduces MAC. Both should be more accurate than set 1. From 3223dc96c9a90ceb0c0e6f7e39ef0ff07cf608e3 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Sun, 28 Jun 2020 11:21:13 -0400 Subject: [PATCH 27/95] updated support_fns and run_params --- examples/support_fns.c | 20 ++++++++++---------- src/run_params/run_params.c | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 8e66a97d..66e1db18 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -287,15 +287,15 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * -// if (beta>=0.0){ -// if (rank==0){ -// printf("beta = %f\n", beta); -// theta = 0.9 - 0.4*pow(beta,2); -// interp_order = (int) (12 - 11*pow(1-beta,3) ); -// printf("computed theta = %f\n", theta); -// printf("computed degree = %i\n", interp_order); -// } -// } + if (beta>=0.0){ + if (rank==0){ + printf("beta = %f\n", beta); + theta = 0.95 - 0.4*pow(beta,1.75); + interp_order = (int) (12 - 11*pow(1-beta,3) ); + printf("computed theta = %f\n", theta); + printf("computed degree = %i\n", interp_order); + } + } @@ -751,7 +751,7 @@ void CSV_Print(int N, int M, struct RunParams *run_params, MPI_Comm_size(MPI_COMM_WORLD, &numProcs); if (rank == 0) { - printf("ABOUT TO WRITE CSV. THETA = %f, ORDER = %d, MAX LEAF = %d\n", run_params->theta, run_params->interp_order, run_params->max_per_source_leaf); + printf("ABOUT TO WRITE CSV. BETA = %f, THETA = %f, ORDER = %d, MAX LEAF = %d\n", run_params->beta, run_params->theta, run_params->interp_order, run_params->max_per_source_leaf); RunParams_Print(run_params); FILE *fp = fopen("out.csv", "a"); diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index b093ac86..8769fad1 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -109,6 +109,7 @@ void RunParams_Print(struct RunParams *run_params) printf("[BaryTree] singularity = %d\n", run_params->singularity); printf("[BaryTree] compute_type = %d\n", run_params->compute_type); printf("[BaryTree] theta = %f\n", run_params->theta); + printf("[BaryTree] beta = %f\n", run_params->beta); printf("[BaryTree] size_check_factor = %f\n", run_params->size_check_factor); printf("[BaryTree] interp_order = %d\n", run_params->interp_order); printf("[BaryTree] interp_pts_per_cluster = %d\n", run_params->interp_pts_per_cluster); From 4a795bbc9832d8edb17c98362dacd966a5a44df5 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 29 Jun 2020 15:38:48 -0400 Subject: [PATCH 28/95] started modifications for yukawa cc ss --- src/kernels/yukawa/yukawa_ss_cp.c | 68 +++++++++++++++++++++++++++++++ src/kernels/yukawa/yukawa_ss_cp.h | 15 +++++++ 2 files changed, 83 insertions(+) create mode 100644 src/kernels/yukawa/yukawa_ss_cp.c create mode 100644 src/kernels/yukawa/yukawa_ss_cp.h diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c new file mode 100644 index 00000000..2789eb2a --- /dev/null +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -0,0 +1,68 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "yukawa_cp.h" + + +void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w, + struct RunParams *run_params, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ + cluster_x, cluster_y, cluster_z, cluster_q) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { + + double temporary_potential = 0.0; + + double cx = cluster_x[starting_index_of_cluster + i]; + double cy = cluster_y[starting_index_of_cluster + i]; + double cz = cluster_z[starting_index_of_cluster + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_sources_in_batch; j++) { +#ifdef OPENACC_ENABLED + #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) +#endif + + int jj = starting_index_of_sources + j; + double dx = cx - source_x[jj]; + double dy = cy - source_y[jj]; + double dz = cz - source_z[jj]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + if (r > DBL_MIN) { + temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter * r) / r; + temporary_potential += (source_q[jj] - cluster_q * cluster_w[jj]) * exp(-kernel_parameter*r) /r; + + } + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + cluster_q[starting_index_of_cluster + i] += temporary_potential; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} + + diff --git a/src/kernels/yukawa/yukawa_ss_cp.h b/src/kernels/yukawa/yukawa_ss_cp.h new file mode 100644 index 00000000..7728f40d --- /dev/null +++ b/src/kernels/yukawa/yukawa_ss_cp.h @@ -0,0 +1,15 @@ +/* Interaction Kernels */ +#ifndef H_K_YUKAWA_CP_H +#define H_K_YUKAWA_CP_H + +#include "../../run_params/struct_run_params.h" + + +void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_w, + struct RunParams *run_params, int gpu_async_stream_id); + + +#endif /* H_K_YUKAWA_CP_H */ From acf19b03a3569921e70aa9bb18602e1345666d46 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 29 Jun 2020 19:29:13 -0400 Subject: [PATCH 29/95] Adding beta parameter --- examples/support_fns.c | 30 ++----- examples/test_BaryTreeInterface.c | 4 +- interfaces/python/BaryTreeInterface.py | 10 +-- interfaces/python/testBaryTreeInterface.py | 5 +- src/interface/BaryTreeInterface.c | 10 +-- src/interface/BaryTreeInterface.h | 4 +- src/run_params/run_params.c | 98 ++++++++++++++++++---- src/run_params/run_params.h | 6 +- src/run_params/struct_run_params.h | 3 +- tests/serial_tests.c | 22 +++-- 10 files changed, 122 insertions(+), 70 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 66e1db18..0102c4e7 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -284,27 +284,14 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } exit(1); } - - - - if (beta>=0.0){ - if (rank==0){ - printf("beta = %f\n", beta); - theta = 0.95 - 0.4*pow(beta,1.75); - interp_order = (int) (12 - 11*pow(1-beta,3) ); - printf("computed theta = %f\n", theta); - printf("computed degree = %i\n", interp_order); - } - } - - + RunParams_Setup(run_params, kernel, num_kernel_params, kernel_params, approximation, singularity, compute_type, - theta, beta, size_check_factor, interp_order, + theta, interp_order, max_per_source_leaf, max_per_target_leaf, - verbosity); + size_check_factor, beta, verbosity); return; } @@ -751,18 +738,17 @@ void CSV_Print(int N, int M, struct RunParams *run_params, MPI_Comm_size(MPI_COMM_WORLD, &numProcs); if (rank == 0) { - printf("ABOUT TO WRITE CSV. BETA = %f, THETA = %f, ORDER = %d, MAX LEAF = %d\n", run_params->beta, run_params->theta, run_params->interp_order, run_params->max_per_source_leaf); - RunParams_Print(run_params); FILE *fp = fopen("out.csv", "a"); - fprintf(fp, "%d,%d,%d,%f,%f,%d,%d,%d,%d,%d,%d," + fprintf(fp, "%d,%d,%d,%d,%d,%d,%d,%f,%d,%d,%d,%d,%f," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e\n", - N, M, run_params->interp_order, run_params->theta, run_params->beta, - run_params->max_per_source_leaf, run_params->max_per_target_leaf, run_params->kernel, - run_params->singularity, run_params->approximation, numProcs, // 1 ends + N, M, numProcs, run_params->kernel, run_params->approximation, run_params->singularity, + run_params->compute_type, run_params->theta, run_params->interp_order, + run_params->max_per_source_leaf, run_params->max_per_target_leaf, + run_params->size_check_factor, run_params->beta, // 1 ends time_run_glob[0][0], time_run_glob[1][0], // min, max, avg pre-process time_run_glob[2][0]/numProcs, diff --git a/examples/test_BaryTreeInterface.c b/examples/test_BaryTreeInterface.c index 8c608dd5..f38d2fa0 100644 --- a/examples/test_BaryTreeInterface.c +++ b/examples/test_BaryTreeInterface.c @@ -65,8 +65,8 @@ int main(int argc, char **argv) xS, yS, zS, qS, wS, potential, kernel, numParams, kernelParams, singularity, approximation, compute_type, - interpOrder, theta, beta, maxPerLeaf, maxPerBatch, - sizeCheck, verbosity); + theta, interpOrder, maxPerLeaf, maxPerBatch, + sizeCheck, beta, verbosity); printf("[test BaryTree interface] BaryTree has finished.\n"); diff --git a/interfaces/python/BaryTreeInterface.py b/interfaces/python/BaryTreeInterface.py index 4ea23fc1..c882dec6 100644 --- a/interfaces/python/BaryTreeInterface.py +++ b/interfaces/python/BaryTreeInterface.py @@ -63,7 +63,7 @@ class ComputeType(CEnum): ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), Kernel, ctypes.c_int, ctypes.POINTER(ctypes.c_double), Singularity, Approximation, ComputeType, - ctypes.c_int, ctypes.c_double, ctypes.c_int, ctypes.c_int, ctypes.c_double, ctypes.c_int ) + ctypes.c_double, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_double, ctypes.c_double, ctypes.c_int ) except NameError: print("Warning: Could not set argtypes of _gpu_treecodeRoutines. Ignore if not using GPUs.") @@ -72,7 +72,7 @@ class ComputeType(CEnum): ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.c_double), Kernel, ctypes.c_int, ctypes.POINTER(ctypes.c_double), Singularity, Approximation, ComputeType, - ctypes.c_int, ctypes.c_double, ctypes.c_int, ctypes.c_int, ctypes.c_double, ctypes.c_int ) + ctypes.c_double, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_double, ctypes.c_double, ctypes.c_int ) except NameError: print("Could not set argtypes of _cpu_treecodeRoutines.") @@ -83,7 +83,7 @@ def callTreedriver(numTargets, numSources, targetX, targetY, targetZ, targetValue, sourceX, sourceY, sourceZ, sourceValue, sourceWeight, kernelName, numberOfKernelParameters, kernelParameters, singularityHandling, - approximationName, computeType, order, theta, maxParNode, batchSize, GPUpresent, verbosity, sizeCheck=None): + approximationName, computeType, theta, order, maxParNode, batchSize, beta, GPUpresent, verbosity, sizeCheck=None): ''' python function which creates pointers to the arrays and calls treedriverWrapper. returns the results array. @@ -119,14 +119,14 @@ def callTreedriver(numTargets, numSources, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_int(order), ctypes.c_double(theta), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(order), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) elif GPUpresent==False: # No gpu present _cpu_treecodeRoutines.BaryTreeInterface(ctypes.c_int(numTargets), ctypes.c_int(numSources), targetX_p, targetY_p, targetZ_p, targetValue_p, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_int(order), ctypes.c_double(theta), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(order), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) else: print("What should GPUpresent be set to in the wrapper?") exit(-1) diff --git a/interfaces/python/testBaryTreeInterface.py b/interfaces/python/testBaryTreeInterface.py index 66f32d81..54947d20 100644 --- a/interfaces/python/testBaryTreeInterface.py +++ b/interfaces/python/testBaryTreeInterface.py @@ -25,6 +25,7 @@ GPUpresent = False theta = 0.8 treecodeOrder = 4 + beta = -1 gaussianAlpha = 1.0 verbosity = 0 @@ -55,8 +56,8 @@ np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), kernel, numberOfKernelParameters, kernelParameters, singularity, approximation, computeType, - treecodeOrder, theta, maxPerSourceLeaf, maxPerTargetLeaf, - GPUpresent, verbosity, sizeCheck=1.0) + theta, treecodeOrder, maxPerSourceLeaf, maxPerTargetLeaf, + beta, GPUpresent, verbosity, sizeCheck=1.0) assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." print("If no errors printed, then the call to the treecode wrapper worked!") diff --git a/src/interface/BaryTreeInterface.c b/src/interface/BaryTreeInterface.c index e39fff09..3a280784 100644 --- a/src/interface/BaryTreeInterface.c +++ b/src/interface/BaryTreeInterface.c @@ -21,8 +21,8 @@ void BaryTreeInterface(int numTargets, int numSources, double *outputArray, KERNEL kernel, int numKernelParams, double *kernelParams, SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, - int interpOrder, double theta, double beta, int maxPerSourceLeaf, int maxPerTargetLeaf, - double sizeCheck, int verbosity) + double theta, int interpOrder, int maxPerSourceLeaf, int maxPerTargetLeaf, + double sizeCheck, double beta, int verbosity) { double timing[12]; @@ -32,9 +32,9 @@ void BaryTreeInterface(int numTargets, int numSources, RunParams_Setup(&run_params, kernel, numKernelParams, kernelParams, approximation, singularity, compute_type, - theta, beta, sizeCheck, interpOrder, - maxPerSourceLeaf, maxPerTargetLeaf, - verbosity); + theta, interpOrder, + maxPerSourceLeaf, maxPerTargetLeaf, sizeCheck, + beta, verbosity); struct Particles sources, targets; diff --git a/src/interface/BaryTreeInterface.h b/src/interface/BaryTreeInterface.h index 8a0765b9..160c9103 100644 --- a/src/interface/BaryTreeInterface.h +++ b/src/interface/BaryTreeInterface.h @@ -10,8 +10,8 @@ void BaryTreeInterface(int numTargets, int numSources, double *outputArray, KERNEL kernel, int numKernelParams, double *kernelParams, SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, - int interpOrder, double theta, double beta, int maxPerSourceLeaf, int maxPerTargetLeaf, - double sizeCheck, int verbosity); + double theta, int interpOrder, int maxPerSourceLeaf, int maxPerTargetLeaf, + double sizeCheck, double beta, int verbosity); #endif /* H_BARYTREE_INTERFACE_H */ diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index 8769fad1..245053cb 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -15,14 +15,15 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, double beta, double size_check_factor, int interp_order, + double theta, int interp_order, int max_per_source_leaf, int max_per_target_leaf, - int verbosity) + double size_check_factor, double beta, int verbosity) { RunParams_Free(run_params_addr); *run_params_addr = malloc(sizeof (struct RunParams)); struct RunParams *run_params = *run_params_addr; + run_params->verbosity = verbosity; run_params->kernel = kernel; run_params->num_kernel_params = num_kernel_params; if (run_params->num_kernel_params > 0) make_vector(run_params->kernel_params, num_kernel_params); @@ -33,13 +34,83 @@ void RunParams_Setup(struct RunParams **run_params_addr, run_params->approximation = approximation; run_params->singularity = singularity; run_params->compute_type = compute_type; - - run_params->theta = theta; - run_params->beta = beta; - run_params->size_check_factor = size_check_factor; - - run_params->interp_order = interp_order; - run_params->interp_pts_per_cluster = (interp_order+1) * (interp_order+1) * (interp_order+1); + + if (beta < 0 || beta > 1) { + + run_params->beta = -1; + + run_params->theta = theta; + run_params->interp_order = interp_order; + run_params->size_check_factor = size_check_factor; + + run_params->max_per_source_leaf = max_per_source_leaf; + run_params->max_per_target_leaf = max_per_target_leaf; + + run_params->interp_pts_per_cluster = (interp_order+1) * (interp_order+1) * (interp_order+1); + + } else { + + run_params->beta = beta; + + double theta_min, theta_max; + double n_min, n_max; + double exp_s, exp_t; + + if (compute_type == PARTICLE_CLUSTER || compute_type == CLUSTER_PARTICLE) { + if (approximation == LAGRANGE) { + theta_min = 0.55; + theta_max = 0.95; + exp_s = 2; + + n_min = 1; + n_max = 12; + exp_t = 3; + + } else { // HERMITE + theta_min = 0.55; + theta_max = 0.95; + exp_s = 2; + + n_min = 1; + n_max = 9; + exp_t = 3; + } + + } else { // CLUSTER_CLUSTER + theta_min = 0.55; + theta_max = 0.95; + exp_s = 1.75; + + n_min = 1; + n_max = 12; + exp_t = 3; + } + + run_params->theta = theta_max - (theta_max - theta_min) * pow(beta, exp_s); + run_params->interp_order = (int) (n_max - (n_max - n_min) * pow(1. - beta, exp_t)); + + run_params->interp_pts_per_cluster = (run_params->interp_order + 1) + * (run_params->interp_order + 1) + * (run_params->interp_order + 1); + + #ifdef OPENACC_ENABLED + run_params->max_per_source_leaf = 2000; + run_params->max_per_target_leaf = 2000; + #else // CPU + if (compute_type == PARTICLE_CLUSTER) { + run_params->max_per_source_leaf = 50; + run_params->max_per_target_leaf = 5; + + } else if (compute_type == CLUSTER_PARTICLE) { + run_params->max_per_source_leaf = 5; + run_params->max_per_target_leaf = 50; + + } else { // CLUSTER_CLUSTER + run_params->max_per_source_leaf = 50; + run_params->max_per_target_leaf = 50; + } + #endif + } run_params->interp_weights_per_cluster = run_params->interp_pts_per_cluster; run_params->interp_charges_per_cluster = run_params->interp_pts_per_cluster; @@ -49,11 +120,6 @@ void RunParams_Setup(struct RunParams **run_params_addr, if (run_params->approximation == HERMITE && run_params->singularity == SUBTRACTION) run_params->interp_weights_per_cluster *=8; - run_params->max_per_source_leaf = max_per_source_leaf; - run_params->max_per_target_leaf = max_per_target_leaf; - - run_params->verbosity = verbosity; - return; } @@ -109,14 +175,14 @@ void RunParams_Print(struct RunParams *run_params) printf("[BaryTree] singularity = %d\n", run_params->singularity); printf("[BaryTree] compute_type = %d\n", run_params->compute_type); printf("[BaryTree] theta = %f\n", run_params->theta); - printf("[BaryTree] beta = %f\n", run_params->beta); - printf("[BaryTree] size_check_factor = %f\n", run_params->size_check_factor); printf("[BaryTree] interp_order = %d\n", run_params->interp_order); printf("[BaryTree] interp_pts_per_cluster = %d\n", run_params->interp_pts_per_cluster); printf("[BaryTree] interp_weights_per_cluster = %d\n", run_params->interp_weights_per_cluster); printf("[BaryTree] interp_charges_per_cluster = %d\n", run_params->interp_charges_per_cluster); printf("[BaryTree] max_per_source_leaf = %d\n", run_params->max_per_source_leaf); printf("[BaryTree] max_per_target_leaf = %d\n", run_params->max_per_target_leaf); + printf("[BaryTree] size_check_factor = %f\n", run_params->size_check_factor); + printf("[BaryTree] beta = %f\n", run_params->beta); printf("[BaryTree] verbosity = %d\n", run_params->verbosity); printf("[BaryTree]\n"); } diff --git a/src/run_params/run_params.h b/src/run_params/run_params.h index a9212c21..e399f0f9 100644 --- a/src/run_params/run_params.h +++ b/src/run_params/run_params.h @@ -10,9 +10,9 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, double beta, double size_check_factor, int interp_order, - int max_per_source_leaf, int max_per_target_leaf, - int verbosity); + double theta, int interp_order, + int max_per_source_leaf, int max_per_target_leaf, double size_check_factor, + double beta, int verbosity); void RunParams_Validate(struct RunParams *run_params); diff --git a/src/run_params/struct_run_params.h b/src/run_params/struct_run_params.h index 8ee90509..2696cc0e 100644 --- a/src/run_params/struct_run_params.h +++ b/src/run_params/struct_run_params.h @@ -24,9 +24,10 @@ struct RunParams int max_per_source_leaf; int max_per_target_leaf; + + double beta; int verbosity; - double beta; }; diff --git a/tests/serial_tests.c b/tests/serial_tests.c index ef19caf6..dc4fc154 100644 --- a/tests/serial_tests.c +++ b/tests/serial_tests.c @@ -74,7 +74,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, COULOMB, numKernelParams, kernelParams, NO_APPROX, SKIPPING, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, -1, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -101,11 +101,9 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, COULOMB, numKernelParams, kernelParams, NO_APPROX, SUBTRACTION, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, 0, verbosity); - fprintf(stderr, "I'm here 3.\n"); + 0, 0, 0, 0, 0, -1, verbosity); directdriver(sources, targets, run_params, potential, time_tree); - fprintf(stderr, "I'm here 4.\n"); for (int i=0; inum; i++){ double trueValue=2.0 * M_PI * kappa2 * i; @@ -130,7 +128,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, YUKAWA, numKernelParams, kernelParams, NO_APPROX, SKIPPING, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, -1, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -157,7 +155,7 @@ static char *test_direct_sum_on_10_particles() RunParams_Setup(&run_params, YUKAWA, numKernelParams, kernelParams, NO_APPROX, SUBTRACTION, NO_COMPUTE_TYPE, - 0, 0, 0, 0, 0, 0, verbosity); + 0, 0, 0, 0, 0, -1, verbosity); directdriver(sources, targets, run_params, potential, time_tree); @@ -259,7 +257,7 @@ static char *test_treecode_on_100_particles() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -602,7 +600,7 @@ static char *test_treecode_on_1_target_10000_sources() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -907,7 +905,7 @@ static char *test_treecode_wrapper() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, beta, size_check, order, max_per_source_leaf, max_per_target_leaf, verbosity); + theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -926,8 +924,8 @@ static char *test_treecode_wrapper() sources->x,sources->y,sources->z,sources->q,sources->w, potential_wrapper, COULOMB, num_kernel_params, kernel_params, SKIPPING, LAGRANGE, PARTICLE_CLUSTER, - order, theta, beta, max_per_source_leaf, max_per_target_leaf, - size_check, verbosity); + theta, order, max_per_source_leaf, max_per_target_leaf, + size_check, beta, verbosity); treedriver(sources, targets, run_params, potential, time_tree); @@ -1030,7 +1028,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - 0, beta, size_check, 0, max_per_source_leaf, max_per_target_leaf, verbosity); + 0, 0, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); // 3 parameter sets. Set 2 increases order, set 3 reduces MAC. Both should be more accurate than set 1. From 6073c3810650d5bc606e3cc5dbf89456b2c0f48b Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 29 Jun 2020 19:55:21 -0400 Subject: [PATCH 30/95] Add size check --- src/run_params/run_params.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index 245053cb..d390b269 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -58,6 +58,8 @@ void RunParams_Setup(struct RunParams **run_params_addr, if (compute_type == PARTICLE_CLUSTER || compute_type == CLUSTER_PARTICLE) { if (approximation == LAGRANGE) { + run_params->size_check_factor = 1.0; + theta_min = 0.55; theta_max = 0.95; exp_s = 2; @@ -67,6 +69,8 @@ void RunParams_Setup(struct RunParams **run_params_addr, exp_t = 3; } else { // HERMITE + run_params->size_check_factor = 4.0; + theta_min = 0.55; theta_max = 0.95; exp_s = 2; @@ -77,6 +81,8 @@ void RunParams_Setup(struct RunParams **run_params_addr, } } else { // CLUSTER_CLUSTER + run_params->size_check_factor = 1.0; + theta_min = 0.55; theta_max = 0.95; exp_s = 1.75; @@ -96,6 +102,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, #ifdef OPENACC_ENABLED run_params->max_per_source_leaf = 2000; run_params->max_per_target_leaf = 2000; + #else // CPU if (compute_type == PARTICLE_CLUSTER) { run_params->max_per_source_leaf = 50; From 96edc8739372398604777b5a4f0885a0f4d05d34 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 30 Jun 2020 08:24:50 -0400 Subject: [PATCH 31/95] Changing default batch/cluster size from 2000 to 3000 --- src/run_params/run_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index d390b269..a852a92b 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -100,8 +100,8 @@ void RunParams_Setup(struct RunParams **run_params_addr, * (run_params->interp_order + 1); #ifdef OPENACC_ENABLED - run_params->max_per_source_leaf = 2000; - run_params->max_per_target_leaf = 2000; + run_params->max_per_source_leaf = 3000; + run_params->max_per_target_leaf = 3000; #else // CPU if (compute_type == PARTICLE_CLUSTER) { From 359c260a4e9a68d56c77dc352c6010d1d6f6d8cd Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 30 Jun 2020 09:16:26 -0400 Subject: [PATCH 32/95] yukawa ss cp implemented. Needs target field approx stored in cluster->w --- src/kernels/yukawa/yukawa_ss_cp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c index 2789eb2a..25bbff02 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.c +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -30,6 +30,7 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double cx = cluster_x[starting_index_of_cluster + i]; double cy = cluster_y[starting_index_of_cluster + i]; double cz = cluster_z[starting_index_of_cluster + i]; + double cw = cluster_w[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:temporary_potential) @@ -49,9 +50,7 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double r = sqrt(dx*dx + dy*dy + dz*dz); if (r > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter * r) / r; - temporary_potential += (source_q[jj] - cluster_q * cluster_w[jj]) * exp(-kernel_parameter*r) /r; - + temporary_potential += (source_q[jj] - cw) * source_w[jj] * exp(-kernel_parameter*r) /r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED From ca4e405daafd937432891b442f50374f0a0a152b Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 30 Jun 2020 11:50:09 -0400 Subject: [PATCH 33/95] SS CP for yukawa in progress. Still a bug lurking. --- src/CMakeLists.txt | 6 +- src/clusters/clusters.c | 67 +++++++++++++++---- src/clusters/clusters.h | 4 +- src/drivers/treedriver.c | 4 +- .../interaction_compute_cp.c | 10 ++- src/kernels/yukawa/yukawa.h | 2 +- src/kernels/yukawa/yukawa_ss_cp.c | 2 +- src/kernels/yukawa/yukawa_ss_cp.h | 6 +- 8 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b6ca3240..6bd5b7f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -99,9 +99,9 @@ SET(SRCS_K_YUKAWA kernels/yukawa/yukawa_ss_correction.h kernels/yukawa/yukawa_ss_correction.c kernels/yukawa/yukawa_ss_pc.h - kernels/yukawa/yukawa_ss_pc.c) -# kernels/yukawa/yukawa_ss_cp.h -# kernels/yukawa/yukawa_ss_cp.c) + kernels/yukawa/yukawa_ss_pc.c + kernels/yukawa/yukawa_ss_cp.h + kernels/yukawa/yukawa_ss_cp.c) SET(SRCS_K_REGULARIZED_COULOMB diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 5959942d..e6a9eb06 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -19,7 +19,7 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrder, double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationOrder, double *xS, double *yS, double *zS, double *qS, double *wS, @@ -28,8 +28,7 @@ static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interp static void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationOrder, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, - double *clusterQ, double *clusterW); + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); static void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int interpolationOrder, int totalNumberInterpolationPoints, @@ -110,7 +109,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF(tree, i, interpolationOrder, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF(tree, i, interpolationOrder, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++) @@ -119,7 +118,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa } else if ((approximation == HERMITE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) pc_comp_ms_modifiedF_hermite(tree, i, interpolationOrder, totalNumberInterpolationPoints, - xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == HERMITE) && (singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++) @@ -141,15 +140,17 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa -void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Tree *tree, - const struct RunParams *run_params) +void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Particles *targets, + const struct Tree *tree, const struct RunParams *run_params) { *clusters_addr = malloc(sizeof(struct Clusters)); struct Clusters *clusters = *clusters_addr; SINGULARITY singularity = run_params->singularity; + APPROXIMATION approximation = run_params->approximation; int tree_numnodes = tree->numnodes; + int totalNumberTargetPoints = targets->num; int interpolationOrder = run_params->interp_order; int interpOrderLim = interpolationOrder + 1; @@ -182,23 +183,65 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Tr double *xC = clusters->x; double *yC = clusters->y; double *zC = clusters->z; + double *wC = clusters->w; + + + + /* + * If using singularity subtraction, compute the modified charges on the target cluster, store in clusters->w + */ + + if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { // doing Lagrange SS, need to both construct interpolation points and anterpolate target charge. + double *xT = targets->x; + double *yT = targets->y; + double *zT = targets->z; + double *qT = targets->q; + + double *ones; // initialize an array of ones, needed in the call to pc_comp_ms_modifiedF below. + make_vector(ones,totalNumberTargetPoints); + for (int i=0;iw + for (int i = 0; i < tree_numnodes; i++){ + pc_comp_ms_modifiedF(tree, i, interpolationOrder, xT, yT, zT, qT, ones, xC, yC, zC, wC); // note the final input is w not q array. + } + free_vector(ones); + + + +#ifdef OPENACC_ENABLED + #pragma acc wait + } // end ACC DATA REGION +#endif + } else { // not doing Lagrange singularity subtraction, just need to construct interpolation points #ifdef OPENACC_ENABLED #pragma acc data copyout(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints]) + zC[0:totalNumberInterpolationPoints], wC[0:totalNumberInterpolationPoints]) { #endif for (int i = 0; i < tree_numnodes; i++) { cp_comp_interp(tree, i, interpolationOrder, xC, yC, zC); } - + #ifdef OPENACC_ENABLED #pragma acc wait } // end ACC DATA REGION #endif - + } return; } @@ -291,7 +334,7 @@ void Clusters_Free_Win(struct Clusters **clusters_addr) void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrder, double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { int interpOrderLim = interpolationOrder + 1; @@ -714,7 +757,7 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationOrder, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { int interpOrderLim = interpolationOrder + 1; diff --git a/src/clusters/clusters.h b/src/clusters/clusters.h index 4de7dcdd..534bb3d4 100644 --- a/src/clusters/clusters.h +++ b/src/clusters/clusters.h @@ -12,8 +12,8 @@ void Clusters_Sources_Construct(struct Clusters **clusters, const struct Particles *sources, const struct Tree *tree, const struct RunParams *run_params); -void Clusters_Targets_Construct(struct Clusters **clusters, const struct Tree *tree, - const struct RunParams *run_params); +void Clusters_Targets_Construct(struct Clusters **clusters, const struct Particles *targets, + const struct Tree *tree, const struct RunParams *run_params); void Clusters_Alloc(struct Clusters **clusters_addr, int length, const struct RunParams *run_params); diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 76718be3..d9c43640 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -101,7 +101,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); - Clusters_Targets_Construct(&clusters, tree, run_params); + Clusters_Targets_Construct(&clusters, targets, tree, run_params); STOP_TIMER(&time_tree[2]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -488,7 +488,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[2]); Clusters_Sources_Construct(&source_clusters, sources, source_tree, run_params); - Clusters_Targets_Construct(&target_clusters, target_tree, run_params); + Clusters_Targets_Construct(&target_clusters, targets, target_tree, run_params); STOP_TIMER(&time_tree[2]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index ca3d16bd..9d7cb6fe 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -168,8 +168,14 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CP YUKAWA SS. EXITING.\n"); - exit(1); +// printf("**ERROR** NOT SET UP FOR CP YUKAWA SS. EXITING.\n"); +// exit(1); + + K_Yukawa_SS_CP_Lagrange(num_sources_in_batch, + interp_pts_per_cluster, batch_start, cluster_start, + source_x, source_y, source_z, source_q, source_w, + cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); diff --git a/src/kernels/yukawa/yukawa.h b/src/kernels/yukawa/yukawa.h index d2901083..91540ddc 100644 --- a/src/kernels/yukawa/yukawa.h +++ b/src/kernels/yukawa/yukawa.h @@ -9,6 +9,6 @@ #include "yukawa_ss_direct.h" #include "yukawa_ss_correction.h" #include "yukawa_ss_pc.h" -//#include "yukawa_ss_cp.h" +#include "yukawa_ss_cp.h" #endif /* H_K_YUKAWA_H */ diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c index 25bbff02..4dac1db8 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.c +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -3,7 +3,7 @@ #include #include "../../run_params/struct_run_params.h" -#include "yukawa_cp.h" +#include "yukawa_ss_cp.h" void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, diff --git a/src/kernels/yukawa/yukawa_ss_cp.h b/src/kernels/yukawa/yukawa_ss_cp.h index 7728f40d..2c3427a7 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.h +++ b/src/kernels/yukawa/yukawa_ss_cp.h @@ -1,6 +1,6 @@ /* Interaction Kernels */ -#ifndef H_K_YUKAWA_CP_H -#define H_K_YUKAWA_CP_H +#ifndef H_K_YUKAWA_SS_CP_H +#define H_K_YUKAWA_SS_CP_H #include "../../run_params/struct_run_params.h" @@ -12,4 +12,4 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter struct RunParams *run_params, int gpu_async_stream_id); -#endif /* H_K_YUKAWA_CP_H */ +#endif /* H_K_YUKAWA_SS_CP_H */ From 4a7f51b092f45822116b693e7644b840ee076fb2 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 1 Jul 2020 10:32:51 -0400 Subject: [PATCH 34/95] Bug fix. Target tree was built using max_per_source_leaf. --- src/tree/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree/tree.c b/src/tree/tree.c index 8240b269..7080c890 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -73,7 +73,7 @@ void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, xyzminmax[5] = maxval(targets->z, targets->num); TreeLinkedList_Targets_Construct(&tree_linked_list, targets, 1, targets->num, - run_params->max_per_source_leaf, xyzminmax, &numnodes, &numleaves, + run_params->max_per_target_leaf, xyzminmax, &numnodes, &numleaves, &min_leaf_size, &max_leaf_size); TreeLinkedList_SetIndex(tree_linked_list, 0); From a31f8519a6f0cb13d143ea4b73f58eda8a97f6b7 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 1 Jul 2020 17:33:37 -0400 Subject: [PATCH 35/95] CC working for singularity subtraction yukawa kernel. --- src/CMakeLists.txt | 3 +- src/clusters/clusters.c | 87 ++++---- src/drivers/treedriver.c | 4 +- .../interaction_compute_cc.c | 30 ++- .../interaction_compute_cp.c | 3 - .../interaction_compute_downpass.c | 197 +++++++++++++++++- src/kernels/yukawa/yukawa.h | 1 + src/kernels/yukawa/yukawa_ss_cc.c | 69 ++++++ src/kernels/yukawa/yukawa_ss_cc.h | 15 ++ src/kernels/yukawa/yukawa_ss_correction.c | 5 +- src/kernels/yukawa/yukawa_ss_cp.c | 6 +- src/kernels/yukawa/yukawa_ss_pc.c | 2 +- 12 files changed, 359 insertions(+), 63 deletions(-) create mode 100644 src/kernels/yukawa/yukawa_ss_cc.c create mode 100644 src/kernels/yukawa/yukawa_ss_cc.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6bd5b7f0..bf72bee3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,7 +101,8 @@ SET(SRCS_K_YUKAWA kernels/yukawa/yukawa_ss_pc.h kernels/yukawa/yukawa_ss_pc.c kernels/yukawa/yukawa_ss_cp.h - kernels/yukawa/yukawa_ss_cp.c) + kernels/yukawa/yukawa_ss_cp.c + kernels/yukawa/yukawa_ss_cc.c) SET(SRCS_K_REGULARIZED_COULOMB diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index e6a9eb06..d1c1e066 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -129,6 +129,14 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa exit(1); } + int numZeros=0; + for (int i=0; iq: %i of %i\n", numZeros, totalNumberInterpolationCharges); + + + #ifdef OPENACC_ENABLED #pragma acc wait } // end ACC DATA REGION @@ -191,41 +199,48 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa * If using singularity subtraction, compute the modified charges on the target cluster, store in clusters->w */ - if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { // doing Lagrange SS, need to both construct interpolation points and anterpolate target charge. - double *xT = targets->x; - double *yT = targets->y; - double *zT = targets->z; - double *qT = targets->q; - - double *ones; // initialize an array of ones, needed in the call to pc_comp_ms_modifiedF below. - make_vector(ones,totalNumberTargetPoints); - for (int i=0;iw - for (int i = 0; i < tree_numnodes; i++){ - pc_comp_ms_modifiedF(tree, i, interpolationOrder, xT, yT, zT, qT, ones, xC, yC, zC, wC); // note the final input is w not q array. - } - free_vector(ones); - - - -#ifdef OPENACC_ENABLED - #pragma acc wait - } // end ACC DATA REGION -#endif - - } else { // not doing Lagrange singularity subtraction, just need to construct interpolation points +// if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { // doing Lagrange SS, need to both construct interpolation points and anterpolate target charge. +// double *xT = targets->x; +// double *yT = targets->y; +// double *zT = targets->z; +// double *qT = targets->q; +// double *wT = targets->w; +// +// double *ones; // initialize an array of ones, needed in the call to pc_comp_ms_modifiedF below. +// make_vector(ones,totalNumberTargetPoints); +// for (int i=0;iw +// for (int i = 0; i < tree_numnodes; i++){ +// pc_comp_ms_modifiedF(tree, i, interpolationOrder, xT, yT, zT, qT, ones, xC, yC, zC, wC); // note the final input is w not q array. +// } +// free_vector(ones); +// +// int numZeros=0; +// for (int i=0; iw: %i of %i\n", numZeros, totalNumberInterpolationCharges); +// +// +// +//#ifdef OPENACC_ENABLED +// #pragma acc wait +// } // end ACC DATA REGION +//#endif +// +// } else { // not doing Lagrange singularity subtraction, just need to construct interpolation points #ifdef OPENACC_ENABLED #pragma acc data copyout(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ @@ -241,7 +256,7 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa #pragma acc wait } // end ACC DATA REGION #endif - } +// } return; } diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index d9c43640..93ad801c 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -199,7 +199,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[7]); } - + //------------------------------- //------------------------------- // DOWNPASS @@ -213,7 +213,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- //------------------------------- - // CORRECT AND REORDER + // CORRECT AND REORDER POTENTIAL AT TARGETS //------------------------------- //------------------------------- diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index bf92d831..980f18db 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -71,6 +71,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T double *target_cluster_y = target_clusters->y; double *target_cluster_z = target_clusters->z; double *target_cluster_q = target_clusters->q; + double *target_cluster_w = target_clusters->w; int *source_tree_ibeg = source_tree->ibeg; int *source_tree_iend = source_tree->iend; @@ -80,7 +81,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T int *target_tree_iend = target_tree->iend; int *target_tree_cluster_ind = target_tree->cluster_ind; - // NOTE: Not currently setup for SS, thus the target_cluster_w array is not copied out. // Additionally, not setup for Hermite either at the moment. #ifdef OPENACC_ENABLED @@ -96,7 +96,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T target_cluster_x[0:num_target_cluster_points], \ target_cluster_y[0:num_target_cluster_points], \ target_cluster_z[0:num_target_cluster_points]) \ - copy(target_cluster_q[0:num_target_cluster_charges], \ + copy(target_cluster_q[0:num_target_cluster_charges], target_cluster_w[0:num_target_cluster_charges], \ potential[0:num_targets]) #endif { @@ -120,6 +120,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T /* * ************ POTENTIAL FROM APPROX *********************/ /* * ********************************************************/ +// printf("cluster %i, CC = %i, CP = %i, PC = %i, PP = %i\n",i,num_approx_in_cluster,num_target_approx_in_cluster,num_source_approx_in_cluster,num_direct_in_cluster); for (int j = 0; j < num_approx_in_cluster; j++) { int source_node_index = approx_inter_list[i][j]; int source_cluster_start = interp_pts_per_cluster * source_tree_cluster_ind[source_node_index]; @@ -195,8 +196,13 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); - exit(1); + K_Yukawa_SS_CC_Lagrange(interp_pts_per_cluster, + interp_pts_per_cluster, source_cluster_start, target_cluster_start, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, source_cluster_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, target_cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -433,8 +439,12 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); - exit(1); + K_Yukawa_SS_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, target_q, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, source_cluster_w, + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -673,8 +683,12 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC YUKAWA SS. EXITING.\n"); - exit(1); + K_Yukawa_SS_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, target_cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 9d7cb6fe..7e8b418d 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -168,9 +168,6 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { -// printf("**ERROR** NOT SET UP FOR CP YUKAWA SS. EXITING.\n"); -// exit(1); - K_Yukawa_SS_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, source_x, source_y, source_z, source_q, source_w, diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 75ad31b1..f974e3e9 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -14,11 +14,11 @@ static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order, double *xT, double *yT, double *zT, double *qT, - double *clusterQ, double *clusterW); + double *clusterQ); -//static void cp_comp_pot_SS(struct Tree *tree, int idx, int interp_order, -// double *xT, double *yT, double *zT, double *qT, -// double *clusterQ, double *clusterW); +static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_order, + double *xT, double *yT, double *zT, double *qT, + double *clusterQ, double *clusterW); static void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_order, double *xT, double *yT, double *zT, double *qT, @@ -60,12 +60,13 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) cp_comp_pot(tree, i, potential, interp_order, - target_x, target_y, target_z, target_q, cluster_q, cluster_w); + target_x, target_y, target_z, target_q, cluster_q); } else if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SUBTRACTION)) { -// for (int i = 0; i < tree_numnodes; i++) -// cp_comp_pot_SS(tree, i, potential interp_order, -// target_x, target_y, target_z, target_q, cluster_q, cluster_w); + for (int i = 0; i < tree_numnodes; i++){ + cp_comp_pot_SS(tree, i, potential, interp_order, + target_x, target_y, target_z, target_q, cluster_q, cluster_w); + } } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) @@ -73,6 +74,8 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, target_x, target_y, target_z, target_q, cluster_q, cluster_w); } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SUBTRACTION)) { + printf("Not set up to do Hermite SS downpass.\n"); + exit(-1); // for (int i = 0; i < tree_numnodes; i++) // cp_comp_pot_hermite_SS(tree, i, potential, interp_order, // target_x, target_y, target_z, target_q, cluster_q, cluster_w); @@ -97,6 +100,180 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, /************************************/ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order, + double *target_x, double *target_y, double *target_z, double *target_q, + double *cluster_q) +{ + int interp_order_lim = interp_order + 1; + int interp_pts_per_cluster = interp_order_lim * interp_order_lim * interp_order_lim; + + int num_targets_in_cluster = tree->iend[idx] - tree->ibeg[idx] + 1; + int target_start = tree->ibeg[idx] - 1; + int cluster_start = idx * interp_pts_per_cluster; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ; + + make_vector(weights, interp_order_lim); + make_vector(dj, interp_order_lim); + make_vector(tt, interp_order_lim); + make_vector(nodeX, interp_order_lim); + make_vector(nodeY, interp_order_lim); + make_vector(nodeZ, interp_order_lim); + + double x0 = tree->x_min[idx]; + double x1 = tree->x_max[idx]; + double y0 = tree->y_min[idx]; + double y1 = tree->y_max[idx]; + double z0 = tree->z_min[idx]; + double z1 = tree->z_max[idx]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q) \ + create(nodeX[0:interp_order_lim], nodeY[0:interp_order_lim], nodeZ[0:interp_order_lim], \ + weights[0:interp_order_lim], dj[0:interp_order_lim], tt[0:interp_order_lim]) + { +#endif + + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interp_order_lim; i++) { + tt[i] = cos(i * M_PI / interp_order); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_order_lim; j++){ + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interp_order) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_order_lim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < num_targets_in_cluster; i++) { // loop through the target points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double tx = target_x[target_start+i]; + double ty = target_y[target_start+i]; + double tz = target_z[target_start+i]; + + int eix = -1; + int eiy = -1; + int eiz = -1; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) +#endif + for (int j = 0; j < interp_order_lim; j++) { // loop through the degree + + double cx = tx - nodeX[j]; + double cy = ty - nodeY[j]; + double cz = tz - nodeZ[j]; + + if (fabs(cx) +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "yukawa_ss_cc.h" + + +void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, + double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w, + struct RunParams *run_params, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \ + target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { + + double temporary_potential = 0.0; + double temporary_weight = 0.0; + + double cx = target_cluster_x[starting_index_of_cluster + i]; + double cy = target_cluster_y[starting_index_of_cluster + i]; + double cz = target_cluster_z[starting_index_of_cluster + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential, +:temporary_weight) +#endif + for (int j = 0; j < number_of_sources_in_batch; j++) { +#ifdef OPENACC_ENABLED + #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) +#endif + + int jj = starting_index_of_sources + j; + double dx = cx - source_cluster_x[jj]; + double dy = cy - source_cluster_y[jj]; + double dz = cz - source_cluster_z[jj]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + if (r > DBL_MIN) { + temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w + temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; + } + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + target_cluster_q[starting_index_of_cluster + i] += temporary_potential; + target_cluster_w[starting_index_of_cluster + i] += temporary_weight; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} + + diff --git a/src/kernels/yukawa/yukawa_ss_cc.h b/src/kernels/yukawa/yukawa_ss_cc.h new file mode 100644 index 00000000..789df2a2 --- /dev/null +++ b/src/kernels/yukawa/yukawa_ss_cc.h @@ -0,0 +1,15 @@ +/* Interaction Kernels */ +#ifndef H_K_YUKAWA_SS_CC_H +#define H_K_YUKAWA_SS_CC_H + +#include "../../run_params/struct_run_params.h" + + +void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, + double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_w, + struct RunParams *run_params, int gpu_async_stream_id); + + +#endif /* H_K_YUKAWA_SS_CC_H */ diff --git a/src/kernels/yukawa/yukawa_ss_correction.c b/src/kernels/yukawa/yukawa_ss_correction.c index 50efdcf7..99136686 100644 --- a/src/kernels/yukawa/yukawa_ss_correction.c +++ b/src/kernels/yukawa/yukawa_ss_correction.c @@ -11,7 +11,10 @@ void K_Yukawa_SS_Correction(double *potential, double *target_q, { double kernel_parameter=run_params->kernel_params[0]; double param = 4.0 * M_PI / kernel_parameter / kernel_parameter; - for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i]; + for (int i = 0; i < numTargets; i++){ + potential[i] += param * target_q[i]; +// printf("target_q[%i], target_w[%i] = %f, %f\n", i, i, potential[i], target_q[i]); + } return; } diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c index 4dac1db8..dbc7f1d5 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.c +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -26,11 +26,11 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { double temporary_potential = 0.0; + double temporary_weight = 0.0; double cx = cluster_x[starting_index_of_cluster + i]; double cy = cluster_y[starting_index_of_cluster + i]; double cz = cluster_z[starting_index_of_cluster + i]; - double cw = cluster_w[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:temporary_potential) @@ -50,13 +50,15 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double r = sqrt(dx*dx + dy*dy + dz*dz); if (r > DBL_MIN) { - temporary_potential += (source_q[jj] - cw) * source_w[jj] * exp(-kernel_parameter*r) /r; + temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter*r) /r; + temporary_weight += source_w[jj] * exp(-kernel_parameter*r) /r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic #endif cluster_q[starting_index_of_cluster + i] += temporary_potential; + cluster_w[starting_index_of_cluster + i] += temporary_weight; } #ifdef OPENACC_ENABLED } // end kernel diff --git a/src/kernels/yukawa/yukawa_ss_pc.c b/src/kernels/yukawa/yukawa_ss_pc.c index eacc7897..69a17fde 100644 --- a/src/kernels/yukawa/yukawa_ss_pc.c +++ b/src/kernels/yukawa/yukawa_ss_pc.c @@ -44,7 +44,7 @@ void K_Yukawa_SS_PC_Lagrange(int number_of_targets_in_batch, double r = sqrt(dx*dx + dy*dy + dz*dz); if (r > DBL_MIN) { - temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj]) * exp(-kernel_parameter*r) /r; + temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] ) * exp(-kernel_parameter*r) /r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED From 24bacdd7024c8b4df86061a2aebf77bc79deb8ec Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 09:15:28 -0400 Subject: [PATCH 36/95] Coulomb SS implemented for CP and CC. --- src/CMakeLists.txt | 9 ++- src/clusters/clusters.c | 1 - .../interaction_compute_cc.c | 26 +++++-- .../interaction_compute_cp.c | 7 +- .../interaction_compute_downpass.c | 2 +- src/kernels/coulomb/coulomb.h | 4 +- src/kernels/coulomb/coulomb_ss_cc.c | 71 +++++++++++++++++++ src/kernels/coulomb/coulomb_ss_cc.h | 15 ++++ src/kernels/coulomb/coulomb_ss_cp.c | 71 +++++++++++++++++++ src/kernels/coulomb/coulomb_ss_cp.h | 16 +++++ src/kernels/yukawa/yukawa_ss_cc.c | 2 +- 11 files changed, 209 insertions(+), 15 deletions(-) create mode 100644 src/kernels/coulomb/coulomb_ss_cc.c create mode 100644 src/kernels/coulomb/coulomb_ss_cc.h create mode 100644 src/kernels/coulomb/coulomb_ss_cp.c create mode 100644 src/kernels/coulomb/coulomb_ss_cp.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bf72bee3..87c441e5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -79,9 +79,11 @@ SET(SRCS_K_COULOMB kernels/coulomb/coulomb_ss_correction.h kernels/coulomb/coulomb_ss_correction.c kernels/coulomb/coulomb_ss_pc.h - kernels/coulomb/coulomb_ss_pc.c) -# kernels/coulomb/coulomb_ss_cp.h -# kernels/coulomb/coulomb_ss_cp.c) + kernels/coulomb/coulomb_ss_pc.c + kernels/coulomb/coulomb_ss_cp.h + kernels/coulomb/coulomb_ss_cp.c + kernels/coulomb/coulomb_ss_cc.h + kernels/coulomb/coulomb_ss_cc.c) SET(SRCS_K_YUKAWA @@ -102,6 +104,7 @@ SET(SRCS_K_YUKAWA kernels/yukawa/yukawa_ss_pc.c kernels/yukawa/yukawa_ss_cp.h kernels/yukawa/yukawa_ss_cp.c + kernels/yukawa/yukawa_ss_cc.h kernels/yukawa/yukawa_ss_cc.c) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index d1c1e066..895addb9 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -133,7 +133,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa for (int i=0; iq: %i of %i\n", numZeros, totalNumberInterpolationCharges); diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 980f18db..b79aef91 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -146,8 +146,13 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); - exit(1); + K_Coulomb_SS_CC_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, + source_cluster_start, target_cluster_start, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, source_cluster_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, target_cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -390,8 +395,13 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); - exit(1); + K_Coulomb_SS_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, target_q, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, source_cluster_w, + run_params, potential, stream_id); + } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -634,8 +644,12 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CC COULOMB SS. EXITING.\n"); - exit(1); + K_Coulomb_SS_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, source_w, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, target_cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 7e8b418d..4dc030d6 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -117,8 +117,11 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - printf("**ERROR** NOT SET UP FOR CP COULOMB SS. EXITING.\n"); - exit(1); + K_Coulomb_SS_CP_Lagrange(num_sources_in_batch, + interp_pts_per_cluster, batch_start, cluster_start, + source_x, source_y, source_z, source_q, source_w, + cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, + run_params, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index f974e3e9..543d0a3b 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -426,7 +426,7 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or if (eiz != k3) numerator *= 0; } - temp += numerator * denominator * (cq-tq*cw); + temp += numerator * denominator * (cq-tq*cw); // subtract target_q*cluster_w for singularity subtraction } #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb.h b/src/kernels/coulomb/coulomb.h index 8919868a..5a413dd3 100644 --- a/src/kernels/coulomb/coulomb.h +++ b/src/kernels/coulomb/coulomb.h @@ -10,7 +10,9 @@ #include "coulomb_ss_direct.h" #include "coulomb_ss_correction.h" #include "coulomb_ss_pc.h" -//#include "coulomb_ss_cp.h" +#include "coulomb_ss_cp.h" +#include "coulomb_ss_cc.h" + #endif /* H_K_COULOMB_H */ diff --git a/src/kernels/coulomb/coulomb_ss_cc.c b/src/kernels/coulomb/coulomb_ss_cc.c new file mode 100644 index 00000000..aea167be --- /dev/null +++ b/src/kernels/coulomb/coulomb_ss_cc.c @@ -0,0 +1,71 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "coulomb_ss_cc.h" + + +void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, + double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w, + struct RunParams *run_params, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + double kernel_parameter2 = kernel_parameter * kernel_parameter; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, \ + target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { + + double temporary_potential = 0.0; + double temporary_weight = 0.0; + + double cx = target_cluster_x[starting_index_of_cluster + i]; + double cy = target_cluster_y[starting_index_of_cluster + i]; + double cz = target_cluster_z[starting_index_of_cluster + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_sources_in_batch; j++) { +#ifdef OPENACC_ENABLED + #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) +#endif + + int jj = starting_index_of_sources + j; + double dx = cx - source_cluster_x[jj]; + double dy = cy - source_cluster_y[jj]; + double dz = cz - source_cluster_z[jj]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + if (r > DBL_MIN) { + temporary_potential += source_cluster_q[jj] / r; + temporary_weight += exp(-r*r/kernel_parameter2) * source_cluster_w[jj] / r; + } + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + target_cluster_q[starting_index_of_cluster + i] += temporary_potential; + target_cluster_w[starting_index_of_cluster + i] += temporary_weight; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} + + + diff --git a/src/kernels/coulomb/coulomb_ss_cc.h b/src/kernels/coulomb/coulomb_ss_cc.h new file mode 100644 index 00000000..f545529c --- /dev/null +++ b/src/kernels/coulomb/coulomb_ss_cc.h @@ -0,0 +1,15 @@ +/* Interaction Kernels */ +#ifndef H_K_COULOMB_SS_CC_H +#define H_K_COULOMB_SS_CC_H + +#include "../../run_params/struct_run_params.h" + + +void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, + double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_weight, + struct RunParams *run_params, int gpu_async_stream_id); + + +#endif /* H_K_COULOMB_SS_CC_H */ diff --git a/src/kernels/coulomb/coulomb_ss_cp.c b/src/kernels/coulomb/coulomb_ss_cp.c new file mode 100644 index 00000000..8b4f958b --- /dev/null +++ b/src/kernels/coulomb/coulomb_ss_cp.c @@ -0,0 +1,71 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "coulomb_ss_cp.h" + + +void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w, + struct RunParams *run_params, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + double kernel_parameter2 = kernel_parameter * kernel_parameter; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ + cluster_x, cluster_y, cluster_z, cluster_q) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { + + double temporary_potential = 0.0; + double temporary_weight = 0.0; + + double cx = cluster_x[starting_index_of_cluster + i]; + double cy = cluster_y[starting_index_of_cluster + i]; + double cz = cluster_z[starting_index_of_cluster + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_sources_in_batch; j++) { +#ifdef OPENACC_ENABLED + #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) +#endif + + int jj = starting_index_of_sources + j; + double dx = cx - source_x[jj]; + double dy = cy - source_y[jj]; + double dz = cz - source_z[jj]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + if (r > DBL_MIN) { + temporary_potential += source_q[jj] * source_w[jj] / r; + temporary_weight += exp(-r*r/kernel_parameter2) * source_w[jj] / r; + } + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + cluster_q[starting_index_of_cluster + i] += temporary_potential; + cluster_w[starting_index_of_cluster + i] += temporary_weight; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} + + + diff --git a/src/kernels/coulomb/coulomb_ss_cp.h b/src/kernels/coulomb/coulomb_ss_cp.h new file mode 100644 index 00000000..28e9774e --- /dev/null +++ b/src/kernels/coulomb/coulomb_ss_cp.h @@ -0,0 +1,16 @@ +/* Interaction Kernels */ +#ifndef H_K_COULOMB_SS_CP_H +#define H_K_COULOMB_SS_CP_H + +#include "../../run_params/struct_run_params.h" + + +void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, + struct RunParams *run_params, int gpu_async_stream_id); + + + +#endif /* H_K_COULOMB_SS_CP_H */ diff --git a/src/kernels/yukawa/yukawa_ss_cc.c b/src/kernels/yukawa/yukawa_ss_cc.c index 46c5d9df..06dbd4b8 100644 --- a/src/kernels/yukawa/yukawa_ss_cc.c +++ b/src/kernels/yukawa/yukawa_ss_cc.c @@ -51,7 +51,7 @@ void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inter if (r > DBL_MIN) { temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w - temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; + temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED From f1bd059dbd376c8c442873c91930d8c4db11e556 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 09:36:47 -0400 Subject: [PATCH 37/95] updated coulomb ss pragmas. --- src/kernels/coulomb/coulomb_ss_cc.c | 16 ++++++++-------- src/kernels/coulomb/coulomb_ss_cp.c | 16 ++++++++-------- src/kernels/coulomb/coulomb_ss_pc.c | 5 ++--- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/kernels/coulomb/coulomb_ss_cc.c b/src/kernels/coulomb/coulomb_ss_cc.c index aea167be..ad445aaf 100644 --- a/src/kernels/coulomb/coulomb_ss_cc.c +++ b/src/kernels/coulomb/coulomb_ss_cc.c @@ -17,8 +17,8 @@ void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inte double kernel_parameter2 = kernel_parameter * kernel_parameter; #ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, \ - target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q) + #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \ + target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w) { #endif #ifdef OPENACC_ENABLED @@ -34,14 +34,15 @@ void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inte double cz = target_cluster_z[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) + #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) #endif for (int j = 0; j < number_of_sources_in_batch; j++) { #ifdef OPENACC_ENABLED #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -50,10 +51,9 @@ void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inte double dz = cz - source_cluster_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_cluster_q[jj] / r; - temporary_weight += exp(-r*r/kernel_parameter2) * source_cluster_w[jj] / r; - } + temporary_potential += source_cluster_q[jj] / r; + temporary_weight += exp(-r*r/kernel_parameter2) * source_cluster_w[jj] / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/coulomb/coulomb_ss_cp.c b/src/kernels/coulomb/coulomb_ss_cp.c index 8b4f958b..45ef6630 100644 --- a/src/kernels/coulomb/coulomb_ss_cp.c +++ b/src/kernels/coulomb/coulomb_ss_cp.c @@ -17,8 +17,8 @@ void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inte double kernel_parameter2 = kernel_parameter * kernel_parameter; #ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - cluster_x, cluster_y, cluster_z, cluster_q) + #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w, \ + cluster_x, cluster_y, cluster_z, cluster_q, cluster_w) { #endif #ifdef OPENACC_ENABLED @@ -34,14 +34,15 @@ void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inte double cz = cluster_z[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) + #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) #endif for (int j = 0; j < number_of_sources_in_batch; j++) { #ifdef OPENACC_ENABLED #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -50,10 +51,9 @@ void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inte double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] / r; - temporary_weight += exp(-r*r/kernel_parameter2) * source_w[jj] / r; - } + temporary_potential += source_q[jj] * source_w[jj] / r; + temporary_weight += exp(-r*r/kernel_parameter2) * source_w[jj] / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/coulomb/coulomb_ss_pc.c b/src/kernels/coulomb/coulomb_ss_pc.c index 00ab5960..d93b7762 100644 --- a/src/kernels/coulomb/coulomb_ss_pc.c +++ b/src/kernels/coulomb/coulomb_ss_pc.c @@ -44,9 +44,8 @@ void K_Coulomb_SS_PC_Lagrange(int number_of_targets_in_batch, double dz = tz - cluster_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] * exp(-r*r/kernel_parameter2)) / r; - } + temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] * exp(-r*r/kernel_parameter2)) / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic From cd7ac2ac156882c898ecafcfe84ebde9a13cee8b Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 09:38:51 -0400 Subject: [PATCH 38/95] updated yukawa ss pragmas. --- src/kernels/yukawa/yukawa_ss_cc.c | 12 ++++++------ src/kernels/yukawa/yukawa_ss_cp.c | 16 ++++++++-------- src/kernels/yukawa/yukawa_ss_pc.c | 5 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/kernels/yukawa/yukawa_ss_cc.c b/src/kernels/yukawa/yukawa_ss_cc.c index 06dbd4b8..1ff2a61e 100644 --- a/src/kernels/yukawa/yukawa_ss_cc.c +++ b/src/kernels/yukawa/yukawa_ss_cc.c @@ -33,14 +33,15 @@ void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inter double cz = target_cluster_z[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential, +:temporary_weight) + #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) #endif for (int j = 0; j < number_of_sources_in_batch; j++) { #ifdef OPENACC_ENABLED #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -49,10 +50,9 @@ void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inter double dz = cz - source_cluster_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w - temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; - } + temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w + temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c index dbc7f1d5..7742014d 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.c +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -16,8 +16,8 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double kernel_parameter = run_params->kernel_params[0]; #ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - cluster_x, cluster_y, cluster_z, cluster_q) + #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w,\ + cluster_x, cluster_y, cluster_z, cluster_q, cluster_w) { #endif #ifdef OPENACC_ENABLED @@ -33,14 +33,15 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double cz = cluster_z[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) + #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) #endif for (int j = 0; j < number_of_sources_in_batch; j++) { #ifdef OPENACC_ENABLED #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -49,10 +50,9 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter*r) /r; - temporary_weight += source_w[jj] * exp(-kernel_parameter*r) /r; - } + temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter*r) /r; + temporary_weight += source_w[jj] * exp(-kernel_parameter*r) /r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/yukawa/yukawa_ss_pc.c b/src/kernels/yukawa/yukawa_ss_pc.c index 69a17fde..9cd68972 100644 --- a/src/kernels/yukawa/yukawa_ss_pc.c +++ b/src/kernels/yukawa/yukawa_ss_pc.c @@ -43,9 +43,8 @@ void K_Yukawa_SS_PC_Lagrange(int number_of_targets_in_batch, double dz = tz - cluster_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] ) * exp(-kernel_parameter*r) /r; - } + temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] ) * exp(-kernel_parameter*r) /r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic From f1f0a70868c2b98f42f75c6352d33f5f1c7035f1 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 10:29:16 -0400 Subject: [PATCH 39/95] Erroneously copying out garbage clusters_w from GPU. Fixed. --- src/clusters/clusters.c | 52 +---------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 895addb9..a6273f74 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -190,60 +190,10 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa double *xC = clusters->x; double *yC = clusters->y; double *zC = clusters->z; - double *wC = clusters->w; - - - - /* - * If using singularity subtraction, compute the modified charges on the target cluster, store in clusters->w - */ - -// if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { // doing Lagrange SS, need to both construct interpolation points and anterpolate target charge. -// double *xT = targets->x; -// double *yT = targets->y; -// double *zT = targets->z; -// double *qT = targets->q; -// double *wT = targets->w; -// -// double *ones; // initialize an array of ones, needed in the call to pc_comp_ms_modifiedF below. -// make_vector(ones,totalNumberTargetPoints); -// for (int i=0;iw -// for (int i = 0; i < tree_numnodes; i++){ -// pc_comp_ms_modifiedF(tree, i, interpolationOrder, xT, yT, zT, qT, ones, xC, yC, zC, wC); // note the final input is w not q array. -// } -// free_vector(ones); -// -// int numZeros=0; -// for (int i=0; iw: %i of %i\n", numZeros, totalNumberInterpolationCharges); -// -// -// -//#ifdef OPENACC_ENABLED -// #pragma acc wait -// } // end ACC DATA REGION -//#endif -// -// } else { // not doing Lagrange singularity subtraction, just need to construct interpolation points #ifdef OPENACC_ENABLED #pragma acc data copyout(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints], wC[0:totalNumberInterpolationPoints]) + zC[0:totalNumberInterpolationPoints]) { #endif From 13852876383dd28228c1c0ac92363b43c3909c5c Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 10:32:47 -0400 Subject: [PATCH 40/95] added atomics for both cluster_q and cluster_w for Coulomb SS. --- src/kernels/coulomb/coulomb_ss_cc.c | 3 +++ src/kernels/coulomb/coulomb_ss_cp.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/kernels/coulomb/coulomb_ss_cc.c b/src/kernels/coulomb/coulomb_ss_cc.c index ad445aaf..a8abe453 100644 --- a/src/kernels/coulomb/coulomb_ss_cc.c +++ b/src/kernels/coulomb/coulomb_ss_cc.c @@ -59,6 +59,9 @@ void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inte #pragma acc atomic #endif target_cluster_q[starting_index_of_cluster + i] += temporary_potential; +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif target_cluster_w[starting_index_of_cluster + i] += temporary_weight; } #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb_ss_cp.c b/src/kernels/coulomb/coulomb_ss_cp.c index 45ef6630..864cd3fd 100644 --- a/src/kernels/coulomb/coulomb_ss_cp.c +++ b/src/kernels/coulomb/coulomb_ss_cp.c @@ -59,6 +59,9 @@ void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inte #pragma acc atomic #endif cluster_q[starting_index_of_cluster + i] += temporary_potential; +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif cluster_w[starting_index_of_cluster + i] += temporary_weight; } #ifdef OPENACC_ENABLED From ae9326ec2244d8c403fcb3e12f22c52cb01caf8f Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 10:39:05 -0400 Subject: [PATCH 41/95] singularity subtraction Yuk and Coulomb working on GPUs --- src/interaction_compute/interaction_compute_cc.c | 3 ++- src/interaction_compute/interaction_compute_downpass.c | 2 +- src/kernels/yukawa/yukawa_ss_cc.c | 5 ++++- src/kernels/yukawa/yukawa_ss_cp.c | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index b79aef91..65b22437 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -96,7 +96,8 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T target_cluster_x[0:num_target_cluster_points], \ target_cluster_y[0:num_target_cluster_points], \ target_cluster_z[0:num_target_cluster_points]) \ - copy(target_cluster_q[0:num_target_cluster_charges], target_cluster_w[0:num_target_cluster_charges], \ + copy(target_cluster_q[0:num_target_cluster_charges], \ + target_cluster_w[0:num_target_cluster_charges], \ potential[0:num_targets]) #endif { diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 543d0a3b..ceeba9dc 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -302,7 +302,7 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q) \ + #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q, cluster_w) \ create(nodeX[0:interp_order_lim], nodeY[0:interp_order_lim], nodeZ[0:interp_order_lim], \ weights[0:interp_order_lim], dj[0:interp_order_lim], tt[0:interp_order_lim]) { diff --git a/src/kernels/yukawa/yukawa_ss_cc.c b/src/kernels/yukawa/yukawa_ss_cc.c index 1ff2a61e..c5c6d4a8 100644 --- a/src/kernels/yukawa/yukawa_ss_cc.c +++ b/src/kernels/yukawa/yukawa_ss_cc.c @@ -33,7 +33,7 @@ void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inter double cz = target_cluster_z[starting_index_of_cluster + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) + #pragma acc loop independent reduction(+:temporary_potential,temporary_weight) #endif for (int j = 0; j < number_of_sources_in_batch; j++) { #ifdef OPENACC_ENABLED @@ -58,6 +58,9 @@ void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_inter #pragma acc atomic #endif target_cluster_q[starting_index_of_cluster + i] += temporary_potential; +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif target_cluster_w[starting_index_of_cluster + i] += temporary_weight; } #ifdef OPENACC_ENABLED diff --git a/src/kernels/yukawa/yukawa_ss_cp.c b/src/kernels/yukawa/yukawa_ss_cp.c index 7742014d..ff4fcd39 100644 --- a/src/kernels/yukawa/yukawa_ss_cp.c +++ b/src/kernels/yukawa/yukawa_ss_cp.c @@ -58,6 +58,9 @@ void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_inter #pragma acc atomic #endif cluster_q[starting_index_of_cluster + i] += temporary_potential; +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif cluster_w[starting_index_of_cluster + i] += temporary_weight; } #ifdef OPENACC_ENABLED From c646ade7323fcd4104718424c8ac191b918cab71 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 11:07:54 -0400 Subject: [PATCH 42/95] Changed interpolation order to interpolation degree. --- examples/support_fns.c | 10 +- examples/test_BaryTreeInterface.c | 4 +- interfaces/python/BaryTreeInterface.py | 6 +- interfaces/python/testBaryTreeInterface.py | 4 +- src/clusters/clusters.c | 248 +++++++++--------- .../interaction_compute_downpass.c | 154 +++++------ src/interface/BaryTreeInterface.c | 4 +- src/run_params/run_params.c | 20 +- src/run_params/run_params.h | 2 +- src/run_params/struct_run_params.h | 2 +- tests/serial_tests.c | 86 +++--- 11 files changed, 270 insertions(+), 270 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 0102c4e7..216af515 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -21,7 +21,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * /* BaryTree params */ int verbosity = 0; - int interp_order = 5; + int interp_degree = 5; double theta = 0.5; double beta = -1.0; int max_per_source_leaf = 500; @@ -63,8 +63,8 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * sscanf(c, "%s %s", c1, c2); /* Parameters for the RunParam struct */ - if (strcmp(c1, "order") == 0) { - interp_order = atoi(c2); + if (strcmp(c1, "degree") == 0) { + interp_degree = atoi(c2); } else if (strcmp(c1, "theta") == 0) { theta = atof(c2); @@ -289,7 +289,7 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * RunParams_Setup(run_params, kernel, num_kernel_params, kernel_params, approximation, singularity, compute_type, - theta, interp_order, + theta, interp_degree, max_per_source_leaf, max_per_target_leaf, size_check_factor, beta, verbosity); @@ -746,7 +746,7 @@ void CSV_Print(int N, int M, struct RunParams *run_params, "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e\n", N, M, numProcs, run_params->kernel, run_params->approximation, run_params->singularity, - run_params->compute_type, run_params->theta, run_params->interp_order, + run_params->compute_type, run_params->theta, run_params->interp_degree, run_params->max_per_source_leaf, run_params->max_per_target_leaf, run_params->size_check_factor, run_params->beta, // 1 ends diff --git a/examples/test_BaryTreeInterface.c b/examples/test_BaryTreeInterface.c index f38d2fa0..f3c9ba77 100644 --- a/examples/test_BaryTreeInterface.c +++ b/examples/test_BaryTreeInterface.c @@ -23,7 +23,7 @@ int main(int argc, char **argv) int numParams = 1; double kernelParams[1] = {0.5}; - int interpOrder = 5; + int interpDegree = 5; double theta = 0.8; double beta = 1.0; @@ -65,7 +65,7 @@ int main(int argc, char **argv) xS, yS, zS, qS, wS, potential, kernel, numParams, kernelParams, singularity, approximation, compute_type, - theta, interpOrder, maxPerLeaf, maxPerBatch, + theta, interpDegree, maxPerLeaf, maxPerBatch, sizeCheck, beta, verbosity); printf("[test BaryTree interface] BaryTree has finished.\n"); diff --git a/interfaces/python/BaryTreeInterface.py b/interfaces/python/BaryTreeInterface.py index c882dec6..e5105c67 100644 --- a/interfaces/python/BaryTreeInterface.py +++ b/interfaces/python/BaryTreeInterface.py @@ -83,7 +83,7 @@ def callTreedriver(numTargets, numSources, targetX, targetY, targetZ, targetValue, sourceX, sourceY, sourceZ, sourceValue, sourceWeight, kernelName, numberOfKernelParameters, kernelParameters, singularityHandling, - approximationName, computeType, theta, order, maxParNode, batchSize, beta, GPUpresent, verbosity, sizeCheck=None): + approximationName, computeType, theta, degree, maxParNode, batchSize, beta, GPUpresent, verbosity, sizeCheck=None): ''' python function which creates pointers to the arrays and calls treedriverWrapper. returns the results array. @@ -119,14 +119,14 @@ def callTreedriver(numTargets, numSources, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_double(theta), ctypes.c_int(order), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) elif GPUpresent==False: # No gpu present _cpu_treecodeRoutines.BaryTreeInterface(ctypes.c_int(numTargets), ctypes.c_int(numSources), targetX_p, targetY_p, targetZ_p, targetValue_p, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_double(theta), ctypes.c_int(order), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) else: print("What should GPUpresent be set to in the wrapper?") exit(-1) diff --git a/interfaces/python/testBaryTreeInterface.py b/interfaces/python/testBaryTreeInterface.py index 54947d20..6dab5be7 100644 --- a/interfaces/python/testBaryTreeInterface.py +++ b/interfaces/python/testBaryTreeInterface.py @@ -24,7 +24,7 @@ maxPerTargetLeaf = 10 GPUpresent = False theta = 0.8 - treecodeOrder = 4 + treecodeDegree = 4 beta = -1 gaussianAlpha = 1.0 verbosity = 0 @@ -56,7 +56,7 @@ np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), kernel, numberOfKernelParameters, kernelParameters, singularity, approximation, computeType, - theta, treecodeOrder, maxPerSourceLeaf, maxPerTargetLeaf, + theta, treecodeDegree, maxPerSourceLeaf, maxPerTargetLeaf, beta, GPUpresent, verbosity, sizeCheck=1.0) assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index a6273f74..a3df4c62 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -17,26 +17,26 @@ #include "clusters.h" -static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrder, +static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); -static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationOrder, +static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationOrder, +static void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationDegree, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); -static void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int interpolationOrder, +static void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int interpolationDegree, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void cp_comp_interp(const struct Tree *tree, int idx, int interpolationOrder, +static void cp_comp_interp(const struct Tree *tree, int idx, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ); @@ -53,8 +53,8 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int tree_numnodes = tree->numnodes; int totalNumberSourcePoints = sources->num; - int interpolationOrder = run_params->interp_order; - int interpOrderLim = interpolationOrder + 1; + int interpolationDegree = run_params->interp_degree; + int interpDegreeLim = interpolationDegree + 1; int interpolationPointsPerCluster = run_params->interp_pts_per_cluster; int totalNumberInterpolationPoints = tree_numnodes * interpolationPointsPerCluster; @@ -109,20 +109,20 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF(tree, i, interpolationOrder, xS, yS, zS, qS, wS, xC, yC, zC, qC); + pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_SS(tree, i, interpolationOrder, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); } else if ((approximation == HERMITE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_hermite(tree, i, interpolationOrder, totalNumberInterpolationPoints, + pc_comp_ms_modifiedF_hermite(tree, i, interpolationDegree, totalNumberInterpolationPoints, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == HERMITE) && (singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_hermite_SS(tree, i, interpolationOrder, totalNumberInterpolationPoints, + pc_comp_ms_modifiedF_hermite_SS(tree, i, interpolationDegree, totalNumberInterpolationPoints, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); } else { @@ -159,8 +159,8 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa int tree_numnodes = tree->numnodes; int totalNumberTargetPoints = targets->num; - int interpolationOrder = run_params->interp_order; - int interpOrderLim = interpolationOrder + 1; + int interpolationDegree = run_params->interp_degree; + int interpDegreeLim = interpolationDegree + 1; int interpolationPointsPerCluster = run_params->interp_pts_per_cluster; int totalNumberInterpolationPoints = tree_numnodes * interpolationPointsPerCluster; @@ -198,7 +198,7 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa #endif for (int i = 0; i < tree_numnodes; i++) { - cp_comp_interp(tree, i, interpolationOrder, xC, yC, zC); + cp_comp_interp(tree, i, interpolationDegree, xC, yC, zC); } #ifdef OPENACC_ENABLED @@ -296,13 +296,13 @@ void Clusters_Free_Win(struct Clusters **clusters_addr) /***** LOCAL FUNCTIONS **************/ /************************************/ -void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrder, +void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { - int interpOrderLim = interpolationOrder + 1; - int interpolationPointsPerCluster = interpOrderLim * interpOrderLim * interpOrderLim; + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; int sourcePointsInCluster = tree->iend[idx] - tree->ibeg[idx] + 1; int startingIndexInClustersArray = idx * interpolationPointsPerCluster; int startingIndexInSourcesArray = tree->ibeg[idx]-1; @@ -310,12 +310,12 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF; int *exactIndX, *exactIndY, *exactIndZ; - make_vector(weights, interpOrderLim); - make_vector(dj, interpOrderLim); - make_vector(tt, interpOrderLim); - make_vector(nodeX, interpOrderLim); - make_vector(nodeY, interpOrderLim); - make_vector(nodeZ, interpOrderLim); + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, sourcePointsInCluster); make_vector(exactIndX, sourcePointsInCluster); make_vector(exactIndY, sourcePointsInCluster); @@ -333,9 +333,9 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, clusterX, clusterY, clusterZ, clusterQ) \ create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ - nodeX[0:interpOrderLim], nodeY[0:interpOrderLim], \ - nodeZ[0:interpOrderLim], weights[0:interpOrderLim], \ - dj[0:interpOrderLim], tt[0:interpOrderLim]) + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ + nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim]) { #endif @@ -353,8 +353,8 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interpOrderLim; i++) { - tt[i] = cos(i * M_PI / interpolationOrder); + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); @@ -364,16 +364,16 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; if (j == 0) dj[j] = 0.5; - if (j == interpolationOrder) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; } #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; } @@ -394,7 +394,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif - for (int j = 0; j < (interpolationOrder+1); j++) { // loop through the degree + for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree double cx = sx - nodeX[j]; double cy = sy - nodeY[j]; @@ -425,11 +425,11 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd #pragma acc loop independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { - int k1 = j%(interpolationOrder+1); - int kk = (j-k1)/(interpolationOrder+1); - int k2 = kk%(interpolationOrder+1); + int k1 = j%(interpolationDegree+1); + int kk = (j-k1)/(interpolationDegree+1); + int k2 = kk%(interpolationDegree+1); kk = kk - k2; - int k3 = kk / (interpolationOrder+1); + int k3 = kk / (interpolationDegree+1); double cz = nodeZ[k3]; double w3 = weights[k3]; @@ -504,12 +504,12 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationOrd -void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationOrder, +void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { - int interpOrderLim = interpolationOrder + 1; - int pointsPerCluster = interpOrderLim * interpOrderLim * interpOrderLim; + int interpDegreeLim = interpolationDegree + 1; + int pointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; int pointsInNode = tree->iend[idx] - tree->ibeg[idx] + 1; int startingIndexInClusters = idx * pointsPerCluster; int startingIndexInSources = tree->ibeg[idx]-1; @@ -517,12 +517,12 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; int *exactIndX, *exactIndY, *exactIndZ; - make_vector(weights, interpOrderLim); - make_vector(dj, interpOrderLim); - make_vector(tt, interpOrderLim); - make_vector(nodeX, interpOrderLim); - make_vector(nodeY, interpOrderLim); - make_vector(nodeZ, interpOrderLim); + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, pointsInNode); make_vector(modifiedF2, pointsInNode); make_vector(exactIndX, pointsInNode); @@ -542,8 +542,8 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation clusterX, clusterY, clusterZ, clusterQ, clusterW) \ create(modifiedF[0:pointsInNode], modifiedF2[0:pointsInNode], exactIndX[0:pointsInNode], \ exactIndY[0:pointsInNode], exactIndZ[0:pointsInNode], \ - nodeX[0:interpOrderLim], nodeY[0:interpOrderLim], nodeZ[0:interpOrderLim], \ - weights[0:interpOrderLim], dj[0:interpOrderLim], tt[0:interpOrderLim]) + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], nodeZ[0:interpDegreeLim], \ + weights[0:interpDegreeLim], dj[0:interpDegreeLim], tt[0:interpDegreeLim]) { #endif @@ -562,8 +562,8 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interpOrderLim; i++) { - tt[i] = cos(i * M_PI / interpolationOrder); + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); @@ -573,16 +573,16 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; if (j == 0) dj[j] = 0.5; - if (j == interpolationOrder) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; } #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; } @@ -603,7 +603,7 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif - for (int j = 0; j < interpOrderLim; j++) { // loop through the degree + for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree double cx = sx - nodeX[j]; double cy = sy - nodeY[j]; @@ -637,11 +637,11 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation #endif for (int j = 0; j < pointsPerCluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point // compute k1, k2, k3 from j - int k1 = j % interpOrderLim; - int kk = (j-k1) / interpOrderLim; - int k2 = kk % interpOrderLim; + int k1 = j % interpDegreeLim; + int kk = (j-k1) / interpDegreeLim; + int k2 = kk % interpDegreeLim; kk = kk - k2; - int k3 = kk / interpOrderLim; + int k3 = kk / interpDegreeLim; double cz = nodeZ[k3]; double w3 = weights[k3]; @@ -719,13 +719,13 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation -void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationOrder, +void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationDegree, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { - int interpOrderLim = interpolationOrder + 1; - int interpolationPointsPerCluster = interpOrderLim * interpOrderLim * interpOrderLim; + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; int sourcePointsInCluster = tree->iend[idx] - tree->ibeg[idx] + 1; int startingIndexInSourcesArray = tree->ibeg[idx] - 1; @@ -736,15 +736,15 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol double *nodeX, *nodeY, *nodeZ, *modifiedF; int *exactIndX, *exactIndY, *exactIndZ; - make_vector(dj, interpOrderLim); - make_vector(tt, interpOrderLim); - make_vector(ww, interpOrderLim); - make_vector(wx, interpOrderLim); - make_vector(wy, interpOrderLim); - make_vector(wz, interpOrderLim); - make_vector(nodeX, interpOrderLim); - make_vector(nodeY, interpOrderLim); - make_vector(nodeZ, interpOrderLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(ww, interpDegreeLim); + make_vector(wx, interpDegreeLim); + make_vector(wy, interpDegreeLim); + make_vector(wz, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, sourcePointsInCluster); make_vector(exactIndX, sourcePointsInCluster); make_vector(exactIndY, sourcePointsInCluster); @@ -764,9 +764,9 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol clusterX, clusterY, clusterZ, clusterQ) \ create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ - nodeX[0:interpOrderLim], nodeY[0:interpOrderLim], nodeZ[0:interpOrderLim], \ - dj[0:interpOrderLim], tt[0:interpOrderLim], ww[0:interpOrderLim], \ - wx[0:interpOrderLim], wy[0:interpOrderLim], wz[0:interpOrderLim]) + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], nodeZ[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim], ww[0:interpDegreeLim], \ + wx[0:interpDegreeLim], wy[0:interpDegreeLim], wz[0:interpDegreeLim]) { #endif @@ -784,29 +784,29 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interpOrderLim; i++) { - double xx = i * M_PI / interpolationOrder; + for (int i = 0; i < interpDegreeLim; i++) { + double xx = i * M_PI / interpolationDegree; tt[i] = cos(xx); ww[i] = -cos(xx) / (2 * sin(xx) * sin(xx)); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); } - ww[0] = 0.25 * (interpolationOrder*interpolationOrder/3.0 + 1.0/6.0); - ww[interpolationOrder] = -ww[0]; + ww[0] = 0.25 * (interpolationDegree*interpolationDegree/3.0 + 1.0/6.0); + ww[interpolationDegree] = -ww[0]; // Compute weights #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; wx[j] = -4.0 * ww[j] / (x1 - x0); wy[j] = -4.0 * ww[j] / (y1 - y0); wz[j] = -4.0 * ww[j] / (z1 - z0); } dj[0] = 0.25; - dj[interpolationOrder] = 0.25; + dj[interpolationDegree] = 0.25; @@ -826,7 +826,7 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif - for (int j = 0; j < interpOrderLim; j++) { // loop through the degree + for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree double dx = sx - nodeX[j]; double dy = sy - nodeY[j]; @@ -857,11 +857,11 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { // compute k1, k2, k3 from j - int k1 = j % interpOrderLim; - int kk = (j-k1) / interpOrderLim; - int k2 = kk % interpOrderLim; + int k1 = j % interpDegreeLim; + int kk = (j-k1) / interpDegreeLim; + int k2 = kk % interpDegreeLim; kk = kk - k2; - int k3 = kk / interpOrderLim; + int k3 = kk / interpDegreeLim; double cz = nodeZ[k3]; double cy = nodeY[k2]; @@ -1013,13 +1013,13 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol -void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int interpolationOrder, +void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int interpolationDegree, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { - int interpOrderLim = interpolationOrder + 1; - int interpolationPointsPerCluster = interpOrderLim * interpOrderLim * interpOrderLim; + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; int sourcePointsInCluster = tree->iend[idx] - tree->ibeg[idx] + 1; int startingIndexInSourcesArray = tree->ibeg[idx] - 1; @@ -1031,15 +1031,15 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter double *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; int *exactIndX, *exactIndY, *exactIndZ; - make_vector(dj, interpOrderLim); - make_vector(tt, interpOrderLim); - make_vector(ww, interpOrderLim); - make_vector(wx, interpOrderLim); - make_vector(wy, interpOrderLim); - make_vector(wz, interpOrderLim); - make_vector(nodeX, interpOrderLim); - make_vector(nodeY, interpOrderLim); - make_vector(nodeZ, interpOrderLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(ww, interpDegreeLim); + make_vector(wx, interpDegreeLim); + make_vector(wy, interpDegreeLim); + make_vector(wz, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, sourcePointsInCluster); make_vector(modifiedF2, sourcePointsInCluster); make_vector(exactIndX, sourcePointsInCluster); @@ -1061,9 +1061,9 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter create(modifiedF[0:sourcePointsInCluster], modifiedF2[0:sourcePointsInCluster], \ exactIndX[0:sourcePointsInCluster], exactIndY[0:sourcePointsInCluster], \ exactIndZ[0:sourcePointsInCluster], \ - nodeX[0:interpOrderLim], nodeY[0:interpOrderLim], nodeZ[0:interpOrderLim], \ - dj[0:interpOrderLim], tt[0:interpOrderLim], ww[0:interpOrderLim], \ - wx[0:interpOrderLim], wy[0:interpOrderLim], wz[0:interpOrderLim]) + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], nodeZ[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim], ww[0:interpDegreeLim], \ + wx[0:interpDegreeLim], wy[0:interpDegreeLim], wz[0:interpDegreeLim]) { #endif @@ -1082,29 +1082,29 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interpOrderLim; i++) { - double xx = i * M_PI / interpolationOrder; + for (int i = 0; i < interpDegreeLim; i++) { + double xx = i * M_PI / interpolationDegree; tt[i] = cos(xx); ww[i] = -cos(xx) / (2 * sin(xx) * sin(xx)); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); } - ww[0] = 0.25 * (interpolationOrder*interpolationOrder/3.0 + 1.0/6.0); - ww[interpolationOrder] = -ww[0]; + ww[0] = 0.25 * (interpolationDegree*interpolationDegree/3.0 + 1.0/6.0); + ww[interpolationDegree] = -ww[0]; // Compute weights #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interpOrderLim; j++) { + for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; wx[j] = -4.0 * ww[j] / (x1 - x0); wy[j] = -4.0 * ww[j] / (y1 - y0); wz[j] = -4.0 * ww[j] / (z1 - z0); } dj[0] = 0.25; - dj[interpolationOrder] = 0.25; + dj[interpolationDegree] = 0.25; #ifdef OPENACC_ENABLED @@ -1123,7 +1123,7 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif - for (int j = 0; j < interpOrderLim; j++) { // loop through the degree + for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree double dx = sx - nodeX[j]; double dy = sy - nodeY[j]; @@ -1155,11 +1155,11 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { // compute k1, k2, k3 from j - int k1 = j % interpOrderLim; - int kk = (j-k1) / interpOrderLim; - int k2 = kk % interpOrderLim; + int k1 = j % interpDegreeLim; + int kk = (j-k1) / interpDegreeLim; + int k2 = kk % interpDegreeLim; kk = kk - k2; - int k3 = kk / interpOrderLim; + int k3 = kk / interpDegreeLim; double cz = nodeZ[k3]; double cy = nodeY[k2]; @@ -1335,20 +1335,20 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter -void cp_comp_interp(const struct Tree *tree, int idx, int interpolationOrder, +void cp_comp_interp(const struct Tree *tree, int idx, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ) { - int interpOrderLim = interpolationOrder + 1; - int interpolationPointsPerCluster = interpOrderLim * interpOrderLim * interpOrderLim; + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; int startingIndexInClustersArray = idx * interpolationPointsPerCluster; double *tt, *nodeX, *nodeY, *nodeZ; - make_vector(tt, interpOrderLim); - make_vector(nodeX, interpOrderLim); - make_vector(nodeY, interpOrderLim); - make_vector(nodeZ, interpOrderLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); double x0 = tree->x_min[idx]; double x1 = tree->x_max[idx]; @@ -1360,8 +1360,8 @@ void cp_comp_interp(const struct Tree *tree, int idx, int interpolationOrder, #ifdef OPENACC_ENABLED int streamID = rand() % 4; #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ) \ - create(nodeX[0:interpOrderLim], nodeY[0:interpOrderLim], \ - nodeZ[0:interpOrderLim], tt[0:interpOrderLim]) + create(nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ + nodeZ[0:interpDegreeLim], tt[0:interpDegreeLim]) { #endif @@ -1370,8 +1370,8 @@ void cp_comp_interp(const struct Tree *tree, int idx, int interpolationOrder, #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interpOrderLim; i++) { - tt[i] = cos(i * M_PI / interpolationOrder); + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); @@ -1382,11 +1382,11 @@ void cp_comp_interp(const struct Tree *tree, int idx, int interpolationOrder, #pragma acc loop independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { - int k1 = j%(interpolationOrder+1); - int kk = (j-k1)/(interpolationOrder+1); - int k2 = kk%(interpolationOrder+1); + int k1 = j%(interpolationDegree+1); + int kk = (j-k1)/(interpolationDegree+1); + int k2 = kk%(interpolationDegree+1); kk = kk - k2; - int k3 = kk / (interpolationOrder+1); + int k3 = kk / (interpolationDegree+1); // Fill cluster X, Y, and Z arrays clusterX[startingIndexInClustersArray + j] = nodeX[k1]; diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index ceeba9dc..6c6be795 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -12,19 +12,19 @@ #include "interaction_compute.h" -static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order, +static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degree, double *xT, double *yT, double *zT, double *qT, double *clusterQ); -static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_order, +static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_degree, double *xT, double *yT, double *zT, double *qT, double *clusterQ, double *clusterW); -static void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_order, +static void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_degree, double *xT, double *yT, double *zT, double *qT, double *clusterQ, double *clusterW); -//static void cp_comp_pot_hermite_SS(struct Tree *tree, int idx, int interp_order, +//static void cp_comp_pot_hermite_SS(struct Tree *tree, int idx, int interp_degree, // int totalNumberInterpolationPoints, // double *xT, double *yT, double *zT, double *qT, // double *clusterQ, double *clusterW); @@ -46,7 +46,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, double *cluster_w = clusters->w; int tree_numnodes = tree->numnodes; - int interp_order = run_params->interp_order; + int interp_degree = run_params->interp_degree; #ifdef OPENACC_ENABLED #pragma acc data copyin(target_x[0:num_targets], target_y[0:num_targets], \ @@ -59,25 +59,25 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - cp_comp_pot(tree, i, potential, interp_order, + cp_comp_pot(tree, i, potential, interp_degree, target_x, target_y, target_z, target_q, cluster_q); } else if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++){ - cp_comp_pot_SS(tree, i, potential, interp_order, + cp_comp_pot_SS(tree, i, potential, interp_degree, target_x, target_y, target_z, target_q, cluster_q, cluster_w); } } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - cp_comp_pot_hermite(tree, i, potential, interp_order, + cp_comp_pot_hermite(tree, i, potential, interp_degree, target_x, target_y, target_z, target_q, cluster_q, cluster_w); } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SUBTRACTION)) { printf("Not set up to do Hermite SS downpass.\n"); exit(-1); // for (int i = 0; i < tree_numnodes; i++) -// cp_comp_pot_hermite_SS(tree, i, potential, interp_order, +// cp_comp_pot_hermite_SS(tree, i, potential, interp_degree, // target_x, target_y, target_z, target_q, cluster_q, cluster_w); } else { @@ -99,12 +99,12 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, /***** LOCAL FUNCTIONS **************/ /************************************/ -void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order, +void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degree, double *target_x, double *target_y, double *target_z, double *target_q, double *cluster_q) { - int interp_order_lim = interp_order + 1; - int interp_pts_per_cluster = interp_order_lim * interp_order_lim * interp_order_lim; + int interp_degree_lim = interp_degree + 1; + int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; int num_targets_in_cluster = tree->iend[idx] - tree->ibeg[idx] + 1; int target_start = tree->ibeg[idx] - 1; @@ -112,12 +112,12 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ; - make_vector(weights, interp_order_lim); - make_vector(dj, interp_order_lim); - make_vector(tt, interp_order_lim); - make_vector(nodeX, interp_order_lim); - make_vector(nodeY, interp_order_lim); - make_vector(nodeZ, interp_order_lim); + make_vector(weights, interp_degree_lim); + make_vector(dj, interp_degree_lim); + make_vector(tt, interp_degree_lim); + make_vector(nodeX, interp_degree_lim); + make_vector(nodeY, interp_degree_lim); + make_vector(nodeZ, interp_degree_lim); double x0 = tree->x_min[idx]; double x1 = tree->x_max[idx]; @@ -129,8 +129,8 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order #ifdef OPENACC_ENABLED int streamID = rand() % 4; #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q) \ - create(nodeX[0:interp_order_lim], nodeY[0:interp_order_lim], nodeZ[0:interp_order_lim], \ - weights[0:interp_order_lim], dj[0:interp_order_lim], tt[0:interp_order_lim]) + create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ + weights[0:interp_degree_lim], dj[0:interp_degree_lim], tt[0:interp_degree_lim]) { #endif @@ -139,8 +139,8 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interp_order_lim; i++) { - tt[i] = cos(i * M_PI / interp_order); + for (int i = 0; i < interp_degree_lim; i++) { + tt[i] = cos(i * M_PI / interp_degree); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); @@ -150,16 +150,16 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interp_order_lim; j++){ + for (int j = 0; j < interp_degree_lim; j++){ dj[j] = 1.0; if (j == 0) dj[j] = 0.5; - if (j == interp_order) dj[j] = 0.5; + if (j == interp_degree) dj[j] = 0.5; } #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interp_order_lim; j++) { + for (int j = 0; j < interp_degree_lim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; } @@ -183,7 +183,7 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) #endif - for (int j = 0; j < interp_order_lim; j++) { // loop through the degree + for (int j = 0; j < interp_degree_lim; j++) { // loop through the degree double cx = tx - nodeX[j]; double cy = ty - nodeY[j]; @@ -213,11 +213,11 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order #endif for (int j = 0; j < interp_pts_per_cluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point - int k1 = j%interp_order_lim; - int kk = (j-k1)/interp_order_lim; - int k2 = kk%interp_order_lim; + int k1 = j%interp_degree_lim; + int kk = (j-k1)/interp_degree_lim; + int k2 = kk%interp_degree_lim; kk = kk - k2; - int k3 = kk / interp_order_lim; + int k3 = kk / interp_degree_lim; double w3 = weights[k3]; double w2 = weights[k2]; @@ -273,12 +273,12 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_order } -void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_order, +void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_degree, double *target_x, double *target_y, double *target_z, double *target_q, double *cluster_q, double *cluster_w) { - int interp_order_lim = interp_order + 1; - int interp_pts_per_cluster = interp_order_lim * interp_order_lim * interp_order_lim; + int interp_degree_lim = interp_degree + 1; + int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; int num_targets_in_cluster = tree->iend[idx] - tree->ibeg[idx] + 1; int target_start = tree->ibeg[idx] - 1; @@ -286,12 +286,12 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ; - make_vector(weights, interp_order_lim); - make_vector(dj, interp_order_lim); - make_vector(tt, interp_order_lim); - make_vector(nodeX, interp_order_lim); - make_vector(nodeY, interp_order_lim); - make_vector(nodeZ, interp_order_lim); + make_vector(weights, interp_degree_lim); + make_vector(dj, interp_degree_lim); + make_vector(tt, interp_degree_lim); + make_vector(nodeX, interp_degree_lim); + make_vector(nodeY, interp_degree_lim); + make_vector(nodeZ, interp_degree_lim); double x0 = tree->x_min[idx]; double x1 = tree->x_max[idx]; @@ -303,8 +303,8 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #ifdef OPENACC_ENABLED int streamID = rand() % 4; #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q, cluster_w) \ - create(nodeX[0:interp_order_lim], nodeY[0:interp_order_lim], nodeZ[0:interp_order_lim], \ - weights[0:interp_order_lim], dj[0:interp_order_lim], tt[0:interp_order_lim]) + create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ + weights[0:interp_degree_lim], dj[0:interp_degree_lim], tt[0:interp_degree_lim]) { #endif @@ -313,8 +313,8 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interp_order_lim; i++) { - tt[i] = cos(i * M_PI / interp_order); + for (int i = 0; i < interp_degree_lim; i++) { + tt[i] = cos(i * M_PI / interp_degree); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); @@ -324,16 +324,16 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interp_order_lim; j++){ + for (int j = 0; j < interp_degree_lim; j++){ dj[j] = 1.0; if (j == 0) dj[j] = 0.5; - if (j == interp_order) dj[j] = 0.5; + if (j == interp_degree) dj[j] = 0.5; } #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interp_order_lim; j++) { + for (int j = 0; j < interp_degree_lim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; } @@ -358,7 +358,7 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) #endif - for (int j = 0; j < interp_order_lim; j++) { // loop through the degree + for (int j = 0; j < interp_degree_lim; j++) { // loop through the degree double cx = tx - nodeX[j]; double cy = ty - nodeY[j]; @@ -388,11 +388,11 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or #endif for (int j = 0; j < interp_pts_per_cluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point - int k1 = j%interp_order_lim; - int kk = (j-k1)/interp_order_lim; - int k2 = kk%interp_order_lim; + int k1 = j%interp_degree_lim; + int kk = (j-k1)/interp_degree_lim; + int k2 = kk%interp_degree_lim; kk = kk - k2; - int k3 = kk / interp_order_lim; + int k3 = kk / interp_degree_lim; double w3 = weights[k3]; double w2 = weights[k2]; @@ -450,11 +450,11 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_or -void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_order, +void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_degree, double *target_x, double *target_y, double *target_z, double *target_q, double *cluster_q, double *cluster_w) { - int interp_order_lim = interp_order + 1; - int interp_pts_per_cluster = interp_order_lim * interp_order_lim * interp_order_lim; + int interp_degree_lim = interp_degree + 1; + int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; int num_targets_in_cluster = tree->iend[idx] - tree->ibeg[idx] + 1; int target_start = tree->ibeg[idx] - 1; @@ -462,15 +462,15 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte double *dj, *tt, *ww, *wx, *wy, *wz, *nodeX, *nodeY, *nodeZ; - make_vector(dj, interp_order_lim); - make_vector(tt, interp_order_lim); - make_vector(ww, interp_order_lim); - make_vector(wx, interp_order_lim); - make_vector(wy, interp_order_lim); - make_vector(wz, interp_order_lim); - make_vector(nodeX, interp_order_lim); - make_vector(nodeY, interp_order_lim); - make_vector(nodeZ, interp_order_lim); + make_vector(dj, interp_degree_lim); + make_vector(tt, interp_degree_lim); + make_vector(ww, interp_degree_lim); + make_vector(wx, interp_degree_lim); + make_vector(wy, interp_degree_lim); + make_vector(wz, interp_degree_lim); + make_vector(nodeX, interp_degree_lim); + make_vector(nodeY, interp_degree_lim); + make_vector(nodeZ, interp_degree_lim); double *cluster_q_ = &cluster_q[8*cluster_start + 0*interp_pts_per_cluster]; double *cluster_q_dx = &cluster_q[8*cluster_start + 1*interp_pts_per_cluster]; @@ -494,9 +494,9 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) \ - create(nodeX[0:interp_order_lim], nodeY[0:interp_order_lim], nodeZ[0:interp_order_lim], \ - dj[0:interp_order_lim], tt[0:interp_order_lim], ww[0:interp_order_lim], \ - wx[0:interp_order_lim], wy[0:interp_order_lim], wz[0:interp_order_lim]) + create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ + dj[0:interp_degree_lim], tt[0:interp_degree_lim], ww[0:interp_degree_lim], \ + wx[0:interp_degree_lim], wy[0:interp_degree_lim], wz[0:interp_degree_lim]) { #endif @@ -504,29 +504,29 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < interp_order_lim; i++) { - double xx = i * M_PI / interp_order; + for (int i = 0; i < interp_degree_lim; i++) { + double xx = i * M_PI / interp_degree; tt[i] = cos(xx); ww[i] = -cos(xx) / (2 * sin(xx) * sin(xx)); nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); } - ww[0] = 0.25 * (interp_order*interp_order/3.0 + 1.0/6.0); - ww[interp_order] = -ww[0]; + ww[0] = 0.25 * (interp_degree*interp_degree/3.0 + 1.0/6.0); + ww[interp_degree] = -ww[0]; // Compute weights #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < interp_order_lim; j++){ + for (int j = 0; j < interp_degree_lim; j++){ dj[j] = 1.0; wx[j] = -4.0 * ww[j] / (x1 - x0); wy[j] = -4.0 * ww[j] / (y1 - y0); wz[j] = -4.0 * ww[j] / (z1 - z0); } dj[0] = 0.25; - dj[interp_order] = 0.25; + dj[interp_degree] = 0.25; #ifdef OPENACC_ENABLED #pragma acc loop independent @@ -548,7 +548,7 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) #endif - for (int j = 0; j < interp_order_lim; j++) { // loop through the degree + for (int j = 0; j < interp_degree_lim; j++) { // loop through the degree double cx = tx - nodeX[j]; double cy = ty - nodeY[j]; @@ -577,11 +577,11 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte #endif for (int j = 0; j < interp_pts_per_cluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point - int k1 = j%interp_order_lim; - int kk = (j-k1)/interp_order_lim; - int k2 = kk%interp_order_lim; + int k1 = j%interp_degree_lim; + int kk = (j-k1)/interp_degree_lim; + int k2 = kk%interp_degree_lim; kk = kk - k2; - int k3 = kk / interp_order_lim; + int k3 = kk / interp_degree_lim; double dx = tx - nodeX[k1]; double dy = ty - nodeY[k2]; diff --git a/src/interface/BaryTreeInterface.c b/src/interface/BaryTreeInterface.c index 3a280784..0d2a8e33 100644 --- a/src/interface/BaryTreeInterface.c +++ b/src/interface/BaryTreeInterface.c @@ -21,7 +21,7 @@ void BaryTreeInterface(int numTargets, int numSources, double *outputArray, KERNEL kernel, int numKernelParams, double *kernelParams, SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, - double theta, int interpOrder, int maxPerSourceLeaf, int maxPerTargetLeaf, + double theta, int interpDegree, int maxPerSourceLeaf, int maxPerTargetLeaf, double sizeCheck, double beta, int verbosity) { @@ -32,7 +32,7 @@ void BaryTreeInterface(int numTargets, int numSources, RunParams_Setup(&run_params, kernel, numKernelParams, kernelParams, approximation, singularity, compute_type, - theta, interpOrder, + theta, interpDegree, maxPerSourceLeaf, maxPerTargetLeaf, sizeCheck, beta, verbosity); diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index a852a92b..8c9bdfdc 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -15,7 +15,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, int interp_order, + double theta, int interp_degree, int max_per_source_leaf, int max_per_target_leaf, double size_check_factor, double beta, int verbosity) { @@ -40,13 +40,13 @@ void RunParams_Setup(struct RunParams **run_params_addr, run_params->beta = -1; run_params->theta = theta; - run_params->interp_order = interp_order; + run_params->interp_degree = interp_degree; run_params->size_check_factor = size_check_factor; run_params->max_per_source_leaf = max_per_source_leaf; run_params->max_per_target_leaf = max_per_target_leaf; - run_params->interp_pts_per_cluster = (interp_order+1) * (interp_order+1) * (interp_order+1); + run_params->interp_pts_per_cluster = (interp_degree+1) * (interp_degree+1) * (interp_degree+1); } else { @@ -93,11 +93,11 @@ void RunParams_Setup(struct RunParams **run_params_addr, } run_params->theta = theta_max - (theta_max - theta_min) * pow(beta, exp_s); - run_params->interp_order = (int) (n_max - (n_max - n_min) * pow(1. - beta, exp_t)); + run_params->interp_degree = (int) (n_max - (n_max - n_min) * pow(1. - beta, exp_t)); - run_params->interp_pts_per_cluster = (run_params->interp_order + 1) - * (run_params->interp_order + 1) - * (run_params->interp_order + 1); + run_params->interp_pts_per_cluster = (run_params->interp_degree + 1) + * (run_params->interp_degree + 1) + * (run_params->interp_degree + 1); #ifdef OPENACC_ENABLED run_params->max_per_source_leaf = 3000; @@ -134,8 +134,8 @@ void RunParams_Setup(struct RunParams **run_params_addr, void RunParams_Validate(struct RunParams *run_params) { - int interp_order_lim = run_params->interp_order + 1; - run_params->interp_pts_per_cluster = interp_order_lim * interp_order_lim * interp_order_lim; + int interp_degree_lim = run_params->interp_degree + 1; + run_params->interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; run_params->interp_weights_per_cluster = run_params->interp_pts_per_cluster; run_params->interp_charges_per_cluster = run_params->interp_pts_per_cluster; @@ -182,7 +182,7 @@ void RunParams_Print(struct RunParams *run_params) printf("[BaryTree] singularity = %d\n", run_params->singularity); printf("[BaryTree] compute_type = %d\n", run_params->compute_type); printf("[BaryTree] theta = %f\n", run_params->theta); - printf("[BaryTree] interp_order = %d\n", run_params->interp_order); + printf("[BaryTree] interp_degree = %d\n", run_params->interp_degree); printf("[BaryTree] interp_pts_per_cluster = %d\n", run_params->interp_pts_per_cluster); printf("[BaryTree] interp_weights_per_cluster = %d\n", run_params->interp_weights_per_cluster); printf("[BaryTree] interp_charges_per_cluster = %d\n", run_params->interp_charges_per_cluster); diff --git a/src/run_params/run_params.h b/src/run_params/run_params.h index e399f0f9..5c22f26c 100644 --- a/src/run_params/run_params.h +++ b/src/run_params/run_params.h @@ -10,7 +10,7 @@ void RunParams_Setup(struct RunParams **run_params_addr, APPROXIMATION approximation, SINGULARITY singularity, COMPUTE_TYPE compute_type, - double theta, int interp_order, + double theta, int interp_degree, int max_per_source_leaf, int max_per_target_leaf, double size_check_factor, double beta, int verbosity); diff --git a/src/run_params/struct_run_params.h b/src/run_params/struct_run_params.h index 2696cc0e..ef5c40b1 100644 --- a/src/run_params/struct_run_params.h +++ b/src/run_params/struct_run_params.h @@ -17,7 +17,7 @@ struct RunParams double theta; double size_check_factor; - int interp_order; + int interp_degree; int interp_pts_per_cluster; int interp_charges_per_cluster; int interp_weights_per_cluster; diff --git a/tests/serial_tests.c b/tests/serial_tests.c index dc4fc154..1fa0d2de 100644 --- a/tests/serial_tests.c +++ b/tests/serial_tests.c @@ -247,7 +247,7 @@ static char *test_treecode_on_100_particles() int max_per_source_leaf = 3; int max_per_target_leaf = 3; - int order = 2; + int degree = 2; double theta = 0.7; double size_check = 1.0; @@ -257,7 +257,7 @@ static char *test_treecode_on_100_particles() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); + theta, degree, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -591,7 +591,7 @@ static char *test_treecode_on_1_target_10000_sources() int max_per_source_leaf = 100; int max_per_target_leaf = 100; - int order = 4; + int degree = 4; double theta = 0.8; double size_check = 0.0; @@ -600,7 +600,7 @@ static char *test_treecode_on_1_target_10000_sources() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); + theta, degree, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -895,7 +895,7 @@ static char *test_treecode_wrapper() int max_per_source_leaf = 100; int max_per_target_leaf = 100; - int order = 4; + int degree = 4; double theta = 0.8; double beta = -1.0; double size_check = 1.0; @@ -905,7 +905,7 @@ static char *test_treecode_wrapper() RunParams_Setup(&run_params, NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, PARTICLE_CLUSTER, - theta, order, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); + theta, degree, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); /***********************************************/ @@ -924,7 +924,7 @@ static char *test_treecode_wrapper() sources->x,sources->y,sources->z,sources->q,sources->w, potential_wrapper, COULOMB, num_kernel_params, kernel_params, SKIPPING, LAGRANGE, PARTICLE_CLUSTER, - theta, order, max_per_source_leaf, max_per_target_leaf, + theta, degree, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); treedriver(sources, targets, run_params, potential, time_tree); @@ -1031,14 +1031,14 @@ static char *test_treecode_parameters_on_1_target_10000_sources() 0, 0, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); - // 3 parameter sets. Set 2 increases order, set 3 reduces MAC. Both should be more accurate than set 1. - int order1=3; + // 3 parameter sets. Set 2 increases degree, set 3 reduces MAC. Both should be more accurate than set 1. + int degree1=3; double theta1=0.9; - int order2=6; + int degree2=6; double theta2=0.9; - int order3=3; + int degree3=3; double theta3=0.4; /***********************************************/ @@ -1057,15 +1057,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1078,7 +1078,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: lagrange-coulomb-skipping", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: lagrange-coulomb-skipping", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: lagrange-coulomb-skipping", \ err3 < err1); @@ -1101,15 +1101,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1121,7 +1121,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: lagrange-coulomb-subtraction", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: lagrange-coulomb-subtraction", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: lagrange-coulomb-subtraction", \ err3 < err1); @@ -1144,15 +1144,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1164,7 +1164,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: lagrange-yukawa-skipping", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: lagrange-yukawa-skipping", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: lagrange-yukawa-skipping", \ err3 < err1); @@ -1187,15 +1187,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1207,7 +1207,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: lagrange-yukawa-subtraction", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: lagrange-yukawa-subtraction", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: lagrange-yukawa-subtraction", \ err3 < err1); @@ -1230,15 +1230,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1252,7 +1252,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); verbosity=0; - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: hermite-coulomb-skipping", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: hermite-coulomb-skipping", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: hermite-coulomb-skipping", \ err3 < err1); @@ -1275,15 +1275,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1295,7 +1295,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: hermite-coulomb-subtraction", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: hermite-coulomb-subtraction", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: hermite-coulomb-subtraction", \ err3 < err1); @@ -1318,15 +1318,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1338,7 +1338,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: hermite-yukawa-skipping", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: hermite-yukawa-skipping", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: hermite-yukawa-skipping", \ err3 < err1); @@ -1361,15 +1361,15 @@ static char *test_treecode_parameters_on_1_target_10000_sources() directdriver(sources, targets, run_params, potential_direct, time_tree); - run_params->interp_order = order1; + run_params->interp_degree = degree1; run_params->theta = theta1; treedriver(sources, targets, run_params, potential1, time_tree); - run_params->interp_order = order2; + run_params->interp_degree = degree2; run_params->theta = theta2; treedriver(sources, targets, run_params, potential2, time_tree); - run_params->interp_order = order3; + run_params->interp_degree = degree3; run_params->theta = theta3; treedriver(sources, targets, run_params, potential3, time_tree); @@ -1381,7 +1381,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() if (verbosity>0) printf("err1 = %1.4e\n", err1); if (verbosity>0) printf("err2 = %1.4e\n", err2); if (verbosity>0) printf("err3 = %1.4e\n", err3); - mu_assert("TEST FAILED: increasing order didn't improve accuracy for: hermite-yukawa-subtraction", \ + mu_assert("TEST FAILED: increasing degree didn't improve accuracy for: hermite-yukawa-subtraction", \ err2 < err1); mu_assert("TEST FAILED: decreasing theta didn't improve accuracy for: hermite-yukawa-subtraction", \ err3 < err1); From c36df316f1ff58dc784036d4413743cbf164ceba Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 11:28:33 -0400 Subject: [PATCH 43/95] Coulomb and Yukawa kernels updated to use PP instead of Direct --- src/CMakeLists.txt | 32 +++++++++---------- .../interaction_compute_cc.c | 16 +++++----- .../interaction_compute_cp.c | 16 +++++----- .../interaction_compute_direct.c | 16 +++++----- .../interaction_compute_pc.c | 16 +++++----- src/kernels/coulomb/coulomb.h | 4 +-- .../{coulomb_direct.c => coulomb_pp.c} | 4 +-- .../yukawa_direct.h => coulomb/coulomb_pp.h} | 9 +++--- .../{coulomb_ss_direct.c => coulomb_ss_pp.c} | 4 +-- .../{coulomb_ss_direct.h => coulomb_ss_pp.h} | 8 ++--- .../regularized-coulomb/regularized-coulomb.h | 4 +-- ...lomb_direct.c => regularized-coulomb_PP.c} | 4 +-- .../regularized-coulomb_PP.h} | 8 ++--- ...s_direct.c => regularized-coulomb_ss_pp.c} | 4 +-- ...s_direct.h => regularized-coulomb_ss_pp.h} | 8 ++--- .../regularized-yukawa/regularized-yukawa.h | 4 +-- ...ukawa_direct.c => regularized-yukawa_pp.c} | 4 +-- .../regularized-yukawa_pp.h} | 8 ++--- ...ss_direct.c => regularized-yukawa_ss_pp.c} | 4 +-- ...ss_direct.h => regularized-yukawa_ss_pp.h} | 8 ++--- src/kernels/yukawa/yukawa.h | 4 +-- .../yukawa/{yukawa_direct.c => yukawa_pp.c} | 4 +-- .../coulomb_direct.h => yukawa/yukawa_pp.h} | 9 +++--- .../{yukawa_ss_direct.c => yukawa_ss_pp.c} | 4 +-- .../{yukawa_ss_direct.h => yukawa_ss_pp.h} | 8 ++--- 25 files changed, 105 insertions(+), 105 deletions(-) rename src/kernels/coulomb/{coulomb_direct.c => coulomb_pp.c} (95%) rename src/kernels/{yukawa/yukawa_direct.h => coulomb/coulomb_pp.h} (67%) rename src/kernels/coulomb/{coulomb_ss_direct.c => coulomb_ss_pp.c} (96%) rename src/kernels/coulomb/{coulomb_ss_direct.h => coulomb_ss_pp.h} (68%) rename src/kernels/regularized-coulomb/{regularized-coulomb_direct.c => regularized-coulomb_PP.c} (94%) rename src/kernels/{regularized-yukawa/regularized-yukawa_direct.h => regularized-coulomb/regularized-coulomb_PP.h} (63%) rename src/kernels/regularized-coulomb/{regularized-coulomb_ss_direct.c => regularized-coulomb_ss_pp.c} (92%) rename src/kernels/regularized-coulomb/{regularized-coulomb_ss_direct.h => regularized-coulomb_ss_pp.h} (63%) rename src/kernels/regularized-yukawa/{regularized-yukawa_direct.c => regularized-yukawa_pp.c} (93%) rename src/kernels/{regularized-coulomb/regularized-coulomb_direct.h => regularized-yukawa/regularized-yukawa_pp.h} (62%) rename src/kernels/regularized-yukawa/{regularized-yukawa_ss_direct.c => regularized-yukawa_ss_pp.c} (92%) rename src/kernels/regularized-yukawa/{regularized-yukawa_ss_direct.h => regularized-yukawa_ss_pp.h} (63%) rename src/kernels/yukawa/{yukawa_direct.c => yukawa_pp.c} (94%) rename src/kernels/{coulomb/coulomb_direct.h => yukawa/yukawa_pp.h} (67%) rename src/kernels/yukawa/{yukawa_ss_direct.c => yukawa_ss_pp.c} (93%) rename src/kernels/yukawa/{yukawa_ss_direct.h => yukawa_ss_pp.h} (68%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87c441e5..f6774008 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -65,8 +65,8 @@ SET(SRCS_TREEDRIVER SET(SRCS_K_COULOMB # Singularity skipping kernels/coulomb/coulomb.h - kernels/coulomb/coulomb_direct.h - kernels/coulomb/coulomb_direct.c + kernels/coulomb/coulomb_pp.h + kernels/coulomb/coulomb_pp.c kernels/coulomb/coulomb_pc.h kernels/coulomb/coulomb_pc.c kernels/coulomb/coulomb_cp.h @@ -74,8 +74,8 @@ SET(SRCS_K_COULOMB kernels/coulomb/coulomb_cc_hermite.h kernels/coulomb/coulomb_cc_hermite.c # Singularity subtraction - kernels/coulomb/coulomb_ss_direct.h - kernels/coulomb/coulomb_ss_direct.c + kernels/coulomb/coulomb_ss_pp.h + kernels/coulomb/coulomb_ss_pp.c kernels/coulomb/coulomb_ss_correction.h kernels/coulomb/coulomb_ss_correction.c kernels/coulomb/coulomb_ss_pc.h @@ -89,15 +89,15 @@ SET(SRCS_K_COULOMB SET(SRCS_K_YUKAWA # Singularity skipping kernels/yukawa/yukawa.h - kernels/yukawa/yukawa_direct.h - kernels/yukawa/yukawa_direct.c + kernels/yukawa/yukawa_pp.h + kernels/yukawa/yukawa_pp.c kernels/yukawa/yukawa_pc.h kernels/yukawa/yukawa_pc.c kernels/yukawa/yukawa_cp.h kernels/yukawa/yukawa_cp.c # Singularity subtraction - kernels/yukawa/yukawa_ss_direct.h - kernels/yukawa/yukawa_ss_direct.c + kernels/yukawa/yukawa_ss_pp.h + kernels/yukawa/yukawa_ss_pp.c kernels/yukawa/yukawa_ss_correction.h kernels/yukawa/yukawa_ss_correction.c kernels/yukawa/yukawa_ss_pc.h @@ -111,15 +111,15 @@ SET(SRCS_K_YUKAWA SET(SRCS_K_REGULARIZED_COULOMB # Singularity skipping kernels/regularized-coulomb/regularized-coulomb.h - kernels/regularized-coulomb/regularized-coulomb_direct.h - kernels/regularized-coulomb/regularized-coulomb_direct.c + kernels/regularized-coulomb/regularized-coulomb_pp.h + kernels/regularized-coulomb/regularized-coulomb_pp.c kernels/regularized-coulomb/regularized-coulomb_pc.h kernels/regularized-coulomb/regularized-coulomb_pc.c kernels/regularized-coulomb/regularized-coulomb_cp.h kernels/regularized-coulomb/regularized-coulomb_cp.c # Singularity subtraction - kernels/regularized-coulomb/regularized-coulomb_ss_direct.h - kernels/regularized-coulomb/regularized-coulomb_ss_direct.c + kernels/regularized-coulomb/regularized-coulomb_ss_pp.h + kernels/regularized-coulomb/regularized-coulomb_ss_pp.c kernels/regularized-coulomb/regularized-coulomb_ss_correction.h kernels/regularized-coulomb/regularized-coulomb_ss_correction.c kernels/regularized-coulomb/regularized-coulomb_ss_pc.h @@ -131,15 +131,15 @@ SET(SRCS_K_REGULARIZED_COULOMB SET(SRCS_K_REGULARIZED_YUKAWA #Singularity skipping kernels/regularized-yukawa/regularized-yukawa.h - kernels/regularized-yukawa/regularized-yukawa_direct.h - kernels/regularized-yukawa/regularized-yukawa_direct.c + kernels/regularized-yukawa/regularized-yukawa_pp.h + kernels/regularized-yukawa/regularized-yukawa_pp.c kernels/regularized-yukawa/regularized-yukawa_pc.h kernels/regularized-yukawa/regularized-yukawa_pc.c kernels/regularized-yukawa/regularized-yukawa_cp.h kernels/regularized-yukawa/regularized-yukawa_cp.c #Singularity subtraction - kernels/regularized-yukawa/regularized-yukawa_ss_direct.h - kernels/regularized-yukawa/regularized-yukawa_ss_direct.c + kernels/regularized-yukawa/regularized-yukawa_ss_pp.h + kernels/regularized-yukawa/regularized-yukawa_ss_pp.c kernels/regularized-yukawa/regularized-yukawa_ss_correction.h kernels/regularized-yukawa/regularized-yukawa_ss_correction.c kernels/regularized-yukawa/regularized-yukawa_ss_pc.h diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 65b22437..b4983f47 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -883,7 +883,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - K_Coulomb_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_Coulomb_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -891,7 +891,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - K_Coulomb_SS_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_Coulomb_SS_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -910,7 +910,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - K_Yukawa_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_Yukawa_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -918,7 +918,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - K_Yukawa_SS_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_Yukawa_SS_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -937,7 +937,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - K_RegularizedCoulomb_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_RegularizedCoulomb_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -945,7 +945,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedCoulomb_SS_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_RegularizedCoulomb_SS_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -964,7 +964,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - K_RegularizedYukawa_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_RegularizedYukawa_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -972,7 +972,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedYukawa_SS_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_RegularizedYukawa_SS_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 4dc030d6..42423554 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -366,7 +366,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_Coulomb_Direct(num_targets_in_cluster, num_sources_in_batch, + K_Coulomb_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -374,7 +374,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_Coulomb_SS_Direct(num_targets_in_cluster, num_sources_in_batch, + K_Coulomb_SS_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -393,7 +393,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_Yukawa_Direct(num_targets_in_cluster, num_sources_in_batch, + K_Yukawa_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -401,7 +401,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_Yukawa_SS_Direct(num_targets_in_cluster, num_sources_in_batch, + K_Yukawa_SS_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -420,7 +420,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_RegularizedCoulomb_Direct(num_targets_in_cluster, num_sources_in_batch, + K_RegularizedCoulomb_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -428,7 +428,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedCoulomb_SS_Direct(num_targets_in_cluster, num_sources_in_batch, + K_RegularizedCoulomb_SS_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -447,7 +447,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_RegularizedYukawa_Direct(num_targets_in_cluster, num_sources_in_batch, + K_RegularizedYukawa_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -455,7 +455,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedYukawa_SS_Direct(num_targets_in_cluster, num_sources_in_batch, + K_RegularizedYukawa_SS_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, diff --git a/src/interaction_compute/interaction_compute_direct.c b/src/interaction_compute/interaction_compute_direct.c index 284c3d70..29162811 100644 --- a/src/interaction_compute/interaction_compute_direct.c +++ b/src/interaction_compute/interaction_compute_direct.c @@ -59,14 +59,14 @@ void InteractionCompute_Direct(double *potential, if (run_params->singularity == SKIPPING) { - K_Coulomb_Direct(num_targets, num_sources, 0, 0, + K_Coulomb_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { - K_Coulomb_SS_Direct(num_targets, num_sources, 0, 0, + K_Coulomb_SS_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); @@ -84,14 +84,14 @@ void InteractionCompute_Direct(double *potential, if (run_params->singularity == SKIPPING) { - K_Yukawa_Direct(num_targets, num_sources, 0, 0, + K_Yukawa_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { - K_Yukawa_SS_Direct(num_targets, num_sources, 0, 0, + K_Yukawa_SS_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); @@ -109,14 +109,14 @@ void InteractionCompute_Direct(double *potential, if (run_params->singularity == SKIPPING) { - K_RegularizedCoulomb_Direct(num_targets, num_sources, 0, 0, + K_RegularizedCoulomb_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedCoulomb_SS_Direct(num_targets, num_sources, 0, 0, + K_RegularizedCoulomb_SS_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); @@ -132,14 +132,14 @@ void InteractionCompute_Direct(double *potential, if (run_params->singularity == SKIPPING) { - K_RegularizedYukawa_Direct(num_targets, num_sources, 0, 0, + K_RegularizedYukawa_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedYukawa_SS_Direct(num_targets, num_sources, 0, 0, + K_RegularizedYukawa_SS_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 7e02c8ab..3063763f 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -425,7 +425,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_Coulomb_Direct(num_targets_in_batch, num_sources_in_cluster, + K_Coulomb_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -433,7 +433,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_Coulomb_SS_Direct(num_targets_in_batch, num_sources_in_cluster, + K_Coulomb_SS_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -452,7 +452,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_Yukawa_Direct(num_targets_in_batch, num_sources_in_cluster, + K_Yukawa_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -460,7 +460,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_Yukawa_SS_Direct(num_targets_in_batch, num_sources_in_cluster, + K_Yukawa_SS_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -479,7 +479,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_RegularizedCoulomb_Direct(num_targets_in_batch, num_sources_in_cluster, + K_RegularizedCoulomb_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -487,7 +487,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedCoulomb_SS_Direct(num_targets_in_batch, num_sources_in_cluster, + K_RegularizedCoulomb_SS_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, @@ -503,7 +503,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_RegularizedYukawa_Direct(num_targets_in_batch, num_sources_in_cluster, + K_RegularizedYukawa_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -511,7 +511,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->singularity == SUBTRACTION) { - K_RegularizedYukawa_SS_Direct(num_targets_in_batch, num_sources_in_cluster, + K_RegularizedYukawa_SS_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, diff --git a/src/kernels/coulomb/coulomb.h b/src/kernels/coulomb/coulomb.h index 5a413dd3..186c2928 100644 --- a/src/kernels/coulomb/coulomb.h +++ b/src/kernels/coulomb/coulomb.h @@ -2,12 +2,12 @@ #ifndef H_K_COULOMB_H #define H_K_COULOMB_H -#include "coulomb_direct.h" +#include "coulomb_pp.h" #include "coulomb_pc.h" #include "coulomb_cp.h" #include "coulomb_cc_hermite.h" -#include "coulomb_ss_direct.h" +#include "coulomb_ss_pp.h" #include "coulomb_ss_correction.h" #include "coulomb_ss_pc.h" #include "coulomb_ss_cp.h" diff --git a/src/kernels/coulomb/coulomb_direct.c b/src/kernels/coulomb/coulomb_pp.c similarity index 95% rename from src/kernels/coulomb/coulomb_direct.c rename to src/kernels/coulomb/coulomb_pp.c index b46470e1..b971232d 100644 --- a/src/kernels/coulomb/coulomb_direct.c +++ b/src/kernels/coulomb/coulomb_pp.c @@ -3,9 +3,9 @@ #include #include "../../run_params/struct_run_params.h" -#include "coulomb_direct.h" +#include "coulomb_pp.h" -void K_Coulomb_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/yukawa/yukawa_direct.h b/src/kernels/coulomb/coulomb_pp.h similarity index 67% rename from src/kernels/yukawa/yukawa_direct.h rename to src/kernels/coulomb/coulomb_pp.h index 9f6a1378..130421e5 100644 --- a/src/kernels/yukawa/yukawa_direct.h +++ b/src/kernels/coulomb/coulomb_pp.h @@ -1,14 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_YUKAWA_DIRECT_H -#define H_K_YUKAWA_DIRECT_H +#ifndef H_K_COULOMB_PP_H +#define H_K_COULOMB_PP_H #include "../../run_params/struct_run_params.h" -void K_Yukawa_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, + +void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_YUKAWA_DIRECT_H */ +#endif /* H_K_COULOMB_PP_H */ diff --git a/src/kernels/coulomb/coulomb_ss_direct.c b/src/kernels/coulomb/coulomb_ss_pp.c similarity index 96% rename from src/kernels/coulomb/coulomb_ss_direct.c rename to src/kernels/coulomb/coulomb_ss_pp.c index 77283812..a585c995 100644 --- a/src/kernels/coulomb/coulomb_ss_direct.c +++ b/src/kernels/coulomb/coulomb_ss_pp.c @@ -3,9 +3,9 @@ #include #include "../../run_params/struct_run_params.h" -#include "coulomb_ss_direct.h" +#include "coulomb_ss_pp.h" -void K_Coulomb_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/coulomb/coulomb_ss_direct.h b/src/kernels/coulomb/coulomb_ss_pp.h similarity index 68% rename from src/kernels/coulomb/coulomb_ss_direct.h rename to src/kernels/coulomb/coulomb_ss_pp.h index d4228992..006ee112 100644 --- a/src/kernels/coulomb/coulomb_ss_direct.h +++ b/src/kernels/coulomb/coulomb_ss_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_COULOMB_SS_DIRECT_H -#define H_K_COULOMB_SS_DIRECT_H +#ifndef H_K_COULOMB_SS_PP_H +#define H_K_COULOMB_SS_PP_H #include "../../run_params/struct_run_params.h" -void K_Coulomb_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_COULOMB_SS_DIRECT_H */ +#endif /* H_K_COULOMB_SS_PP_H */ diff --git a/src/kernels/regularized-coulomb/regularized-coulomb.h b/src/kernels/regularized-coulomb/regularized-coulomb.h index e0e76bf0..0bbd9b98 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb.h +++ b/src/kernels/regularized-coulomb/regularized-coulomb.h @@ -3,11 +3,11 @@ #define H_K_REGULARIZED_COULOMB_H -#include "regularized-coulomb_direct.h" +#include "regularized-coulomb_pp.h" #include "regularized-coulomb_pc.h" #include "regularized-coulomb_cp.h" -#include "regularized-coulomb_ss_direct.h" +#include "regularized-coulomb_ss_pp.h" #include "regularized-coulomb_ss_correction.h" #include "regularized-coulomb_ss_pc.h" //#include "regularized-coulomb_ss_cp.h" diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_direct.c b/src/kernels/regularized-coulomb/regularized-coulomb_PP.c similarity index 94% rename from src/kernels/regularized-coulomb/regularized-coulomb_direct.c rename to src/kernels/regularized-coulomb/regularized-coulomb_PP.c index fa50afe2..b330cb9e 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_direct.c +++ b/src/kernels/regularized-coulomb/regularized-coulomb_PP.c @@ -3,10 +3,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "regularized-coulomb_direct.h" +#include "regularized-coulomb_pp.h" -void K_RegularizedCoulomb_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_direct.h b/src/kernels/regularized-coulomb/regularized-coulomb_PP.h similarity index 63% rename from src/kernels/regularized-yukawa/regularized-yukawa_direct.h rename to src/kernels/regularized-coulomb/regularized-coulomb_PP.h index b88eeb80..b46994db 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_direct.h +++ b/src/kernels/regularized-coulomb/regularized-coulomb_PP.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_REGULARIZED_YUKAWA_DIRECT_H -#define H_K_REGULARIZED_YUKAWA_DIRECT_H +#ifndef H_K_REGULARIZED_COULOMB_PP_H +#define H_K_REGULARIZED_COULOMB_PP_H #include "../../run_params/struct_run_params.h" -void K_RegularizedYukawa_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_REGULARIZED_YUKAWA_DIRECT_H */ +#endif /* H_K_REGULARIZED_COULOMB_PP_H */ diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.c b/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.c similarity index 92% rename from src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.c rename to src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.c index 3dbd1414..bce7cd61 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.c +++ b/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.c @@ -3,10 +3,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "regularized-coulomb_ss_direct.h" +#include "regularized-coulomb_ss_pp.h" -void K_RegularizedCoulomb_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.h b/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.h similarity index 63% rename from src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.h rename to src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.h index f800f8ac..2c5f81f6 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_ss_direct.h +++ b/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_REGULARIZED_COULOMB_SS_DIRECT_H -#define H_K_REGULARIZED_COULOMB_SS_DIRECT_H +#ifndef H_K_REGULARIZED_COULOMB_SS_PP_H +#define H_K_REGULARIZED_COULOMB_SS_PP_H #include "../../run_params/struct_run_params.h" -void K_RegularizedCoulomb_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_REGULARIZED_COULOMB_SS_DIRECT_H */ +#endif /* H_K_REGULARIZED_COULOMB_SS_PP_H */ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa.h b/src/kernels/regularized-yukawa/regularized-yukawa.h index 066c21ff..950c34f4 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa.h @@ -3,11 +3,11 @@ #define H_K_REGULARIZED_YUKAWA_H -#include "regularized-yukawa_direct.h" +#include "regularized-yukawa_pp.h" #include "regularized-yukawa_pc.h" #include "regularized-yukawa_cp.h" -#include "regularized-yukawa_ss_direct.h" +#include "regularized-yukawa_ss_pp.h" #include "regularized-yukawa_ss_correction.h" #include "regularized-yukawa_ss_pc.h" //#include "regularized-yukawa_ss_cp.h" diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_direct.c b/src/kernels/regularized-yukawa/regularized-yukawa_pp.c similarity index 93% rename from src/kernels/regularized-yukawa/regularized-yukawa_direct.c rename to src/kernels/regularized-yukawa/regularized-yukawa_pp.c index fe2261f0..0c9c5276 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_direct.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_pp.c @@ -3,10 +3,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "regularized-yukawa_direct.h" +#include "regularized-yukawa_pp.h" -void K_RegularizedYukawa_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_direct.h b/src/kernels/regularized-yukawa/regularized-yukawa_pp.h similarity index 62% rename from src/kernels/regularized-coulomb/regularized-coulomb_direct.h rename to src/kernels/regularized-yukawa/regularized-yukawa_pp.h index 8bfc76bd..294cfc4e 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_direct.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_REGULARIZED_COULOMB_DIRECT_H -#define H_K_REGULARIZED_COULOMB_DIRECT_H +#ifndef H_K_REGULARIZED_YUKAWA_PP_H +#define H_K_REGULARIZED_YUKAWA_PP_H #include "../../run_params/struct_run_params.h" -void K_RegularizedCoulomb_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_REGULARIZED_COULOMB_DIRECT_H */ +#endif /* H_K_REGULARIZED_YUKAWA_PP_H */ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.c b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.c similarity index 92% rename from src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.c rename to src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.c index 3d9363cf..7d265ef1 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.c @@ -4,9 +4,9 @@ #include "../../run_params/struct_run_params.h" -#include "regularized-yukawa_ss_direct.h" +#include "regularized-yukawa_ss_pp.h" -void K_RegularizedYukawa_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.h b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.h similarity index 63% rename from src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.h rename to src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.h index 468fd529..9189794e 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_ss_direct.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_REGULARIZED_YUKAWA_SS_DIRECT_H -#define H_K_REGULARIZED_YUKAWA_SS_DIRECT_H +#ifndef H_K_REGULARIZED_YUKAWA_SS_PP_H +#define H_K_REGULARIZED_YUKAWA_SS_PP_H #include "../../run_params/struct_run_params.h" -void K_RegularizedYukawa_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_REGULARIZED_YUKAWA_SS_DIRECT_H */ +#endif /* H_K_REGULARIZED_YUKAWA_SS_PP_H */ diff --git a/src/kernels/yukawa/yukawa.h b/src/kernels/yukawa/yukawa.h index ff272f10..850a1398 100644 --- a/src/kernels/yukawa/yukawa.h +++ b/src/kernels/yukawa/yukawa.h @@ -2,11 +2,11 @@ #ifndef H_K_YUKAWA_H #define H_K_YUKAWA_H -#include "yukawa_direct.h" +#include "yukawa_pp.h" #include "yukawa_pc.h" #include "yukawa_cp.h" -#include "yukawa_ss_direct.h" +#include "yukawa_ss_pp.h" #include "yukawa_ss_correction.h" #include "yukawa_ss_pc.h" #include "yukawa_ss_cp.h" diff --git a/src/kernels/yukawa/yukawa_direct.c b/src/kernels/yukawa/yukawa_pp.c similarity index 94% rename from src/kernels/yukawa/yukawa_direct.c rename to src/kernels/yukawa/yukawa_pp.c index b0b1b3ce..e043613d 100644 --- a/src/kernels/yukawa/yukawa_direct.c +++ b/src/kernels/yukawa/yukawa_pp.c @@ -3,10 +3,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "yukawa_direct.h" +#include "yukawa_pp.h" -void K_Yukawa_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/coulomb/coulomb_direct.h b/src/kernels/yukawa/yukawa_pp.h similarity index 67% rename from src/kernels/coulomb/coulomb_direct.h rename to src/kernels/yukawa/yukawa_pp.h index 5abba55d..929c295a 100644 --- a/src/kernels/coulomb/coulomb_direct.h +++ b/src/kernels/yukawa/yukawa_pp.h @@ -1,15 +1,14 @@ /* Interaction Kernels */ -#ifndef H_K_COULOMB_DIRECT_H -#define H_K_COULOMB_DIRECT_H +#ifndef H_K_YUKAWA_PP_H +#define H_K_YUKAWA_PP_H #include "../../run_params/struct_run_params.h" - -void K_Coulomb_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_COULOMB_DIRECT_H */ +#endif /* H_K_YUKAWA_PP_H */ diff --git a/src/kernels/yukawa/yukawa_ss_direct.c b/src/kernels/yukawa/yukawa_ss_pp.c similarity index 93% rename from src/kernels/yukawa/yukawa_ss_direct.c rename to src/kernels/yukawa/yukawa_ss_pp.c index c78508fc..b22156a6 100644 --- a/src/kernels/yukawa/yukawa_ss_direct.c +++ b/src/kernels/yukawa/yukawa_ss_pp.c @@ -3,10 +3,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "yukawa_ss_direct.h" +#include "yukawa_ss_pp.h" -void K_Yukawa_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, diff --git a/src/kernels/yukawa/yukawa_ss_direct.h b/src/kernels/yukawa/yukawa_ss_pp.h similarity index 68% rename from src/kernels/yukawa/yukawa_ss_direct.h rename to src/kernels/yukawa/yukawa_ss_pp.h index f114a5ea..e47afacb 100644 --- a/src/kernels/yukawa/yukawa_ss_direct.h +++ b/src/kernels/yukawa/yukawa_ss_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_YUKAWA_SS_DIRECT_H -#define H_K_YUKAWA_SS_DIRECT_H +#ifndef H_K_YUKAWA_SS_PP_H +#define H_K_YUKAWA_SS_PP_H #include "../../run_params/struct_run_params.h" -void K_Yukawa_SS_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *target_charge, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_YUKAWA_SS_DIRECT_H */ +#endif /* H_K_YUKAWA_SS_PP_H */ From 80a814c60c3f17b1842c54a3a87a93dc9f6fe787 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 11:36:44 -0400 Subject: [PATCH 44/95] atan, sin-over-r, and mq kernels converted from Direct to PP. --- src/CMakeLists.txt | 14 +++++++------- src/interaction_compute/interaction_compute_cc.c | 2 +- src/interaction_compute/interaction_compute_cp.c | 2 +- .../interaction_compute_direct.c | 6 +++--- src/interaction_compute/interaction_compute_pc.c | 6 +++--- src/kernels/atan/atan.h | 2 +- src/kernels/atan/{atan_direct.c => atan_pp.c} | 4 ++-- src/kernels/{mq/mq_direct.h => atan/atan_pp.h} | 8 ++++---- src/kernels/mq/mq.h | 2 +- src/kernels/mq/{mq_direct.c => mq_pp.c} | 4 ++-- .../{sin-over-r/sin-over-r_direct.h => mq/mq_pp.h} | 8 ++++---- src/kernels/sin-over-r/sin-over-r.h | 2 +- .../{sin-over-r_direct.c => sin-over-r_pp.c} | 4 ++-- .../atan_direct.h => sin-over-r/sin-over-r_pp.h} | 8 ++++---- 14 files changed, 36 insertions(+), 36 deletions(-) rename src/kernels/atan/{atan_direct.c => atan_pp.c} (94%) rename src/kernels/{mq/mq_direct.h => atan/atan_pp.h} (69%) rename src/kernels/mq/{mq_direct.c => mq_pp.c} (94%) rename src/kernels/{sin-over-r/sin-over-r_direct.h => mq/mq_pp.h} (66%) rename src/kernels/sin-over-r/{sin-over-r_direct.c => sin-over-r_pp.c} (95%) rename src/kernels/{atan/atan_direct.h => sin-over-r/sin-over-r_pp.h} (74%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6774008..4195a5be 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,7 +8,7 @@ SET(SRCS_TREEDRIVER utilities/xmalloc.c utilities/array.h utilities/enums.h -# Calling functions for tree and direct calculations +# Calling functions for tree and pp calculations drivers/directdriver.h drivers/directdriver.c drivers/treedriver.h @@ -150,8 +150,8 @@ SET(SRCS_K_REGULARIZED_YUKAWA SET(SRCS_K_ATAN kernels/atan/atan.h - kernels/atan/atan_direct.h - kernels/atan/atan_direct.c + kernels/atan/atan_pp.h + kernels/atan/atan_pp.c kernels/atan/atan_pc.h kernels/atan/atan_pc.c) # kernels/atan/atan_cp.h @@ -160,8 +160,8 @@ SET(SRCS_K_ATAN SET(SRCS_K_SIN_OVER_R kernels/sin-over-r/sin-over-r.h - kernels/sin-over-r/sin-over-r_direct.h - kernels/sin-over-r/sin-over-r_direct.c + kernels/sin-over-r/sin-over-r_pp.h + kernels/sin-over-r/sin-over-r_pp.c kernels/sin-over-r/sin-over-r_pc.h kernels/sin-over-r/sin-over-r_pc.c kernels/sin-over-r/sin-over-r_cp.h @@ -169,8 +169,8 @@ SET(SRCS_K_SIN_OVER_R SET(SRCS_K_MQ kernels/mq/mq.h - kernels/mq/mq_direct.h - kernels/mq/mq_direct.c + kernels/mq/mq_pp.h + kernels/mq/mq_pp.c kernels/mq/mq.h kernels/mq/mq_pc.c) # kernels/mq/mq_cp.h diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index b4983f47..94f1e038 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -991,7 +991,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T if (run_params->singularity == SKIPPING) { - K_SinOverR_Direct(num_targets_in_cluster, num_sources_in_cluster, + K_SinOverR_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 42423554..5caa5f77 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -475,7 +475,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba if (run_params->singularity == SKIPPING) { - K_SinOverR_Direct(num_targets_in_cluster, num_sources_in_batch, + K_SinOverR_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, diff --git a/src/interaction_compute/interaction_compute_direct.c b/src/interaction_compute/interaction_compute_direct.c index 29162811..23895758 100644 --- a/src/interaction_compute/interaction_compute_direct.c +++ b/src/interaction_compute/interaction_compute_direct.c @@ -152,7 +152,7 @@ void InteractionCompute_Direct(double *potential, } else if (run_params->kernel == ATAN) { - K_Atan_Direct(num_targets, num_sources, 0, 0, + K_Atan_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); @@ -164,7 +164,7 @@ void InteractionCompute_Direct(double *potential, } else if (run_params->kernel == SIN_OVER_R) { - K_SinOverR_Direct(num_targets, num_sources, 0, 0, + K_SinOverR_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); @@ -175,7 +175,7 @@ void InteractionCompute_Direct(double *potential, } else if (run_params->kernel == MQ) { - K_MQ_Direct(num_targets, num_sources, 0, 0, + K_MQ_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, run_params, potential, 0); diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 3063763f..8e87e856 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -526,7 +526,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->kernel == ATAN) { - K_Atan_Direct(num_targets_in_batch, num_sources_in_cluster, + K_Atan_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -540,7 +540,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->kernel == MQ) { - K_MQ_Direct(num_targets_in_batch, num_sources_in_cluster, + K_MQ_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, @@ -553,7 +553,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } else if (run_params->kernel == SIN_OVER_R) { - K_SinOverR_Direct(num_targets_in_batch, num_sources_in_cluster, + K_SinOverR_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, source_x, source_y, source_z, source_q, source_w, diff --git a/src/kernels/atan/atan.h b/src/kernels/atan/atan.h index a9bcd6f8..63f99b64 100644 --- a/src/kernels/atan/atan.h +++ b/src/kernels/atan/atan.h @@ -3,7 +3,7 @@ #define H_K_ATAN_H -#include "atan_direct.h" +#include "atan_pp.h" #include "atan_pc.h" #endif /* H_K_ATAN_PC_H */ diff --git a/src/kernels/atan/atan_direct.c b/src/kernels/atan/atan_pp.c similarity index 94% rename from src/kernels/atan/atan_direct.c rename to src/kernels/atan/atan_pp.c index aea631c3..75d896d0 100644 --- a/src/kernels/atan/atan_direct.c +++ b/src/kernels/atan/atan_pp.c @@ -7,10 +7,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "atan_direct.h" +#include "atan_pp.h" -void K_Atan_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/mq/mq_direct.h b/src/kernels/atan/atan_pp.h similarity index 69% rename from src/kernels/mq/mq_direct.h rename to src/kernels/atan/atan_pp.h index 43b27e1b..be1ff988 100644 --- a/src/kernels/mq/mq_direct.h +++ b/src/kernels/atan/atan_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_MQ_DIRECT_H -#define H_K_MQ_DIRECT_H +#ifndef H_K_ATAN_PP_H +#define H_K_ATAN_PP_H #include "../../run_params/struct_run_params.h" -void K_MQ_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_Atan_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_MQ_DIRECT_H */ +#endif /* H_K_ATAN_PP_H */ diff --git a/src/kernels/mq/mq.h b/src/kernels/mq/mq.h index 6fb88873..e3dc2bc5 100644 --- a/src/kernels/mq/mq.h +++ b/src/kernels/mq/mq.h @@ -3,7 +3,7 @@ #define H_K_MQ_H -#include "mq_direct.h" +#include "mq_pp.h" #include "mq_pc.h" #endif /* H_K_MQ_PC_H */ diff --git a/src/kernels/mq/mq_direct.c b/src/kernels/mq/mq_pp.c similarity index 94% rename from src/kernels/mq/mq_direct.c rename to src/kernels/mq/mq_pp.c index 2dff2b50..642fd517 100644 --- a/src/kernels/mq/mq_direct.c +++ b/src/kernels/mq/mq_pp.c @@ -7,10 +7,10 @@ #include #include "../../run_params/struct_run_params.h" -#include "mq_direct.h" +#include "mq_pp.h" -void K_MQ_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/sin-over-r/sin-over-r_direct.h b/src/kernels/mq/mq_pp.h similarity index 66% rename from src/kernels/sin-over-r/sin-over-r_direct.h rename to src/kernels/mq/mq_pp.h index 78a9aa7d..0ce73cb3 100644 --- a/src/kernels/sin-over-r/sin-over-r_direct.h +++ b/src/kernels/mq/mq_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_SIN_OVER_R_DIRECT_H -#define H_K_SIN_OVER_R_DIRECT_H +#ifndef H_K_MQ_PP_H +#define H_K_MQ_PP_H #include "../../run_params/struct_run_params.h" -void K_SinOverR_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_MQ_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_SIN_OVER_R_DIRECT_H */ +#endif /* H_K_MQ_PP_H */ diff --git a/src/kernels/sin-over-r/sin-over-r.h b/src/kernels/sin-over-r/sin-over-r.h index c6b75013..6144e04a 100644 --- a/src/kernels/sin-over-r/sin-over-r.h +++ b/src/kernels/sin-over-r/sin-over-r.h @@ -2,7 +2,7 @@ #ifndef H_K_SIN_OVER_R_H #define H_K_SIN_OVER_R_H -#include "sin-over-r_direct.h" +#include "sin-over-r_pp.h" #include "sin-over-r_pc.h" #include "sin-over-r_cp.h" diff --git a/src/kernels/sin-over-r/sin-over-r_direct.c b/src/kernels/sin-over-r/sin-over-r_pp.c similarity index 95% rename from src/kernels/sin-over-r/sin-over-r_direct.c rename to src/kernels/sin-over-r/sin-over-r_pp.c index 4192fcee..2b1b2984 100644 --- a/src/kernels/sin-over-r/sin-over-r_direct.c +++ b/src/kernels/sin-over-r/sin-over-r_pp.c @@ -3,9 +3,9 @@ #include #include "../../run_params/struct_run_params.h" -#include "sin-over-r_direct.h" +#include "sin-over-r_pp.h" -void K_SinOverR_Direct(int number_of_targets_in_batch, int number_of_source_points_in_cluster, +void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, diff --git a/src/kernels/atan/atan_direct.h b/src/kernels/sin-over-r/sin-over-r_pp.h similarity index 74% rename from src/kernels/atan/atan_direct.h rename to src/kernels/sin-over-r/sin-over-r_pp.h index b2e63408..a17efa50 100644 --- a/src/kernels/atan/atan_direct.h +++ b/src/kernels/sin-over-r/sin-over-r_pp.h @@ -1,15 +1,15 @@ /* Interaction Kernels */ -#ifndef H_K_ATAN_DIRECT_H -#define H_K_ATAN_DIRECT_H +#ifndef H_K_SIN_OVER_R_PP_H +#define H_K_SIN_OVER_R_PP_H #include "../../run_params/struct_run_params.h" -void K_Atan_Direct(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, +void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -#endif /* H_K_ATAN_DIRECT_H */ +#endif /* H_K_SIN_OVER_R_PP_H */ From 07367790907a5b35bff3c75b0487c72d6836dbf2 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 12:53:46 -0400 Subject: [PATCH 45/95] Removing singularity checks from well-separated approximations. --- src/kernels/coulomb/coulomb_cc_hermite.c | 19 ++- src/kernels/coulomb/coulomb_cp.c | 24 ++-- src/kernels/coulomb/coulomb_pc.c | 18 +-- .../regularized-coulomb_cp.c | 24 ++-- .../regularized-yukawa_cp.c | 133 ------------------ .../regularized-yukawa_cp.h | 8 +- .../regularized-yukawa_pc.c | 109 -------------- .../regularized-yukawa_ss_pc.c | 114 --------------- .../regularized-yukawa_ss_pc.h | 7 - src/kernels/sin-over-r/sin-over-r_cp.c | 58 ++++---- src/kernels/sin-over-r/sin-over-r_pc.c | 5 +- src/kernels/yukawa/yukawa_cp.c | 50 ++++--- src/kernels/yukawa/yukawa_pc.c | 28 ++-- src/kernels/yukawa/yukawa_ss_pc.c | 23 ++- 14 files changed, 116 insertions(+), 504 deletions(-) diff --git a/src/kernels/coulomb/coulomb_cc_hermite.c b/src/kernels/coulomb/coulomb_cc_hermite.c index 03a2b3e9..6ddc0168 100644 --- a/src/kernels/coulomb/coulomb_cc_hermite.c +++ b/src/kernels/coulomb/coulomb_cc_hermite.c @@ -93,16 +93,15 @@ void K_Coulomb_CC_Hermite(int number_of_sources_in_batch, int number_of_interpol r5inv *= 3.0; - if (r2 > DBL_MIN) { - temp_pot_ += source_cluster_q_[j] * rinv; - temp_pot_dx += source_cluster_q_dx[j] * r3inv * dx; - temp_pot_dy += source_cluster_q_dy[j] * r3inv * dy; - temp_pot_dz += source_cluster_q_dz[j] * r3inv * dz; - temp_pot_dxy += source_cluster_q_dxy[j] * r5inv * dx * dy; - temp_pot_dyz += source_cluster_q_dyz[j] * r5inv * dy * dz; - temp_pot_dxz += source_cluster_q_dxz[j] * r5inv * dx * dz; - temp_pot_dxyz += source_cluster_q_dxyz[j] * r7inv * dx * dy * dz * 15.0; - } + temp_pot_ += source_cluster_q_[j] * rinv; + temp_pot_dx += source_cluster_q_dx[j] * r3inv * dx; + temp_pot_dy += source_cluster_q_dy[j] * r3inv * dy; + temp_pot_dz += source_cluster_q_dz[j] * r3inv * dz; + temp_pot_dxy += source_cluster_q_dxy[j] * r5inv * dx * dy; + temp_pot_dyz += source_cluster_q_dyz[j] * r5inv * dy * dz; + temp_pot_dxz += source_cluster_q_dxz[j] * r5inv * dx * dz; + temp_pot_dxyz += source_cluster_q_dxyz[j] * r7inv * dx * dy * dz * 15.0; + } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb_cp.c b/src/kernels/coulomb/coulomb_cp.c index 8c097f7b..4752b950 100644 --- a/src/kernels/coulomb/coulomb_cp.c +++ b/src/kernels/coulomb/coulomb_cp.c @@ -46,9 +46,8 @@ void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpo double dz = cz - source_z[jj]; double r2 = dx*dx + dy*dy + dz*dz; - if (r2 > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); - } + temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -138,16 +137,15 @@ void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpol r5inv *= 3.0; - if (r2 > DBL_MIN) { - temp_pot_ += rinvq; - temp_pot_dx += r3inv * dx; - temp_pot_dy += r3inv * dy; - temp_pot_dz += r3inv * dz; - temp_pot_dxy += r5inv * dx * dy; - temp_pot_dyz += r5inv * dy * dz; - temp_pot_dxz += r5inv * dx * dz; - temp_pot_dxyz += r7inv * dx * dy * dz * 15.0; - } + temp_pot_ += rinvq; + temp_pot_dx += r3inv * dx; + temp_pot_dy += r3inv * dy; + temp_pot_dz += r3inv * dz; + temp_pot_dxy += r5inv * dx * dy; + temp_pot_dyz += r5inv * dy * dz; + temp_pot_dxz += r5inv * dx * dz; + temp_pot_dxyz += r7inv * dx * dy * dz * 15.0; + } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb_pc.c b/src/kernels/coulomb/coulomb_pc.c index 00b15e5c..260a2ff3 100644 --- a/src/kernels/coulomb/coulomb_pc.c +++ b/src/kernels/coulomb/coulomb_pc.c @@ -46,9 +46,8 @@ void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpo double dz = tz - cluster_z[jj]; double r2 = dx*dx + dy*dy + dz*dz; - if (r2 > DBL_MIN) { temporary_potential += cluster_charge[starting_index_of_cluster + j] / sqrt(r2); - } + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -119,16 +118,13 @@ void K_Coulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpol double r5inv = r3inv*rinv*rinv; double r7inv = r5inv*rinv*rinv; - if (r > DBL_MIN) { - - temporary_potential += rinv * (cluster_charge_[j]) - + r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy - + cluster_charge_delta_z[j]*dz) - + 3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz - + cluster_charge_delta_xz[j]*dx*dz) - + 15 * r7inv * cluster_charge_delta_xyz[j]*dx*dy*dz; + temporary_potential += rinv * (cluster_charge_[j]) + + r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy + + cluster_charge_delta_z[j]*dz) + + 3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz + + cluster_charge_delta_xz[j]*dx*dz) + + 15 * r7inv * cluster_charge_delta_xyz[j]*dx*dy*dz; - } } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_cp.c b/src/kernels/regularized-coulomb/regularized-coulomb_cp.c index 679d297b..ba69e31d 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_cp.c +++ b/src/kernels/regularized-coulomb/regularized-coulomb_cp.c @@ -48,9 +48,8 @@ void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number double dz = cz - source_z[jj]; double r2 = dx*dx + dy*dy + dz*dz + epsilon2; - if (r2 > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); - } + temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -142,16 +141,15 @@ void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_ r5inv *= 3.0; - if (r2 > DBL_MIN) { - temp_pot_ += rinvq; - temp_pot_dx += r3inv * dx; - temp_pot_dy += r3inv * dy; - temp_pot_dz += r3inv * dz; - temp_pot_dxy += r5inv * dx * dy; - temp_pot_dyz += r5inv * dy * dz; - temp_pot_dxz += r5inv * dx * dz; - temp_pot_dxyz += r7inv * dx * dy * dz * 15.0; - } + temp_pot_ += rinvq; + temp_pot_dx += r3inv * dx; + temp_pot_dy += r3inv * dy; + temp_pot_dz += r3inv * dz; + temp_pot_dxy += r5inv * dx * dy; + temp_pot_dyz += r5inv * dy * dz; + temp_pot_dxz += r5inv * dx * dz; + temp_pot_dxyz += r7inv * dx * dy * dz * 15.0; + } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_cp.c b/src/kernels/regularized-yukawa/regularized-yukawa_cp.c index 460d1709..73099ca7 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_cp.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_cp.c @@ -66,136 +66,3 @@ void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_ } - -/* -void K_RegularizedYukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, - int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, - double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, - struct RunParams *run_params, int gpu_async_stream_id) -{ - - double *cluster_q_ = &cluster_q[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster]; - double *cluster_q_dx = &cluster_q[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster]; - double *cluster_q_dy = &cluster_q[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster]; - double *cluster_q_dz = &cluster_q[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster]; - double *cluster_q_dxy = &cluster_q[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster]; - double *cluster_q_dyz = &cluster_q[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster]; - double *cluster_q_dxz = &cluster_q[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster]; - double *cluster_q_dxyz = &cluster_q[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster]; - - double kappa = run_params->kernel_params[0]; - double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; - double kappa2 = kappa * kappa; - double kappa3 = kappa * kappa2; - -#ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - source_w, cluster_x, cluster_y, cluster_z, \ - cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ - cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ - cluster_q_dxyz) - { -#endif -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { - - double temp_pot_ = 0.0; - double temp_pot_dx = 0.0; - double temp_pot_dy = 0.0; - double temp_pot_dz = 0.0; - double temp_pot_dxy = 0.0; - double temp_pot_dyz = 0.0; - double temp_pot_dxz = 0.0; - double temp_pot_dxyz = 0.0; - - int ii = starting_index_of_cluster + i; - double cx = cluster_x[ii]; - double cy = cluster_y[ii]; - double cz = cluster_z[ii]; - -#ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp_pot_dx) reduction(+:temp_pot_dy) reduction(+:temp_pot_dz) \ - reduction(+:temp_pot_dxy) reduction(+:temp_pot_dyz) reduction(+:temp_pot_dxz) \ - reduction(+:temp_pot_) reduction(+:temp_pot_dxyz) - -#endif - for (int j = 0; j < number_of_sources_in_batch; j++) { -#ifdef OPENACC_ENABLED - #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) -#endif - - int jj = starting_index_of_sources + j; - double dx = source_x[jj] - cx; - double dy = source_y[jj] - cy; - double dz = source_z[jj] - cz; - double r2 = dx*dx + dy*dy + dz*dz; - - if (r2 > DBL_MIN) { - double r = sqrt(r2); - double r3 = r2 * r; - - double r2inv = 1 / r2; - double rinvq = source_q[jj] * source_w[jj] / r * exp(-kappa * r); - double r3inv = rinvq * r2inv; - double r5inv = r3inv * r2inv; - double r7inv = r5inv * r2inv; - - double term_d1 = r3inv * (1 + kappa * r); - double term_d2 = r5inv * (3 + 3 * kappa * r + kappa2 * r2); - double term_d3 = r7inv * (15 + 15 * kappa * r + 6 * kappa2 * r2 + kappa3 * r3); - - temp_pot_ += rinvq; - temp_pot_dx += term_d1 * dx; - temp_pot_dy += term_d1 * dy; - temp_pot_dz += term_d1 * dz; - temp_pot_dxy += term_d2 * dx * dy; - temp_pot_dyz += term_d2 * dy * dz; - temp_pot_dxz += term_d2 * dx * dz; - temp_pot_dxyz += term_d3 * dx * dy * dz; - } - - } // end loop over interpolation points - -#ifdef OPENACC_ENABLED - #pragma acc atomic - cluster_q_[i] += temp_pot_; - #pragma acc atomic - cluster_q_dx[i] += temp_pot_dx; - #pragma acc atomic - cluster_q_dy[i] += temp_pot_dy; - #pragma acc atomic - cluster_q_dz[i] += temp_pot_dz; - #pragma acc atomic - cluster_q_dxy[i] += temp_pot_dxy; - #pragma acc atomic - cluster_q_dyz[i] += temp_pot_dyz; - #pragma acc atomic - cluster_q_dxz[i] += temp_pot_dxz; - #pragma acc atomic - cluster_q_dxyz[i] += temp_pot_dxyz; -#else - cluster_q_[i] += temp_pot_; - cluster_q_dx[i] += temp_pot_dx; - cluster_q_dy[i] += temp_pot_dy; - cluster_q_dz[i] += temp_pot_dz; - cluster_q_dxy[i] += temp_pot_dxy; - cluster_q_dyz[i] += temp_pot_dyz; - cluster_q_dxz[i] += temp_pot_dxz; - cluster_q_dxyz[i] += temp_pot_dxyz; -#endif - - - } -#ifdef OPENACC_ENABLED - } // end kernel -#endif - return; -} -*/ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_cp.h b/src/kernels/regularized-yukawa/regularized-yukawa_cp.h index 49f61b2e..295d96d5 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_cp.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_cp.h @@ -11,12 +11,6 @@ void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_ double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); -/* -void K_RegularizedYukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, - int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, - double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, - struct RunParams *run_params, int gpu_async_stream_id); -*/ + #endif /* H_K_REGULARIZED_YUKAWA_CP_H */ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_pc.c b/src/kernels/regularized-yukawa/regularized-yukawa_pc.c index abdcfa2c..88aae893 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_pc.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_pc.c @@ -59,112 +59,3 @@ void K_RegularizedYukawa_PC_Lagrange(int number_of_targets_in_batch, int number_ -/* -void K_RegularizedYukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, - int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, - double *target_x, double *target_y, double *target_z, - double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, - struct RunParams *run_params, double *potential, int gpu_async_stream_id) -{ - - double *cluster_charge_ = &cluster_charge[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_x = &cluster_charge[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_y = &cluster_charge[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_z = &cluster_charge[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xy = &cluster_charge[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_yz = &cluster_charge[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xz = &cluster_charge[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xyz = &cluster_charge[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster]; - - double kappa = run_params->kernel_params[0]; - double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; - double kappa2 = kappa * kappa; - double kappa3 = kappa * kappa2; - -#ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - cluster_x, cluster_y, cluster_z, cluster_charge, potential, \ - cluster_charge_, cluster_charge_delta_x, cluster_charge_delta_y, cluster_charge_delta_z, \ - cluster_charge_delta_xy, cluster_charge_delta_yz, cluster_charge_delta_xz, \ - cluster_charge_delta_xyz) - { -#endif -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int i = 0; i < number_of_targets_in_batch; i++) { - - int ii = starting_index_of_target + i; - double temporary_potential = 0.0; - - double tx = target_x[ii]; - double ty = target_y[ii]; - double tz = target_z[ii]; - - #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) -#endif - for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { - - int jj = starting_index_of_cluster + j; - double dx = tx - cluster_x[jj]; - double dy = ty - cluster_y[jj]; - double dz = tz - cluster_z[jj]; - - double r = sqrt(dx*dx + dy*dy + dz*dz); - double q = sqrt(dx*dx + dy*dy * dz*dz + epsilon2); - - double rinv = 1 / r; - double r2inv = rinv * rinv; - double r3inv = rinv * r2inv; - double r4inv = rinv * r3inv; - double r5inv = rinv * r4inv; - - double q2 = q * q; - double q4 = q2 * q2; - double qinv = 1 / q; - double q2inv = qinv * qinv; - double q3inv = qinv * q2inv; - double q5inv = q2inv * q3inv; - - - - if (r > DBL_MIN) { - temporary_potential += exp(-kappa * r) - - * (qinv * (cluster_charge_[j]) - - + rinv * q3inv * (kappa * q2 + r) - * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy - + cluster_charge_delta_z[j]*dz) - - + q5inv * (3 + kappa2 * q4 * r2inv + kappa * q4 * r3inv + 2 * kappa * q2 * rinv) - * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz - + cluster_charge_delta_xz[j]*dx*dz) - - + (kappa3 * r3inv * qinv + 3 * kappa2 * r2inv * q3inv + 3 * kappa2 * r4inv * qinv - - 6 * kappa * rinv * q5inv - + 3 * kappa * r3inv * q3inv + 3 * kappa * r5inv * qinv - + 15 * kappa * rinv * qinv + 15 * q3inv) - * cluster_charge_delta_xyz[j]*dx*dy*dz); - - - } else { - - temporary_potential += exp(-kappa * r) * qinv * (cluster_charge_[j]); - - } - - - } // end loop over interpolation points -#ifdef OPENACC_ENABLED - #pragma acc atomic -#endif - potential[starting_index_of_target + i] += temporary_potential; - } -#ifdef OPENACC_ENABLED - } // end kernel -#endif - return; -} -*/ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c index e91fa560..a499298b 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c @@ -58,117 +58,3 @@ void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch, } -/* -void K_RegularizedYukawa_SS_PC_Hermite(int number_of_targets_in_batch, - int number_of_interpolation_points_in_cluster, int starting_index_of_target, - int starting_index_of_cluster, int total_number_interpolation_points, - double *target_x, double *target_y, double *target_z, double *target_charge, - double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, - struct RunParams *run_params, double *potential, int gpu_async_stream_id) -{ - - double epsilon=run_params->kernel_params[1]; - double kappa=run_params->kernel_params[0]; - double kappa2 = kappa * kappa; - double kappa3 = kappa * kappa2; - - // total_number_interpolation_points is the stride, separating clustersQ, clustersQx, clustersQy, etc. - double *cluster_charge_ = &cluster_charge[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_x = &cluster_charge[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_y = &cluster_charge[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_z = &cluster_charge[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xy = &cluster_charge[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_yz = &cluster_charge[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xz = &cluster_charge[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster]; - double *cluster_charge_delta_xyz = &cluster_charge[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster]; - - double *cluster_weight_ = &cluster_weight[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_x = &cluster_weight[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_y = &cluster_weight[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_z = &cluster_weight[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_xy = &cluster_weight[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_yz = &cluster_weight[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_xz = &cluster_weight[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster]; - double *cluster_weight_delta_xyz = &cluster_weight[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster]; - - -#ifdef OPENACC_ENABLED - #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ - cluster_x, cluster_y, cluster_z, cluster_charge, cluster_weight, potential, \ - cluster_charge_, cluster_charge_delta_x, cluster_charge_delta_y, cluster_charge_delta_z, \ - cluster_charge_delta_xy, cluster_charge_delta_yz, cluster_charge_delta_xz, \ - cluster_charge_delta_xyz, \ - cluster_weight_, cluster_weight_delta_x, cluster_weight_delta_y, cluster_weight_delta_z, \ - cluster_weight_delta_xy, cluster_weight_delta_yz, cluster_weight_delta_xz, \ - cluster_weight_delta_xyz) - { -#endif -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int i = 0; i < number_of_targets_in_batch; i++) { - - int ii = starting_index_of_target + i; - double temporary_potential = 0.0; - - double tx = target_x[ii]; - double ty = target_y[ii]; - double tz = target_z[ii]; - double tq = target_charge[ii]; - -#ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temporary_potential) -#endif - for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { - - int jj = starting_index_of_cluster + j; - double dx = tx - cluster_x[jj]; - double dy = ty - cluster_y[jj]; - double dz = tz - cluster_z[jj]; - double r = sqrt( dx*dx + dy*dy + dz*dz); - - double r2 = r*r; - double r3 = r2*r; - double r4 = r2*r2; - double rinv = 1 / r; - double r3inv = rinv*rinv*rinv; - double r5inv = r3inv*rinv*rinv; - double r7inv = r5inv*rinv*rinv; - - double kr = kappa * r; - double k2r2 = kr * kr; - double k3r3 = k2r2 * kr; - - double charge_diff = cluster_charge_[j] - cluster_weight_[j] * tq; - double delta_x_diff = cluster_charge_delta_x[j] - cluster_weight_delta_x[j] * tq; - double delta_y_diff = cluster_charge_delta_y[j] - cluster_weight_delta_y[j] * tq; - double delta_z_diff = cluster_charge_delta_z[j] - cluster_weight_delta_z[j] * tq; - double delta_xy_diff = cluster_charge_delta_xy[j] - cluster_weight_delta_xy[j] * tq; - double delta_yz_diff = cluster_charge_delta_yz[j] - cluster_weight_delta_yz[j] * tq; - double delta_xz_diff = cluster_charge_delta_xz[j] - cluster_weight_delta_xz[j] * tq; - double delta_xyz_diff = cluster_charge_delta_xyz[j] - cluster_weight_delta_xyz[j] * tq; - - if (r > DBL_MIN) { - - temporary_potential += exp(-kappa*r) - * (rinv * (charge_diff) - + r3inv * (1 + kappa*r) - * (delta_x_diff*dx + delta_y_diff*dy + delta_z_diff*dz) - + r5inv * (3 + 3*kappa*r + kappa2*r2) - * (delta_xy_diff*dx*dy + delta_yz_diff*dy*dz + delta_xz_diff*dx*dz) - + r7inv * (15 + 15*kappa*r + 6*kappa2*r2 + kappa3*r3) - * delta_xyz_diff*dx*dy*dz); - - } - } // end loop over interpolation points -#ifdef OPENACC_ENABLED - #pragma acc atomic -#endif - potential[starting_index_of_target + i] += temporary_potential; - } -#ifdef OPENACC_ENABLED - } // end kernel -#endif - return; -} -*/ diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h index 495c65c7..bab42cf1 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h @@ -11,13 +11,6 @@ void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch, int numb double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); -/* -void K_RegularizedYukawa_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, - int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, - double *target_x, double *target_y, double *target_z, double *target_charge, - double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, - struct RunParams *run_params, double *potential, int gpu_async_stream_id); -*/ #endif /* H_K_REGULARIZED_YUKAWA_SS_PC_H */ diff --git a/src/kernels/sin-over-r/sin-over-r_cp.c b/src/kernels/sin-over-r/sin-over-r_cp.c index 14fe29cf..c3447c42 100644 --- a/src/kernels/sin-over-r/sin-over-r_cp.c +++ b/src/kernels/sin-over-r/sin-over-r_cp.c @@ -48,9 +48,8 @@ void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interp double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] * sin(kernel_parameter * r) / r; - } + temporary_potential += source_q[jj] * source_w[jj] * sin(kernel_parameter * r) / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -134,33 +133,32 @@ void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpo double dz = source_z[jj] - cz; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - - double rinv = 1 / r; - double r2inv = rinv * rinv; - double r3inv = rinv * r2inv; - double r4inv = r2inv * r2inv; - double r5inv = r3inv * r2inv; - double r6inv = r3inv * r3inv; - double r7inv = r4inv * r3inv; - - double sinr = sin(k*r) * source_q[jj] * source_w[jj]; - double cosr = cos(k*r) * source_q[jj] * source_w[jj]; - - double term_d0 = sinr * rinv; - double term_d1 = sinr * r3inv - k * cosr * r2inv; - double term_d2 = sinr * (3 * r5inv - k2 * r3inv) - 3 * k * cosr * r4inv; - double term_d3 = sinr * (15 * r7inv - 6 * k2 * r5inv) + cosr * (k3 * r4inv - 15 * k * r6inv); - - temp_pot_ += term_d0; - temp_pot_dx += term_d1 * dx; - temp_pot_dy += term_d1 * dy; - temp_pot_dz += term_d1 * dz; - temp_pot_dxy += term_d2 * dx * dy; - temp_pot_dyz += term_d2 * dy * dz; - temp_pot_dxz += term_d2 * dx * dz; - temp_pot_dxyz += term_d3 * dx * dy * dz; - } + + double rinv = 1 / r; + double r2inv = rinv * rinv; + double r3inv = rinv * r2inv; + double r4inv = r2inv * r2inv; + double r5inv = r3inv * r2inv; + double r6inv = r3inv * r3inv; + double r7inv = r4inv * r3inv; + + double sinr = sin(k*r) * source_q[jj] * source_w[jj]; + double cosr = cos(k*r) * source_q[jj] * source_w[jj]; + + double term_d0 = sinr * rinv; + double term_d1 = sinr * r3inv - k * cosr * r2inv; + double term_d2 = sinr * (3 * r5inv - k2 * r3inv) - 3 * k * cosr * r4inv; + double term_d3 = sinr * (15 * r7inv - 6 * k2 * r5inv) + cosr * (k3 * r4inv - 15 * k * r6inv); + + temp_pot_ += term_d0; + temp_pot_dx += term_d1 * dx; + temp_pot_dy += term_d1 * dy; + temp_pot_dz += term_d1 * dz; + temp_pot_dxy += term_d2 * dx * dy; + temp_pot_dyz += term_d2 * dy * dz; + temp_pot_dxz += term_d2 * dx * dz; + temp_pot_dxyz += term_d3 * dx * dy * dz; + } // end loop over interpolation points diff --git a/src/kernels/sin-over-r/sin-over-r_pc.c b/src/kernels/sin-over-r/sin-over-r_pc.c index bab670d8..1e9fcc42 100644 --- a/src/kernels/sin-over-r/sin-over-r_pc.c +++ b/src/kernels/sin-over-r/sin-over-r_pc.c @@ -48,9 +48,8 @@ void K_SinOverR_PC_Lagrange(int number_of_targets_in_batch, int number_of_interp double dz = tz - cluster_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += cluster_charge[starting_index_of_cluster + j] * sin(kernel_parameter * r) / r; - } + temporary_potential += cluster_charge[starting_index_of_cluster + j] * sin(kernel_parameter * r) / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/yukawa/yukawa_cp.c b/src/kernels/yukawa/yukawa_cp.c index 5c259193..4d0ae3a6 100644 --- a/src/kernels/yukawa/yukawa_cp.c +++ b/src/kernels/yukawa/yukawa_cp.c @@ -48,9 +48,8 @@ void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpol double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter * r) / r; - } + temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter * r) / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -134,29 +133,28 @@ void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpola double dz = source_z[jj] - cz; double r2 = dx*dx + dy*dy + dz*dz; - if (r2 > DBL_MIN) { - double r = sqrt(r2); - double r3 = r2 * r; - - double r2inv = 1 / r2; - double rinvq = source_q[jj] * source_w[jj] / r * exp(-kernel_parameter * r); - double r3inv = rinvq * r2inv; - double r5inv = r3inv * r2inv; - double r7inv = r5inv * r2inv; - - double term_d1 = r3inv * (1 + kernel_parameter * r); - double term_d2 = r5inv * (3 + 3 * kernel_parameter * r + kernel_parameter2 * r2); - double term_d3 = r7inv * (15 + 15 * kernel_parameter * r + 6 * kernel_parameter2 * r2 + kernel_parameter3 * r3); - - temp_pot_ += rinvq; - temp_pot_dx += term_d1 * dx; - temp_pot_dy += term_d1 * dy; - temp_pot_dz += term_d1 * dz; - temp_pot_dxy += term_d2 * dx * dy; - temp_pot_dyz += term_d2 * dy * dz; - temp_pot_dxz += term_d2 * dx * dz; - temp_pot_dxyz += term_d3 * dx * dy * dz; - } + double r = sqrt(r2); + double r3 = r2 * r; + + double r2inv = 1 / r2; + double rinvq = source_q[jj] * source_w[jj] / r * exp(-kernel_parameter * r); + double r3inv = rinvq * r2inv; + double r5inv = r3inv * r2inv; + double r7inv = r5inv * r2inv; + + double term_d1 = r3inv * (1 + kernel_parameter * r); + double term_d2 = r5inv * (3 + 3 * kernel_parameter * r + kernel_parameter2 * r2); + double term_d3 = r7inv * (15 + 15 * kernel_parameter * r + 6 * kernel_parameter2 * r2 + kernel_parameter3 * r3); + + temp_pot_ += rinvq; + temp_pot_dx += term_d1 * dx; + temp_pot_dy += term_d1 * dy; + temp_pot_dz += term_d1 * dz; + temp_pot_dxy += term_d2 * dx * dy; + temp_pot_dyz += term_d2 * dy * dz; + temp_pot_dxz += term_d2 * dx * dz; + temp_pot_dxyz += term_d3 * dx * dy * dz; + } // end loop over interpolation points diff --git a/src/kernels/yukawa/yukawa_pc.c b/src/kernels/yukawa/yukawa_pc.c index c34e4901..f1da739a 100644 --- a/src/kernels/yukawa/yukawa_pc.c +++ b/src/kernels/yukawa/yukawa_pc.c @@ -41,9 +41,8 @@ void K_Yukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpol double dz = tz - cluster_z[starting_index_of_cluster + j]; double r = sqrt(dx*dx + dy*dy + dz*dz); - if (r > DBL_MIN) { - temporary_potential += cluster_charge[starting_index_of_cluster + j] * exp(-kernel_parameter * r) / r; - } + temporary_potential += cluster_charge[starting_index_of_cluster + j] * exp(-kernel_parameter * r) / r; + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic @@ -119,21 +118,18 @@ void K_Yukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpola double r5inv = r3inv*rinv*rinv; double r7inv = r5inv*rinv*rinv; - if (r > DBL_MIN) { - - temporary_potential += exp(-kernel_parameter * r) - * (rinv * (cluster_charge_[j]) - + r3inv * (1 + kernel_parameter * r) - * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy - + cluster_charge_delta_z[j]*dz) - + r5inv * (3 + 3 * kernel_parameter * r + kernel_parameter2 * r2) - * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz - + cluster_charge_delta_xz[j]*dx*dz) - + r7inv * (15 + 15 * kernel_parameter * r + 6 * kernel_parameter2 * r2 + kernel_parameter3 * r3) - * cluster_charge_delta_xyz[j]*dx*dy*dz); + temporary_potential += exp(-kernel_parameter * r) + * (rinv * (cluster_charge_[j]) + + r3inv * (1 + kernel_parameter * r) + * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy + + cluster_charge_delta_z[j]*dz) + + r5inv * (3 + 3 * kernel_parameter * r + kernel_parameter2 * r2) + * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz + + cluster_charge_delta_xz[j]*dx*dz) + + r7inv * (15 + 15 * kernel_parameter * r + 6 * kernel_parameter2 * r2 + kernel_parameter3 * r3) + * cluster_charge_delta_xyz[j]*dx*dy*dz); - } } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/yukawa/yukawa_ss_pc.c b/src/kernels/yukawa/yukawa_ss_pc.c index 9cd68972..c9db5f5e 100644 --- a/src/kernels/yukawa/yukawa_ss_pc.c +++ b/src/kernels/yukawa/yukawa_ss_pc.c @@ -146,18 +146,17 @@ void K_Yukawa_SS_PC_Hermite(int number_of_targets_in_batch, double delta_xz_diff = cluster_charge_delta_xz[j] - cluster_weight_delta_xz[j] * tq; double delta_xyz_diff = cluster_charge_delta_xyz[j] - cluster_weight_delta_xyz[j] * tq; - if (r > DBL_MIN) { - - temporary_potential += exp(-kernel_parameter*r) - * (rinv * (charge_diff) - + r3inv * (1 + kernel_parameter*r) - * (delta_x_diff*dx + delta_y_diff*dy + delta_z_diff*dz) - + r5inv * (3 + 3*kernel_parameter*r + kernel_parameter2*r2) - * (delta_xy_diff*dx*dy + delta_yz_diff*dy*dz + delta_xz_diff*dx*dz) - + r7inv * (15 + 15*kernel_parameter*r + 6*kernel_parameter2*r2 + kernel_parameter3*r3) - * delta_xyz_diff*dx*dy*dz); - - } + + temporary_potential += exp(-kernel_parameter*r) + * (rinv * (charge_diff) + + r3inv * (1 + kernel_parameter*r) + * (delta_x_diff*dx + delta_y_diff*dy + delta_z_diff*dz) + + r5inv * (3 + 3*kernel_parameter*r + kernel_parameter2*r2) + * (delta_xy_diff*dx*dy + delta_yz_diff*dy*dz + delta_xz_diff*dx*dz) + + r7inv * (15 + 15*kernel_parameter*r + 6*kernel_parameter2*r2 + kernel_parameter3*r3) + * delta_xyz_diff*dx*dy*dz); + + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic From 4ee70db89d220381afde25a4e5ed8a22f9e6e418 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 2 Jul 2020 12:54:10 -0400 Subject: [PATCH 46/95] forgot one kernel file in previous commit. --- src/kernels/coulomb/coulomb_ss_pc.c | 40 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/kernels/coulomb/coulomb_ss_pc.c b/src/kernels/coulomb/coulomb_ss_pc.c index d93b7762..2e0de018 100644 --- a/src/kernels/coulomb/coulomb_ss_pc.c +++ b/src/kernels/coulomb/coulomb_ss_pc.c @@ -134,27 +134,25 @@ void K_Coulomb_SS_PC_Hermite(int number_of_targets_in_batch, double r_over_k_4 = r_over_k_2*r_over_k_2; double r_over_k_6 = r_over_k_4*r_over_k_2; - if (r > DBL_MIN) { - - temporary_potential += rinv * (cluster_charge_[j]) - + r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy - + cluster_charge_delta_z[j]*dz) - + 3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz - + cluster_charge_delta_xz[j]*dx*dz) - + 15 * r7inv * cluster_charge_delta_xyz[j]*dx*dy*dz - - - tcharge * exp(-r_over_k_2) - * (rinv * (cluster_weight_[j]) - + r3inv * (1 + 2*r_over_k_2) - * (cluster_weight_delta_x[j]*dx + cluster_weight_delta_y[j]*dy - + cluster_weight_delta_z[j]*dz) - + r5inv * (3 + 4*r_over_k_2 + 4*r_over_k_4) - * (cluster_weight_delta_xy[j]*dx*dy + cluster_weight_delta_yz[j]*dy*dz - + cluster_weight_delta_xz[j]*dx*dz) - + r7inv * (15 + 18*r_over_k_2 + 12*r_over_k_4 + 8*r_over_k_6) - * cluster_weight_delta_xyz[j]*dx*dy*dz); - - } + temporary_potential += rinv * (cluster_charge_[j]) + + r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy + + cluster_charge_delta_z[j]*dz) + + 3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz + + cluster_charge_delta_xz[j]*dx*dz) + + 15 * r7inv * cluster_charge_delta_xyz[j]*dx*dy*dz + + - tcharge * exp(-r_over_k_2) + * (rinv * (cluster_weight_[j]) + + r3inv * (1 + 2*r_over_k_2) + * (cluster_weight_delta_x[j]*dx + cluster_weight_delta_y[j]*dy + + cluster_weight_delta_z[j]*dz) + + r5inv * (3 + 4*r_over_k_2 + 4*r_over_k_4) + * (cluster_weight_delta_xy[j]*dx*dy + cluster_weight_delta_yz[j]*dy*dz + + cluster_weight_delta_xz[j]*dx*dz) + + r7inv * (15 + 18*r_over_k_2 + 12*r_over_k_4 + 8*r_over_k_6) + * cluster_weight_delta_xyz[j]*dx*dy*dz); + + } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic From c9fa19d2f5b20dbe2e76b49754731c83b9952897 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 21 Jul 2020 19:12:52 -0400 Subject: [PATCH 47/95] tree routines updated to create leaf and levels lists --- src/tree/struct_tree.h | 9 +++ src/tree/struct_tree_linked_list_node.h | 1 + src/tree/tree.c | 78 +++++++++++++++++++++++++ src/tree/tree.h | 4 ++ 4 files changed, 92 insertions(+) diff --git a/src/tree/struct_tree.h b/src/tree/struct_tree.h index dd7c010e..d6aeccf5 100644 --- a/src/tree/struct_tree.h +++ b/src/tree/struct_tree.h @@ -8,6 +8,7 @@ struct Tree int min_leaf_size; int max_leaf_size; + int max_depth; int *ibeg; int *iend; @@ -31,6 +32,14 @@ struct Tree int *num_children; int *children; + int *parent; + + int **levels_list; + int *levels_list_num; + + int *leaves_list; + int leaves_list_num; + }; #endif /* H_STRUCT_TREE_H */ diff --git a/src/tree/struct_tree_linked_list_node.h b/src/tree/struct_tree_linked_list_node.h index dc560414..aeb979d4 100644 --- a/src/tree/struct_tree_linked_list_node.h +++ b/src/tree/struct_tree_linked_list_node.h @@ -13,6 +13,7 @@ struct TreeLinkedListNode int num_children; struct TreeLinkedListNode *child[8]; + struct TreeLinkedListNode *parent; int node_index; }; diff --git a/src/tree/tree.c b/src/tree/tree.c index 7080c890..ddc4cdda 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -40,6 +40,8 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, TreeLinkedList_SetIndex(tree_linked_list, 0); + + Tree_Alloc(tree_addr, numnodes); Tree_Fill(*tree_addr, tree_linked_list); (*tree_addr)->numleaves = numleaves; @@ -47,6 +49,8 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, (*tree_addr)->min_leaf_size = min_leaf_size; (*tree_addr)->max_leaf_size = max_leaf_size; + Tree_Set_Leaves_and_Levels(*tree_addr); + TreeLinkedList_Free(&tree_linked_list); return; @@ -91,6 +95,61 @@ void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, } +void Tree_Set_Leaves_and_Levels(struct Tree *tree) +{ + + /* Creating levels list for the downpass */ + make_matrix(tree->levels_list, tree->max_depth, 20); + make_vector(tree->levels_list_num, tree->max_depth); + for (int i = 0; i < tree->max_depth; ++i) tree->levels_list_num[i]=0; + + make_vector(tree->leaves_list, 50); + tree->leaves_list_num = 0; + + int *sizeof_levels_list = NULL; + make_vector(sizeof_levels_list, tree->max_depth); + for (int i = 0; i < tree->max_depth; ++i) sizeof_levels_list[i]=20; + + int sizeof_leaves_list = 50; + + Tree_Fill_Levels(tree, 0, 0, sizeof_levels_list, &sizeof_leaves_list); + free_vector(sizeof_levels_list); + + return; +} + + +void Tree_Fill_Levels(struct Tree *tree, int idx, int level, int *sizeof_levels_list, int *sizeof_leaves_list) +{ + + if (tree->num_children[idx] == 0) { + if (tree->leaves_list_num >= *sizeof_leaves_list) { + *sizeof_leaves_list *= 1.5; + tree->leaves_list = realloc_vector(tree->leaves_list, *sizeof_leaves_list); + } + + tree->leaves_list[tree->leaves_list_num] = idx; + tree->leaves_list_num++; + + } else { + + if (tree->levels_list_num[level] >= sizeof_levels_list[level]) { + sizeof_levels_list[level] *= 1.5; + tree->levels_list[level] = realloc_vector(tree->levels_list[level], sizeof_levels_list[level]); + } + + tree->levels_list[level][tree->levels_list_num[level]] = idx; + tree->levels_list_num[level]++; + + for (int i = 0; i < tree->num_children[idx]; i++) + Tree_Fill_Levels(tree, tree->children[8*idx + i], level+1, sizeof_levels_list, sizeof_leaves_list); + } + + + return; +} + + void Tree_Alloc(struct Tree **tree_addr, int length) { @@ -112,8 +171,15 @@ void Tree_Alloc(struct Tree **tree_addr, int length) make_vector(tree->z_max, length); make_vector(tree->cluster_ind, length); make_vector(tree->radius, length); + make_vector(tree->num_children, length); make_vector(tree->children, 8*length); + make_vector(tree->parent, length); + + tree->levels_list = NULL; + tree->levels_list_num = NULL; + tree->leaves_list = NULL; + return; } /* END of function allocate_tree */ @@ -139,8 +205,15 @@ void Tree_Free(struct Tree **tree_addr) free_vector(tree->z_max); free_vector(tree->cluster_ind); free_vector(tree->radius); + free_vector(tree->num_children); free_vector(tree->children); + free_vector(tree->parent); + + if (tree->levels_list != NULL) free_matrix(tree->levels_list); + if (tree->levels_list_num != NULL) free_vector(tree->levels_list_num); + if (tree->leaves_list != NULL) free_vector(tree->leaves_list); + free(tree); } @@ -173,6 +246,11 @@ void Tree_Fill(struct Tree *tree, struct TreeLinkedListNode *p) tree->num_children[p->node_index] = p->num_children; + if (p->parent != NULL) + tree->parent[p->node_index] = (p->parent)->node_index; + else + tree->parent[p->node_index] = -1; + for (int i = 0; i < p->num_children; i++) { tree->children[8*p->node_index+i] = (p->child[i])->node_index; Tree_Fill(tree, p->child[i]); diff --git a/src/tree/tree.h b/src/tree/tree.h index 3bd6c2bc..20cd2ed9 100644 --- a/src/tree/tree.h +++ b/src/tree/tree.h @@ -12,6 +12,10 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, struct RunParams *run_params); +void Tree_Set_Leaves_and_Levels(struct Tree *tree); + +void Tree_Fill_Levels(struct Tree *tree, int idx, int level, int *sizeof_levels_list, int *sizeof_leaves_list); + void Tree_Alloc(struct Tree **tree_addr, int length); void Tree_Free(struct Tree **tree_addr); From 9cb27553eb74f93f4ccdac4f9eb62606def8c83f Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 21 Jul 2020 19:33:15 -0400 Subject: [PATCH 48/95] Setting max depth correctly. --- examples/example.in | 1 + src/clusters/clusters.c | 214 +++++++++++++++++++++++- src/tree/struct_tree_linked_list_node.h | 2 + src/tree/tree.c | 6 +- src/tree/tree_linked_list.c | 18 +- src/tree/tree_linked_list.h | 8 +- 6 files changed, 239 insertions(+), 10 deletions(-) diff --git a/examples/example.in b/examples/example.in index cc807c99..5ec2badb 100644 --- a/examples/example.in +++ b/examples/example.in @@ -14,3 +14,4 @@ distribution uniform run_direct 1 slice 10 verbosity 1 +temp 0 \ No newline at end of file diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index a3df4c62..3a30da8c 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -21,6 +21,10 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpola double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); +static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int interpolationDegree, + double *xS, double *yS, double *zS, double *qS, double *wS, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); + static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); @@ -109,7 +113,8 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); + pc_comp_ms_modifiedF_child_to_parent(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); +// pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++) @@ -296,6 +301,213 @@ void Clusters_Free_Win(struct Clusters **clusters_addr) /***** LOCAL FUNCTIONS **************/ /************************************/ +void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int interpolationDegree, + double *xS, double *yS, double *zS, double *qS, double *wS, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) +{ + + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; + int sourcePointsInCluster = tree->iend[idx] - tree->ibeg[idx] + 1; + int startingIndexInClustersArray = idx * interpolationPointsPerCluster; + int startingIndexInSourcesArray = tree->ibeg[idx]-1; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF; + int *exactIndX, *exactIndY, *exactIndZ; + + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); + make_vector(modifiedF, sourcePointsInCluster); + make_vector(exactIndX, sourcePointsInCluster); + make_vector(exactIndY, sourcePointsInCluster); + make_vector(exactIndZ, sourcePointsInCluster); + + double x0 = tree->x_min[idx]; + double x1 = tree->x_max[idx]; + double y0 = tree->y_min[idx]; + double y1 = tree->y_max[idx]; + double z0 = tree->z_min[idx]; + double z1 = tree->z_max[idx]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, clusterX, clusterY, clusterZ, clusterQ) \ + create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ + exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ + nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim]) + { +#endif + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < sourcePointsInCluster; j++) { + modifiedF[j] = qS[startingIndexInSourcesArray + j] * wS[startingIndexInSourcesArray + j]; + exactIndX[j] = -1; + exactIndY[j] = -1; + exactIndZ[j] = -1; + } + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + + // Compute modified f values +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < sourcePointsInCluster; i++) { // loop through the source points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double sx = xS[startingIndexInSourcesArray+i]; + double sy = yS[startingIndexInSourcesArray+i]; + double sz = zS[startingIndexInSourcesArray+i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) +#endif + for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree + + double cx = sx - nodeX[j]; + double cy = sy - nodeY[j]; + double cz = sz - nodeZ[j]; + + if (fabs(cx) < DBL_MIN) exactIndX[i] = j; + if (fabs(cy) < DBL_MIN) exactIndY[i] = j; + if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; + + // Increment the sums + double w = weights[j]; + sumX += w / cx; + sumY += w / cy; + sumZ += w / cz; + + } + + double denominator = 1.0; + if (exactIndX[i] == -1) denominator *= sumX; + if (exactIndY[i] == -1) denominator *= sumY; + if (exactIndZ[i] == -1) denominator *= sumZ; + + modifiedF[i] /= denominator; + } + + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpolationPointsPerCluster; j++) { + int k1 = j%(interpolationDegree+1); + int kk = (j-k1)/(interpolationDegree+1); + int k2 = kk%(interpolationDegree+1); + kk = kk - k2; + int k3 = kk / (interpolationDegree+1); + + double cz = nodeZ[k3]; + double w3 = weights[k3]; + + double cy = nodeY[k2]; + double w2 = weights[k2]; + + double cx = nodeX[k1]; + double w1 = weights[k1]; + + // Fill cluster X, Y, and Z arrays + clusterX[startingIndexInClustersArray + j] = cx; + clusterY[startingIndexInClustersArray + j] = cy; + clusterZ[startingIndexInClustersArray + j] = cz; + + // Increment cluster Q array + double temp = 0.0; +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temp) +#endif + for (int i = 0; i < sourcePointsInCluster; i++) { // loop over source points + double sx = xS[startingIndexInSourcesArray + i]; + double sy = yS[startingIndexInSourcesArray + i]; + double sz = zS[startingIndexInSourcesArray + i]; + + double numerator = 1.0; + + // If exactInd[i] == -1, then no issues. + // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. + if (exactIndX[i] == -1) { + numerator *= w1 / (sx - cx); + } else { + if (exactIndX[i] != k1) numerator *= 0; + } + + if (exactIndY[i] == -1) { + numerator *= w2 / (sy - cy); + } else { + if (exactIndY[i] != k2) numerator *= 0; + } + + if (exactIndZ[i] == -1) { + numerator *= w3 / (sz - cz); + } else { + if (exactIndZ[i] != k3) numerator *= 0; + } + + temp += numerator * modifiedF[i]; + + } + + clusterQ[startingIndexInClustersArray + j] += temp; + + } +#ifdef OPENACC_ENABLED + } //end acc kernels region +#endif + + free_vector(weights); + free_vector(dj); + free_vector(tt); + free_vector(nodeX); + free_vector(nodeY); + free_vector(nodeZ); + free_vector(modifiedF); + free_vector(exactIndX); + free_vector(exactIndY); + free_vector(exactIndZ); + + return; +} + + void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) diff --git a/src/tree/struct_tree_linked_list_node.h b/src/tree/struct_tree_linked_list_node.h index aeb979d4..e35ee924 100644 --- a/src/tree/struct_tree_linked_list_node.h +++ b/src/tree/struct_tree_linked_list_node.h @@ -16,6 +16,8 @@ struct TreeLinkedListNode struct TreeLinkedListNode *parent; int node_index; + + int level; }; #endif /* H_STRUCT_TREE_LINKED_LIST_NODE_H */ diff --git a/src/tree/tree.c b/src/tree/tree.c index ddc4cdda..97d1692f 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -23,6 +23,7 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, double xyzminmax[6]; int numnodes = 0; int numleaves = 0; + int max_depth = 1; int min_leaf_size = INT_MAX; int max_leaf_size = 0; @@ -34,9 +35,9 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, xyzminmax[4] = minval(sources->z, sources->num); xyzminmax[5] = maxval(sources->z, sources->num); - TreeLinkedList_Sources_Construct(&tree_linked_list, sources, 1, sources->num, + TreeLinkedList_Sources_Construct(&tree_linked_list, NULL, sources, 1, sources->num, run_params->max_per_source_leaf, xyzminmax, &numnodes, &numleaves, - &min_leaf_size, &max_leaf_size); + &min_leaf_size, &max_leaf_size, &max_depth, 0); TreeLinkedList_SetIndex(tree_linked_list, 0); @@ -48,6 +49,7 @@ void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, (*tree_addr)->min_leaf_size = min_leaf_size; (*tree_addr)->max_leaf_size = max_leaf_size; + (*tree_addr)->max_depth = max_depth; Tree_Set_Leaves_and_Levels(*tree_addr); diff --git a/src/tree/tree_linked_list.c b/src/tree/tree_linked_list.c index 718f1f8b..809215ef 100644 --- a/src/tree/tree_linked_list.c +++ b/src/tree/tree_linked_list.c @@ -138,9 +138,11 @@ void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Part -void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct Particles *sources, +void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, + struct Particles *sources, int ibeg, int iend, int maxparnode, double *xyzmm, - int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size) + int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size, + int *max_depth, int current_level) { int ind[8][2]; double xyzmms[6][8]; @@ -166,8 +168,15 @@ void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct Part (*p) = malloc(sizeof(struct TreeLinkedListNode)); (*numnodes)++; + (*p)->parent = parent; (*p)->numpar = iend - ibeg + 1; + if (current_level + 1 > *max_depth){ + printf("[TreeLinkedList_Sources_Construct] Increasing max depth to %i\n",current_level + 1); + *max_depth = current_level + 1; + } + (*p)->level = current_level; + (*p)->x_min = minval(sources->x + ibeg - 1, (*p)->numpar); (*p)->x_max = maxval(sources->x + ibeg - 1, (*p)->numpar); (*p)->y_min = minval(sources->y + ibeg - 1, (*p)->numpar); @@ -240,10 +249,11 @@ void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct Part struct TreeLinkedListNode **paddress = &((*p)->child[idx]); - TreeLinkedList_Sources_Construct(paddress, + TreeLinkedList_Sources_Construct(paddress, *p, sources, ind[i][0], ind[i][1], maxparnode, lxyzmm, numnodes, numleaves, - min_leaf_size, max_leaf_size); + min_leaf_size, max_leaf_size, + max_depth, current_level+1); } } diff --git a/src/tree/tree_linked_list.h b/src/tree/tree_linked_list.h index 8301be53..512e49f8 100644 --- a/src/tree/tree_linked_list.h +++ b/src/tree/tree_linked_list.h @@ -9,9 +9,11 @@ void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Part int ibeg, int iend, int maxparnode, double *xyzmm, int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size); -void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct Particles *sources, - int ibeg, int iend, int maxparnode, double *xyzmm, int *numnodes, int *numleaves, - int *min_leaf_size, int *max_leaf_size); +void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, + struct Particles *sources, + int ibeg, int iend, int maxparnode, double *xyzmm, + int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size, + int *max_depth, int current_level); int TreeLinkedList_SetIndex(struct TreeLinkedListNode *p, int index); From d56f824ba1394a71f9bb407031ac556d3452ce58 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 22 Jul 2020 13:49:27 -0400 Subject: [PATCH 49/95] O(N) upward pass working on CPUs for Lagrage. --- src/clusters/clusters.c | 94 ++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 3a30da8c..5196fbfc 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -21,8 +21,7 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpola double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); -static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, +static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, @@ -112,8 +111,34 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa #endif if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { - for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_child_to_parent(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); + + // anterpolate from particles to leaf cluster interpolation points + printf("Computing modified charges for the %i leaves\n",tree->leaves_list_num); + for (int i = 0; i < tree->leaves_list_num; ++i) { + int leaf_index = tree->leaves_list[i]; + pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); + } + + // interpolate up clusters, level by level + for (int level = tree->max_depth-2; level >= 0; --level) { + printf("Computing modified charges for level %i which contains %i clusters\n",level,tree->levels_list_num[level]); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { + + int parent_index = tree->levels_list[level][cluster_index]; + + for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ + + int child_index = tree->children[8*parent_index + child_counter]; + + pc_comp_ms_modifiedF_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC); + + } + } + } + + + + // pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { @@ -301,16 +326,15 @@ void Clusters_Free_Win(struct Clusters **clusters_addr) /***** LOCAL FUNCTIONS **************/ /************************************/ -void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, +void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { int interpDegreeLim = interpolationDegree + 1; int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; - int sourcePointsInCluster = tree->iend[idx] - tree->ibeg[idx] + 1; - int startingIndexInClustersArray = idx * interpolationPointsPerCluster; - int startingIndexInSourcesArray = tree->ibeg[idx]-1; + + int child_startingIndexInClustersArray = child_index * interpolationPointsPerCluster; + int parent_startingIndexInClustersArray = parent_index * interpolationPointsPerCluster; double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF; int *exactIndX, *exactIndY, *exactIndZ; @@ -321,21 +345,21 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int make_vector(nodeX, interpDegreeLim); make_vector(nodeY, interpDegreeLim); make_vector(nodeZ, interpDegreeLim); - make_vector(modifiedF, sourcePointsInCluster); - make_vector(exactIndX, sourcePointsInCluster); - make_vector(exactIndY, sourcePointsInCluster); - make_vector(exactIndZ, sourcePointsInCluster); - - double x0 = tree->x_min[idx]; - double x1 = tree->x_max[idx]; - double y0 = tree->y_min[idx]; - double y1 = tree->y_max[idx]; - double z0 = tree->z_min[idx]; - double z1 = tree->z_max[idx]; + make_vector(modifiedF, interpolationPointsPerCluster); + make_vector(exactIndX, interpolationPointsPerCluster); + make_vector(exactIndY, interpolationPointsPerCluster); + make_vector(exactIndZ, interpolationPointsPerCluster); + + double x0 = tree->x_min[parent_index]; + double x1 = tree->x_max[parent_index]; + double y0 = tree->y_min[parent_index]; + double y1 = tree->y_max[parent_index]; + double z0 = tree->z_min[parent_index]; + double z1 = tree->z_max[parent_index]; #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, clusterX, clusterY, clusterZ, clusterQ) \ + #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ @@ -347,8 +371,8 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int j = 0; j < sourcePointsInCluster; j++) { - modifiedF[j] = qS[startingIndexInSourcesArray + j] * wS[startingIndexInSourcesArray + j]; + for (int j = 0; j < interpolationPointsPerCluster; j++) { + modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j];// * wS[child_startingIndexInClustersArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -386,15 +410,15 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int #ifdef OPENACC_ENABLED #pragma acc loop independent #endif - for (int i = 0; i < sourcePointsInCluster; i++) { // loop through the source points + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop through the source points double sumX = 0.0; double sumY = 0.0; double sumZ = 0.0; - double sx = xS[startingIndexInSourcesArray+i]; - double sy = yS[startingIndexInSourcesArray+i]; - double sz = zS[startingIndexInSourcesArray+i]; + double sx = clusterX[child_startingIndexInClustersArray+i]; + double sy = clusterY[child_startingIndexInClustersArray+i]; + double sz = clusterZ[child_startingIndexInClustersArray+i]; #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) @@ -446,19 +470,19 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int double w1 = weights[k1]; // Fill cluster X, Y, and Z arrays - clusterX[startingIndexInClustersArray + j] = cx; - clusterY[startingIndexInClustersArray + j] = cy; - clusterZ[startingIndexInClustersArray + j] = cz; + clusterX[parent_startingIndexInClustersArray + j] = cx; + clusterY[parent_startingIndexInClustersArray + j] = cy; + clusterZ[parent_startingIndexInClustersArray + j] = cz; // Increment cluster Q array double temp = 0.0; #ifdef OPENACC_ENABLED #pragma acc loop independent reduction(+:temp) #endif - for (int i = 0; i < sourcePointsInCluster; i++) { // loop over source points - double sx = xS[startingIndexInSourcesArray + i]; - double sy = yS[startingIndexInSourcesArray + i]; - double sz = zS[startingIndexInSourcesArray + i]; + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points + double sx = clusterX[child_startingIndexInClustersArray + i]; + double sy = clusterY[child_startingIndexInClustersArray + i]; + double sz = clusterZ[child_startingIndexInClustersArray + i]; double numerator = 1.0; @@ -486,7 +510,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int idx, int } - clusterQ[startingIndexInClustersArray + j] += temp; + clusterQ[parent_startingIndexInClustersArray + j] += temp; } #ifdef OPENACC_ENABLED From d8632d95ed5e459f592edabe1b18c20c0de1b569 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 22 Jul 2020 14:24:20 -0400 Subject: [PATCH 50/95] fixing file names and openacc create clause --- src/clusters/clusters.c | 4 ++-- .../{regularized-coulomb_PP.c => regularized-coulomb_pp.c} | 0 .../{regularized-coulomb_PP.h => regularized-coulomb_pp.h} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/kernels/regularized-coulomb/{regularized-coulomb_PP.c => regularized-coulomb_pp.c} (100%) rename src/kernels/regularized-coulomb/{regularized-coulomb_PP.h => regularized-coulomb_pp.h} (100%) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 5196fbfc..d821ac33 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -360,8 +360,8 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind #ifdef OPENACC_ENABLED int streamID = rand() % 4; #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ - create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ - exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ + create(modifiedF[0:interpolationPointsPerCluster], exactIndX[0:interpolationPointsPerCluster], \ + exactIndY[0:interpolationPointsPerCluster], exactIndZ[0:interpolationPointsPerCluster], \ nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ dj[0:interpDegreeLim], tt[0:interpDegreeLim]) diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_PP.c b/src/kernels/regularized-coulomb/regularized-coulomb_pp.c similarity index 100% rename from src/kernels/regularized-coulomb/regularized-coulomb_PP.c rename to src/kernels/regularized-coulomb/regularized-coulomb_pp.c diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_PP.h b/src/kernels/regularized-coulomb/regularized-coulomb_pp.h similarity index 100% rename from src/kernels/regularized-coulomb/regularized-coulomb_PP.h rename to src/kernels/regularized-coulomb/regularized-coulomb_pp.h From b87985b7b67ba6547f78ad014d3077594a7860f6 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 22 Jul 2020 14:35:42 -0400 Subject: [PATCH 51/95] fixing file names from _PP to _pp --- .../mpi_subtraction_tests_9992266.gl1005.out | 56 +++++++++++++++++++ ...-coulomb_PP.c => regularized-coulomb_pp.c} | 0 ...-coulomb_PP.h => regularized-coulomb_pp.h} | 0 3 files changed, 56 insertions(+) create mode 100644 examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out rename src/kernels/regularized-coulomb/{regularized-coulomb_PP.c => regularized-coulomb_pp.c} (100%) rename src/kernels/regularized-coulomb/{regularized-coulomb_PP.h => regularized-coulomb_pp.h} (100%) diff --git a/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out b/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out new file mode 100644 index 00000000..d53c6e9f --- /dev/null +++ b/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out @@ -0,0 +1,56 @@ +particle-cluster 0.8 7 -1.0 +[random cube example] Beginning random cube example with 1 ranks. +[random cube example] Zoltan load balancing has finished. +[random cube example] Running direct comparison... +[random cube example] Running treedriver... +[random cube example] +[random cube example] Treecode timing summary (all times in seconds)... +[random cube example] +[random cube example] Max Avg Max/Min +[random cube example] | Total time...................... 2.325e+00 s (100.00%) 2.325e+00 s (100.00%) 1.000 +[random cube example] | | +[random cube example] | |....Pre-process................ 2.645e-01 s ( 11.38%) 2.645e-01 s ( 11.38%) 1.000 +[random cube example] | |....Directdriver............... 4.650e-01 s ( 20.00%) 4.650e-01 s ( 20.00%) 1.000 +[random cube example] | |....Treedriver................. 1.594e+00 s ( 68.59%) 1.594e+00 s ( 68.59%) 1.000 +[random cube example] +[random cube example] +[random cube example] | Directdriver.................... 4.650e-01 s (100.00%) 4.650e-01 s (100.00%) 1.000 +[random cube example] | | +[random cube example] | |....Compute local.............. 3.574e-01 s ( 76.86%) 3.574e-01 s ( 76.86%) 1.000 +[random cube example] +[random cube example] +[random cube example] | Treedriver...................... 1.594e+00 s (100.00%) 1.594e+00 s (100.00%) 1.000 +[random cube example] | | +[random cube example] | |....Build local tree........... 7.443e-02 s ( 4.67%) 7.443e-02 s ( 4.67%) 1.000 +[random cube example] | |....Build local batches........ 7.230e-02 s ( 4.53%) 7.230e-02 s ( 4.53%) 1.000 +[random cube example] | |....Build local clusters....... 2.528e-01 s ( 15.85%) 2.528e-01 s ( 15.85%) 1.000 +[random cube example] | |....Build local lists.......... 1.451e-03 s ( 0.09%) 1.451e-03 s ( 0.09%) 1.000 +[random cube example] | |....Compute local.............. 1.086e+00 s ( 68.09%) 1.086e+00 s ( 68.09%) 1.000 +[random cube example] | |....Correct potential.......... 1.025e-01 s ( 6.43%) 1.025e-01 s ( 6.43%) 1.000 +[random cube example] | |....Cleanup.................... 6.916e-05 s ( 0.00%) 6.916e-05 s ( 0.00%) 1.000 +[random cube example] +[random cube example] Tree potential energy: -133380260.858546 +[random cube example] +[random cube example] Relative inf norm error in potential: 6.939956e-06 +[random cube example] Relative 2 norm error in potential: 2.781444e-06 +[random cube example] +[BaryTree] +[BaryTree] RunParams struct has been set to the following: +[BaryTree] +[BaryTree] kernel = 1 +[BaryTree] num_kernel_params = 1 +[BaryTree] kernel_params = 1.000000, +[BaryTree] approximation = 1 +[BaryTree] singularity = 1 +[BaryTree] compute_type = 1 +[BaryTree] theta = 0.800000 +[BaryTree] size_check_factor = 1.000000 +[BaryTree] interp_order = 7 +[BaryTree] interp_pts_per_cluster = 512 +[BaryTree] interp_weights_per_cluster = 512 +[BaryTree] interp_charges_per_cluster = 512 +[BaryTree] max_per_source_leaf = 3000 +[BaryTree] max_per_target_leaf = 3000 +[BaryTree] verbosity = 0 +[BaryTree] +/home/njvaughn/BaryTree/examples diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_PP.c b/src/kernels/regularized-coulomb/regularized-coulomb_pp.c similarity index 100% rename from src/kernels/regularized-coulomb/regularized-coulomb_PP.c rename to src/kernels/regularized-coulomb/regularized-coulomb_pp.c diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_PP.h b/src/kernels/regularized-coulomb/regularized-coulomb_pp.h similarity index 100% rename from src/kernels/regularized-coulomb/regularized-coulomb_PP.h rename to src/kernels/regularized-coulomb/regularized-coulomb_pp.h From da81ea68b7965039e375884a47bd94e086367dd4 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 22 Jul 2020 17:16:46 -0400 Subject: [PATCH 52/95] first pass at singularity subtraction --- src/clusters/clusters.c | 248 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 5 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index d821ac33..0fd5f65b 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -24,6 +24,9 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpola static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); +static void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); + static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); @@ -113,15 +116,13 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { // anterpolate from particles to leaf cluster interpolation points - printf("Computing modified charges for the %i leaves\n",tree->leaves_list_num); for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } // interpolate up clusters, level by level - for (int level = tree->max_depth-2; level >= 0; --level) { - printf("Computing modified charges for level %i which contains %i clusters\n",level,tree->levels_list_num[level]); + for (int level = tree->max_depth-1; level >= 0; --level) { for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; @@ -142,8 +143,31 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { - for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); +// for (int i = 0; i < tree_numnodes; i++) +// pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + + // anterpolate from particles to leaf cluster interpolation points + for (int i = 0; i < tree->leaves_list_num; ++i) { + int leaf_index = tree->leaves_list[i]; + pc_comp_ms_modifiedF_SS(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + } + + // interpolate up clusters, level by level + for (int level = tree->max_depth-1; level >= 0; --level) { + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { + + int parent_index = tree->levels_list[level][cluster_index]; + + for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ + + int child_index = tree->children[8*parent_index + child_counter]; + + pc_comp_ms_modifiedF_SS_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); + + } + } + } + } else if ((approximation == HERMITE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) @@ -532,6 +556,220 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind } +void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) +{ + + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; + + int child_startingIndexInClustersArray = child_index * interpolationPointsPerCluster; + int parent_startingIndexInClustersArray = parent_index * interpolationPointsPerCluster; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; + int *exactIndX, *exactIndY, *exactIndZ; + + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); + make_vector(modifiedF, interpolationPointsPerCluster); + make_vector(modifiedF2, interpolationPointsPerCluster); + make_vector(exactIndX, interpolationPointsPerCluster); + make_vector(exactIndY, interpolationPointsPerCluster); + make_vector(exactIndZ, interpolationPointsPerCluster); + + double x0 = tree->x_min[parent_index]; + double x1 = tree->x_max[parent_index]; + double y0 = tree->y_min[parent_index]; + double y1 = tree->y_max[parent_index]; + double z0 = tree->z_min[parent_index]; + double z1 = tree->z_max[parent_index]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ + create(modifiedF[0:interpolationPointsPerCluster], modifiedF2[0:interpolationPointsPerCluster], exactIndX[0:interpolationPointsPerCluster], \ + exactIndY[0:interpolationPointsPerCluster], exactIndZ[0:interpolationPointsPerCluster], \ + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ + nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim]) + { +#endif + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpolationPointsPerCluster; j++) { + modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; + modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; + exactIndX[j] = -1; + exactIndY[j] = -1; + exactIndZ[j] = -1; + } + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + + // Compute modified f values +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop through the source points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double sx = clusterX[child_startingIndexInClustersArray+i]; + double sy = clusterY[child_startingIndexInClustersArray+i]; + double sz = clusterZ[child_startingIndexInClustersArray+i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) +#endif + for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree + + double cx = sx - nodeX[j]; + double cy = sy - nodeY[j]; + double cz = sz - nodeZ[j]; + + if (fabs(cx) < DBL_MIN) exactIndX[i] = j; + if (fabs(cy) < DBL_MIN) exactIndY[i] = j; + if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; + + // Increment the sums + double w = weights[j]; + sumX += w / cx; + sumY += w / cy; + sumZ += w / cz; + + } + + double denominator = 1.0; + if (exactIndX[i] == -1) denominator *= sumX; + if (exactIndY[i] == -1) denominator *= sumY; + if (exactIndZ[i] == -1) denominator *= sumZ; + + modifiedF[i] /= denominator; + modifiedF2[i] /= denominator; + } + + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpolationPointsPerCluster; j++) { + int k1 = j%(interpolationDegree+1); + int kk = (j-k1)/(interpolationDegree+1); + int k2 = kk%(interpolationDegree+1); + kk = kk - k2; + int k3 = kk / (interpolationDegree+1); + + double cz = nodeZ[k3]; + double w3 = weights[k3]; + + double cy = nodeY[k2]; + double w2 = weights[k2]; + + double cx = nodeX[k1]; + double w1 = weights[k1]; + + // Fill cluster X, Y, and Z arrays + clusterX[parent_startingIndexInClustersArray + j] = cx; + clusterY[parent_startingIndexInClustersArray + j] = cy; + clusterZ[parent_startingIndexInClustersArray + j] = cz; + + // Increment cluster Q array + double temp = 0.0; + double temp2 = 0.0; +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temp) +#endif + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points + double sx = clusterX[child_startingIndexInClustersArray + i]; + double sy = clusterY[child_startingIndexInClustersArray + i]; + double sz = clusterZ[child_startingIndexInClustersArray + i]; + + double numerator = 1.0; + + // If exactInd[i] == -1, then no issues. + // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. + if (exactIndX[i] == -1) { + numerator *= w1 / (sx - cx); + } else { + if (exactIndX[i] != k1) numerator *= 0; + } + + if (exactIndY[i] == -1) { + numerator *= w2 / (sy - cy); + } else { + if (exactIndY[i] != k2) numerator *= 0; + } + + if (exactIndZ[i] == -1) { + numerator *= w3 / (sz - cz); + } else { + if (exactIndZ[i] != k3) numerator *= 0; + } + + temp += numerator * modifiedF[i]; + temp2 += numerator * modifiedF2[i]; + + } + + clusterQ[parent_startingIndexInClustersArray + j] += temp; + clusterW[parent_startingIndexInClustersArray + j] += temp2; + + } +#ifdef OPENACC_ENABLED + } //end acc kernels region +#endif + + free_vector(weights); + free_vector(dj); + free_vector(tt); + free_vector(nodeX); + free_vector(nodeY); + free_vector(nodeZ); + free_vector(modifiedF); + free_vector(modifiedF2); + free_vector(exactIndX); + free_vector(exactIndY); + free_vector(exactIndZ); + + return; +} + + + void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) From 2a8dfea78677068b56db0890fb6877a582e8e08f Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 23 Jul 2020 08:22:11 -0400 Subject: [PATCH 53/95] SS upward pass: split Q and W, still buggy though. --- src/clusters/clusters.c | 496 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 478 insertions(+), 18 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 0fd5f65b..3ee5c803 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -22,12 +22,19 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpola double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); + +static void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); +static void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); + +static void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolationDegree, + double *xS, double *yS, double *zS, double *qS, double *wS, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, +static void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); @@ -147,13 +154,39 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); // anterpolate from particles to leaf cluster interpolation points + printf("anterpolating Q to leaves.\n"); + for (int i = 0; i < tree->leaves_list_num; ++i) { + int leaf_index = tree->leaves_list[i]; + pc_comp_ms_modifiedF_SS_Q(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + } + + // interpolate up clusters, level by level + for (int level = tree->max_depth-1; level >= 0; --level) { + printf("anterpolating Q at level %i.\n", level); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { + + int parent_index = tree->levels_list[level][cluster_index]; + + for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ + + int child_index = tree->children[8*parent_index + child_counter]; + + pc_comp_ms_modifiedF_SS_child_to_parent_Q(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); + + } + } + } + + // anterpolate from particles to leaf cluster interpolation points + printf("anterpolating W to leaves.\n"); for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF_SS(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF_SS_W(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); } // interpolate up clusters, level by level for (int level = tree->max_depth-1; level >= 0; --level) { + printf("anterpolating W at level %i.\n", level); for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; @@ -162,7 +195,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int child_index = tree->children[8*parent_index + child_counter]; - pc_comp_ms_modifiedF_SS_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF_SS_child_to_parent_W(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); } } @@ -556,7 +589,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind } -void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, +void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { @@ -576,7 +609,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ make_vector(nodeY, interpDegreeLim); make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, interpolationPointsPerCluster); - make_vector(modifiedF2, interpolationPointsPerCluster); +// make_vector(modifiedF2, interpolationPointsPerCluster); make_vector(exactIndX, interpolationPointsPerCluster); make_vector(exactIndY, interpolationPointsPerCluster); make_vector(exactIndZ, interpolationPointsPerCluster); @@ -604,7 +637,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; - modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; +// modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -679,6 +712,219 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ if (exactIndZ[i] == -1) denominator *= sumZ; modifiedF[i] /= denominator; +// modifiedF2[i] /= denominator; + } + + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpolationPointsPerCluster; j++) { + int k1 = j%(interpolationDegree+1); + int kk = (j-k1)/(interpolationDegree+1); + int k2 = kk%(interpolationDegree+1); + kk = kk - k2; + int k3 = kk / (interpolationDegree+1); + + double cz = nodeZ[k3]; + double w3 = weights[k3]; + + double cy = nodeY[k2]; + double w2 = weights[k2]; + + double cx = nodeX[k1]; + double w1 = weights[k1]; + + // Fill cluster X, Y, and Z arrays + clusterX[parent_startingIndexInClustersArray + j] = cx; + clusterY[parent_startingIndexInClustersArray + j] = cy; + clusterZ[parent_startingIndexInClustersArray + j] = cz; + + // Increment cluster Q array + double temp = 0.0; +// double temp2 = 0.0; +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temp) +#endif + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points + double sx = clusterX[child_startingIndexInClustersArray + i]; + double sy = clusterY[child_startingIndexInClustersArray + i]; + double sz = clusterZ[child_startingIndexInClustersArray + i]; + + double numerator = 1.0; + + // If exactInd[i] == -1, then no issues. + // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. + if (exactIndX[i] == -1) { + numerator *= w1 / (sx - cx); + } else { + if (exactIndX[i] != k1) numerator *= 0; + } + + if (exactIndY[i] == -1) { + numerator *= w2 / (sy - cy); + } else { + if (exactIndY[i] != k2) numerator *= 0; + } + + if (exactIndZ[i] == -1) { + numerator *= w3 / (sz - cz); + } else { + if (exactIndZ[i] != k3) numerator *= 0; + } + + temp += numerator * modifiedF[i]; +// temp2 += numerator * modifiedF2[i]; + + } + + clusterQ[parent_startingIndexInClustersArray + j] += temp; +// clusterW[parent_startingIndexInClustersArray + j] += temp2; + + } +#ifdef OPENACC_ENABLED + } //end acc kernels region +#endif + + free_vector(weights); + free_vector(dj); + free_vector(tt); + free_vector(nodeX); + free_vector(nodeY); + free_vector(nodeZ); + free_vector(modifiedF); +// free_vector(modifiedF2); + free_vector(exactIndX); + free_vector(exactIndY); + free_vector(exactIndZ); + + return; +} + + +void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) +{ + + int interpDegreeLim = interpolationDegree + 1; + int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; + + int child_startingIndexInClustersArray = child_index * interpolationPointsPerCluster; + int parent_startingIndexInClustersArray = parent_index * interpolationPointsPerCluster; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; + int *exactIndX, *exactIndY, *exactIndZ; + + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); +// make_vector(modifiedF, interpolationPointsPerCluster); + make_vector(modifiedF2, interpolationPointsPerCluster); + make_vector(exactIndX, interpolationPointsPerCluster); + make_vector(exactIndY, interpolationPointsPerCluster); + make_vector(exactIndZ, interpolationPointsPerCluster); + + double x0 = tree->x_min[parent_index]; + double x1 = tree->x_max[parent_index]; + double y0 = tree->y_min[parent_index]; + double y1 = tree->y_max[parent_index]; + double z0 = tree->z_min[parent_index]; + double z1 = tree->z_max[parent_index]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ + create(modifiedF[0:interpolationPointsPerCluster], modifiedF2[0:interpolationPointsPerCluster], exactIndX[0:interpolationPointsPerCluster], \ + exactIndY[0:interpolationPointsPerCluster], exactIndZ[0:interpolationPointsPerCluster], \ + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ + nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ + dj[0:interpDegreeLim], tt[0:interpDegreeLim]) + { +#endif + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpolationPointsPerCluster; j++) { +// modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; + modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; + exactIndX[j] = -1; + exactIndY[j] = -1; + exactIndZ[j] = -1; + } + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + + // Compute modified f values +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop through the source points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double sx = clusterX[child_startingIndexInClustersArray+i]; + double sy = clusterY[child_startingIndexInClustersArray+i]; + double sz = clusterZ[child_startingIndexInClustersArray+i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) +#endif + for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree + + double cx = sx - nodeX[j]; + double cy = sy - nodeY[j]; + double cz = sz - nodeZ[j]; + + if (fabs(cx) < DBL_MIN) exactIndX[i] = j; + if (fabs(cy) < DBL_MIN) exactIndY[i] = j; + if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; + + // Increment the sums + double w = weights[j]; + sumX += w / cx; + sumY += w / cy; + sumZ += w / cz; + + } + + double denominator = 1.0; + if (exactIndX[i] == -1) denominator *= sumX; + if (exactIndY[i] == -1) denominator *= sumY; + if (exactIndZ[i] == -1) denominator *= sumZ; + +// modifiedF[i] /= denominator; modifiedF2[i] /= denominator; } @@ -740,12 +986,12 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ if (exactIndZ[i] != k3) numerator *= 0; } - temp += numerator * modifiedF[i]; +// temp += numerator * modifiedF[i]; temp2 += numerator * modifiedF2[i]; } - clusterQ[parent_startingIndexInClustersArray + j] += temp; +// clusterQ[parent_startingIndexInClustersArray + j] += temp; clusterW[parent_startingIndexInClustersArray + j] += temp2; } @@ -759,7 +1005,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ free_vector(nodeX); free_vector(nodeY); free_vector(nodeZ); - free_vector(modifiedF); +// free_vector(modifiedF); free_vector(modifiedF2); free_vector(exactIndX); free_vector(exactIndY); @@ -978,7 +1224,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg -void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, +void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { @@ -998,7 +1244,7 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation make_vector(nodeY, interpDegreeLim); make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, pointsInNode); - make_vector(modifiedF2, pointsInNode); +// make_vector(modifiedF2, pointsInNode); make_vector(exactIndX, pointsInNode); make_vector(exactIndY, pointsInNode); make_vector(exactIndZ, pointsInNode); @@ -1026,7 +1272,7 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation #endif for (int j = 0; j < pointsInNode; j++) { modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; - modifiedF2[j] = wS[startingIndexInSources + j]; +// modifiedF2[j] = wS[startingIndexInSources + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -1101,7 +1347,7 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation if (exactIndZ[i] == -1) denominator *= sumZ; modifiedF[i] /= denominator; - modifiedF2[i] /= denominator; +// modifiedF2[i] /= denominator; } @@ -1165,11 +1411,11 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation } temp += numerator * modifiedF[i]; - temp2 += numerator * modifiedF2[i]; +// temp2 += numerator * modifiedF2[i]; } clusterQ[startingIndexInClusters + j] += temp; - clusterW[startingIndexInClusters + j] += temp2; +// clusterW[startingIndexInClusters + j] += temp2; } #ifdef OPENACC_ENABLED @@ -1183,6 +1429,220 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation free_vector(nodeY); free_vector(nodeZ); free_vector(modifiedF); +// free_vector(modifiedF2); + free_vector(exactIndX); + free_vector(exactIndY); + free_vector(exactIndZ); + + return; +} + + +void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolationDegree, + double *xS, double *yS, double *zS, double *qS, double *wS, + double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) +{ + int interpDegreeLim = interpolationDegree + 1; + int pointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; + int pointsInNode = tree->iend[idx] - tree->ibeg[idx] + 1; + int startingIndexInClusters = idx * pointsPerCluster; + int startingIndexInSources = tree->ibeg[idx]-1; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; + int *exactIndX, *exactIndY, *exactIndZ; + + make_vector(weights, interpDegreeLim); + make_vector(dj, interpDegreeLim); + make_vector(tt, interpDegreeLim); + make_vector(nodeX, interpDegreeLim); + make_vector(nodeY, interpDegreeLim); + make_vector(nodeZ, interpDegreeLim); +// make_vector(modifiedF, pointsInNode); + make_vector(modifiedF2, pointsInNode); + make_vector(exactIndX, pointsInNode); + make_vector(exactIndY, pointsInNode); + make_vector(exactIndZ, pointsInNode); + + double x0 = tree->x_min[idx]; // 1e-15 fails for large meshes, mysteriously. + double x1 = tree->x_max[idx]; + double y0 = tree->y_min[idx]; + double y1 = tree->y_max[idx]; + double z0 = tree->z_min[idx]; + double z1 = tree->z_max[idx]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 3; + #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, \ + clusterX, clusterY, clusterZ, clusterQ, clusterW) \ + create(modifiedF[0:pointsInNode], modifiedF2[0:pointsInNode], exactIndX[0:pointsInNode], \ + exactIndY[0:pointsInNode], exactIndZ[0:pointsInNode], \ + nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], nodeZ[0:interpDegreeLim], \ + weights[0:interpDegreeLim], dj[0:interpDegreeLim], tt[0:interpDegreeLim]) + { +#endif + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < pointsInNode; j++) { +// modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; + modifiedF2[j] = wS[startingIndexInSources + j]; + exactIndX[j] = -1; + exactIndY[j] = -1; + exactIndZ[j] = -1; + } + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interpDegreeLim; i++) { + tt[i] = cos(i * M_PI / interpolationDegree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interpolationDegree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interpDegreeLim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + + // Compute modified f values +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < pointsInNode; i++) { // loop through the source points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double sx = xS[startingIndexInSources+i]; + double sy = yS[startingIndexInSources+i]; + double sz = zS[startingIndexInSources+i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) +#endif + for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree + + double cx = sx - nodeX[j]; + double cy = sy - nodeY[j]; + double cz = sz - nodeZ[j]; + + if (fabs(cx) < DBL_MIN) exactIndX[i] = j; + if (fabs(cy) < DBL_MIN) exactIndY[i] = j; + if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; + + // Increment the sums + double w = weights[j]; + sumX += w / (cx); + sumY += w / (cy); + sumZ += w / (cz); + + } + + double denominator = 1.0; + if (exactIndX[i] == -1) denominator *= sumX; + if (exactIndY[i] == -1) denominator *= sumY; + if (exactIndZ[i] == -1) denominator *= sumZ; + +// modifiedF[i] /= denominator; + modifiedF2[i] /= denominator; + + } + + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < pointsPerCluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point + // compute k1, k2, k3 from j + int k1 = j % interpDegreeLim; + int kk = (j-k1) / interpDegreeLim; + int k2 = kk % interpDegreeLim; + kk = kk - k2; + int k3 = kk / interpDegreeLim; + + double cz = nodeZ[k3]; + double w3 = weights[k3]; + + double cy = nodeY[k2]; + double w2 = weights[k2]; + + double cx = nodeX[k1]; + double w1 = weights[k1]; + + // Fill cluster X, Y, and Z arrays + clusterX[startingIndexInClusters + j] = cx; + clusterY[startingIndexInClusters + j] = cy; + clusterZ[startingIndexInClusters + j] = cz; + + // Increment cluster Q array + double temp = 0.0, temp2 = 0.0; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temp) reduction(+:temp2) +#endif + for (int i = 0; i < pointsInNode; i++) { // loop over source points + double sx = xS[startingIndexInSources + i]; + double sy = yS[startingIndexInSources + i]; + double sz = zS[startingIndexInSources + i]; + + double numerator = 1.0; + + // If exactInd[i] == -1, then no issues. + // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. + if (exactIndX[i] == -1) { + numerator *= w1 / (sx - cx); + } else { + if (exactIndX[i] != k1) numerator *= 0; + } + + if (exactIndY[i] == -1) { + numerator *= w2 / (sy - cy); + } else { + if (exactIndY[i] != k2) numerator *= 0; + } + + if (exactIndZ[i] == -1) { + numerator *= w3 / (sz - cz); + } else { + if (exactIndZ[i] != k3) numerator *= 0; + } + +// temp += numerator * modifiedF[i]; + temp2 += numerator * modifiedF2[i]; + } + +// clusterQ[startingIndexInClusters + j] += temp; + clusterW[startingIndexInClusters + j] += temp2; + } + +#ifdef OPENACC_ENABLED + } // end acc kernels region +#endif + + free_vector(weights); + free_vector(dj); + free_vector(tt); + free_vector(nodeX); + free_vector(nodeY); + free_vector(nodeZ); +// free_vector(modifiedF); free_vector(modifiedF2); free_vector(exactIndX); free_vector(exactIndY); From 286a6b59e5daeb94e22536f2995515fc6063371c Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 23 Jul 2020 08:56:28 -0400 Subject: [PATCH 54/95] target tree updated to track max depth and parent nodes. --- src/drivers/treedriver.c | 2 ++ src/tree/tree.c | 8 ++++++-- src/tree/tree_linked_list.c | 20 ++++++++++++++++---- src/tree/tree_linked_list.h | 5 +++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 93ad801c..35d494cc 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -480,10 +480,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Sources_Construct(&source_tree, sources, run_params); + printf("Tree_Sources_Construct complete.\n"); STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Tree_Targets_Construct(&target_tree, targets, run_params); + printf("Tree_Targets_Construct complete.\n"); STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); diff --git a/src/tree/tree.c b/src/tree/tree.c index 97d1692f..69ffa128 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -67,6 +67,7 @@ void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, double xyzminmax[6]; int numnodes = 0; int numleaves = 0; + int max_depth = 1; int min_leaf_size = INT_MAX; int max_leaf_size = 0; @@ -78,12 +79,15 @@ void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, xyzminmax[4] = minval(targets->z, targets->num); xyzminmax[5] = maxval(targets->z, targets->num); - TreeLinkedList_Targets_Construct(&tree_linked_list, targets, 1, targets->num, + TreeLinkedList_Targets_Construct(&tree_linked_list, NULL, targets, 1, targets->num, run_params->max_per_target_leaf, xyzminmax, &numnodes, &numleaves, - &min_leaf_size, &max_leaf_size); + &min_leaf_size, &max_leaf_size, &max_depth, 0); + printf("TreeLinkedList_Targets_Construct complete.\n"); + TreeLinkedList_SetIndex(tree_linked_list, 0); + Tree_Alloc(tree_addr, numnodes); Tree_Fill(*tree_addr, tree_linked_list); (*tree_addr)->numleaves = numleaves; diff --git a/src/tree/tree_linked_list.c b/src/tree/tree_linked_list.c index 809215ef..b83e0f3f 100644 --- a/src/tree/tree_linked_list.c +++ b/src/tree/tree_linked_list.c @@ -13,9 +13,11 @@ static void remove_node(struct TreeLinkedListNode *p); -void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Particles *targets, +void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, + struct Particles *targets, int ibeg, int iend, int maxparnode, double *xyzmm, - int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size) + int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size, + int *max_depth, int current_level) { int ind[8][2]; double xyzmms[6][8]; @@ -40,8 +42,15 @@ void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Part (*p) = malloc(sizeof(struct TreeLinkedListNode)); (*numnodes)++; + (*p)->parent = parent; (*p)->numpar = iend - ibeg + 1; + if (current_level + 1 > *max_depth){ + printf("[TreeLinkedList_Targets_Construct] Increasing max depth to %i\n",current_level + 1); + *max_depth = current_level + 1; + } + (*p)->level = current_level; + (*p)->x_min = minval(targets->x + ibeg - 1, (*p)->numpar); (*p)->x_max = maxval(targets->x + ibeg - 1, (*p)->numpar); (*p)->y_min = minval(targets->y + ibeg - 1, (*p)->numpar); @@ -115,10 +124,13 @@ void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Part struct TreeLinkedListNode **paddress = &((*p)->child[idx]); - TreeLinkedList_Targets_Construct(paddress, + TreeLinkedList_Targets_Construct(paddress, *p, targets, ind[i][0], ind[i][1], maxparnode, lxyzmm, numnodes, numleaves, - min_leaf_size, max_leaf_size); + min_leaf_size, max_leaf_size, + max_depth, current_level+1); + + } } diff --git a/src/tree/tree_linked_list.h b/src/tree/tree_linked_list.h index 512e49f8..088793fd 100644 --- a/src/tree/tree_linked_list.h +++ b/src/tree/tree_linked_list.h @@ -5,9 +5,10 @@ #include "struct_tree_linked_list_node.h" -void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Particles *targets, +void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, + struct Particles *targets, int ibeg, int iend, int maxparnode, double *xyzmm, int *numnodes, int *numleaves, - int *min_leaf_size, int *max_leaf_size); + int *min_leaf_size, int *max_leaf_size, int *max_depth, int current_level); void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, struct Particles *sources, From 3baaedda401572fe31f3338950f1704b217ad738 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Fri, 24 Jul 2020 09:24:49 -0400 Subject: [PATCH 55/95] O(N) downpass working for CPUs. Single routine, not split. --- src/clusters/clusters.c | 2 +- .../interaction_compute_downpass.c | 220 +++++++++++++++++- src/tree/tree.c | 3 + 3 files changed, 220 insertions(+), 5 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 3ee5c803..0cbc27a2 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -128,7 +128,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); } - // interpolate up clusters, level by level + // anterpolate up clusters, level by level for (int level = tree->max_depth-1; level >= 0; --level) { for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 6c6be795..12181492 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -16,6 +16,9 @@ static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int inter double *xT, double *yT, double *zT, double *qT, double *clusterQ); +static void cp_comp_pot_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q); + static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_degree, double *xT, double *yT, double *zT, double *qT, double *clusterQ, double *clusterW); @@ -40,10 +43,14 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, double *target_z = targets->z; double *target_q = targets->q; + double *cluster_x = clusters->x; + double *cluster_y = clusters->y; + double *cluster_z = clusters->z; + double *cluster_q = clusters->q; + double *cluster_w = clusters->w; + int total_num_interp_charges = clusters->num_charges; int total_num_interp_weights = clusters->num_weights; - double *cluster_q = clusters->q; - double *cluster_w = clusters->w; int tree_numnodes = tree->numnodes; int interp_degree = run_params->interp_degree; @@ -58,9 +65,39 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, #endif if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { - for (int i = 0; i < tree_numnodes; i++) - cp_comp_pot(tree, i, potential, interp_degree, +// for (int i = 0; i < tree_numnodes; i++) +// cp_comp_pot(tree, i, potential, interp_degree, +// target_x, target_y, target_z, target_q, cluster_q); + + + // interpolate up clusters, level by level + for (int level = 0; level < tree->max_depth; ++level) { + printf("Interpolating for level %i\n", level); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { + + int parent_index = tree->levels_list[level][cluster_index]; + + for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ + + int child_index = tree->children[8*parent_index + child_counter]; + +// cp_comp_pot(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC); + + cp_comp_pot_parent_to_child(tree, parent_index, child_index, interp_degree, + cluster_x, cluster_y, cluster_z, cluster_q); + + } + } + } + + // interpolate from leaf cluster interpolation points to target particles + printf("Interpolating from leaf interpolation points to particles.\n"); + for (int i = 0; i < tree->leaves_list_num; ++i) { + int leaf_index = tree->leaves_list[i]; + cp_comp_pot(tree, leaf_index, potential, interp_degree, target_x, target_y, target_z, target_q, cluster_q); + } + } else if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SUBTRACTION)) { for (int i = 0; i < tree_numnodes; i++){ @@ -99,6 +136,181 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, /***** LOCAL FUNCTIONS **************/ /************************************/ +void cp_comp_pot_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q) +{ + int interp_degree_lim = interp_degree + 1; + int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; + +// int num_targets_in_cluster = tree->iend[parent_index] - tree->ibeg[parent_index] + 1; +// int target_start = tree->ibeg[idx] - 1; + + int parent_cluster_start = parent_index * interp_pts_per_cluster; + int child_cluster_start = child_index * interp_pts_per_cluster; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ; + + make_vector(weights, interp_degree_lim); + make_vector(dj, interp_degree_lim); + make_vector(tt, interp_degree_lim); + make_vector(nodeX, interp_degree_lim); + make_vector(nodeY, interp_degree_lim); + make_vector(nodeZ, interp_degree_lim); + + double x0 = tree->x_min[parent_index]; + double x1 = tree->x_max[parent_index]; + double y0 = tree->y_min[parent_index]; + double y1 = tree->y_max[parent_index]; + double z0 = tree->z_min[parent_index]; + double z1 = tree->z_max[parent_index]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(cluster_x, cluster_y, cluster_z, cluster_q) \ + create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ + weights[0:interp_degree_lim], dj[0:interp_degree_lim], tt[0:interp_degree_lim]) + { +#endif + + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interp_degree_lim; i++) { + tt[i] = cos(i * M_PI / interp_degree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_degree_lim; j++){ + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interp_degree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_degree_lim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interp_pts_per_cluster; i++) { // loop through the child cluster points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double tx = cluster_x[child_cluster_start+i]; + double ty = cluster_y[child_cluster_start+i]; + double tz = cluster_z[child_cluster_start+i]; + + int eix = -1; + int eiy = -1; + int eiz = -1; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) +#endif + for (int j = 0; j < interp_degree_lim; j++) { // loop through the degree + + double cx = tx - nodeX[j]; + double cy = ty - nodeY[j]; + double cz = tz - nodeZ[j]; + + if (fabs(cx)min_leaf_size = min_leaf_size; (*tree_addr)->max_leaf_size = max_leaf_size; + (*tree_addr)->max_depth = max_depth; + Tree_Set_Leaves_and_Levels(*tree_addr); + TreeLinkedList_Free(&tree_linked_list); return; From 4e8f020aec95e7383a46dd126ff28abf93435ab8 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Fri, 24 Jul 2020 13:21:05 -0400 Subject: [PATCH 56/95] Downpass working on GPU --- src/interaction_compute/interaction_compute_downpass.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 12181492..59ef90e1 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -49,6 +49,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, double *cluster_q = clusters->q; double *cluster_w = clusters->w; + int total_num_interp_pts = clusters->num; int total_num_interp_charges = clusters->num_charges; int total_num_interp_weights = clusters->num_weights; @@ -59,6 +60,9 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, #pragma acc data copyin(target_x[0:num_targets], target_y[0:num_targets], \ target_z[0:num_targets], target_q[0:num_targets], \ cluster_q[0:total_num_interp_charges], \ + cluster_x[0:total_num_interp_pts], \ + cluster_y[0:total_num_interp_pts], \ + cluster_z[0:total_num_interp_pts], \ cluster_w[0:total_num_interp_weights]) \ copy(potential[0:num_targets]) { From 55e82d920440c7441fe0e57398f9a8d4fe407284 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Fri, 24 Jul 2020 21:36:42 -0400 Subject: [PATCH 57/95] unstructured data regions --- src/clusters/clusters.c | 33 +++-------- src/drivers/treedriver.c | 55 ++++++++++++++++++- .../interaction_compute_cc.c | 3 +- .../interaction_compute_downpass.c | 22 ++++---- 4 files changed, 75 insertions(+), 38 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 0cbc27a2..85b36a42 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -111,13 +111,9 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED - #pragma acc data copyin(xS[0:totalNumberSourcePoints], yS[0:totalNumberSourcePoints], \ - zS[0:totalNumberSourcePoints], qS[0:totalNumberSourcePoints], \ - wS[0:totalNumberSourcePoints]) \ - copy(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges], \ - wC[0:totalNumberInterpolationWeights]) - { + #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ + zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) + #pragma acc enter data copyin(wC[0:totalNumberInterpolationWeights]) #endif if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { @@ -216,18 +212,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa exit(1); } - int numZeros=0; - for (int i=0; ix; double *yC = clusters->y; double *zC = clusters->z; + double *qC = clusters->q; + double *wC = clusters->w; #ifdef OPENACC_ENABLED - #pragma acc data copyout(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints]) - { + #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ + zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) + #pragma acc enter data copyin(wC[0:totalNumberInterpolationWeights]) #endif for (int i = 0; i < tree_numnodes; i++) { @@ -290,9 +276,8 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED #pragma acc wait - } // end ACC DATA REGION #endif -// } + return; } diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 35d494cc..79b8720b 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -480,12 +480,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Sources_Construct(&source_tree, sources, run_params); - printf("Tree_Sources_Construct complete.\n"); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ + sources->z[0:sources->num], sources->q[0:sources->num], \ + sources->w[0:sources->num]) +#endif STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Tree_Targets_Construct(&target_tree, targets, run_params); - printf("Tree_Targets_Construct complete.\n"); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ + targets->z[0:targets->num], targets->q[0:targets->num]) + #pragma acc enter data create(potential[0:targets->num]) +#endif STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); @@ -589,6 +597,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[5]); InteractionCompute_CC(potential, source_tree, target_tree, local_interaction_list, sources, targets, source_clusters, target_clusters, run_params); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, sources->w, \ + source_clusters->x, source_clusters->y, source_clusters->z, \ + source_clusters->q, source_clusters->w) +#endif InteractionLists_Free(&local_interaction_list); STOP_TIMER(&time_tree[5]); @@ -598,6 +611,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] = 0; time_tree[7] = 0; + +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ + let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ + let_sources->w[0:let_sources->num], \ + let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges], \ + let_clusters->w[0:let_clusters->num_weights]) + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif for (int proc_id = 1; proc_id < num_procs; ++proc_id) { @@ -649,6 +676,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time1); time_tree[7] += time1; } + +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc exit data delete(let_sources->x, let_sources->y, \ + let_sources->z, let_sources->q, \ + let_sources->w, \ + let_clusters->x, let_clusters->y, \ + let_clusters->z, let_clusters->q, \ + let_clusters->w) + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif //------------------------------- @@ -669,6 +710,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[9]); + #pragma acc exit data copyout(potential[0:targets->num]) InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); Particles_Targets_Reorder(targets, potential); Particles_Sources_Reorder(sources); @@ -682,6 +724,15 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); + +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(targets->x, targets->y, \ + targets->z, targets->q, \ + target_clusters->x, target_clusters->y, \ + target_clusters->z, target_clusters->q, \ + target_clusters->w) +#endif + Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&source_tree); diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 94f1e038..e6e3708e 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -83,6 +83,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T // Additionally, not setup for Hermite either at the moment. +/* #ifdef OPENACC_ENABLED #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ source_q[0:num_sources], source_w[0:num_sources], \ @@ -100,6 +101,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T target_cluster_w[0:num_target_cluster_charges], \ potential[0:num_targets]) #endif +*/ { for (int i = 0; i < target_tree_numnodes; i++) { @@ -121,7 +123,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T /* * ************ POTENTIAL FROM APPROX *********************/ /* * ********************************************************/ -// printf("cluster %i, CC = %i, CP = %i, PC = %i, PP = %i\n",i,num_approx_in_cluster,num_target_approx_in_cluster,num_source_approx_in_cluster,num_direct_in_cluster); for (int j = 0; j < num_approx_in_cluster; j++) { int source_node_index = approx_inter_list[i][j]; int source_cluster_start = interp_pts_per_cluster * source_tree_cluster_ind[source_node_index]; diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 59ef90e1..b825d7dd 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -56,17 +56,17 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, int tree_numnodes = tree->numnodes; int interp_degree = run_params->interp_degree; -#ifdef OPENACC_ENABLED - #pragma acc data copyin(target_x[0:num_targets], target_y[0:num_targets], \ - target_z[0:num_targets], target_q[0:num_targets], \ - cluster_q[0:total_num_interp_charges], \ - cluster_x[0:total_num_interp_pts], \ - cluster_y[0:total_num_interp_pts], \ - cluster_z[0:total_num_interp_pts], \ - cluster_w[0:total_num_interp_weights]) \ - copy(potential[0:num_targets]) +//#ifdef OPENACC_ENABLED +// #pragma acc data copyin(target_x[0:num_targets], target_y[0:num_targets], \ +// target_z[0:num_targets], target_q[0:num_targets], \ +// cluster_q[0:total_num_interp_charges], \ +// cluster_x[0:total_num_interp_pts], \ +// cluster_y[0:total_num_interp_pts], \ +// cluster_z[0:total_num_interp_pts], \ +// cluster_w[0:total_num_interp_weights]) \ +// copy(potential[0:num_targets]) +//#endif { -#endif if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { // for (int i = 0; i < tree_numnodes; i++) @@ -127,8 +127,8 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, #ifdef OPENACC_ENABLED #pragma acc wait - } // end ACC DATA REGION #endif + } // end ACC DATA REGION return; } From 69e0d48e4d9a52c84abc79d4d9a8a52f4d579873 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 25 Jul 2020 16:32:04 -0400 Subject: [PATCH 58/95] Only using w arrays for subtraction --- src/clusters/clusters.c | 173 +++++++++--------- src/drivers/treedriver.c | 31 +++- .../interaction_compute_cc.c | 54 ++---- .../interaction_compute_cp.c | 42 ++--- .../interaction_compute_direct.c | 21 ++- .../interaction_compute_downpass.c | 12 -- .../interaction_compute_pc.c | 106 ++++------- src/kernels/atan/atan_pp.c | 6 +- src/kernels/atan/atan_pp.h | 2 +- src/kernels/coulomb/coulomb_cp.c | 13 +- src/kernels/coulomb/coulomb_cp.h | 4 +- src/kernels/coulomb/coulomb_pc.c | 2 +- src/kernels/coulomb/coulomb_pp.c | 9 +- src/kernels/coulomb/coulomb_pp.h | 2 +- src/kernels/mq/mq_pp.c | 6 +- src/kernels/mq/mq_pp.h | 2 +- .../regularized-coulomb_cp.c | 13 +- .../regularized-coulomb_cp.h | 4 +- .../regularized-coulomb_pp.c | 9 +- .../regularized-coulomb_pp.h | 2 +- .../regularized-yukawa_cp.c | 4 +- .../regularized-yukawa_cp.h | 2 +- .../regularized-yukawa_pp.c | 7 +- .../regularized-yukawa_pp.h | 2 +- src/kernels/sin-over-r/sin-over-r_cp.c | 15 +- src/kernels/sin-over-r/sin-over-r_cp.h | 4 +- src/kernels/sin-over-r/sin-over-r_pp.c | 9 +- src/kernels/sin-over-r/sin-over-r_pp.h | 2 +- src/kernels/yukawa/yukawa_cp.c | 13 +- src/kernels/yukawa/yukawa_cp.h | 4 +- src/kernels/yukawa/yukawa_pp.c | 7 +- src/kernels/yukawa/yukawa_pp.h | 2 +- 32 files changed, 252 insertions(+), 332 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 85b36a42..d3918900 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -18,7 +18,7 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, + double *xS, double *yS, double *zS, double *qS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, @@ -78,19 +78,15 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa MPI_Alloc_mem(totalNumberInterpolationPoints*sizeof(double), MPI_INFO_NULL, &(clusters->y)); MPI_Alloc_mem(totalNumberInterpolationPoints*sizeof(double), MPI_INFO_NULL, &(clusters->z)); MPI_Alloc_mem(totalNumberInterpolationCharges*sizeof(double), MPI_INFO_NULL, &(clusters->q)); - MPI_Alloc_mem(totalNumberInterpolationWeights*sizeof(double), MPI_INFO_NULL, &(clusters->w)); for (int i = 0; i < totalNumberInterpolationPoints; i++) clusters->x[i] = 0.0; for (int i = 0; i < totalNumberInterpolationPoints; i++) clusters->y[i] = 0.0; for (int i = 0; i < totalNumberInterpolationPoints; i++) clusters->z[i] = 0.0; for (int i = 0; i < totalNumberInterpolationCharges; i++) clusters->q[i] = 0.0; - - if (singularity == SKIPPING) { - for (int i = 0; i < totalNumberInterpolationWeights; i++) clusters->w[i] = 1.0; - } else if (singularity == SUBTRACTION) { + + if (singularity == SUBTRACTION) { + MPI_Alloc_mem(totalNumberInterpolationWeights*sizeof(double), MPI_INFO_NULL, &(clusters->w)); for (int i = 0; i < totalNumberInterpolationWeights; i++) clusters->w[i] = 0.0; - } else { - exit(1); } clusters->num = totalNumberInterpolationPoints; @@ -113,7 +109,9 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) - #pragma acc enter data copyin(wC[0:totalNumberInterpolationWeights]) + if (singularity == SUBTRACTION) { + #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) + } #endif if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { @@ -121,7 +119,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // anterpolate from particles to leaf cluster interpolation points for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); + pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, xC, yC, zC, qC); } // anterpolate up clusters, level by level @@ -141,10 +139,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa } - - -// pc_comp_ms_modifiedF(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); - } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { // for (int i = 0; i < tree_numnodes; i++) // pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); @@ -242,16 +236,9 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa make_vector(clusters->y, totalNumberInterpolationPoints); make_vector(clusters->z, totalNumberInterpolationPoints); make_vector(clusters->q, totalNumberInterpolationCharges); - make_vector(clusters->w, totalNumberInterpolationWeights); - - if (singularity == SKIPPING) { - for (int i = 0; i < totalNumberInterpolationWeights; i++) clusters->w[i] = 1.0; - } else if (singularity == SUBTRACTION) { - for (int i = 0; i < totalNumberInterpolationWeights; i++) clusters->w[i] = 0.0; - - } else { - exit(1); + if (singularity == SUBTRACTION) { + make_vector(clusters->w, totalNumberInterpolationWeights); } clusters->num = totalNumberInterpolationPoints; @@ -267,7 +254,9 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) - #pragma acc enter data copyin(wC[0:totalNumberInterpolationWeights]) + if (singularity == SUBTRACTION) { + #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) + } #endif for (int i = 0; i < tree_numnodes; i++) { @@ -313,7 +302,9 @@ void Clusters_Alloc(struct Clusters **clusters_addr, int length, const struct Ru make_vector(clusters->y, clusters->num); make_vector(clusters->z, clusters->num); make_vector(clusters->q, clusters->num_charges); - make_vector(clusters->w, clusters->num_weights); + if (singularity == SUBTRACTION) { + make_vector(clusters->w, clusters->num_weights); + } } return; @@ -348,11 +339,11 @@ void Clusters_Free_Win(struct Clusters **clusters_addr) struct Clusters *clusters = *clusters_addr; if (clusters != NULL) { - MPI_Free_mem(clusters->x); - MPI_Free_mem(clusters->y); - MPI_Free_mem(clusters->z); - MPI_Free_mem(clusters->q); - MPI_Free_mem(clusters->w); + if (clusters->x != NULL) MPI_Free_mem(clusters->x); + if (clusters->y != NULL) MPI_Free_mem(clusters->y); + if (clusters->z != NULL) MPI_Free_mem(clusters->z); + if (clusters->q != NULL) MPI_Free_mem(clusters->q); + if (clusters->w != NULL) MPI_Free_mem(clusters->w); free(clusters); } @@ -411,10 +402,10 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { - modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j];// * wS[child_startingIndexInClustersArray + j]; + modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -422,7 +413,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -433,7 +424,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -442,7 +433,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -463,7 +454,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind double sz = clusterZ[child_startingIndexInClustersArray+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree @@ -519,7 +510,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind // Increment cluster Q array double temp = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) + #pragma acc loop vector(32) reduction(+:temp) #endif for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points double sx = clusterX[child_startingIndexInClustersArray + i]; @@ -618,7 +609,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; @@ -630,7 +621,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -641,7 +632,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -650,7 +641,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -671,7 +662,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil double sz = clusterZ[child_startingIndexInClustersArray+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree @@ -729,7 +720,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil double temp = 0.0; // double temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) + #pragma acc loop vector(32) reduction(+:temp) #endif for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points double sx = clusterX[child_startingIndexInClustersArray + i]; @@ -831,7 +822,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { // modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; @@ -843,7 +834,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -854,7 +845,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -863,7 +854,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -884,7 +875,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil double sz = clusterZ[child_startingIndexInClustersArray+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree @@ -942,7 +933,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil double temp = 0.0; double temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) + #pragma acc loop vector(32) reduction(+:temp) #endif for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points double sx = clusterX[child_startingIndexInClustersArray + i]; @@ -1002,7 +993,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, + double *xS, double *yS, double *zS, double *qS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) { @@ -1035,7 +1026,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, clusterX, clusterY, clusterZ, clusterQ) \ + #pragma acc kernels async(streamID) present(xS, yS, zS, qS, clusterX, clusterY, clusterZ, clusterQ) \ create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ @@ -1045,10 +1036,10 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < sourcePointsInCluster; j++) { - modifiedF[j] = qS[startingIndexInSourcesArray + j] * wS[startingIndexInSourcesArray + j]; + modifiedF[j] = qS[startingIndexInSourcesArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -1056,7 +1047,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -1067,7 +1058,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -1076,7 +1067,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -1097,7 +1088,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg double sz = zS[startingIndexInSourcesArray+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree @@ -1153,7 +1144,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg // Increment cluster Q array double temp = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) + #pragma acc loop vector(32) reduction(+:temp) #endif for (int i = 0; i < sourcePointsInCluster; i++) { // loop over source points double sx = xS[startingIndexInSourcesArray + i]; @@ -1253,7 +1244,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < pointsInNode; j++) { modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; @@ -1265,7 +1256,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -1276,7 +1267,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -1285,7 +1276,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -1306,7 +1297,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati double sz = zS[startingIndexInSources+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree @@ -1366,7 +1357,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati double temp = 0.0, temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) reduction(+:temp2) + #pragma acc loop vector(32) reduction(+:temp) reduction(+:temp2) #endif for (int i = 0; i < pointsInNode; i++) { // loop over source points double sx = xS[startingIndexInSources + i]; @@ -1467,7 +1458,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < pointsInNode; j++) { // modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; @@ -1479,7 +1470,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -1490,7 +1481,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -1499,7 +1490,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati } #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; @@ -1520,7 +1511,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati double sz = zS[startingIndexInSources+i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree @@ -1580,7 +1571,7 @@ void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolati double temp = 0.0, temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) reduction(+:temp2) + #pragma acc loop vector(32) reduction(+:temp) reduction(+:temp2) #endif for (int i = 0; i < pointsInNode; i++) { // loop over source points double sx = xS[startingIndexInSources + i]; @@ -1679,7 +1670,7 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol #ifdef OPENACC_ENABLED int streamID = rand() % 3; - #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, \ + #pragma acc kernels async(streamID) present(xS, yS, zS, qS, \ clusterX, clusterY, clusterZ, clusterQ) \ create(modifiedF[0:sourcePointsInCluster], exactIndX[0:sourcePointsInCluster], \ exactIndY[0:sourcePointsInCluster], exactIndZ[0:sourcePointsInCluster], \ @@ -1690,10 +1681,10 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < sourcePointsInCluster; j++) { - modifiedF[j] = qS[startingIndexInSourcesArray + j] * wS[startingIndexInSourcesArray + j]; + modifiedF[j] = qS[startingIndexInSourcesArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -1701,7 +1692,7 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { double xx = i * M_PI / interpolationDegree; @@ -1716,7 +1707,7 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -1743,7 +1734,7 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol double sz = zS[startingIndexInSourcesArray + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree @@ -1798,9 +1789,9 @@ void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpol double temp4 = 0.0, temp5 = 0.0, temp6 = 0.0, temp7 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp0) reduction(+:temp1) reduction(+:temp2) \ - reduction(+:temp3) reduction(+:temp4) reduction(+:temp5) \ - reduction(+:temp6) reduction(+:temp7) + #pragma acc loop vector(32) reduction(+:temp0) reduction(+:temp1) reduction(+:temp2) \ + reduction(+:temp3) reduction(+:temp4) reduction(+:temp5) \ + reduction(+:temp6) reduction(+:temp7) #endif for (int i = 0; i < sourcePointsInCluster; i++) { // loop over source points @@ -1987,7 +1978,7 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter #endif #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < sourcePointsInCluster; j++) { modifiedF[j] = qS[startingIndexInSourcesArray + j] * wS[startingIndexInSourcesArray + j]; @@ -1999,7 +1990,7 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { double xx = i * M_PI / interpolationDegree; @@ -2014,7 +2005,7 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter // Compute weights #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpDegreeLim; j++) { dj[j] = 1.0; @@ -2040,7 +2031,7 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter double sz = zS[startingIndexInSourcesArray + i]; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) + #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) #endif for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree @@ -2099,12 +2090,12 @@ void pc_comp_ms_modifiedF_hermite_SS(const struct Tree *tree, int idx, int inter double tempw4 = 0.0, tempw5 = 0.0, tempw6 = 0.0, tempw7 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:tempq0) reduction(+:tempq1) reduction(+:tempq2) \ - reduction(+:tempq3) reduction(+:tempq4) reduction(+:tempq5) \ - reduction(+:tempq6) reduction(+:tempq7) \ - reduction(+:tempw0) reduction(+:tempw1) reduction(+:tempw2) \ - reduction(+:tempw3) reduction(+:tempw4) reduction(+:tempw5) \ - reduction(+:tempw6) reduction(+:tempw7) + #pragma acc loop vector(32) reduction(+:tempq0) reduction(+:tempq1) reduction(+:tempq2) \ + reduction(+:tempq3) reduction(+:tempq4) reduction(+:tempq5) \ + reduction(+:tempq6) reduction(+:tempq7) \ + reduction(+:tempw0) reduction(+:tempw1) reduction(+:tempw2) \ + reduction(+:tempw3) reduction(+:tempw4) reduction(+:tempw5) \ + reduction(+:tempw6) reduction(+:tempw7) #endif for (int i = 0; i < sourcePointsInCluster; i++) { // loop over source points @@ -2287,7 +2278,7 @@ void cp_comp_interp(const struct Tree *tree, int idx, int interpolationDegree, // Fill in arrays of unique x, y, and z coordinates for the interpolation points. #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int i = 0; i < interpDegreeLim; i++) { tt[i] = cos(i * M_PI / interpolationDegree); @@ -2298,7 +2289,7 @@ void cp_comp_interp(const struct Tree *tree, int idx, int interpolationDegree, #ifdef OPENACC_ENABLED - #pragma acc loop independent + #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { int k1 = j%(interpolationDegree+1); diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 79b8720b..02c2f809 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -482,8 +482,10 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Tree_Sources_Construct(&source_tree, sources, run_params); #ifdef OPENACC_ENABLED #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ - sources->z[0:sources->num], sources->q[0:sources->num], \ - sources->w[0:sources->num]) + sources->z[0:sources->num], sources->q[0:sources->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(sources->w[0:sources->num]) + } #endif STOP_TIMER(&time_tree[0]); @@ -492,6 +494,9 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #ifdef OPENACC_ENABLED #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ targets->z[0:targets->num], targets->q[0:targets->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(targets->q[0:targets->num]) + } #pragma acc enter data create(potential[0:targets->num]) #endif STOP_TIMER(&time_tree[1]); @@ -601,6 +606,9 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, sources->w, \ source_clusters->x, source_clusters->y, source_clusters->z, \ source_clusters->q, source_clusters->w) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(sources->w, source_clusters->w) + } #endif InteractionLists_Free(&local_interaction_list); STOP_TIMER(&time_tree[5]); @@ -617,10 +625,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time1); #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ - let_sources->w[0:let_sources->num], \ let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ - let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges], \ - let_clusters->w[0:let_clusters->num_weights]) + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data create(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + } STOP_TIMER(&time1); time_tree[6] += time1; } @@ -686,6 +695,9 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run let_clusters->x, let_clusters->y, \ let_clusters->z, let_clusters->q, \ let_clusters->w) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(let_sources->w, let_clusters->w) + } STOP_TIMER(&time1); time_tree[6] += time1; } @@ -726,11 +738,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[10]); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(targets->x, targets->y, \ - targets->z, targets->q, \ + #pragma acc exit data delete(targets->x, targets->y, targets->z, \ target_clusters->x, target_clusters->y, \ - target_clusters->z, target_clusters->q, \ - target_clusters->w) + target_clusters->z, target_clusters->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(targets->q, target_clusters->w) + } #endif Particles_FreeOrder(sources); diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index e6e3708e..307c1621 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -83,27 +83,6 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T // Additionally, not setup for Hermite either at the moment. -/* -#ifdef OPENACC_ENABLED - #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ - source_q[0:num_sources], source_w[0:num_sources], \ - target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ - target_q[0:num_targets], \ - source_cluster_x[0:num_source_cluster_points], \ - source_cluster_y[0:num_source_cluster_points], \ - source_cluster_z[0:num_source_cluster_points], \ - source_cluster_q[0:num_source_cluster_charges], \ - source_cluster_w[0:num_source_cluster_weights], \ - target_cluster_x[0:num_target_cluster_points], \ - target_cluster_y[0:num_target_cluster_points], \ - target_cluster_z[0:num_target_cluster_points]) \ - copy(target_cluster_q[0:num_target_cluster_charges], \ - target_cluster_w[0:num_target_cluster_charges], \ - potential[0:num_targets]) -#endif -*/ - { - for (int i = 0; i < target_tree_numnodes; i++) { int target_ibeg = target_tree_ibeg[i]; int target_iend = target_tree_iend[i]; @@ -141,7 +120,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Coulomb_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, source_cluster_start, target_cluster_start, source_cluster_x, source_cluster_y, source_cluster_z, - source_cluster_q, source_cluster_w, + source_cluster_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -196,7 +175,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Yukawa_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, source_cluster_start, target_cluster_start, source_cluster_x, source_cluster_y, source_cluster_z, - source_cluster_q, source_cluster_w, + source_cluster_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -251,7 +230,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedCoulomb_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, source_cluster_start, target_cluster_start, source_cluster_x, source_cluster_y, source_cluster_z, - source_cluster_q, source_cluster_w, + source_cluster_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -301,7 +280,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedYukawa_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, source_cluster_start, target_cluster_start, source_cluster_x, source_cluster_y, source_cluster_z, - source_cluster_q, source_cluster_w, + source_cluster_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -354,7 +333,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_SinOverR_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, source_cluster_start, target_cluster_start, source_cluster_x, source_cluster_y, source_cluster_z, - source_cluster_q, source_cluster_w, + source_cluster_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -639,7 +618,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Coulomb_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, source_start, target_cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -692,7 +671,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Yukawa_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, source_start, target_cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -745,7 +724,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedCoulomb_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, source_start, target_cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -794,7 +773,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedYukawa_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, source_start, target_cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -846,7 +825,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_SinOverR_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, source_start, target_cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, run_params, stream_id); @@ -887,7 +866,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Coulomb_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -914,7 +893,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_Yukawa_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -941,7 +920,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedCoulomb_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -968,7 +947,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_RegularizedYukawa_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -995,7 +974,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T K_SinOverR_PP(num_targets_in_cluster, num_sources_in_cluster, target_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } @@ -1009,9 +988,8 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } // end loop over target nodes #ifdef OPENACC_ENABLED - #pragma acc wait + #pragma acc wait #endif - } // end acc data region return; diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 5caa5f77..16c2f4d9 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -62,19 +62,6 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba int *tree_iend = tree->iend; int *cluster_ind = tree->cluster_ind; -#ifdef OPENACC_ENABLED - #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ - source_q[0:num_sources], source_w[0:num_sources], \ - target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ - target_q[0:num_targets], \ - cluster_x[0:total_num_interp_pts], \ - cluster_y[0:total_num_interp_pts], \ - cluster_z[0:total_num_interp_pts]) \ - copy(cluster_q[0:total_num_interp_charges], \ - cluster_w[0:total_num_interp_weights], \ - potential[0:num_targets]) -#endif - { for (int i = 0; i < batches->numnodes; i++) { @@ -111,7 +98,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Coulomb_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -134,7 +121,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Coulomb_CP_Hermite(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -165,7 +152,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Yukawa_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -188,7 +175,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Yukawa_CP_Hermite(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -219,7 +206,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedCoulomb_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -239,7 +226,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedCoulomb_CP_Hermite(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -270,7 +257,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedYukawa_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); @@ -319,7 +306,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_SinOverR_CP_Lagrange(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); } @@ -330,7 +317,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_SinOverR_CP_Hermite(num_sources_in_batch, interp_pts_per_cluster, batch_start, cluster_start, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, cluster_x, cluster_y, cluster_z, cluster_q, run_params, stream_id); } @@ -369,7 +356,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Coulomb_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -396,7 +383,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_Yukawa_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -423,7 +410,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedCoulomb_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -450,7 +437,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedYukawa_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -478,7 +465,7 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba K_SinOverR_PP(num_targets_in_cluster, num_sources_in_batch, target_start, batch_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, stream_id); } @@ -493,7 +480,6 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba #ifdef OPENACC_ENABLED #pragma acc wait #endif - } // end acc data region return; diff --git a/src/interaction_compute/interaction_compute_direct.c b/src/interaction_compute/interaction_compute_direct.c index 23895758..55615bde 100644 --- a/src/interaction_compute/interaction_compute_direct.c +++ b/src/interaction_compute/interaction_compute_direct.c @@ -40,9 +40,10 @@ void InteractionCompute_Direct(double *potential, #ifdef OPENACC_ENABLED #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ - source_q[0:num_sources], source_w[0:num_sources], \ - target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ - target_q[0:num_targets]), copy(potential[0:num_targets]) + source_q[0:num_sources], \ + target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets]) \ + copy(potential[0:num_targets]) + #pragma acc data copyin(source_w[0:num_sources], target_q[0:num_targets]) #endif { @@ -61,7 +62,7 @@ void InteractionCompute_Direct(double *potential, K_Coulomb_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { @@ -86,7 +87,7 @@ void InteractionCompute_Direct(double *potential, K_Yukawa_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { @@ -111,7 +112,7 @@ void InteractionCompute_Direct(double *potential, K_RegularizedCoulomb_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { @@ -134,7 +135,7 @@ void InteractionCompute_Direct(double *potential, K_RegularizedYukawa_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); } else if (run_params->singularity == SUBTRACTION) { @@ -154,7 +155,7 @@ void InteractionCompute_Direct(double *potential, K_Atan_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); @@ -166,7 +167,7 @@ void InteractionCompute_Direct(double *potential, K_SinOverR_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); /***************************************/ @@ -177,7 +178,7 @@ void InteractionCompute_Direct(double *potential, K_MQ_PP(num_targets, num_sources, 0, 0, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, + source_x, source_y, source_z, source_q, run_params, potential, 0); } else { diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index b825d7dd..6b821184 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -56,17 +56,6 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, int tree_numnodes = tree->numnodes; int interp_degree = run_params->interp_degree; -//#ifdef OPENACC_ENABLED -// #pragma acc data copyin(target_x[0:num_targets], target_y[0:num_targets], \ -// target_z[0:num_targets], target_q[0:num_targets], \ -// cluster_q[0:total_num_interp_charges], \ -// cluster_x[0:total_num_interp_pts], \ -// cluster_y[0:total_num_interp_pts], \ -// cluster_z[0:total_num_interp_pts], \ -// cluster_w[0:total_num_interp_weights]) \ -// copy(potential[0:num_targets]) -//#endif - { if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { // for (int i = 0; i < tree_numnodes; i++) @@ -128,7 +117,6 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, #ifdef OPENACC_ENABLED #pragma acc wait #endif - } // end ACC DATA REGION return; } diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 8e87e856..76d39a6b 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -61,28 +61,10 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba int tree_numnodes = tree->numnodes; int batch_numnodes = batches->numnodes; - double *potential_direct, *potential_approx; - make_vector(potential_direct, num_targets); - make_vector(potential_approx, num_targets); - - memset(potential_approx, 0, num_targets * sizeof(double)); - memset(potential_direct, 0, num_targets * sizeof(double)); - int *tree_ibeg = tree->ibeg; int *tree_iend = tree->iend; int *cluster_ind = tree->cluster_ind; -#ifdef OPENACC_ENABLED - #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ - source_q[0:num_sources], source_w[0:num_sources], \ - target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ - target_q[0:num_targets], \ - cluster_x[0:total_num_interp_pts], cluster_y[0:total_num_interp_pts], \ - cluster_z[0:total_num_interp_pts], \ - cluster_q[0:total_num_interp_charges], cluster_w[0:total_num_interp_weights]) \ - copy(potential_approx[0:num_targets], potential_direct[0:num_targets]) -#endif - { for (int i = 0; i < batches->numnodes; i++) { int batch_ibeg = batches->ibeg[i]; @@ -118,7 +100,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -126,7 +108,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -142,7 +124,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba cluster_start, total_num_interp_pts, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -151,7 +133,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba cluster_start, total_num_interp_pts, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -177,7 +159,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -185,7 +167,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -201,7 +183,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba cluster_start, total_num_interp_pts, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -210,7 +192,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba cluster_start, total_num_interp_pts, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -236,7 +218,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -244,7 +226,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } } else if (run_params->approximation == HERMITE) { @@ -255,7 +237,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, total_num_interp_pts, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -263,7 +245,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, total_num_interp_pts, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } } @@ -281,7 +263,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -289,7 +271,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } @@ -305,7 +287,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, total_num_interp_pts, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); */ } else if (run_params->singularity == SUBTRACTION) { @@ -319,7 +301,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, total_num_interp_pts, target_x, target_y, target_z, target_q, cluster_x, cluster_y, cluster_z, cluster_q, cluster_w, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); */ } } @@ -337,7 +319,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->approximation == HERMITE) { @@ -360,7 +342,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } } else if (run_params->approximation == HERMITE) { @@ -371,7 +353,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, total_num_interp_pts, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } } @@ -388,7 +370,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba interp_pts_per_cluster, batch_start, cluster_start, target_x, target_y, target_z, cluster_x, cluster_y, cluster_z, cluster_q, - run_params, potential_approx, stream_id); + run_params, potential, stream_id); } else if (run_params->approximation == HERMITE) { @@ -428,8 +410,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_Coulomb_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -437,7 +419,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -455,8 +437,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_Yukawa_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -464,7 +446,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + run_params, potential, stream_id); } else { printf("**ERROR** INVALID CHOICE OF SINGULARITY. EXITING. \n"); @@ -482,8 +464,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedCoulomb_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -491,7 +473,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + run_params, potential, stream_id); } @@ -506,8 +488,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_RegularizedYukawa_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); } else if (run_params->singularity == SUBTRACTION) { @@ -515,7 +497,7 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba batch_start, source_start, target_x, target_y, target_z, target_q, source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + run_params, potential, stream_id); } @@ -529,8 +511,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_Atan_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); @@ -543,8 +525,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_MQ_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); /***************************************/ @@ -556,8 +538,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba K_SinOverR_PP(num_targets_in_batch, num_sources_in_cluster, batch_start, source_start, target_x, target_y, target_z, - source_x, source_y, source_z, source_q, source_w, - run_params, potential_direct, stream_id); + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); @@ -568,18 +550,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba } // end loop over target batches #ifdef OPENACC_ENABLED - #pragma acc wait + #pragma acc wait #endif - } // end acc data region - - for (int k = 0; k < num_targets; k++) - potential[k] += potential_direct[k]; - - for (int k = 0; k < num_targets; k++) - potential[k] += potential_approx[k]; - - free_vector(potential_direct); - free_vector(potential_approx); return; diff --git a/src/kernels/atan/atan_pp.c b/src/kernels/atan/atan_pp.c index 75d896d0..4a5afd5e 100644 --- a/src/kernels/atan/atan_pp.c +++ b/src/kernels/atan/atan_pp.c @@ -13,7 +13,7 @@ void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { @@ -24,7 +24,7 @@ void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cl #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #endif #ifdef OPENACC_ENABLED @@ -49,7 +49,7 @@ void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cl if (dz > 0.5) { dz -= 1.0; } - temporary_potential += source_charge[jj] * source_weight[jj] + temporary_potential += source_charge[jj] * (1.0 / M_PI * atan(delta_factor * tan(M_PI * dz)) - dz); } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/atan/atan_pp.h b/src/kernels/atan/atan_pp.h index be1ff988..b8e49bbb 100644 --- a/src/kernels/atan/atan_pp.h +++ b/src/kernels/atan/atan_pp.h @@ -8,7 +8,7 @@ void K_Atan_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/coulomb/coulomb_cp.c b/src/kernels/coulomb/coulomb_cp.c index 4752b950..bd85aefd 100644 --- a/src/kernels/coulomb/coulomb_cp.c +++ b/src/kernels/coulomb/coulomb_cp.c @@ -8,7 +8,7 @@ void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -46,7 +46,7 @@ void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpo double dz = cz - source_z[jj]; double r2 = dx*dx + dy*dy + dz*dz; - temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); + temporary_potential += source_q[jj] / sqrt(r2); } // end loop over interpolation points #ifdef OPENACC_ENABLED @@ -65,7 +65,7 @@ void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpo void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -83,7 +83,7 @@ void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpol #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - source_w, cluster_x, cluster_y, cluster_z, \ + cluster_x, cluster_y, cluster_z, \ cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) @@ -119,8 +119,7 @@ void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpol #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -130,7 +129,7 @@ void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpol double r2 = dx*dx + dy*dy + dz*dz; double r2inv = 1 / r2; - double rinvq = source_q[jj] * source_w[jj] / sqrt(r2); + double rinvq = source_q[jj] / sqrt(r2); double r3inv = rinvq * r2inv; double r5inv = r3inv * r2inv; double r7inv = r5inv * r2inv; diff --git a/src/kernels/coulomb/coulomb_cp.h b/src/kernels/coulomb/coulomb_cp.h index 65b5cbbf..49b3da45 100644 --- a/src/kernels/coulomb/coulomb_cp.h +++ b/src/kernels/coulomb/coulomb_cp.h @@ -7,13 +7,13 @@ void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); diff --git a/src/kernels/coulomb/coulomb_pc.c b/src/kernels/coulomb/coulomb_pc.c index 260a2ff3..54ea4c26 100644 --- a/src/kernels/coulomb/coulomb_pc.c +++ b/src/kernels/coulomb/coulomb_pc.c @@ -46,7 +46,7 @@ void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpo double dz = tz - cluster_z[jj]; double r2 = dx*dx + dy*dy + dz*dz; - temporary_potential += cluster_charge[starting_index_of_cluster + j] / sqrt(r2); + temporary_potential += cluster_charge[starting_index_of_cluster + j] / sqrt(r2); } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb_pp.c b/src/kernels/coulomb/coulomb_pp.c index b971232d..10dfe8ab 100644 --- a/src/kernels/coulomb/coulomb_pp.c +++ b/src/kernels/coulomb/coulomb_pp.c @@ -8,13 +8,13 @@ void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #pragma acc loop independent #endif @@ -35,8 +35,7 @@ void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ - source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ - source_weight[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) + source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) #endif @@ -47,7 +46,7 @@ void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in double r2 = dx*dx + dy*dy + dz*dz; if (r2 > DBL_MIN) { - temporary_potential += source_charge[jj] * source_weight[jj] / sqrt(r2); + temporary_potential += source_charge[jj] / sqrt(r2); } } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/coulomb/coulomb_pp.h b/src/kernels/coulomb/coulomb_pp.h index 130421e5..d44ceffc 100644 --- a/src/kernels/coulomb/coulomb_pp.h +++ b/src/kernels/coulomb/coulomb_pp.h @@ -8,7 +8,7 @@ void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/mq/mq_pp.c b/src/kernels/mq/mq_pp.c index 642fd517..fa99c8f9 100644 --- a/src/kernels/mq/mq_pp.c +++ b/src/kernels/mq/mq_pp.c @@ -13,7 +13,7 @@ void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { @@ -25,7 +25,7 @@ void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_clus #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #endif #ifdef OPENACC_ENABLED @@ -51,7 +51,7 @@ void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_clus if (dz > 0.5) { dz -= 1.0; } - temporary_potential += source_charge[jj] * source_weight[jj] + temporary_potential += source_charge[jj] * (.5 * dz * norm_delta_L / sqrt(dz * dz + deltaLsq) - dz); } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/mq/mq_pp.h b/src/kernels/mq/mq_pp.h index 0ce73cb3..91ec814a 100644 --- a/src/kernels/mq/mq_pp.h +++ b/src/kernels/mq/mq_pp.h @@ -8,7 +8,7 @@ void K_MQ_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_cp.c b/src/kernels/regularized-coulomb/regularized-coulomb_cp.c index ba69e31d..aa65a918 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_cp.c +++ b/src/kernels/regularized-coulomb/regularized-coulomb_cp.c @@ -8,7 +8,7 @@ void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -48,7 +48,7 @@ void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number double dz = cz - source_z[jj]; double r2 = dx*dx + dy*dy + dz*dz + epsilon2; - temporary_potential += source_q[jj] * source_w[jj] / sqrt(r2); + temporary_potential += source_q[jj] / sqrt(r2); } // end loop over interpolation points #ifdef OPENACC_ENABLED @@ -67,7 +67,7 @@ void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -87,7 +87,7 @@ void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_ #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - source_w, cluster_x, cluster_y, cluster_z, \ + cluster_x, cluster_y, cluster_z, \ cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) @@ -123,8 +123,7 @@ void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_ #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -134,7 +133,7 @@ void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_ double r2 = dx*dx + dy*dy + dz*dz + epsilon2; double r2inv = 1 / r2; - double rinvq = source_q[jj] * source_w[jj] / sqrt(r2); + double rinvq = source_q[jj] / sqrt(r2); double r3inv = rinvq * r2inv; double r5inv = r3inv * r2inv; double r7inv = r5inv * r2inv; diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_cp.h b/src/kernels/regularized-coulomb/regularized-coulomb_cp.h index ff6a2183..2a72b6ba 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_cp.h +++ b/src/kernels/regularized-coulomb/regularized-coulomb_cp.h @@ -7,13 +7,13 @@ void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_pp.c b/src/kernels/regularized-coulomb/regularized-coulomb_pp.c index b330cb9e..9345b225 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_pp.c +++ b/src/kernels/regularized-coulomb/regularized-coulomb_pp.c @@ -9,14 +9,14 @@ void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { double epsilon=run_params->kernel_params[0]; #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #pragma acc loop independent #endif @@ -37,8 +37,7 @@ void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_sourc #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ - source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ - source_weight[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1]) + source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1]) #endif @@ -48,7 +47,7 @@ void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_sourc double dz = tz - source_z[jj]; double r2 = dx*dx + dy*dy + dz*dz + epsilon*epsilon; - temporary_potential += source_charge[jj] * source_weight[jj] / sqrt(r2); + temporary_potential += source_charge[jj] / sqrt(r2); } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/regularized-coulomb/regularized-coulomb_pp.h b/src/kernels/regularized-coulomb/regularized-coulomb_pp.h index b46994db..24e9f90f 100644 --- a/src/kernels/regularized-coulomb/regularized-coulomb_pp.h +++ b/src/kernels/regularized-coulomb/regularized-coulomb_pp.h @@ -8,7 +8,7 @@ void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_cp.c b/src/kernels/regularized-yukawa/regularized-yukawa_cp.c index 73099ca7..6d807f3d 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_cp.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_cp.c @@ -8,7 +8,7 @@ void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -50,7 +50,7 @@ void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_ double r2 = dx*dx + dy*dy + dz*dz; double r = sqrt(r2); - temporary_potential += source_q[jj] * source_w[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2); + temporary_potential += source_q[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2); } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_cp.h b/src/kernels/regularized-yukawa/regularized-yukawa_cp.h index 295d96d5..84ebff9e 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_cp.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_cp.h @@ -7,7 +7,7 @@ void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_pp.c b/src/kernels/regularized-yukawa/regularized-yukawa_pp.c index 0c9c5276..4d83e88b 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_pp.c +++ b/src/kernels/regularized-yukawa/regularized-yukawa_pp.c @@ -9,7 +9,7 @@ void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { @@ -19,7 +19,7 @@ void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #endif #ifdef OPENACC_ENABLED @@ -44,8 +44,7 @@ void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source double r = sqrt(dx*dx + dy*dy + dz*dz); - temporary_potential += source_charge[starting_index_of_source + j] - * source_weight[starting_index_of_source + j] * exp(-kappa*r) / sqrt(r*r + epsilon2); + temporary_potential += source_charge[starting_index_of_source + j] * exp(-kappa*r) / sqrt(r*r + epsilon2); } // end loop over interpolation points #ifdef OPENACC_ENABLED #pragma acc atomic diff --git a/src/kernels/regularized-yukawa/regularized-yukawa_pp.h b/src/kernels/regularized-yukawa/regularized-yukawa_pp.h index 294cfc4e..66c6805a 100644 --- a/src/kernels/regularized-yukawa/regularized-yukawa_pp.h +++ b/src/kernels/regularized-yukawa/regularized-yukawa_pp.h @@ -8,7 +8,7 @@ void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/sin-over-r/sin-over-r_cp.c b/src/kernels/sin-over-r/sin-over-r_cp.c index c3447c42..33d5af6d 100644 --- a/src/kernels/sin-over-r/sin-over-r_cp.c +++ b/src/kernels/sin-over-r/sin-over-r_cp.c @@ -8,7 +8,7 @@ void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -48,7 +48,7 @@ void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interp double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - temporary_potential += source_q[jj] * source_w[jj] * sin(kernel_parameter * r) / r; + temporary_potential += source_q[jj] * sin(kernel_parameter * r) / r; } // end loop over interpolation points #ifdef OPENACC_ENABLED @@ -67,7 +67,7 @@ void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interp void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -87,7 +87,7 @@ void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpo #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - source_w, cluster_x, cluster_y, cluster_z, \ + cluster_x, cluster_y, cluster_z, \ cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) @@ -123,8 +123,7 @@ void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpo #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -142,8 +141,8 @@ void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpo double r6inv = r3inv * r3inv; double r7inv = r4inv * r3inv; - double sinr = sin(k*r) * source_q[jj] * source_w[jj]; - double cosr = cos(k*r) * source_q[jj] * source_w[jj]; + double sinr = sin(k*r) * source_q[jj]; + double cosr = cos(k*r) * source_q[jj]; double term_d0 = sinr * rinv; double term_d1 = sinr * r3inv - k * cosr * r2inv; diff --git a/src/kernels/sin-over-r/sin-over-r_cp.h b/src/kernels/sin-over-r/sin-over-r_cp.h index bc1526a1..9b0a7920 100644 --- a/src/kernels/sin-over-r/sin-over-r_cp.h +++ b/src/kernels/sin-over-r/sin-over-r_cp.h @@ -7,13 +7,13 @@ void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); diff --git a/src/kernels/sin-over-r/sin-over-r_pp.c b/src/kernels/sin-over-r/sin-over-r_pp.c index 2b1b2984..f95051d2 100644 --- a/src/kernels/sin-over-r/sin-over-r_pp.c +++ b/src/kernels/sin-over-r/sin-over-r_pp.c @@ -8,7 +8,7 @@ void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { @@ -16,7 +16,7 @@ void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_i #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #pragma acc loop independent #endif @@ -37,8 +37,7 @@ void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_i #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ - source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ - source_weight[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) + source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) #endif @@ -49,7 +48,7 @@ void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_i double r = sqrt(dx*dx + dy*dy + dz*dz); if (r > DBL_MIN) { - temporary_potential += source_charge[jj] * source_weight[jj] * sin(kernel_parameter * r) / r; + temporary_potential += source_charge[jj] * sin(kernel_parameter * r) / r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/sin-over-r/sin-over-r_pp.h b/src/kernels/sin-over-r/sin-over-r_pp.h index a17efa50..72af215f 100644 --- a/src/kernels/sin-over-r/sin-over-r_pp.h +++ b/src/kernels/sin-over-r/sin-over-r_pp.h @@ -8,7 +8,7 @@ void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); diff --git a/src/kernels/yukawa/yukawa_cp.c b/src/kernels/yukawa/yukawa_cp.c index 4d0ae3a6..dc8bca31 100644 --- a/src/kernels/yukawa/yukawa_cp.c +++ b/src/kernels/yukawa/yukawa_cp.c @@ -8,7 +8,7 @@ void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -48,7 +48,7 @@ void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpol double dz = cz - source_z[jj]; double r = sqrt(dx*dx + dy*dy + dz*dz); - temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter * r) / r; + temporary_potential += source_q[jj] * exp(-kernel_parameter * r) / r; } // end loop over interpolation points #ifdef OPENACC_ENABLED @@ -67,7 +67,7 @@ void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpol void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, struct RunParams *run_params, int gpu_async_stream_id) { @@ -87,7 +87,7 @@ void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpola #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ - source_w, cluster_x, cluster_y, cluster_z, \ + cluster_x, cluster_y, cluster_z, \ cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) @@ -123,8 +123,7 @@ void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpola #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ - source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) #endif int jj = starting_index_of_sources + j; @@ -137,7 +136,7 @@ void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpola double r3 = r2 * r; double r2inv = 1 / r2; - double rinvq = source_q[jj] * source_w[jj] / r * exp(-kernel_parameter * r); + double rinvq = source_q[jj] / r * exp(-kernel_parameter * r); double r3inv = rinvq * r2inv; double r5inv = r3inv * r2inv; double r7inv = r5inv * r2inv; diff --git a/src/kernels/yukawa/yukawa_cp.h b/src/kernels/yukawa/yukawa_cp.h index 92eb0b99..acd78cba 100644 --- a/src/kernels/yukawa/yukawa_cp.h +++ b/src/kernels/yukawa/yukawa_cp.h @@ -7,13 +7,13 @@ void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_sources, int starting_index_of_cluster, - double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, + double *source_x, double *source_y, double *source_z, double *source_q, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, struct RunParams *run_params, int gpu_async_stream_id); diff --git a/src/kernels/yukawa/yukawa_pp.c b/src/kernels/yukawa/yukawa_pp.c index e043613d..583993e0 100644 --- a/src/kernels/yukawa/yukawa_pp.c +++ b/src/kernels/yukawa/yukawa_pp.c @@ -9,7 +9,7 @@ void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, int starting_index_of_target, int starting_index_of_source, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id) { @@ -17,7 +17,7 @@ void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_ #ifdef OPENACC_ENABLED #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ - source_x, source_y, source_z, source_charge, source_weight, potential) + source_x, source_y, source_z, source_charge, potential) { #endif #ifdef OPENACC_ENABLED @@ -42,8 +42,7 @@ void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_ double r = sqrt(dx*dx + dy*dy + dz*dz); if (r > DBL_MIN) { - temporary_potential += source_charge[starting_index_of_source + j] - * source_weight[starting_index_of_source + j] * exp(-kernel_parameter*r) / r; + temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r; } } // end loop over interpolation points #ifdef OPENACC_ENABLED diff --git a/src/kernels/yukawa/yukawa_pp.h b/src/kernels/yukawa/yukawa_pp.h index 929c295a..06aaaf02 100644 --- a/src/kernels/yukawa/yukawa_pp.h +++ b/src/kernels/yukawa/yukawa_pp.h @@ -7,7 +7,7 @@ void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, double *target_x, double *target_y, double *target_z, - double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, + double *source_x, double *source_y, double *source_z, double *source_charge, struct RunParams *run_params, double *potential, int gpu_async_stream_id); From e70b8d36dd5984219367d870fb065efe47389e78 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sat, 25 Jul 2020 22:33:45 -0400 Subject: [PATCH 59/95] Fixing downpass GPU arrays --- .../interaction_compute_downpass.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 6b821184..23fe97b8 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -13,7 +13,7 @@ static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degree, - double *xT, double *yT, double *zT, double *qT, + double *xT, double *yT, double *zT, double *clusterQ); static void cp_comp_pot_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, @@ -24,7 +24,7 @@ static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int in double *clusterQ, double *clusterW); static void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_degree, - double *xT, double *yT, double *zT, double *qT, + double *xT, double *yT, double *zT, double *clusterQ, double *clusterW); //static void cp_comp_pot_hermite_SS(struct Tree *tree, int idx, int interp_degree, @@ -88,7 +88,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; cp_comp_pot(tree, leaf_index, potential, interp_degree, - target_x, target_y, target_z, target_q, cluster_q); + target_x, target_y, target_z, cluster_q); } @@ -101,7 +101,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) cp_comp_pot_hermite(tree, i, potential, interp_degree, - target_x, target_y, target_z, target_q, cluster_q, cluster_w); + target_x, target_y, target_z, cluster_q, cluster_w); } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SUBTRACTION)) { printf("Not set up to do Hermite SS downpass.\n"); @@ -304,7 +304,7 @@ void cp_comp_pot_parent_to_child(struct Tree *tree, int parent_index, int child_ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degree, - double *target_x, double *target_y, double *target_z, double *target_q, + double *target_x, double *target_y, double *target_z, double *cluster_q) { int interp_degree_lim = interp_degree + 1; @@ -332,7 +332,7 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degre #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, cluster_q) \ + #pragma acc kernels async(streamID) present(target_x, target_y, target_z, cluster_q) \ create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ weights[0:interp_degree_lim], dj[0:interp_degree_lim], tt[0:interp_degree_lim]) { @@ -655,7 +655,7 @@ void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int interp_de void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_degree, - double *target_x, double *target_y, double *target_z, double *target_q, double *cluster_q, double *cluster_w) + double *target_x, double *target_y, double *target_z, double *cluster_q, double *cluster_w) { int interp_degree_lim = interp_degree + 1; int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; @@ -694,7 +694,7 @@ void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int inte #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(target_x, target_y, target_z, target_q, \ + #pragma acc kernels async(streamID) present(target_x, target_y, target_z, \ cluster_q_, cluster_q_dx, cluster_q_dy, cluster_q_dz, \ cluster_q_dxy, cluster_q_dyz, cluster_q_dxz, \ cluster_q_dxyz) \ From 0634828615b4bbfcdcf05118bc625f1d3cf4a33e Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Mon, 27 Jul 2020 12:04:48 -0700 Subject: [PATCH 60/95] Unstructured data for PC and CP --- src/clusters/clusters.c | 12 ++++ src/drivers/treedriver.c | 119 +++++++++++++++++++++++++++++++++++---- 2 files changed, 119 insertions(+), 12 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index d3918900..57696903 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -73,6 +73,12 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int totalNumberInterpolationPoints = tree_numnodes * interpolationPointsPerCluster; int totalNumberInterpolationCharges = tree_numnodes * run_params->interp_charges_per_cluster; int totalNumberInterpolationWeights = tree_numnodes * run_params->interp_weights_per_cluster; + + clusters->x = NULL; + clusters->y = NULL; + clusters->z = NULL; + clusters->q = NULL; + clusters->w = NULL; MPI_Alloc_mem(totalNumberInterpolationPoints*sizeof(double), MPI_INFO_NULL, &(clusters->x)); MPI_Alloc_mem(totalNumberInterpolationPoints*sizeof(double), MPI_INFO_NULL, &(clusters->y)); @@ -232,6 +238,12 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa int totalNumberInterpolationCharges = tree_numnodes * run_params->interp_charges_per_cluster; int totalNumberInterpolationWeights = tree_numnodes * run_params->interp_weights_per_cluster; + clusters->x = NULL; + clusters->y = NULL; + clusters->z = NULL; + clusters->q = NULL; + clusters->w = NULL; + make_vector(clusters->x, totalNumberInterpolationPoints); make_vector(clusters->y, totalNumberInterpolationPoints); make_vector(clusters->z, totalNumberInterpolationPoints); diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 02c2f809..55c65051 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -94,10 +94,25 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Targets_Construct(&tree, targets, run_params); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ + targets->z[0:targets->num], targets->q[0:targets->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(targets->q[0:targets->num]) + } + #pragma acc enter data create(potential[0:targets->num]) +#endif STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Batches_Sources_Construct(&batches, sources, run_params); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ + sources->z[0:sources->num], sources->q[0:sources->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(sources->w[0:sources->num]) + } +#endif STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); @@ -150,6 +165,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run InteractionCompute_CP(potential, tree, batches, local_interaction_list, sources, targets, clusters, run_params); InteractionLists_Free(&local_interaction_list); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(sources->w) + } +#endif STOP_TIMER(&time_tree[5]); //~~~~~~~~~~~~~~~~~~~~ @@ -191,9 +212,23 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ START_TIMER(&time_tree[7]); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(remote_sources->x[0:remote_sources->num], remote_sources->y[0:remote_sources->num], \ + remote_sources->z[0:remote_sources->num], remote_sources->q[0:remote_sources->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data create(remote_sources->w[0:remote_sources->num]) + } +#endif InteractionCompute_CP(potential, tree, remote_batches, let_interaction_list, remote_sources, targets, clusters, run_params); InteractionLists_Free(&let_interaction_list); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(remote_sources->x, remote_sources->y, \ + remote_sources->z, remote_sources->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(remote_sources->w) + } +#endif Particles_Free(&remote_sources); Batches_Free(&remote_batches); STOP_TIMER(&time_tree[7]); @@ -218,6 +253,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[9]); + #pragma acc exit data copyout(potential[0:targets->num]) InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); Particles_Targets_Reorder(targets, potential); Particles_Sources_Reorder(sources); @@ -231,6 +267,14 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(targets->x, targets->y, targets->z, \ + clusters->x, clusters->y, \ + clusters->z, clusters->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(targets->q, clusters->w) + } +#endif Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&tree); @@ -270,10 +314,25 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Sources_Construct(&tree, sources, run_params); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ + sources->z[0:sources->num], sources->q[0:sources->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(sources->w[0:sources->num]) + } +#endif STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Batches_Targets_Construct(&batches, targets, run_params); +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ + targets->z[0:targets->num], targets->q[0:targets->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(targets->q[0:targets->num]) + } + #pragma acc enter data create(potential[0:targets->num]) +#endif STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); @@ -363,6 +422,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run InteractionCompute_PC(potential, tree, batches, local_interaction_list, sources, targets, clusters, run_params); InteractionLists_Free(&local_interaction_list); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, \ + clusters->x, clusters->y, clusters->z, clusters->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(sources->w, clusters->w) + } +#endif STOP_TIMER(&time_tree[5]); //~~~~~~~~~~~~~~~~~~~~ @@ -372,6 +438,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] = 0; time_tree[7] = 0; +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ + let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ + let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data create(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + } + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif + for (int proc_id = 1; proc_id < num_procs; ++proc_id) { int get_from = (num_procs+rank-proc_id) % num_procs; @@ -408,6 +489,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[7] += time1; } + +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc exit data delete(let_sources->x, let_sources->y, \ + let_sources->z, let_sources->q, \ + let_clusters->x, let_clusters->y, \ + let_clusters->z, let_clusters->q) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(let_sources->w, let_clusters->w) + } + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif //------------------------------- @@ -419,6 +515,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[8] = 0.0; START_TIMER(&time_tree[9]); + #pragma acc exit data copyout(potential[0:targets->num]) InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); Particles_Targets_Reorder(targets, potential); Particles_Sources_Reorder(sources); @@ -432,6 +529,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(targets->x, targets->y, targets->z) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(targets->q) + } +#endif Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&tree); @@ -440,9 +543,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run // remote pieces Clusters_Free(&let_clusters); - Particles_Free(&let_sources); - CommTypesAndTrees_Free(&comm_types, &let_trees); STOP_TIMER(&time_tree[10]); @@ -602,15 +703,14 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[5]); InteractionCompute_CC(potential, source_tree, target_tree, local_interaction_list, sources, targets, source_clusters, target_clusters, run_params); + InteractionLists_Free(&local_interaction_list); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, sources->w, \ - source_clusters->x, source_clusters->y, source_clusters->z, \ - source_clusters->q, source_clusters->w) + #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, \ + source_clusters->x, source_clusters->y, source_clusters->z, source_clusters->q) if (run_params->singularity == SUBTRACTION) { #pragma acc exit data delete(sources->w, source_clusters->w) } #endif - InteractionLists_Free(&local_interaction_list); STOP_TIMER(&time_tree[5]); //~~~~~~~~~~~~~~~~~~~~ @@ -691,10 +791,8 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time1); #pragma acc exit data delete(let_sources->x, let_sources->y, \ let_sources->z, let_sources->q, \ - let_sources->w, \ let_clusters->x, let_clusters->y, \ - let_clusters->z, let_clusters->q, \ - let_clusters->w) + let_clusters->z, let_clusters->q) if (run_params->singularity == SUBTRACTION) { #pragma acc exit data delete(let_sources->w, let_clusters->w) } @@ -713,7 +811,6 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[8]); InteractionCompute_Downpass(potential, target_tree, targets, target_clusters, run_params); STOP_TIMER(&time_tree[8]); - //------------------------------- //------------------------------- @@ -736,7 +833,6 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); - #ifdef OPENACC_ENABLED #pragma acc exit data delete(targets->x, targets->y, targets->z, \ target_clusters->x, target_clusters->y, \ @@ -745,7 +841,6 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #pragma acc exit data delete(targets->q, target_clusters->w) } #endif - Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&source_tree); From e4fb8e298ed10a6cba62bd90bba18556a5c5336c Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 27 Jul 2020 18:21:07 -0400 Subject: [PATCH 61/95] O(NlogN) for SS upward pass --- src/clusters/clusters.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 57696903..ef6e31e9 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -30,7 +30,7 @@ static void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, i static void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolationDegree, +static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); @@ -146,9 +146,10 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { -// for (int i = 0; i < tree_numnodes; i++) -// pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + for (int i = 0; i < tree_numnodes; i++) + pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); +/* // anterpolate from particles to leaf cluster interpolation points printf("anterpolating Q to leaves.\n"); for (int i = 0; i < tree->leaves_list_num; ++i) { @@ -196,7 +197,7 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa } } } - +*/ } else if ((approximation == HERMITE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) @@ -1212,7 +1213,7 @@ void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDeg -void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolationDegree, +void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { @@ -1232,7 +1233,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati make_vector(nodeY, interpDegreeLim); make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, pointsInNode); -// make_vector(modifiedF2, pointsInNode); + make_vector(modifiedF2, pointsInNode); make_vector(exactIndX, pointsInNode); make_vector(exactIndY, pointsInNode); make_vector(exactIndZ, pointsInNode); @@ -1260,7 +1261,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati #endif for (int j = 0; j < pointsInNode; j++) { modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; -// modifiedF2[j] = wS[startingIndexInSources + j]; + modifiedF2[j] = wS[startingIndexInSources + j]; exactIndX[j] = -1; exactIndY[j] = -1; exactIndZ[j] = -1; @@ -1335,7 +1336,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati if (exactIndZ[i] == -1) denominator *= sumZ; modifiedF[i] /= denominator; -// modifiedF2[i] /= denominator; + modifiedF2[i] /= denominator; } @@ -1399,11 +1400,11 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati } temp += numerator * modifiedF[i]; -// temp2 += numerator * modifiedF2[i]; + temp2 += numerator * modifiedF2[i]; } clusterQ[startingIndexInClusters + j] += temp; -// clusterW[startingIndexInClusters + j] += temp2; + clusterW[startingIndexInClusters + j] += temp2; } #ifdef OPENACC_ENABLED @@ -1417,7 +1418,7 @@ void pc_comp_ms_modifiedF_SS_Q(const struct Tree *tree, int idx, int interpolati free_vector(nodeY); free_vector(nodeZ); free_vector(modifiedF); -// free_vector(modifiedF2); + free_vector(modifiedF2); free_vector(exactIndX); free_vector(exactIndY); free_vector(exactIndZ); From b0736a6467b32807de6953435aaef9f0c8c5db8c Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 27 Jul 2020 22:33:04 -0400 Subject: [PATCH 62/95] Removing unneeded functions --- src/clusters/clusters.c | 482 +--------------------------------------- 1 file changed, 10 insertions(+), 472 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index ef6e31e9..44032849 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -24,20 +24,13 @@ static void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpola static void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ); -static void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); - -static void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, +static void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); static void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *wS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); -static void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW); - static void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationDegree, int totalNumberInterpolationPoints, double *xS, double *yS, double *zS, double *qS, double *wS, @@ -146,44 +139,15 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa } else if ((approximation == LAGRANGE) && (singularity == SUBTRACTION)) { - for (int i = 0; i < tree_numnodes; i++) - pc_comp_ms_modifiedF_SS(tree, i, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); - -/* - // anterpolate from particles to leaf cluster interpolation points - printf("anterpolating Q to leaves.\n"); - for (int i = 0; i < tree->leaves_list_num; ++i) { - int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF_SS_Q(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); - } - - // interpolate up clusters, level by level - for (int level = tree->max_depth-1; level >= 0; --level) { - printf("anterpolating Q at level %i.\n", level); - for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { - - int parent_index = tree->levels_list[level][cluster_index]; - - for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ - - int child_index = tree->children[8*parent_index + child_counter]; - - pc_comp_ms_modifiedF_SS_child_to_parent_Q(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); - - } - } - } // anterpolate from particles to leaf cluster interpolation points - printf("anterpolating W to leaves.\n"); for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF_SS_W(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF_SS(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); } // interpolate up clusters, level by level for (int level = tree->max_depth-1; level >= 0; --level) { - printf("anterpolating W at level %i.\n", level); for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; @@ -192,12 +156,11 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int child_index = tree->children[8*parent_index + child_counter]; - pc_comp_ms_modifiedF_SS_child_to_parent_W(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); + pc_comp_ms_modifiedF_SS_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); } } } -*/ } else if ((approximation == HERMITE) && (singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) @@ -578,7 +541,7 @@ void pc_comp_ms_modifiedF_child_to_parent(const struct Tree *tree, int child_ind } -void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, +void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) { @@ -598,219 +561,6 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_Q(const struct Tree *tree, int chil make_vector(nodeY, interpDegreeLim); make_vector(nodeZ, interpDegreeLim); make_vector(modifiedF, interpolationPointsPerCluster); -// make_vector(modifiedF2, interpolationPointsPerCluster); - make_vector(exactIndX, interpolationPointsPerCluster); - make_vector(exactIndY, interpolationPointsPerCluster); - make_vector(exactIndZ, interpolationPointsPerCluster); - - double x0 = tree->x_min[parent_index]; - double x1 = tree->x_max[parent_index]; - double y0 = tree->y_min[parent_index]; - double y1 = tree->y_max[parent_index]; - double z0 = tree->z_min[parent_index]; - double z1 = tree->z_max[parent_index]; - -#ifdef OPENACC_ENABLED - int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ - create(modifiedF[0:interpolationPointsPerCluster], modifiedF2[0:interpolationPointsPerCluster], exactIndX[0:interpolationPointsPerCluster], \ - exactIndY[0:interpolationPointsPerCluster], exactIndZ[0:interpolationPointsPerCluster], \ - nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ - nodeZ[0:interpDegreeLim], weights[0:interpDegreeLim], \ - dj[0:interpDegreeLim], tt[0:interpDegreeLim]) - { -#endif - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < interpolationPointsPerCluster; j++) { - modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; -// modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; - exactIndX[j] = -1; - exactIndY[j] = -1; - exactIndZ[j] = -1; - } - - // Fill in arrays of unique x, y, and z coordinates for the interpolation points. -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int i = 0; i < interpDegreeLim; i++) { - tt[i] = cos(i * M_PI / interpolationDegree); - nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); - nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); - nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); - } - - // Compute weights -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < interpDegreeLim; j++) { - dj[j] = 1.0; - if (j == 0) dj[j] = 0.5; - if (j == interpolationDegree) dj[j] = 0.5; - } - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < interpDegreeLim; j++) { - weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; - } - - // Compute modified f values -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop through the source points - - double sumX = 0.0; - double sumY = 0.0; - double sumZ = 0.0; - - double sx = clusterX[child_startingIndexInClustersArray+i]; - double sy = clusterY[child_startingIndexInClustersArray+i]; - double sz = clusterZ[child_startingIndexInClustersArray+i]; - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) -#endif - for (int j = 0; j < (interpolationDegree+1); j++) { // loop through the degree - - double cx = sx - nodeX[j]; - double cy = sy - nodeY[j]; - double cz = sz - nodeZ[j]; - - if (fabs(cx) < DBL_MIN) exactIndX[i] = j; - if (fabs(cy) < DBL_MIN) exactIndY[i] = j; - if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; - - // Increment the sums - double w = weights[j]; - sumX += w / cx; - sumY += w / cy; - sumZ += w / cz; - - } - - double denominator = 1.0; - if (exactIndX[i] == -1) denominator *= sumX; - if (exactIndY[i] == -1) denominator *= sumY; - if (exactIndZ[i] == -1) denominator *= sumZ; - - modifiedF[i] /= denominator; -// modifiedF2[i] /= denominator; - } - - -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int j = 0; j < interpolationPointsPerCluster; j++) { - int k1 = j%(interpolationDegree+1); - int kk = (j-k1)/(interpolationDegree+1); - int k2 = kk%(interpolationDegree+1); - kk = kk - k2; - int k3 = kk / (interpolationDegree+1); - - double cz = nodeZ[k3]; - double w3 = weights[k3]; - - double cy = nodeY[k2]; - double w2 = weights[k2]; - - double cx = nodeX[k1]; - double w1 = weights[k1]; - - // Fill cluster X, Y, and Z arrays - clusterX[parent_startingIndexInClustersArray + j] = cx; - clusterY[parent_startingIndexInClustersArray + j] = cy; - clusterZ[parent_startingIndexInClustersArray + j] = cz; - - // Increment cluster Q array - double temp = 0.0; -// double temp2 = 0.0; -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) reduction(+:temp) -#endif - for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points - double sx = clusterX[child_startingIndexInClustersArray + i]; - double sy = clusterY[child_startingIndexInClustersArray + i]; - double sz = clusterZ[child_startingIndexInClustersArray + i]; - - double numerator = 1.0; - - // If exactInd[i] == -1, then no issues. - // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. - if (exactIndX[i] == -1) { - numerator *= w1 / (sx - cx); - } else { - if (exactIndX[i] != k1) numerator *= 0; - } - - if (exactIndY[i] == -1) { - numerator *= w2 / (sy - cy); - } else { - if (exactIndY[i] != k2) numerator *= 0; - } - - if (exactIndZ[i] == -1) { - numerator *= w3 / (sz - cz); - } else { - if (exactIndZ[i] != k3) numerator *= 0; - } - - temp += numerator * modifiedF[i]; -// temp2 += numerator * modifiedF2[i]; - - } - - clusterQ[parent_startingIndexInClustersArray + j] += temp; -// clusterW[parent_startingIndexInClustersArray + j] += temp2; - - } -#ifdef OPENACC_ENABLED - } //end acc kernels region -#endif - - free_vector(weights); - free_vector(dj); - free_vector(tt); - free_vector(nodeX); - free_vector(nodeY); - free_vector(nodeZ); - free_vector(modifiedF); -// free_vector(modifiedF2); - free_vector(exactIndX); - free_vector(exactIndY); - free_vector(exactIndZ); - - return; -} - - -void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int child_index, int parent_index, int interpolationDegree, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) -{ - - int interpDegreeLim = interpolationDegree + 1; - int interpolationPointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; - - int child_startingIndexInClustersArray = child_index * interpolationPointsPerCluster; - int parent_startingIndexInClustersArray = parent_index * interpolationPointsPerCluster; - - double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; - int *exactIndX, *exactIndY, *exactIndZ; - - make_vector(weights, interpDegreeLim); - make_vector(dj, interpDegreeLim); - make_vector(tt, interpDegreeLim); - make_vector(nodeX, interpDegreeLim); - make_vector(nodeY, interpDegreeLim); - make_vector(nodeZ, interpDegreeLim); -// make_vector(modifiedF, interpolationPointsPerCluster); make_vector(modifiedF2, interpolationPointsPerCluster); make_vector(exactIndX, interpolationPointsPerCluster); make_vector(exactIndY, interpolationPointsPerCluster); @@ -838,7 +588,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil #pragma acc loop vector(32) independent #endif for (int j = 0; j < interpolationPointsPerCluster; j++) { -// modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j] * clusterW[child_startingIndexInClustersArray + j]; + modifiedF[j] = clusterQ[child_startingIndexInClustersArray + j]; modifiedF2[j] = clusterW[child_startingIndexInClustersArray + j]; exactIndX[j] = -1; exactIndY[j] = -1; @@ -913,7 +663,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil if (exactIndY[i] == -1) denominator *= sumY; if (exactIndZ[i] == -1) denominator *= sumZ; -// modifiedF[i] /= denominator; + modifiedF[i] /= denominator; modifiedF2[i] /= denominator; } @@ -975,12 +725,12 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil if (exactIndZ[i] != k3) numerator *= 0; } -// temp += numerator * modifiedF[i]; + temp += numerator * modifiedF[i]; temp2 += numerator * modifiedF2[i]; } -// clusterQ[parent_startingIndexInClustersArray + j] += temp; + clusterQ[parent_startingIndexInClustersArray + j] += temp; clusterW[parent_startingIndexInClustersArray + j] += temp2; } @@ -994,7 +744,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil free_vector(nodeX); free_vector(nodeY); free_vector(nodeZ); -// free_vector(modifiedF); + free_vector(modifiedF); free_vector(modifiedF2); free_vector(exactIndX); free_vector(exactIndY); @@ -1005,6 +755,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent_W(const struct Tree *tree, int chil + void pc_comp_ms_modifiedF(const struct Tree *tree, int idx, int interpolationDegree, double *xS, double *yS, double *zS, double *qS, double *clusterX, double *clusterY, double *clusterZ, double *clusterQ) @@ -1427,219 +1178,6 @@ void pc_comp_ms_modifiedF_SS(const struct Tree *tree, int idx, int interpolation } -void pc_comp_ms_modifiedF_SS_W(const struct Tree *tree, int idx, int interpolationDegree, - double *xS, double *yS, double *zS, double *qS, double *wS, - double *clusterX, double *clusterY, double *clusterZ, double *clusterQ, double *clusterW) -{ - int interpDegreeLim = interpolationDegree + 1; - int pointsPerCluster = interpDegreeLim * interpDegreeLim * interpDegreeLim; - int pointsInNode = tree->iend[idx] - tree->ibeg[idx] + 1; - int startingIndexInClusters = idx * pointsPerCluster; - int startingIndexInSources = tree->ibeg[idx]-1; - - double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ, *modifiedF, *modifiedF2; - int *exactIndX, *exactIndY, *exactIndZ; - - make_vector(weights, interpDegreeLim); - make_vector(dj, interpDegreeLim); - make_vector(tt, interpDegreeLim); - make_vector(nodeX, interpDegreeLim); - make_vector(nodeY, interpDegreeLim); - make_vector(nodeZ, interpDegreeLim); -// make_vector(modifiedF, pointsInNode); - make_vector(modifiedF2, pointsInNode); - make_vector(exactIndX, pointsInNode); - make_vector(exactIndY, pointsInNode); - make_vector(exactIndZ, pointsInNode); - - double x0 = tree->x_min[idx]; // 1e-15 fails for large meshes, mysteriously. - double x1 = tree->x_max[idx]; - double y0 = tree->y_min[idx]; - double y1 = tree->y_max[idx]; - double z0 = tree->z_min[idx]; - double z1 = tree->z_max[idx]; - -#ifdef OPENACC_ENABLED - int streamID = rand() % 3; - #pragma acc kernels async(streamID) present(xS, yS, zS, qS, wS, \ - clusterX, clusterY, clusterZ, clusterQ, clusterW) \ - create(modifiedF[0:pointsInNode], modifiedF2[0:pointsInNode], exactIndX[0:pointsInNode], \ - exactIndY[0:pointsInNode], exactIndZ[0:pointsInNode], \ - nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], nodeZ[0:interpDegreeLim], \ - weights[0:interpDegreeLim], dj[0:interpDegreeLim], tt[0:interpDegreeLim]) - { -#endif - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < pointsInNode; j++) { -// modifiedF[j] = qS[startingIndexInSources + j] * wS[startingIndexInSources + j]; - modifiedF2[j] = wS[startingIndexInSources + j]; - exactIndX[j] = -1; - exactIndY[j] = -1; - exactIndZ[j] = -1; - } - - // Fill in arrays of unique x, y, and z coordinates for the interpolation points. -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int i = 0; i < interpDegreeLim; i++) { - tt[i] = cos(i * M_PI / interpolationDegree); - nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); - nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); - nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); - } - - // Compute weights -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < interpDegreeLim; j++) { - dj[j] = 1.0; - if (j == 0) dj[j] = 0.5; - if (j == interpolationDegree) dj[j] = 0.5; - } - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) independent -#endif - for (int j = 0; j < interpDegreeLim; j++) { - weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; - } - - // Compute modified f values -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int i = 0; i < pointsInNode; i++) { // loop through the source points - - double sumX = 0.0; - double sumY = 0.0; - double sumZ = 0.0; - - double sx = xS[startingIndexInSources+i]; - double sy = yS[startingIndexInSources+i]; - double sz = zS[startingIndexInSources+i]; - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) reduction(+:sumX) reduction(+:sumY) reduction(+:sumZ) -#endif - for (int j = 0; j < interpDegreeLim; j++) { // loop through the degree - - double cx = sx - nodeX[j]; - double cy = sy - nodeY[j]; - double cz = sz - nodeZ[j]; - - if (fabs(cx) < DBL_MIN) exactIndX[i] = j; - if (fabs(cy) < DBL_MIN) exactIndY[i] = j; - if (fabs(cz) < DBL_MIN) exactIndZ[i] = j; - - // Increment the sums - double w = weights[j]; - sumX += w / (cx); - sumY += w / (cy); - sumZ += w / (cz); - - } - - double denominator = 1.0; - if (exactIndX[i] == -1) denominator *= sumX; - if (exactIndY[i] == -1) denominator *= sumY; - if (exactIndZ[i] == -1) denominator *= sumZ; - -// modifiedF[i] /= denominator; - modifiedF2[i] /= denominator; - - } - - -#ifdef OPENACC_ENABLED - #pragma acc loop independent -#endif - for (int j = 0; j < pointsPerCluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point - // compute k1, k2, k3 from j - int k1 = j % interpDegreeLim; - int kk = (j-k1) / interpDegreeLim; - int k2 = kk % interpDegreeLim; - kk = kk - k2; - int k3 = kk / interpDegreeLim; - - double cz = nodeZ[k3]; - double w3 = weights[k3]; - - double cy = nodeY[k2]; - double w2 = weights[k2]; - - double cx = nodeX[k1]; - double w1 = weights[k1]; - - // Fill cluster X, Y, and Z arrays - clusterX[startingIndexInClusters + j] = cx; - clusterY[startingIndexInClusters + j] = cy; - clusterZ[startingIndexInClusters + j] = cz; - - // Increment cluster Q array - double temp = 0.0, temp2 = 0.0; - -#ifdef OPENACC_ENABLED - #pragma acc loop vector(32) reduction(+:temp) reduction(+:temp2) -#endif - for (int i = 0; i < pointsInNode; i++) { // loop over source points - double sx = xS[startingIndexInSources + i]; - double sy = yS[startingIndexInSources + i]; - double sz = zS[startingIndexInSources + i]; - - double numerator = 1.0; - - // If exactInd[i] == -1, then no issues. - // If exactInd[i] != -1, then we want to zero out terms EXCEPT when exactInd=k1. - if (exactIndX[i] == -1) { - numerator *= w1 / (sx - cx); - } else { - if (exactIndX[i] != k1) numerator *= 0; - } - - if (exactIndY[i] == -1) { - numerator *= w2 / (sy - cy); - } else { - if (exactIndY[i] != k2) numerator *= 0; - } - - if (exactIndZ[i] == -1) { - numerator *= w3 / (sz - cz); - } else { - if (exactIndZ[i] != k3) numerator *= 0; - } - -// temp += numerator * modifiedF[i]; - temp2 += numerator * modifiedF2[i]; - } - -// clusterQ[startingIndexInClusters + j] += temp; - clusterW[startingIndexInClusters + j] += temp2; - } - -#ifdef OPENACC_ENABLED - } // end acc kernels region -#endif - - free_vector(weights); - free_vector(dj); - free_vector(tt); - free_vector(nodeX); - free_vector(nodeY); - free_vector(nodeZ); -// free_vector(modifiedF); - free_vector(modifiedF2); - free_vector(exactIndX); - free_vector(exactIndY); - free_vector(exactIndZ); - - return; -} - void pc_comp_ms_modifiedF_hermite(const struct Tree *tree, int idx, int interpolationDegree, From e7805459d745e08cdbd86f95a7355335bc2a2eae Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 27 Jul 2020 23:09:22 -0400 Subject: [PATCH 63/95] O(N) downpass working for SS now --- src/clusters/clusters.c | 10 +- .../interaction_compute_downpass.c | 247 ++++++++++++++++-- 2 files changed, 229 insertions(+), 28 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 44032849..3037e19d 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -117,8 +117,8 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // anterpolate from particles to leaf cluster interpolation points for (int i = 0; i < tree->leaves_list_num; ++i) { - int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, xC, yC, zC, qC); + int leaf_index = tree->leaves_list[i]; + pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, xC, yC, zC, qC); } // anterpolate up clusters, level by level @@ -132,7 +132,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int child_index = tree->children[8*parent_index + child_counter]; pc_comp_ms_modifiedF_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC); - } } } @@ -142,8 +141,8 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // anterpolate from particles to leaf cluster interpolation points for (int i = 0; i < tree->leaves_list_num; ++i) { - int leaf_index = tree->leaves_list[i]; - pc_comp_ms_modifiedF_SS(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); + int leaf_index = tree->leaves_list[i]; + pc_comp_ms_modifiedF_SS(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC, wC); } // interpolate up clusters, level by level @@ -157,7 +156,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa int child_index = tree->children[8*parent_index + child_counter]; pc_comp_ms_modifiedF_SS_child_to_parent(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC, wC); - } } } diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 23fe97b8..fc374635 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -13,8 +13,7 @@ static void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degree, - double *xT, double *yT, double *zT, - double *clusterQ); + double *xT, double *yT, double *zT, double *clusterQ); static void cp_comp_pot_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q); @@ -23,9 +22,11 @@ static void cp_comp_pot_SS(struct Tree *tree, int idx, double *potential, int in double *xT, double *yT, double *zT, double *qT, double *clusterQ, double *clusterW); +static void cp_comp_pot_SS_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w); + static void cp_comp_pot_hermite(struct Tree *tree, int idx, double *potential, int interp_degree, - double *xT, double *yT, double *zT, - double *clusterQ, double *clusterW); + double *xT, double *yT, double *zT, double *clusterQ, double *clusterW); //static void cp_comp_pot_hermite_SS(struct Tree *tree, int idx, int interp_degree, // int totalNumberInterpolationPoints, @@ -43,10 +44,10 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, double *target_z = targets->z; double *target_q = targets->q; - double *cluster_x = clusters->x; - double *cluster_y = clusters->y; - double *cluster_z = clusters->z; - double *cluster_q = clusters->q; + double *cluster_x = clusters->x; + double *cluster_y = clusters->y; + double *cluster_z = clusters->z; + double *cluster_q = clusters->q; double *cluster_w = clusters->w; int total_num_interp_pts = clusters->num; @@ -58,10 +59,6 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SKIPPING)) { -// for (int i = 0; i < tree_numnodes; i++) -// cp_comp_pot(tree, i, potential, interp_degree, -// target_x, target_y, target_z, target_q, cluster_q); - // interpolate up clusters, level by level for (int level = 0; level < tree->max_depth; ++level) { @@ -70,15 +67,12 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, int parent_index = tree->levels_list[level][cluster_index]; - for (int child_counter=0; child_counternum_children[parent_index]; ++child_counter){ + for (int child_counter=0; child_counter < tree->num_children[parent_index]; ++child_counter) { int child_index = tree->children[8*parent_index + child_counter]; -// cp_comp_pot(tree, child_index, parent_index, interpolationDegree, xC, yC, zC, qC); - cp_comp_pot_parent_to_child(tree, parent_index, child_index, interp_degree, cluster_x, cluster_y, cluster_z, cluster_q); - } } } @@ -86,18 +80,44 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, // interpolate from leaf cluster interpolation points to target particles printf("Interpolating from leaf interpolation points to particles.\n"); for (int i = 0; i < tree->leaves_list_num; ++i) { - int leaf_index = tree->leaves_list[i]; - cp_comp_pot(tree, leaf_index, potential, interp_degree, + int leaf_index = tree->leaves_list[i]; + cp_comp_pot(tree, leaf_index, potential, interp_degree, target_x, target_y, target_z, cluster_q); } } else if ((run_params->approximation == LAGRANGE) && (run_params->singularity == SUBTRACTION)) { - for (int i = 0; i < tree_numnodes; i++){ - cp_comp_pot_SS(tree, i, potential, interp_degree, - target_x, target_y, target_z, target_q, cluster_q, cluster_w); + + // interpolate up clusters, level by level + for (int level = 0; level < tree->max_depth; ++level) { + printf("Interpolating for level %i\n", level); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { + + int parent_index = tree->levels_list[level][cluster_index]; + + for (int child_counter=0; child_counter < tree->num_children[parent_index]; ++child_counter) { + + int child_index = tree->children[8*parent_index + child_counter]; + + cp_comp_pot_SS_parent_to_child(tree, parent_index, child_index, interp_degree, + cluster_x, cluster_y, cluster_z, cluster_q, cluster_w); + } + } } + // interpolate from leaf cluster interpolation points to target particles + printf("Interpolating from leaf interpolation points to particles.\n"); + for (int i = 0; i < tree->leaves_list_num; ++i) { + int leaf_index = tree->leaves_list[i]; + cp_comp_pot_SS(tree, leaf_index, potential, interp_degree, + target_x, target_y, target_z, target_q, cluster_q, cluster_w); + } + +// for (int i = 0; i < tree_numnodes; i++){ +// cp_comp_pot_SS(tree, i, potential, interp_degree, +// target_x, target_y, target_z, target_q, cluster_q, cluster_w); +// } + } else if ((run_params->approximation == HERMITE) && (run_params->singularity == SKIPPING)) { for (int i = 0; i < tree_numnodes; i++) cp_comp_pot_hermite(tree, i, potential, interp_degree, @@ -477,6 +497,189 @@ void cp_comp_pot(struct Tree *tree, int idx, double *potential, int interp_degre } + + +void cp_comp_pot_SS_parent_to_child(struct Tree *tree, int parent_index, int child_index, int interp_degree, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w) +{ + int interp_degree_lim = interp_degree + 1; + int interp_pts_per_cluster = interp_degree_lim * interp_degree_lim * interp_degree_lim; + + int parent_cluster_start = parent_index * interp_pts_per_cluster; + int child_cluster_start = child_index * interp_pts_per_cluster; + + double *weights, *dj, *tt, *nodeX, *nodeY, *nodeZ; + + make_vector(weights, interp_degree_lim); + make_vector(dj, interp_degree_lim); + make_vector(tt, interp_degree_lim); + make_vector(nodeX, interp_degree_lim); + make_vector(nodeY, interp_degree_lim); + make_vector(nodeZ, interp_degree_lim); + + double x0 = tree->x_min[parent_index]; + double x1 = tree->x_max[parent_index]; + double y0 = tree->y_min[parent_index]; + double y1 = tree->y_max[parent_index]; + double z0 = tree->z_min[parent_index]; + double z1 = tree->z_max[parent_index]; + +#ifdef OPENACC_ENABLED + int streamID = rand() % 4; + #pragma acc kernels async(streamID) present(cluster_x, cluster_y, cluster_z, cluster_q, cluster_w) \ + create(nodeX[0:interp_degree_lim], nodeY[0:interp_degree_lim], nodeZ[0:interp_degree_lim], \ + weights[0:interp_degree_lim], dj[0:interp_degree_lim], tt[0:interp_degree_lim]) + { +#endif + + + // Fill in arrays of unique x, y, and z coordinates for the interpolation points. +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interp_degree_lim; i++) { + tt[i] = cos(i * M_PI / interp_degree); + nodeX[i] = x0 + (tt[i] + 1.0)/2.0 * (x1 - x0); + nodeY[i] = y0 + (tt[i] + 1.0)/2.0 * (y1 - y0); + nodeZ[i] = z0 + (tt[i] + 1.0)/2.0 * (z1 - z0); + } + + // Compute weights +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_degree_lim; j++){ + dj[j] = 1.0; + if (j == 0) dj[j] = 0.5; + if (j == interp_degree) dj[j] = 0.5; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int j = 0; j < interp_degree_lim; j++) { + weights[j] = ((j % 2 == 0)? 1 : -1) * dj[j]; + } + +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < interp_pts_per_cluster; i++) { // loop through the target points + + double sumX = 0.0; + double sumY = 0.0; + double sumZ = 0.0; + + double tx = cluster_x[child_cluster_start+i]; + double ty = cluster_y[child_cluster_start+i]; + double tz = cluster_z[child_cluster_start+i]; + + int eix = -1; + int eiy = -1; + int eiz = -1; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:sumX,sumY,sumZ) reduction(max:eix,eiy,eiz) +#endif + for (int j = 0; j < interp_degree_lim; j++) { // loop through the degree + + double cx = tx - nodeX[j]; + double cy = ty - nodeY[j]; + double cz = tz - nodeZ[j]; + + if (fabs(cx) Date: Wed, 29 Jul 2020 10:18:09 -0400 Subject: [PATCH 64/95] Now also testing beta option in python interface. --- interfaces/python/testBaryTreeInterface.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/interfaces/python/testBaryTreeInterface.py b/interfaces/python/testBaryTreeInterface.py index 6dab5be7..bb27fe8d 100644 --- a/interfaces/python/testBaryTreeInterface.py +++ b/interfaces/python/testBaryTreeInterface.py @@ -60,7 +60,25 @@ beta, GPUpresent, verbosity, sizeCheck=1.0) assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." - print("If no errors printed, then the call to the treecode wrapper worked!") + + + + + + + beta = 0.1 + expectedOutput = 588.7445889051367 # this is expected value of first element of output array for beta = 0.1 + output = BT.callTreedriver( N, N, + X, Y, Z, RHO, + np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), + kernel, numberOfKernelParameters, kernelParameters, + singularity, approximation, computeType, + theta, treecodeDegree, maxPerSourceLeaf, maxPerTargetLeaf, + beta, GPUpresent, verbosity, sizeCheck=1.0) + assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." + + + print("If no errors printed, then the calls to the treecode wrapper worked (one using explicit theta/degree, one use beta)") From 79116c8e01438fe8dc8c4ebcdf29f9c3ae817358 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 29 Jul 2020 10:31:28 -0400 Subject: [PATCH 65/95] beta, theta, degree, N0, M0 are all optional now. --- interfaces/python/BaryTreeInterface.py | 23 +++++++++++++++++++--- interfaces/python/testBaryTreeInterface.py | 12 +++++------ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/interfaces/python/BaryTreeInterface.py b/interfaces/python/BaryTreeInterface.py index e5105c67..80825ed2 100644 --- a/interfaces/python/BaryTreeInterface.py +++ b/interfaces/python/BaryTreeInterface.py @@ -83,12 +83,29 @@ def callTreedriver(numTargets, numSources, targetX, targetY, targetZ, targetValue, sourceX, sourceY, sourceZ, sourceValue, sourceWeight, kernelName, numberOfKernelParameters, kernelParameters, singularityHandling, - approximationName, computeType, theta, degree, maxParNode, batchSize, beta, GPUpresent, verbosity, sizeCheck=None): + approximationName, computeType, GPUpresent, verbosity, + beta=None, theta=None, degree=None, sourceLeafSize=None, targetLeafSize=None,sizeCheck=None): ''' python function which creates pointers to the arrays and calls treedriverWrapper. returns the results array. ''' +# Handle optional parameters. Note, either beta must be supplied, or theta, degree, sourceLeafSize, AND targetLeafSize must be supplied. + if beta: + # Using beta to set other paraemters. Set them to dummy values for the purpose of calling treedriver. + theta=-1.0 + degree=-1 + sourceLeafSize=1 + targetLeafSize=1 + else: + # Not using beta. Set it to -1 so it is ignored. Using the provided values of other parameters + assert theta is not None, "If beta isn't provided, theta must be." + assert degree is not None, "If beta isn't provided, degree must be." + assert sourceLeafSize is not None, "If beta isn't provided, sourceLeafSize must be." + assert targetLeafSize is not None, "If beta isn't provided, targetLeafSize must be." + beta=-1 + + c_double_p = ctypes.POINTER(ctypes.c_double) targetX_p = targetX.ctypes.data_as(c_double_p) @@ -119,14 +136,14 @@ def callTreedriver(numTargets, numSources, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(sourceLeafSize), ctypes.c_int(targetLeafSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) elif GPUpresent==False: # No gpu present _cpu_treecodeRoutines.BaryTreeInterface(ctypes.c_int(numTargets), ctypes.c_int(numSources), targetX_p, targetY_p, targetZ_p, targetValue_p, sourceX_p, sourceY_p, sourceZ_p, sourceValue_p, sourceWeight_p, resultArray_p, kernelName, ctypes.c_int(numberOfKernelParameters), kernelParameters_p, singularityHandling, approximationName, computeType, - ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(maxParNode), ctypes.c_int(batchSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) + ctypes.c_double(theta), ctypes.c_int(degree), ctypes.c_int(sourceLeafSize), ctypes.c_int(targetLeafSize), ctypes.c_double(sizeCheck), ctypes.c_double(beta), ctypes.c_int(verbosity) ) else: print("What should GPUpresent be set to in the wrapper?") exit(-1) diff --git a/interfaces/python/testBaryTreeInterface.py b/interfaces/python/testBaryTreeInterface.py index bb27fe8d..e88ba8b4 100644 --- a/interfaces/python/testBaryTreeInterface.py +++ b/interfaces/python/testBaryTreeInterface.py @@ -25,7 +25,6 @@ GPUpresent = False theta = 0.8 treecodeDegree = 4 - beta = -1 gaussianAlpha = 1.0 verbosity = 0 @@ -56,10 +55,10 @@ np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), kernel, numberOfKernelParameters, kernelParameters, singularity, approximation, computeType, - theta, treecodeDegree, maxPerSourceLeaf, maxPerTargetLeaf, - beta, GPUpresent, verbosity, sizeCheck=1.0) + GPUpresent, verbosity, + theta=theta, degree=treecodeDegree, sourceLeafSize=maxPerSourceLeaf, targetLeafSize=maxPerTargetLeaf, sizeCheck=1.0) - assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." + assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using explicit theta/degree." @@ -73,9 +72,8 @@ np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), kernel, numberOfKernelParameters, kernelParameters, singularity, approximation, computeType, - theta, treecodeDegree, maxPerSourceLeaf, maxPerTargetLeaf, - beta, GPUpresent, verbosity, sizeCheck=1.0) - assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output." + GPUpresent, verbosity, beta=beta, sizeCheck=1.0) + assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using beta." print("If no errors printed, then the calls to the treecode wrapper worked (one using explicit theta/degree, one use beta)") From 708201bdd841d94e48b896b7e9b5059087272cdb Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 29 Jul 2020 23:24:52 -0400 Subject: [PATCH 66/95] Removing log files --- .../mpi_subtraction_tests_9992266.gl1005.out | 56 ------------------- 1 file changed, 56 deletions(-) delete mode 100644 examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out diff --git a/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out b/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out deleted file mode 100644 index d53c6e9f..00000000 --- a/examples/dataFiles/mpi_subtraction_tests_9992266.gl1005.out +++ /dev/null @@ -1,56 +0,0 @@ -particle-cluster 0.8 7 -1.0 -[random cube example] Beginning random cube example with 1 ranks. -[random cube example] Zoltan load balancing has finished. -[random cube example] Running direct comparison... -[random cube example] Running treedriver... -[random cube example] -[random cube example] Treecode timing summary (all times in seconds)... -[random cube example] -[random cube example] Max Avg Max/Min -[random cube example] | Total time...................... 2.325e+00 s (100.00%) 2.325e+00 s (100.00%) 1.000 -[random cube example] | | -[random cube example] | |....Pre-process................ 2.645e-01 s ( 11.38%) 2.645e-01 s ( 11.38%) 1.000 -[random cube example] | |....Directdriver............... 4.650e-01 s ( 20.00%) 4.650e-01 s ( 20.00%) 1.000 -[random cube example] | |....Treedriver................. 1.594e+00 s ( 68.59%) 1.594e+00 s ( 68.59%) 1.000 -[random cube example] -[random cube example] -[random cube example] | Directdriver.................... 4.650e-01 s (100.00%) 4.650e-01 s (100.00%) 1.000 -[random cube example] | | -[random cube example] | |....Compute local.............. 3.574e-01 s ( 76.86%) 3.574e-01 s ( 76.86%) 1.000 -[random cube example] -[random cube example] -[random cube example] | Treedriver...................... 1.594e+00 s (100.00%) 1.594e+00 s (100.00%) 1.000 -[random cube example] | | -[random cube example] | |....Build local tree........... 7.443e-02 s ( 4.67%) 7.443e-02 s ( 4.67%) 1.000 -[random cube example] | |....Build local batches........ 7.230e-02 s ( 4.53%) 7.230e-02 s ( 4.53%) 1.000 -[random cube example] | |....Build local clusters....... 2.528e-01 s ( 15.85%) 2.528e-01 s ( 15.85%) 1.000 -[random cube example] | |....Build local lists.......... 1.451e-03 s ( 0.09%) 1.451e-03 s ( 0.09%) 1.000 -[random cube example] | |....Compute local.............. 1.086e+00 s ( 68.09%) 1.086e+00 s ( 68.09%) 1.000 -[random cube example] | |....Correct potential.......... 1.025e-01 s ( 6.43%) 1.025e-01 s ( 6.43%) 1.000 -[random cube example] | |....Cleanup.................... 6.916e-05 s ( 0.00%) 6.916e-05 s ( 0.00%) 1.000 -[random cube example] -[random cube example] Tree potential energy: -133380260.858546 -[random cube example] -[random cube example] Relative inf norm error in potential: 6.939956e-06 -[random cube example] Relative 2 norm error in potential: 2.781444e-06 -[random cube example] -[BaryTree] -[BaryTree] RunParams struct has been set to the following: -[BaryTree] -[BaryTree] kernel = 1 -[BaryTree] num_kernel_params = 1 -[BaryTree] kernel_params = 1.000000, -[BaryTree] approximation = 1 -[BaryTree] singularity = 1 -[BaryTree] compute_type = 1 -[BaryTree] theta = 0.800000 -[BaryTree] size_check_factor = 1.000000 -[BaryTree] interp_order = 7 -[BaryTree] interp_pts_per_cluster = 512 -[BaryTree] interp_weights_per_cluster = 512 -[BaryTree] interp_charges_per_cluster = 512 -[BaryTree] max_per_source_leaf = 3000 -[BaryTree] max_per_target_leaf = 3000 -[BaryTree] verbosity = 0 -[BaryTree] -/home/njvaughn/BaryTree/examples From e897388485b9e2911afc4ffd2df4962413b7bdd9 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 29 Jul 2020 23:34:18 -0400 Subject: [PATCH 67/95] Fixing format specifier --- examples/support_fns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 216af515..81fa1503 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -740,7 +740,7 @@ void CSV_Print(int N, int M, struct RunParams *run_params, if (rank == 0) { RunParams_Print(run_params); FILE *fp = fopen("out.csv", "a"); - fprintf(fp, "%d,%d,%d,%d,%d,%d,%d,%f,%d,%d,%d,%d,%f," + fprintf(fp, "%d,%d,%d,%d,%d,%d,%d,%f,%d,%d,%d,%f,%f," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," From bfc154fde84a0020c8944ed4677ecc5c44941768 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 3 Aug 2020 21:29:03 -0400 Subject: [PATCH 68/95] removing debug print statements. --- src/clusters/clusters.c | 2 -- src/tree/tree_linked_list.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index d821ac33..7a727595 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -113,7 +113,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { // anterpolate from particles to leaf cluster interpolation points - printf("Computing modified charges for the %i leaves\n",tree->leaves_list_num); for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; pc_comp_ms_modifiedF(tree, leaf_index, interpolationDegree, xS, yS, zS, qS, wS, xC, yC, zC, qC); @@ -121,7 +120,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa // interpolate up clusters, level by level for (int level = tree->max_depth-2; level >= 0; --level) { - printf("Computing modified charges for level %i which contains %i clusters\n",level,tree->levels_list_num[level]); for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; diff --git a/src/tree/tree_linked_list.c b/src/tree/tree_linked_list.c index 809215ef..e48c5443 100644 --- a/src/tree/tree_linked_list.c +++ b/src/tree/tree_linked_list.c @@ -172,7 +172,7 @@ void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct Tree (*p)->numpar = iend - ibeg + 1; if (current_level + 1 > *max_depth){ - printf("[TreeLinkedList_Sources_Construct] Increasing max depth to %i\n",current_level + 1); +// printf("[TreeLinkedList_Sources_Construct] Increasing max depth to %i\n",current_level + 1); *max_depth = current_level + 1; } (*p)->level = current_level; From 485a49b5a48849f8ed6d764b97a9ce8ab59d61b2 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sun, 9 Aug 2020 22:12:12 -0400 Subject: [PATCH 69/95] No longer locking windows for unused weight arrays; still some issues --- src/comm_windows/comm_windows.c | 59 ++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/src/comm_windows/comm_windows.c b/src/comm_windows/comm_windows.c index e4db44a8..cf32f366 100644 --- a/src/comm_windows/comm_windows.c +++ b/src/comm_windows/comm_windows.c @@ -8,9 +8,8 @@ #include "struct_comm_windows.h" - void CommWindows_Create(struct CommWindows **comm_windows_addr, - struct Clusters *clusters, struct Particles *sources) + struct Clusters *clusters, struct Particles *sources, struct RunParams *run_params) { *comm_windows_addr = malloc(sizeof(struct CommWindows)); struct CommWindows *comm_windows = *comm_windows_addr; @@ -27,9 +26,6 @@ void CommWindows_Create(struct CommWindows **comm_windows_addr, MPI_Win_create(clusters->q, clusters->num_charges * sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_clusters_q)); - MPI_Win_create(clusters->w, clusters->num_weights * sizeof(double), sizeof(double), - MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_clusters_w)); - MPI_Win_create(sources->x, sources->num * sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_sources_x)); @@ -42,15 +38,20 @@ void CommWindows_Create(struct CommWindows **comm_windows_addr, MPI_Win_create(sources->q, sources->num * sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_sources_q)); - MPI_Win_create(sources->w, sources->num * sizeof(double), sizeof(double), - MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_sources_w)); + if (run_params->singularity == SUBTRACTION) { + MPI_Win_create(clusters->w, clusters->num_weights * sizeof(double), sizeof(double), + MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_clusters_w)); + + MPI_Win_create(sources->w, sources->num * sizeof(double), sizeof(double), + MPI_INFO_NULL, MPI_COMM_WORLD, &(comm_windows->win_sources_w)); + } return; } -void CommWindows_Free(struct CommWindows **comm_windows_addr) +void CommWindows_Free(struct CommWindows **comm_windows_addr, struct RunParams *run_params) { MPI_Barrier(MPI_COMM_WORLD); struct CommWindows *comm_windows = *comm_windows_addr; @@ -59,13 +60,16 @@ void CommWindows_Free(struct CommWindows **comm_windows_addr) MPI_Win_free(&(comm_windows->win_clusters_y)); MPI_Win_free(&(comm_windows->win_clusters_z)); MPI_Win_free(&(comm_windows->win_clusters_q)); - MPI_Win_free(&(comm_windows->win_clusters_w)); MPI_Win_free(&(comm_windows->win_sources_x)); MPI_Win_free(&(comm_windows->win_sources_y)); MPI_Win_free(&(comm_windows->win_sources_z)); MPI_Win_free(&(comm_windows->win_sources_q)); - MPI_Win_free(&(comm_windows->win_sources_w)); + + if (run_params->singularity == SUBTRACTION) { + MPI_Win_free(&(comm_windows->win_clusters_w)); + MPI_Win_free(&(comm_windows->win_sources_w)); + } free(comm_windows); comm_windows = NULL; @@ -75,38 +79,44 @@ void CommWindows_Free(struct CommWindows **comm_windows_addr) -void CommWindows_Lock(struct CommWindows *comm_windows, int get_from) +void CommWindows_Lock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params) { MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_x); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_y); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_z); - MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_w); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_q); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_x); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_y); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_z); MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_q); - MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_w); + + if (run_params->singularity == SUBTRACTION) { + MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_clusters_w); + MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, comm_windows->win_sources_w); + } return; } -void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from) +void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params) { MPI_Win_unlock(get_from, comm_windows->win_clusters_x); MPI_Win_unlock(get_from, comm_windows->win_clusters_y); MPI_Win_unlock(get_from, comm_windows->win_clusters_z); - MPI_Win_unlock(get_from, comm_windows->win_clusters_w); MPI_Win_unlock(get_from, comm_windows->win_clusters_q); MPI_Win_unlock(get_from, comm_windows->win_sources_x); MPI_Win_unlock(get_from, comm_windows->win_sources_y); MPI_Win_unlock(get_from, comm_windows->win_sources_z); MPI_Win_unlock(get_from, comm_windows->win_sources_q); - MPI_Win_unlock(get_from, comm_windows->win_sources_w); + + if (run_params->singularity == SUBTRACTION) { + MPI_Win_unlock(get_from, comm_windows->win_clusters_w); + MPI_Win_unlock(get_from, comm_windows->win_sources_w); + } return; } @@ -141,10 +151,6 @@ void CommWindows_GetData(struct Clusters *let_clusters, struct Particles *let_so comm_types->num_remote_approx_array[get_from] * interp_charges_per_cluster, MPI_DOUBLE, get_from, 0, 1, comm_types->MPI_approx_charges_type[get_from], comm_windows->win_clusters_q); - MPI_Get(&(let_clusters->w[comm_types->previous_let_clusters_length_array[get_from] * weights_per_point]), - comm_types->num_remote_approx_array[get_from] * interp_weights_per_cluster, MPI_DOUBLE, - get_from, 0, 1, comm_types->MPI_approx_weights_type[get_from], comm_windows->win_clusters_w); - MPI_Get(&(let_sources->x[comm_types->previous_let_sources_length_array[get_from]]), comm_types->new_sources_length_array[get_from], MPI_DOUBLE, @@ -162,9 +168,16 @@ void CommWindows_GetData(struct Clusters *let_clusters, struct Particles *let_so comm_types->new_sources_length_array[get_from], MPI_DOUBLE, get_from, 0, 1, comm_types->MPI_direct_type[get_from], comm_windows->win_sources_q); - MPI_Get(&(let_sources->w[comm_types->previous_let_sources_length_array[get_from]]), - comm_types->new_sources_length_array[get_from], MPI_DOUBLE, - get_from, 0, 1, comm_types->MPI_direct_type[get_from], comm_windows->win_sources_w); + + if (run_params->singularity == SUBTRACTION) { + MPI_Get(&(let_clusters->w[comm_types->previous_let_clusters_length_array[get_from] * weights_per_point]), + comm_types->num_remote_approx_array[get_from] * interp_weights_per_cluster, MPI_DOUBLE, + get_from, 0, 1, comm_types->MPI_approx_weights_type[get_from], comm_windows->win_clusters_w); + + MPI_Get(&(let_sources->w[comm_types->previous_let_sources_length_array[get_from]]), + comm_types->new_sources_length_array[get_from], MPI_DOUBLE, + get_from, 0, 1, comm_types->MPI_direct_type[get_from], comm_windows->win_sources_w); + } return; } From 8355abd18eab6f6a8bfa32403b22861919f24159 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Sun, 9 Aug 2020 22:53:10 -0400 Subject: [PATCH 70/95] Copying clusters back before communicating, but still failing --- src/drivers/treedriver.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 55c65051..dc68cdd6 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -369,6 +369,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Barrier(MPI_COMM_WORLD); START_TIMER(&time_tree[3]); +#ifdef OPENACC_ENABLED + #pragma acc update self(clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ + clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc update self(clusters->w[0:clusters->num_weights]) + } +#endif CommTypesAndTrees_Construct(&comm_types, &let_trees, tree, batches, run_params); Particles_Alloc(&let_sources, comm_types->let_sources_length); @@ -446,7 +453,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data create(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) } STOP_TIMER(&time1); time_tree[6] += time1; @@ -728,7 +735,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data create(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) } STOP_TIMER(&time1); time_tree[6] += time1; From 8fd85f43604f7545226465ec382c873836e39f2a Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 10 Aug 2020 11:15:15 -0400 Subject: [PATCH 71/95] fixed bug where run_params wasn't being passed. --- src/comm_windows/comm_windows.h | 8 ++++---- src/drivers/treedriver.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/comm_windows/comm_windows.h b/src/comm_windows/comm_windows.h index c0f3e798..6f30571e 100644 --- a/src/comm_windows/comm_windows.h +++ b/src/comm_windows/comm_windows.h @@ -10,13 +10,13 @@ void CommWindows_Create(struct CommWindows **comm_windows_addr, - struct Clusters *clusters, struct Particles *sources); + struct Clusters *clusters, struct Particles *sources, struct RunParams *run_params); -void CommWindows_Free(struct CommWindows **comm_windows_addr); +void CommWindows_Free(struct CommWindows **comm_windows_addr, struct RunParams *run_params); -void CommWindows_Lock(struct CommWindows *comm_windows, int get_from); +void CommWindows_Lock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params); -void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from); +void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params); void CommWindows_GetData(struct Clusters *let_clusters, struct Particles *let_sources, struct CommTypes *comm_types, struct CommWindows *comm_windows, diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index dc68cdd6..7c9c3b6f 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -377,26 +377,26 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } #endif CommTypesAndTrees_Construct(&comm_types, &let_trees, tree, batches, run_params); - Particles_Alloc(&let_sources, comm_types->let_sources_length); Clusters_Alloc(&let_clusters, comm_types->let_clusters_length, run_params); - CommWindows_Create(&comm_windows, clusters, sources); + CommWindows_Create(&comm_windows, clusters, sources, run_params); for (int proc_id = 1; proc_id < num_procs; ++proc_id) { int get_from = (num_procs + rank - proc_id) % num_procs; - CommWindows_Lock(comm_windows, get_from); + CommWindows_Lock(comm_windows, get_from, run_params); // This is a non-blocking call! CommWindows_GetData(let_clusters, let_sources, comm_types, comm_windows, get_from, run_params); - CommWindows_Unlock(comm_windows, get_from); + CommWindows_Unlock(comm_windows, get_from, run_params); } - CommWindows_Free(&comm_windows); + CommWindows_Free(&comm_windows, run_params); STOP_TIMER(&time_tree[3]); } + //~~~~~~~~~~~~~~~~~~~~ // Local compute //~~~~~~~~~~~~~~~~~~~~ @@ -650,19 +650,19 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Particles_Alloc(&let_sources, comm_types->let_sources_length); Clusters_Alloc(&let_clusters, comm_types->let_clusters_length, run_params); - CommWindows_Create(&comm_windows, source_clusters, sources); + CommWindows_Create(&comm_windows, source_clusters, sources, run_params); for (int proc_id = 1; proc_id < num_procs; ++proc_id) { int get_from = (num_procs + rank - proc_id) % num_procs; - CommWindows_Lock(comm_windows, get_from); + CommWindows_Lock(comm_windows, get_from, run_params); //This is a non-blocking call! CommWindows_GetData(let_clusters, let_sources, comm_types, comm_windows, get_from, run_params); - CommWindows_Unlock(comm_windows, get_from); + CommWindows_Unlock(comm_windows, get_from, run_params); } - CommWindows_Free(&comm_windows); + CommWindows_Free(&comm_windows, run_params); STOP_TIMER(&time_tree[3]); } From 2bd3b00a54d634d06f3c77958c747cfa99786d43 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 10 Aug 2020 11:16:28 -0400 Subject: [PATCH 72/95] removing some downpass related print statements. --- src/interaction_compute/interaction_compute_downpass.c | 8 ++++---- src/tree/tree.c | 1 - src/tree/tree_linked_list.c | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index fc374635..554c025c 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -62,7 +62,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, // interpolate up clusters, level by level for (int level = 0; level < tree->max_depth; ++level) { - printf("Interpolating for level %i\n", level); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; @@ -78,7 +78,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, } // interpolate from leaf cluster interpolation points to target particles - printf("Interpolating from leaf interpolation points to particles.\n"); + for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; cp_comp_pot(tree, leaf_index, potential, interp_degree, @@ -90,7 +90,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, // interpolate up clusters, level by level for (int level = 0; level < tree->max_depth; ++level) { - printf("Interpolating for level %i\n", level); + for (int cluster_index = 0; cluster_index < tree->levels_list_num[level]; ++cluster_index) { int parent_index = tree->levels_list[level][cluster_index]; @@ -106,7 +106,7 @@ void InteractionCompute_Downpass(double *potential, struct Tree *tree, } // interpolate from leaf cluster interpolation points to target particles - printf("Interpolating from leaf interpolation points to particles.\n"); + for (int i = 0; i < tree->leaves_list_num; ++i) { int leaf_index = tree->leaves_list[i]; cp_comp_pot_SS(tree, leaf_index, potential, interp_degree, diff --git a/src/tree/tree.c b/src/tree/tree.c index 9930333c..42d363d9 100644 --- a/src/tree/tree.c +++ b/src/tree/tree.c @@ -83,7 +83,6 @@ void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, run_params->max_per_target_leaf, xyzminmax, &numnodes, &numleaves, &min_leaf_size, &max_leaf_size, &max_depth, 0); - printf("TreeLinkedList_Targets_Construct complete.\n"); TreeLinkedList_SetIndex(tree_linked_list, 0); diff --git a/src/tree/tree_linked_list.c b/src/tree/tree_linked_list.c index 2619eac5..f4bacc65 100644 --- a/src/tree/tree_linked_list.c +++ b/src/tree/tree_linked_list.c @@ -46,7 +46,6 @@ void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct Tree (*p)->numpar = iend - ibeg + 1; if (current_level + 1 > *max_depth){ - printf("[TreeLinkedList_Targets_Construct] Increasing max depth to %i\n",current_level + 1); *max_depth = current_level + 1; } (*p)->level = current_level; From 490f203a883e98e2109c7ec9cbad82eca4359df3 Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Fri, 14 Aug 2020 21:03:58 -0700 Subject: [PATCH 73/95] CC now correctly copies source clusters back to host if more than one GPU --- src/drivers/treedriver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 7c9c3b6f..d53acf28 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -644,6 +644,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Barrier(MPI_COMM_WORLD); START_TIMER(&time_tree[3]); +#ifdef OPENACC_ENABLED + #pragma acc update self(source_clusters->x[0:source_clusters->num], source_clusters->y[0:source_clusters->num], \ + source_clusters->z[0:source_clusters->num], source_clusters->q[0:source_clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc update self(source_clusters->w[0:source_clusters->num_weights]) + } +#endif CommTypesAndTrees_Construct(&comm_types, &let_trees, source_tree, target_tree, run_params); From e66d89ff4bd2b640a0a40d4139d8e2ae275f8be9 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 24 Aug 2020 15:54:41 -0400 Subject: [PATCH 74/95] removing header dependency --- src/interface/BaryTreeInterface.h | 46 +++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/interface/BaryTreeInterface.h b/src/interface/BaryTreeInterface.h index 160c9103..8d1fbdc6 100644 --- a/src/interface/BaryTreeInterface.h +++ b/src/interface/BaryTreeInterface.h @@ -1,8 +1,50 @@ #ifndef H_BARYTREE_INTERFACE_H #define H_BARYTREE_INTERFACE_H -#include "../utilities/enums.h" - + #ifndef H_BARYTREE_TYPES_H + #define H_BARYTREE_TYPES_H + + typedef enum KERNEL + { + NO_KERNEL, + COULOMB, + YUKAWA, + REGULARIZED_COULOMB, + REGULARIZED_YUKAWA, + ATAN, + TCF, + DCF, + SIN_OVER_R, + MQ + } KERNEL; + + + typedef enum SINGULARITY + { + NO_SINGULARITY, + SKIPPING, + SUBTRACTION + } SINGULARITY; + + + typedef enum APPROXIMATION + { + NO_APPROX, + LAGRANGE, + HERMITE + } APPROXIMATION; + + + typedef enum COMPUTE_TYPE + { + NO_COMPUTE_TYPE, + PARTICLE_CLUSTER, + CLUSTER_PARTICLE, + CLUSTER_CLUSTER, + } COMPUTE_TYPE; + + + #endif /* H_BARYTREE_TYPES_H */ void BaryTreeInterface(int numTargets, int numSources, double *targetX, double *targetY, double *targetZ, double *targetValue, From 78821501e92e801bb2d2d6a7ea423181c8d9194a Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Mon, 24 Aug 2020 20:30:52 -0700 Subject: [PATCH 75/95] Adding custom plummer 4 rank partition --- examples/random_cube_reproducible.c | 118 +++++++++++++++++++++++++++- examples/support_fns.c | 1 - 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/examples/random_cube_reproducible.c b/examples/random_cube_reproducible.c index 9c7bf973..52576925 100644 --- a/examples/random_cube_reproducible.c +++ b/examples/random_cube_reproducible.c @@ -18,6 +18,7 @@ #include "zoltan_fns.h" #include "support_fns.h" +void Particles_Fix_Plummer(MESH_DATA *mySources); int main(int argc, char **argv) { @@ -264,6 +265,8 @@ int main(int argc, char **argv) Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart); Zoltan_Destroy(&zz); + if (distribution == PLUMMER && numProcs == 4) Particles_Fix_Plummer(&mySources); + /* Setting up sources with MPI-allocated source arrays for RMA use */ sources = malloc(sizeof(struct Particles)); @@ -282,7 +285,7 @@ int main(int argc, char **argv) /* Output load balanced points */ - /* + char points_file[256]; sprintf(points_file, "points_rank_%d.csv", rank); FILE *points_fp = fopen(points_file, "w"); @@ -290,7 +293,6 @@ int main(int argc, char **argv) fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]); } fclose(points_fp); - */ /* Setting up targets */ @@ -437,3 +439,115 @@ int main(int argc, char **argv) return 0; } + + +/*----------------------------------------------------------------------------*/ +void Particles_Fix_Plummer(MESH_DATA *mySources) +{ + //This is only for four ranks + int rank, numProcs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &numProcs); + + int *sendto = malloc(mySources->numMyPoints * sizeof(int)); + + int num_sendto[4] = {0}; + int num_global[4] = {0}; + int increment_sendto[4] = {0}; + int num_sendto_all[16] = {0}; + + MESH_DATA mesh_sendto[4]; + + for (int i = 0; i < mySources->numMyPoints; ++i) { + if (mySources->x[i] > 0 && mySources->y[i] > 0) {sendto[i] = 0; num_sendto[0]++;} + if (mySources->x[i] > 0 && mySources->y[i] < 0) {sendto[i] = 1; num_sendto[1]++;} + if (mySources->x[i] < 0 && mySources->y[i] < 0) {sendto[i] = 2; num_sendto[2]++;} + if (mySources->x[i] < 0 && mySources->y[i] > 0) {sendto[i] = 3; num_sendto[3]++;} + } + + for (int i = 0; i < numProcs; ++i) { + mesh_sendto[i].numMyPoints = num_sendto[i]; + mesh_sendto[i].x = malloc(num_sendto[i]*sizeof(double)); + mesh_sendto[i].y = malloc(num_sendto[i]*sizeof(double)); + mesh_sendto[i].z = malloc(num_sendto[i]*sizeof(double)); + mesh_sendto[i].q = malloc(num_sendto[i]*sizeof(double)); + mesh_sendto[i].w = malloc(num_sendto[i]*sizeof(double)); + } + + for (int pt_idx = 0; pt_idx < mySources->numMyPoints; ++pt_idx) { + for (int proc_idx = 0; proc_idx < numProcs; ++proc_idx) { + if (sendto[pt_idx] == proc_idx) { + mesh_sendto[proc_idx].x[increment_sendto[proc_idx]] = mySources->x[pt_idx]; + mesh_sendto[proc_idx].y[increment_sendto[proc_idx]] = mySources->y[pt_idx]; + mesh_sendto[proc_idx].z[increment_sendto[proc_idx]] = mySources->z[pt_idx]; + mesh_sendto[proc_idx].q[increment_sendto[proc_idx]] = mySources->q[pt_idx]; + mesh_sendto[proc_idx].w[increment_sendto[proc_idx]] = mySources->w[pt_idx]; + increment_sendto[proc_idx]++; + break; + } + } + } + + MPI_Allreduce(num_sendto, num_global, 4, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allgather(num_sendto, 4, MPI_INT, num_sendto_all, 4, MPI_INT, MPI_COMM_WORLD); + + free(mySources->x); + free(mySources->y); + free(mySources->z); + free(mySources->q); + free(mySources->w); + + mySources->numMyPoints = num_global[rank]; + mySources->x = malloc(num_global[rank]*sizeof(double)); + mySources->y = malloc(num_global[rank]*sizeof(double)); + mySources->z = malloc(num_global[rank]*sizeof(double)); + mySources->q = malloc(num_global[rank]*sizeof(double)); + mySources->w = malloc(num_global[rank]*sizeof(double)); + + memcpy(mySources->x, mesh_sendto[rank].x, num_sendto[rank] * sizeof(double)); + memcpy(mySources->y, mesh_sendto[rank].y, num_sendto[rank] * sizeof(double)); + memcpy(mySources->z, mesh_sendto[rank].z, num_sendto[rank] * sizeof(double)); + memcpy(mySources->q, mesh_sendto[rank].q, num_sendto[rank] * sizeof(double)); + memcpy(mySources->w, mesh_sendto[rank].w, num_sendto[rank] * sizeof(double)); + int offset = num_sendto[rank]; + + MPI_Request send_request[5], recv_request[5]; + + for (int proc_idx = 0; proc_idx < numProcs-1; ++proc_idx) { + + int proc_num = (rank + proc_idx + 1) % numProcs; + + MPI_Isend(mesh_sendto[proc_num].x, num_sendto[proc_num], MPI_DOUBLE, proc_num, + 0, MPI_COMM_WORLD, &send_request[0]); + MPI_Irecv(&(mySources->x[offset]), num_sendto_all[4*proc_num+rank], MPI_DOUBLE, proc_num, + 0, MPI_COMM_WORLD, &recv_request[0]); + + MPI_Isend(mesh_sendto[proc_num].y, num_sendto[proc_num], MPI_DOUBLE, proc_num, + 1, MPI_COMM_WORLD, &send_request[1]); + MPI_Irecv(&(mySources->y[offset]), num_sendto_all[4*proc_num+rank], MPI_DOUBLE, proc_num, + 1, MPI_COMM_WORLD, &recv_request[1]); + + MPI_Isend(mesh_sendto[proc_num].z, num_sendto[proc_num], MPI_DOUBLE, proc_num, + 2, MPI_COMM_WORLD, &send_request[2]); + MPI_Irecv(&(mySources->z[offset]), num_sendto_all[4*proc_num+rank], MPI_DOUBLE, proc_num, + 2, MPI_COMM_WORLD, &recv_request[2]); + + MPI_Isend(mesh_sendto[proc_num].q, num_sendto[proc_num], MPI_DOUBLE, proc_num, + 3, MPI_COMM_WORLD, &send_request[3]); + MPI_Irecv(&(mySources->q[offset]), num_sendto_all[4*proc_num+rank], MPI_DOUBLE, proc_num, + 3, MPI_COMM_WORLD, &recv_request[3]); + + MPI_Isend(mesh_sendto[proc_num].w, num_sendto[proc_num], MPI_DOUBLE, proc_num, + 4, MPI_COMM_WORLD, &send_request[4]); + MPI_Irecv(&(mySources->w[offset]), num_sendto_all[4*proc_num+rank], MPI_DOUBLE, proc_num, + 4, MPI_COMM_WORLD, &recv_request[4]); + + offset += num_sendto_all[4*proc_num+rank]; + } + + MPI_Waitall(5, send_request, MPI_STATUSES_IGNORE); + MPI_Waitall(5, recv_request, MPI_STATUSES_IGNORE); + MPI_Barrier(MPI_COMM_WORLD); + + return; +} diff --git a/examples/support_fns.c b/examples/support_fns.c index 81fa1503..1e2daeae 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -738,7 +738,6 @@ void CSV_Print(int N, int M, struct RunParams *run_params, MPI_Comm_size(MPI_COMM_WORLD, &numProcs); if (rank == 0) { - RunParams_Print(run_params); FILE *fp = fopen("out.csv", "a"); fprintf(fp, "%d,%d,%d,%d,%d,%d,%d,%f,%d,%d,%d,%f,%f," "%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e,%e," From 3d13f5a46d1ebf8b41cac03f51564b4d03e50c4d Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Tue, 8 Sep 2020 10:26:51 -0500 Subject: [PATCH 76/95] target charges not copied to GPU for skipping runs --- src/drivers/treedriver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index d53acf28..0890aa12 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -96,7 +96,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Tree_Targets_Construct(&tree, targets, run_params); #ifdef OPENACC_ENABLED #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ - targets->z[0:targets->num], targets->q[0:targets->num]) + targets->z[0:targets->num]) if (run_params->singularity == SUBTRACTION) { #pragma acc enter data copyin(targets->q[0:targets->num]) } @@ -327,7 +327,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Batches_Targets_Construct(&batches, targets, run_params); #ifdef OPENACC_ENABLED #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ - targets->z[0:targets->num], targets->q[0:targets->num]) + targets->z[0:targets->num]) if (run_params->singularity == SUBTRACTION) { #pragma acc enter data copyin(targets->q[0:targets->num]) } @@ -601,7 +601,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Tree_Targets_Construct(&target_tree, targets, run_params); #ifdef OPENACC_ENABLED #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ - targets->z[0:targets->num], targets->q[0:targets->num]) + targets->z[0:targets->num]) if (run_params->singularity == SUBTRACTION) { #pragma acc enter data copyin(targets->q[0:targets->num]) } From 5a4f5a26d28d76c381337d12c1dfcc1239bf7999 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 23 Sep 2020 10:34:14 -0400 Subject: [PATCH 77/95] Fixed SS on GPUs, was missing reductions --- src/clusters/clusters.c | 4 ++-- src/interaction_compute/interaction_compute_downpass.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 3037e19d..4a224548 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -573,7 +573,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ #ifdef OPENACC_ENABLED int streamID = rand() % 4; - #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ) \ + #pragma acc kernels async(streamID) present(clusterX, clusterY, clusterZ, clusterQ, clusterW) \ create(modifiedF[0:interpolationPointsPerCluster], modifiedF2[0:interpolationPointsPerCluster], exactIndX[0:interpolationPointsPerCluster], \ exactIndY[0:interpolationPointsPerCluster], exactIndZ[0:interpolationPointsPerCluster], \ nodeX[0:interpDegreeLim], nodeY[0:interpDegreeLim], \ @@ -694,7 +694,7 @@ void pc_comp_ms_modifiedF_SS_child_to_parent(const struct Tree *tree, int child_ double temp = 0.0; double temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop vector(32) reduction(+:temp) + #pragma acc loop vector(32) reduction(+:temp) reduction(+:temp2) #endif for (int i = 0; i < interpolationPointsPerCluster; i++) { // loop over source points double sx = clusterX[child_startingIndexInClustersArray + i]; diff --git a/src/interaction_compute/interaction_compute_downpass.c b/src/interaction_compute/interaction_compute_downpass.c index 554c025c..b3dc33ed 100644 --- a/src/interaction_compute/interaction_compute_downpass.c +++ b/src/interaction_compute/interaction_compute_downpass.c @@ -608,7 +608,7 @@ void cp_comp_pot_SS_parent_to_child(struct Tree *tree, int parent_index, int chi double temp2 = 0.0; #ifdef OPENACC_ENABLED - #pragma acc loop independent reduction(+:temp) + #pragma acc loop independent reduction(+:temp) reduction(+:temp2) #endif for (int j = 0; j < interp_pts_per_cluster; j++) { // loop over interpolation points, set (cx,cy,cz) for this point From cf4308b13d7307db07ee189220b79f45d1f5768d Mon Sep 17 00:00:00 2001 From: Leighton Wilson Date: Fri, 2 Oct 2020 19:40:19 -0700 Subject: [PATCH 78/95] +-100 on plummer, checking for null source w, target q array --- examples/support_fns.c | 22 ++++++++++++---------- src/drivers/treedriver.c | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 1e2daeae..1246d500 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -369,18 +369,20 @@ double Point_Set(DISTRIBUTION distribution, double xmin, double xmax) /*----------------------------------------------------------------------------*/ void Point_Plummer(double R, double *x, double *y, double *z) { - double u = (double)random()/(1.+ (double)(RAND_MAX)); - double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); + do { + double u = (double)random()/(1.+ (double)(RAND_MAX)); + double radius = R / sqrt(pow(u, (-2.0/3.0)) - 1.0); - u = (double)random()/(1.+ (double)(RAND_MAX)); - double theta = acos(-1 + u * 2.0); - - u = (double)random()/(1.+ (double)(RAND_MAX)); - double phi = u * 2.0 * M_PI; + u = (double)random()/(1.+ (double)(RAND_MAX)); + double theta = acos(-1 + u * 2.0); + + u = (double)random()/(1.+ (double)(RAND_MAX)); + double phi = u * 2.0 * M_PI; - *x = radius * sin(theta) * cos(phi); - *y = radius * sin(theta) * sin(phi); - *z = radius * cos(theta); + *x = radius * sin(theta) * cos(phi); + *y = radius * sin(theta) * sin(phi); + *z = radius * cos(theta); + } while (fabs(*x) > 100 || fabs(*y) > 100 || fabs(*z) > 100); return; } diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 0890aa12..b488c0bf 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -46,6 +46,18 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run Particles_Validate(sources, targets); Particles_ConstructOrder(sources); Particles_ConstructOrder(targets); + + int sources_w_dummy = 0; + int targets_q_dummy = 0; + + if (sources->w == NULL) { + sources_w_dummy = 1; + make_vector(sources->w, sources->num); + } + if (targets->q == NULL) { + targets_q_dummy = 1; + make_vector(targets->q, targets->num); + } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0 && rank == 0) { @@ -878,6 +890,8 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } + if (sources_w_dummy) free_vector(sources->w); + if (targets_q_dummy) free_vector(targets->q); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ From 81b2fb601f89354ee49bd183133b5f98c08fb09b Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 5 Oct 2020 11:41:35 -0400 Subject: [PATCH 79/95] adding more explicit delete statements --- src/drivers/treedriver.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index b488c0bf..e1a42507 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -235,10 +235,10 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run remote_sources, targets, clusters, run_params); InteractionLists_Free(&let_interaction_list); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(remote_sources->x, remote_sources->y, \ - remote_sources->z, remote_sources->q) + #pragma acc exit data delete(remote_sources->x[0:remote_sources->num], remote_sources->y[0:remote_sources->num], \ + remote_sources->z[0:remote_sources->num], remote_sources->q[0:remote_sources->num]) if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(remote_sources->w) + #pragma acc exit data delete(remote_sources->w[0:remote_sources->num]) } #endif Particles_Free(&remote_sources); @@ -280,11 +280,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[10]); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(targets->x, targets->y, targets->z, \ - clusters->x, clusters->y, \ - clusters->z, clusters->q) + #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num], \ + clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ + clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(targets->q, clusters->w) + #pragma acc exit data delete(targets->q[0:targets->num], clusters->w[0:clusters->num_weights]) } #endif Particles_FreeOrder(sources); @@ -442,10 +442,11 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run sources, targets, clusters, run_params); InteractionLists_Free(&local_interaction_list); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(sources->x, sources->y, sources->z, sources->q, \ - clusters->x, clusters->y, clusters->z, clusters->q) + #pragma acc exit data delete(sources->x[0:sources->num], sources->y[0:sources->num], sources->z[0:sources->num], sources->q[0:sources->num], \ + clusters->x[0:clusters->num], clusters->y[0:clusters->num], clusters->z[0:clusters->num], \ + clusters->q[0:clusters->num_charges]) if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(sources->w, clusters->w) + #pragma acc exit data delete(sources->w[0:sources->num], clusters->w[0:clusters->num_weights]) } #endif STOP_TIMER(&time_tree[5]); @@ -512,12 +513,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #ifdef OPENACC_ENABLED if (num_procs > 1) { START_TIMER(&time1); - #pragma acc exit data delete(let_sources->x, let_sources->y, \ - let_sources->z, let_sources->q, \ - let_clusters->x, let_clusters->y, \ - let_clusters->z, let_clusters->q) + #pragma acc exit data delete(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ + let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ + let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(let_sources->w, let_clusters->w) + #pragma acc exit data delete(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) } STOP_TIMER(&time1); time_tree[6] += time1; @@ -549,9 +550,9 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[10]); #ifdef OPENACC_ENABLED - #pragma acc exit data delete(targets->x, targets->y, targets->z) + #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num]) if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(targets->q) + #pragma acc exit data delete(targets->q[0:targets->num]) } #endif Particles_FreeOrder(sources); From e1cc2b200186b9a62d159d8691e1923184f45bca Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 5 Oct 2020 12:19:28 -0400 Subject: [PATCH 80/95] structured data for PC --- src/clusters/clusters.c | 30 +++- src/drivers/treedriver.c | 130 +++++++++--------- .../interaction_compute_pc.c | 17 +++ 3 files changed, 106 insertions(+), 71 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 4a224548..6fed2222 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -105,14 +105,27 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa double *wC = clusters->w; -#ifdef OPENACC_ENABLED - #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) - if (singularity == SUBTRACTION) { - #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) - } +//#ifdef OPENACC_ENABLED +// #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ +// zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) +// if (singularity == SUBTRACTION) { +// #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) +// } +//#endif + + +#ifdef OPENACC_ENABLED + #pragma acc data copyin(xS[0:totalNumberSourcePoints], yS[0:totalNumberSourcePoints], \ + zS[0:totalNumberSourcePoints], qS[0:totalNumberSourcePoints], \ + wS[0:totalNumberSourcePoints]) \ + copy(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ + zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges], \ + wC[0:totalNumberInterpolationWeights]) + { #endif + + if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { // anterpolate from particles to leaf cluster interpolation points @@ -174,6 +187,11 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa exit(1); } +#ifdef OPENACC_ENABLED + #pragma acc wait + } // end ACC DATA REGION +#endif + return; } diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index e1a42507..775e25d9 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -326,25 +326,25 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Sources_Construct(&tree, sources, run_params); -#ifdef OPENACC_ENABLED - #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ - sources->z[0:sources->num], sources->q[0:sources->num]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data copyin(sources->w[0:sources->num]) - } -#endif +//#ifdef OPENACC_ENABLED +// #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ +// sources->z[0:sources->num], sources->q[0:sources->num]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc enter data copyin(sources->w[0:sources->num]) +// } +//#endif STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Batches_Targets_Construct(&batches, targets, run_params); -#ifdef OPENACC_ENABLED - #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ - targets->z[0:targets->num]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data copyin(targets->q[0:targets->num]) - } - #pragma acc enter data create(potential[0:targets->num]) -#endif +//#ifdef OPENACC_ENABLED +// #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ +// targets->z[0:targets->num]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc enter data copyin(targets->q[0:targets->num]) +// } +// #pragma acc enter data create(potential[0:targets->num]) +//#endif STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); @@ -381,13 +381,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Barrier(MPI_COMM_WORLD); START_TIMER(&time_tree[3]); -#ifdef OPENACC_ENABLED - #pragma acc update self(clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ - clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc update self(clusters->w[0:clusters->num_weights]) - } -#endif +//#ifdef OPENACC_ENABLED +// #pragma acc update self(clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ +// clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc update self(clusters->w[0:clusters->num_weights]) +// } +//#endif CommTypesAndTrees_Construct(&comm_types, &let_trees, tree, batches, run_params); Particles_Alloc(&let_sources, comm_types->let_sources_length); Clusters_Alloc(&let_clusters, comm_types->let_clusters_length, run_params); @@ -441,14 +441,14 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run InteractionCompute_PC(potential, tree, batches, local_interaction_list, sources, targets, clusters, run_params); InteractionLists_Free(&local_interaction_list); -#ifdef OPENACC_ENABLED - #pragma acc exit data delete(sources->x[0:sources->num], sources->y[0:sources->num], sources->z[0:sources->num], sources->q[0:sources->num], \ - clusters->x[0:clusters->num], clusters->y[0:clusters->num], clusters->z[0:clusters->num], \ - clusters->q[0:clusters->num_charges]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(sources->w[0:sources->num], clusters->w[0:clusters->num_weights]) - } -#endif +//#ifdef OPENACC_ENABLED +// #pragma acc exit data delete(sources->x[0:sources->num], sources->y[0:sources->num], sources->z[0:sources->num], sources->q[0:sources->num], \ +// clusters->x[0:clusters->num], clusters->y[0:clusters->num], clusters->z[0:clusters->num], \ +// clusters->q[0:clusters->num_charges]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc exit data delete(sources->w[0:sources->num], clusters->w[0:clusters->num_weights]) +// } +//#endif STOP_TIMER(&time_tree[5]); //~~~~~~~~~~~~~~~~~~~~ @@ -458,20 +458,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] = 0; time_tree[7] = 0; -#ifdef OPENACC_ENABLED - if (num_procs > 1) { - START_TIMER(&time1); - #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ - let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ - let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ - let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) - } - STOP_TIMER(&time1); - time_tree[6] += time1; - } -#endif +//#ifdef OPENACC_ENABLED +// if (num_procs > 1) { +// START_TIMER(&time1); +// #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ +// let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ +// let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ +// let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) +// } +// STOP_TIMER(&time1); +// time_tree[6] += time1; +// } +//#endif for (int proc_id = 1; proc_id < num_procs; ++proc_id) { @@ -510,20 +510,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } -#ifdef OPENACC_ENABLED - if (num_procs > 1) { - START_TIMER(&time1); - #pragma acc exit data delete(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ - let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ - let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ - let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) - } - STOP_TIMER(&time1); - time_tree[6] += time1; - } -#endif +//#ifdef OPENACC_ENABLED +// if (num_procs > 1) { +// START_TIMER(&time1); +// #pragma acc exit data delete(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ +// let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ +// let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ +// let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc exit data delete(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) +// } +// STOP_TIMER(&time1); +// time_tree[6] += time1; +// } +//#endif //------------------------------- @@ -535,7 +535,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[8] = 0.0; START_TIMER(&time_tree[9]); - #pragma acc exit data copyout(potential[0:targets->num]) +// #pragma acc exit data copyout(potential[0:targets->num]) InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); Particles_Targets_Reorder(targets, potential); Particles_Sources_Reorder(sources); @@ -549,12 +549,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); -#ifdef OPENACC_ENABLED - #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num]) - if (run_params->singularity == SUBTRACTION) { - #pragma acc exit data delete(targets->q[0:targets->num]) - } -#endif +//#ifdef OPENACC_ENABLED +// #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num]) +// if (run_params->singularity == SUBTRACTION) { +// #pragma acc exit data delete(targets->q[0:targets->num]) +// } +//#endif Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&tree); diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 76d39a6b..cd681cca 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -64,6 +64,21 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba int *tree_ibeg = tree->ibeg; int *tree_iend = tree->iend; int *cluster_ind = tree->cluster_ind; + + + +#ifdef OPENACC_ENABLED + #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ + source_q[0:num_sources], source_w[0:num_sources], \ + target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ + target_q[0:num_targets], \ + cluster_x[0:total_num_interp_pts], cluster_y[0:total_num_interp_pts], \ + cluster_z[0:total_num_interp_pts], \ + cluster_q[0:total_num_interp_charges], cluster_w[0:total_num_interp_weights]) \ + copy(potential[0:num_targets]) +#endif + { + for (int i = 0; i < batches->numnodes; i++) { @@ -553,6 +568,8 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba #pragma acc wait #endif + } // end acc data region + return; } /* END of Interaction_PC_Compute */ From 9f35f1882c1a93563817fd8e95fe5eb0706313f5 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Mon, 5 Oct 2020 12:22:02 -0400 Subject: [PATCH 81/95] Revert "structured data for PC" This reverts commit e1cc2b200186b9a62d159d8691e1923184f45bca. --- src/clusters/clusters.c | 30 +--- src/drivers/treedriver.c | 130 +++++++++--------- .../interaction_compute_pc.c | 17 --- 3 files changed, 71 insertions(+), 106 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 6fed2222..4a224548 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -105,27 +105,14 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa double *wC = clusters->w; -//#ifdef OPENACC_ENABLED -// #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ -// zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) -// if (singularity == SUBTRACTION) { -// #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) -// } -//#endif - - -#ifdef OPENACC_ENABLED - #pragma acc data copyin(xS[0:totalNumberSourcePoints], yS[0:totalNumberSourcePoints], \ - zS[0:totalNumberSourcePoints], qS[0:totalNumberSourcePoints], \ - wS[0:totalNumberSourcePoints]) \ - copy(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ - zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges], \ - wC[0:totalNumberInterpolationWeights]) - { +#ifdef OPENACC_ENABLED + #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ + zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) + if (singularity == SUBTRACTION) { + #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) + } #endif - - if ((approximation == LAGRANGE) && (singularity == SKIPPING)) { // anterpolate from particles to leaf cluster interpolation points @@ -187,11 +174,6 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa exit(1); } -#ifdef OPENACC_ENABLED - #pragma acc wait - } // end ACC DATA REGION -#endif - return; } diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 775e25d9..e1a42507 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -326,25 +326,25 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run START_TIMER(&time_tree[0]); Tree_Sources_Construct(&tree, sources, run_params); -//#ifdef OPENACC_ENABLED -// #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ -// sources->z[0:sources->num], sources->q[0:sources->num]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc enter data copyin(sources->w[0:sources->num]) -// } -//#endif +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(sources->x[0:sources->num], sources->y[0:sources->num], \ + sources->z[0:sources->num], sources->q[0:sources->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(sources->w[0:sources->num]) + } +#endif STOP_TIMER(&time_tree[0]); START_TIMER(&time_tree[1]); Batches_Targets_Construct(&batches, targets, run_params); -//#ifdef OPENACC_ENABLED -// #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ -// targets->z[0:targets->num]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc enter data copyin(targets->q[0:targets->num]) -// } -// #pragma acc enter data create(potential[0:targets->num]) -//#endif +#ifdef OPENACC_ENABLED + #pragma acc enter data copyin(targets->x[0:targets->num], targets->y[0:targets->num], \ + targets->z[0:targets->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(targets->q[0:targets->num]) + } + #pragma acc enter data create(potential[0:targets->num]) +#endif STOP_TIMER(&time_tree[1]); START_TIMER(&time_tree[2]); @@ -381,13 +381,13 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Barrier(MPI_COMM_WORLD); START_TIMER(&time_tree[3]); -//#ifdef OPENACC_ENABLED -// #pragma acc update self(clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ -// clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc update self(clusters->w[0:clusters->num_weights]) -// } -//#endif +#ifdef OPENACC_ENABLED + #pragma acc update self(clusters->x[0:clusters->num], clusters->y[0:clusters->num], \ + clusters->z[0:clusters->num], clusters->q[0:clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc update self(clusters->w[0:clusters->num_weights]) + } +#endif CommTypesAndTrees_Construct(&comm_types, &let_trees, tree, batches, run_params); Particles_Alloc(&let_sources, comm_types->let_sources_length); Clusters_Alloc(&let_clusters, comm_types->let_clusters_length, run_params); @@ -441,14 +441,14 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run InteractionCompute_PC(potential, tree, batches, local_interaction_list, sources, targets, clusters, run_params); InteractionLists_Free(&local_interaction_list); -//#ifdef OPENACC_ENABLED -// #pragma acc exit data delete(sources->x[0:sources->num], sources->y[0:sources->num], sources->z[0:sources->num], sources->q[0:sources->num], \ -// clusters->x[0:clusters->num], clusters->y[0:clusters->num], clusters->z[0:clusters->num], \ -// clusters->q[0:clusters->num_charges]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc exit data delete(sources->w[0:sources->num], clusters->w[0:clusters->num_weights]) -// } -//#endif +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(sources->x[0:sources->num], sources->y[0:sources->num], sources->z[0:sources->num], sources->q[0:sources->num], \ + clusters->x[0:clusters->num], clusters->y[0:clusters->num], clusters->z[0:clusters->num], \ + clusters->q[0:clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(sources->w[0:sources->num], clusters->w[0:clusters->num_weights]) + } +#endif STOP_TIMER(&time_tree[5]); //~~~~~~~~~~~~~~~~~~~~ @@ -458,20 +458,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] = 0; time_tree[7] = 0; -//#ifdef OPENACC_ENABLED -// if (num_procs > 1) { -// START_TIMER(&time1); -// #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ -// let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ -// let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ -// let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) -// } -// STOP_TIMER(&time1); -// time_tree[6] += time1; -// } -//#endif +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc enter data copyin(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ + let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ + let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc enter data copyin(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + } + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif for (int proc_id = 1; proc_id < num_procs; ++proc_id) { @@ -510,20 +510,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } -//#ifdef OPENACC_ENABLED -// if (num_procs > 1) { -// START_TIMER(&time1); -// #pragma acc exit data delete(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ -// let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ -// let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ -// let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc exit data delete(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) -// } -// STOP_TIMER(&time1); -// time_tree[6] += time1; -// } -//#endif +#ifdef OPENACC_ENABLED + if (num_procs > 1) { + START_TIMER(&time1); + #pragma acc exit data delete(let_sources->x[0:let_sources->num], let_sources->y[0:let_sources->num], \ + let_sources->z[0:let_sources->num], let_sources->q[0:let_sources->num], \ + let_clusters->x[0:let_clusters->num], let_clusters->y[0:let_clusters->num], \ + let_clusters->z[0:let_clusters->num], let_clusters->q[0:let_clusters->num_charges]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(let_sources->w[0:let_sources->num], let_clusters->w[0:let_clusters->num_weights]) + } + STOP_TIMER(&time1); + time_tree[6] += time1; + } +#endif //------------------------------- @@ -535,7 +535,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[8] = 0.0; START_TIMER(&time_tree[9]); -// #pragma acc exit data copyout(potential[0:targets->num]) + #pragma acc exit data copyout(potential[0:targets->num]) InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); Particles_Targets_Reorder(targets, potential); Particles_Sources_Reorder(sources); @@ -549,12 +549,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //------------------------------- START_TIMER(&time_tree[10]); -//#ifdef OPENACC_ENABLED -// #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num]) -// if (run_params->singularity == SUBTRACTION) { -// #pragma acc exit data delete(targets->q[0:targets->num]) -// } -//#endif +#ifdef OPENACC_ENABLED + #pragma acc exit data delete(targets->x[0:targets->num], targets->y[0:targets->num], targets->z[0:targets->num]) + if (run_params->singularity == SUBTRACTION) { + #pragma acc exit data delete(targets->q[0:targets->num]) + } +#endif Particles_FreeOrder(sources); Particles_FreeOrder(targets); Tree_Free(&tree); diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index cd681cca..76d39a6b 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -64,21 +64,6 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba int *tree_ibeg = tree->ibeg; int *tree_iend = tree->iend; int *cluster_ind = tree->cluster_ind; - - - -#ifdef OPENACC_ENABLED - #pragma acc data copyin(source_x[0:num_sources], source_y[0:num_sources], source_z[0:num_sources], \ - source_q[0:num_sources], source_w[0:num_sources], \ - target_x[0:num_targets], target_y[0:num_targets], target_z[0:num_targets], \ - target_q[0:num_targets], \ - cluster_x[0:total_num_interp_pts], cluster_y[0:total_num_interp_pts], \ - cluster_z[0:total_num_interp_pts], \ - cluster_q[0:total_num_interp_charges], cluster_w[0:total_num_interp_weights]) \ - copy(potential[0:num_targets]) -#endif - { - for (int i = 0; i < batches->numnodes; i++) { @@ -568,8 +553,6 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba #pragma acc wait #endif - } // end acc data region - return; } /* END of Interaction_PC_Compute */ From cdc3183ba94036ac9e885784336c39dd43d148b3 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 5 Oct 2020 15:30:08 -0400 Subject: [PATCH 82/95] patched particle-cluster unstructured data --- src/clusters/clusters.c | 26 ++++++++++++++++++++++++-- src/drivers/treedriver.c | 7 +++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 4a224548..3a0c9a35 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -108,8 +108,30 @@ void Clusters_Sources_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) - if (singularity == SUBTRACTION) { - #pragma acc enter data create(wC[0:totalNumberInterpolationWeights]) + + #pragma acc kernels present(xC,yC,zC,qC) + { + for (int i=0;iq[0:targets->num]) } #pragma acc enter data create(potential[0:targets->num]) + +#pragma acc kernels present(potential) + { + for (int i=0;inum;i++){ + potential[i]=0.0; + } + } #endif STOP_TIMER(&time_tree[1]); From c3d3b8072df9e20fd4575a6af58d8d40764741d0 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 5 Oct 2020 15:46:38 -0400 Subject: [PATCH 83/95] Unstructured data fixed for CP and CC as well. --- src/clusters/clusters.c | 21 +++++++++++++++++++++ src/drivers/treedriver.c | 14 +++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/clusters/clusters.c b/src/clusters/clusters.c index 3a0c9a35..0f6ec6dd 100644 --- a/src/clusters/clusters.c +++ b/src/clusters/clusters.c @@ -250,8 +250,29 @@ void Clusters_Targets_Construct(struct Clusters **clusters_addr, const struct Pa #ifdef OPENACC_ENABLED #pragma acc enter data create(xC[0:totalNumberInterpolationPoints], yC[0:totalNumberInterpolationPoints], \ zC[0:totalNumberInterpolationPoints], qC[0:totalNumberInterpolationCharges]) + + #pragma acc kernels present(xC,yC,zC,qC) + { + for (int i=0;iq[0:targets->num]) } #pragma acc enter data create(potential[0:targets->num]) + #pragma acc kernels present(potential) + { + for (int i=0;inum;i++){ + potential[i]=0.0; + } + } #endif STOP_TIMER(&time_tree[0]); @@ -345,7 +351,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } #pragma acc enter data create(potential[0:targets->num]) -#pragma acc kernels present(potential) + #pragma acc kernels present(potential) { for (int i=0;inum;i++){ potential[i]=0.0; @@ -626,6 +632,12 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #pragma acc enter data copyin(targets->q[0:targets->num]) } #pragma acc enter data create(potential[0:targets->num]) + #pragma acc kernels present(potential) + { + for (int i=0;inum;i++){ + potential[i]=0.0; + } + } #endif STOP_TIMER(&time_tree[1]); From 944f5e49dfed815e52331367bbd560f2a6f42b51 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Mon, 5 Oct 2020 18:02:38 -0400 Subject: [PATCH 84/95] Added a test that compares BLDTT to direct. --- tests/CMakeLists.txt | 1 + tests/serial_tests.c | 145 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 138 insertions(+), 8 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5e345f29..30096fcf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,3 +6,4 @@ add_test(NAME treecode_on_100_particles COMMAND tests_cpu 1) add_test(NAME treecode_on_1_target_10000_sources COMMAND tests_cpu 2) add_test(NAME treecode_parameters_on_1_target_10000_sources COMMAND tests_cpu 3) add_test(NAME test_treecode_wrapper COMMAND tests_cpu 4) +add_test(NAME test_BLDTT COMMAND tests_cpu 5) \ No newline at end of file diff --git a/tests/serial_tests.c b/tests/serial_tests.c index 1fa0d2de..1f87755b 100644 --- a/tests/serial_tests.c +++ b/tests/serial_tests.c @@ -962,13 +962,13 @@ static char *test_treecode_wrapper() } -static char *test_treecode_parameters_on_1_target_10000_sources() +static char *test_treecode_parameters_on_1_target_5000_sources() { struct RunParams *run_params = NULL; double time_tree[9]; int verbosity = 1; - int N = 10000; + int N = 5000; double beta = -1.0; struct Particles *sources = NULL; @@ -1018,7 +1018,7 @@ static char *test_treecode_parameters_on_1_target_10000_sources() } - int max_per_source_leaf = 5; + int max_per_source_leaf = 50; int max_per_target_leaf = 5; double size_check = 0.0; @@ -1412,6 +1412,132 @@ static char *test_treecode_parameters_on_1_target_10000_sources() + + +static char *test_BLDTT() +{ + struct RunParams *run_params = NULL; + double time_tree[13]; + + int verbosity = 1; + int N = 5000; + + struct Particles *sources = NULL; + struct Particles *targets = NULL; + double *potential = NULL, *potential_direct = NULL; + double potential_engy = 0; + double potential_engy_direct = 0; + + sources = malloc(sizeof(struct Particles)); + targets = malloc(sizeof(struct Particles)); + potential = malloc(sizeof(double) * N); + potential_direct = malloc(sizeof(double) * N); + + targets->num = N; + targets->x = malloc(targets->num*sizeof(double)); + targets->y = malloc(targets->num*sizeof(double)); + targets->z = malloc(targets->num*sizeof(double)); + targets->q = malloc(targets->num*sizeof(double)); + + sources->num = N; + sources->x = malloc(sources->num*sizeof(double)); + sources->y = malloc(sources->num*sizeof(double)); + sources->z = malloc(sources->num*sizeof(double)); + sources->q = malloc(sources->num*sizeof(double)); + sources->w = malloc(sources->num*sizeof(double)); + + + srand(1); + for (int i=0; inum; i++){ + // 10,000 randomly distributed sources in the [-1,1] box + targets->x[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + targets->y[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + targets->z[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + targets->q[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + + sources->x[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + sources->y[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + sources->z[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + sources->q[i]=((double)rand()/(double)(RAND_MAX)) * 2. - 1.; + sources->w[i]=((double)rand()/(double)(RAND_MAX)); + } + + + int max_per_source_leaf = 20; + int max_per_target_leaf = 20; + + int degree = 3; + double theta = 0.9; + double beta = -1.0; + double size_check = 1.0; + + int num_kernel_params = 1; + double kernel_params[1] = {0.5}; + + RunParams_Setup(&run_params, + NO_KERNEL, num_kernel_params, kernel_params, NO_APPROX, NO_SINGULARITY, CLUSTER_CLUSTER, + theta, degree, max_per_source_leaf, max_per_target_leaf, size_check, beta, verbosity); + + + /***********************************************/ + /******************* Test **********************/ + /***********************************************/ + /***********************************************/ + memset(potential, 0, targets->num * sizeof(double)); + memset(potential_direct, 0, targets->num * sizeof(double)); + + run_params->kernel = COULOMB; + run_params->singularity = SKIPPING; + run_params->approximation = LAGRANGE; + + + directdriver(sources, targets, run_params, potential_direct, time_tree); + + treedriver(sources, targets, run_params, potential, time_tree); + + double cumulative_potential_bldtt=0.0; + double cumulative_potential=0.0; + double error; + + for (int i=0; inum; i++){ + + cumulative_potential_bldtt += potential[i]; + cumulative_potential += potential_direct[i]; + + } + + error=fabs(cumulative_potential_bldtt - cumulative_potential)/fabs(cumulative_potential); + + if (verbosity>-1) printf("direct: %1.8e\n", cumulative_potential); + if (verbosity>-1) printf("approx: %1.8e\n", cumulative_potential_bldtt); + if (verbosity>-1) printf("rel. err.: %1.8e\n", error); + + mu_assert("TEST FAILED: Cluster-cluster didn't give same results as direct", \ + error < 6e-3); + + free(sources->x); + free(sources->y); + free(sources->z); + free(sources->q); + free(sources->w); + free(sources); + + free(targets->x); + free(targets->y); + free(targets->z); + free(targets->q); + free(targets); + + free(potential); + free(potential_direct); + + RunParams_Free(&run_params); + + return 0; +} + + + // Run all the tests static char *all_tests() { @@ -1421,8 +1547,8 @@ static char *all_tests() printf("Completed test_treecode_on_100_particles().\n"); mu_run_test(test_treecode_on_1_target_10000_sources); printf("Completed test_treecode_on_1_target_10000_sources().\n"); - mu_run_test(test_treecode_parameters_on_1_target_10000_sources); - printf("Completed test_treecode_parameters_on_1_target_10000_sources().\n"); + mu_run_test(test_treecode_parameters_on_1_target_5000_sources); + printf("Completed test_treecode_parameters_on_1_target_5000_sources().\n"); return 0; } @@ -1439,13 +1565,16 @@ static char *run_one_test(int i) mu_run_test(test_treecode_on_1_target_10000_sources); printf("Completed test_treecode_on_1_target_10000_sources().\n"); }else if(i==3){ - mu_run_test(test_treecode_parameters_on_1_target_10000_sources); - printf("Completed test_treecode_parameters_on_1_target_10000_sources().\n"); + mu_run_test(test_treecode_parameters_on_1_target_5000_sources); + printf("Completed test_treecode_parameters_on_1_target_5000_sources().\n"); }else if (i==4){ mu_run_test(test_treecode_wrapper); printf("Completed test_treecode_wrapper().\n"); + }else if (i==5){ + mu_run_test(test_BLDTT); + printf("Completed test_BLDTT().\n"); }else{ - printf("Incorrect test number. Exiting."); + printf("Incorrect test number. Exiting.\n"); exit(1); } return 0; From b84cdcc53b36408bfe0806489ad4de5b88b04a4b Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 6 Oct 2020 10:49:31 -0400 Subject: [PATCH 85/95] sources->w create converted to copyin for CP --- src/drivers/treedriver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index c05c49cb..52bc2c3c 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -234,7 +234,9 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run #pragma acc enter data copyin(remote_sources->x[0:remote_sources->num], remote_sources->y[0:remote_sources->num], \ remote_sources->z[0:remote_sources->num], remote_sources->q[0:remote_sources->num]) if (run_params->singularity == SUBTRACTION) { - #pragma acc enter data create(remote_sources->w[0:remote_sources->num]) + #pragma acc enter data copyin(remote_sources->w[0:remote_sources->num]) + + } #endif InteractionCompute_CP(potential, tree, remote_batches, let_interaction_list, From 5ae7c55bedb4ba070df7af2259450945e0d0fac3 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Tue, 6 Oct 2020 15:35:02 -0400 Subject: [PATCH 86/95] Added a user defined kernel that can be copied or modified --- examples/support_fns.c | 3 + src/CMakeLists.txt | 16 +++- .../interaction_compute_cc.c | 75 ++++++++++++++++++- .../interaction_compute_correction.c | 1 + .../interaction_compute_cp.c | 19 +++++ .../interaction_compute_direct.c | 12 +++ .../interaction_compute_pc.c | 29 +++++++ src/utilities/enums.h | 3 +- 8 files changed, 152 insertions(+), 6 deletions(-) diff --git a/examples/support_fns.c b/examples/support_fns.c index 1246d500..bb9d486e 100644 --- a/examples/support_fns.c +++ b/examples/support_fns.c @@ -160,6 +160,9 @@ void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int * } else if (strcasecmp(kernel_string, "TCF") == 0) { kernel = TCF; + } else if (strcasecmp(kernel_string, "USER") == 0) { + kernel = USER; + } else if (strcasecmp(kernel_string, "DCF") == 0) { kernel = DCF; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4195a5be..f4a7336c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -173,8 +173,17 @@ SET(SRCS_K_MQ kernels/mq/mq_pp.c kernels/mq/mq.h kernels/mq/mq_pc.c) -# kernels/mq/mq_cp.h -# kernels/mq/mq_cp.c) + + +SET(SRCS_K_USER + kernels/user_kernel/user_kernel.h + kernels/user_kernel/user_kernel_pp.h + kernels/user_kernel/user_kernel_pp.c + kernels/user_kernel/user_kernel_pc.h + kernels/user_kernel/user_kernel_pc.c + kernels/user_kernel/user_kernel_cp.h + kernels/user_kernel/user_kernel_cp.c) + SET(SRCS_KERNELS ${SRCS_K_COULOMB} @@ -183,7 +192,8 @@ SET(SRCS_KERNELS ${SRCS_K_COULOMB} ${SRCS_K_REGULARIZED_YUKAWA} ${SRCS_K_ATAN} ${SRCS_K_SIN_OVER_R} - ${SRCS_K_MQ}) + ${SRCS_K_MQ} + ${SRCS_K_USER}) set(TRGT BaryTree_cpu) diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 307c1621..4ba5779d 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -15,6 +15,7 @@ #include "../kernels/regularized-coulomb/regularized-coulomb.h" #include "../kernels/regularized-yukawa/regularized-yukawa.h" #include "../kernels/sin-over-r/sin-over-r.h" +#include "../kernels/user_kernel/user_kernel.h" #include "interaction_compute.h" @@ -340,6 +341,25 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } } + /* * *********************************************/ + /* * ******* USER DEFINED KERNEL *****************/ + /* * *********************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->approximation == LAGRANGE) { + + + K_User_Kernel_CP_Lagrange(interp_pts_per_cluster, interp_pts_per_cluster, + source_cluster_start, target_cluster_start, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + + } + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); @@ -584,8 +604,25 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } } + /* * *********************************************/ + /* * ******* USER DEFINED KERNEL *****************/ + /* * *********************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->approximation == LAGRANGE) { + + K_User_Kernel_PC_Lagrange(num_targets_in_cluster, interp_pts_per_cluster, + target_start, source_cluster_start, + target_x, target_y, target_z, + source_cluster_x, source_cluster_y, source_cluster_z, + source_cluster_q, + run_params, potential, stream_id); + + } + } else { - printf("**ERROR** INVALID KERNEL. EXITING.\n"); + printf("[Interaction_Compute_CC] **ERROR** INVALID KERNEL. EXITING.\n"); exit(1); } @@ -594,7 +631,7 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T /* * ********************************************************/ -/* * ************ POTENTIAL FROM TARGET APPROX (PC) *********/ +/* * ************ POTENTIAL FROM TARGET APPROX (CP) *********/ /* * ********************************************************/ for (int j = 0; j < num_target_approx_in_cluster; j++) { @@ -832,6 +869,25 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T } } + /* * *********************************************/ + /* * ******* USER DEFINED KERNEL *****************/ + /* * *********************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_User_Kernel_CP_Lagrange(num_sources_in_cluster, interp_pts_per_cluster, + source_start, target_cluster_start, + source_x, source_y, source_z, source_q, + target_cluster_x, target_cluster_y, target_cluster_z, + target_cluster_q, + run_params, stream_id); + } + } + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); @@ -978,6 +1034,21 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T run_params, potential, stream_id); } + /* * *********************************************/ + /* * ********** USER DEFINED KERNEL **************/ + /* * *********************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->singularity == SKIPPING) { + + K_User_Kernel_PP(num_targets_in_cluster, num_sources_in_cluster, + target_start, source_start, + target_x, target_y, target_z, + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); + } + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); diff --git a/src/interaction_compute/interaction_compute_correction.c b/src/interaction_compute/interaction_compute_correction.c index 6b732167..cf8f0774 100644 --- a/src/interaction_compute/interaction_compute_correction.c +++ b/src/interaction_compute/interaction_compute_correction.c @@ -14,6 +14,7 @@ #include "../kernels/atan/atan.h" #include "../kernels/sin-over-r/sin-over-r.h" #include "../kernels/mq/mq.h" +#include "../kernels/user_kernel/user_kernel.h" #include "interaction_compute.h" diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 16c2f4d9..09b34272 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -16,6 +16,7 @@ #include "../kernels/regularized-coulomb/regularized-coulomb.h" #include "../kernels/regularized-yukawa/regularized-yukawa.h" #include "../kernels/sin-over-r/sin-over-r.h" +#include "../kernels/user_kernel/user_kernel.h" #include "interaction_compute.h" @@ -323,6 +324,24 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba } } + /***************************************/ + /****** USER DEFINED KERNEL ************/ + /***************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->approximation == LAGRANGE) { + + if (run_params->singularity == SKIPPING) { + + K_User_Kernel_CP_Lagrange(num_sources_in_batch, + interp_pts_per_cluster, batch_start, cluster_start, + source_x, source_y, source_z, source_q, + cluster_x, cluster_y, cluster_z, cluster_q, + run_params, stream_id); + } + } + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); diff --git a/src/interaction_compute/interaction_compute_direct.c b/src/interaction_compute/interaction_compute_direct.c index 55615bde..b0f9c867 100644 --- a/src/interaction_compute/interaction_compute_direct.c +++ b/src/interaction_compute/interaction_compute_direct.c @@ -16,6 +16,7 @@ #include "../kernels/atan/atan.h" #include "../kernels/sin-over-r/sin-over-r.h" #include "../kernels/mq/mq.h" +#include "../kernels/user_kernel/user_kernel.h" #include "interaction_compute.h" @@ -181,6 +182,17 @@ void InteractionCompute_Direct(double *potential, source_x, source_y, source_z, source_q, run_params, potential, 0); + /***************************************/ + /******** USER DEFINED KERNEL **********/ + /***************************************/ + + } else if (run_params->kernel == USER) { + + K_User_Kernel_PP(num_targets, num_sources, 0, 0, + target_x, target_y, target_z, + source_x, source_y, source_z, source_q, + run_params, potential, 0); + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 76d39a6b..8a38b32c 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -19,6 +19,7 @@ #include "../kernels/atan/atan.h" #include "../kernels/sin-over-r/sin-over-r.h" #include "../kernels/mq/mq.h" +#include "../kernels/user_kernel/user_kernel.h" #include "interaction_compute.h" @@ -378,6 +379,22 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba exit(1); } + /***************************************/ + /********* USER DEFINED KERNEL *********/ + /***************************************/ + + } else if (run_params->kernel == USER) { + + if (run_params->approximation == LAGRANGE) { + + K_User_Kernel_PC_Lagrange(num_targets_in_batch, + interp_pts_per_cluster, batch_start, cluster_start, + target_x, target_y, target_z, + cluster_x, cluster_y, cluster_z, cluster_q, + run_params, potential, stream_id); + + } + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); @@ -541,6 +558,18 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba source_x, source_y, source_z, source_q, run_params, potential, stream_id); + /***************************************/ + /******** USER DEFINED KERNEL **********/ + /***************************************/ + + } else if (run_params->kernel == USER) { + + K_User_Kernel_PP(num_targets_in_batch, num_sources_in_cluster, + batch_start, source_start, + target_x, target_y, target_z, + source_x, source_y, source_z, source_q, + run_params, potential, stream_id); + } else { printf("**ERROR** INVALID KERNEL. EXITING.\n"); exit(1); diff --git a/src/utilities/enums.h b/src/utilities/enums.h index a7818bee..1a4835f4 100644 --- a/src/utilities/enums.h +++ b/src/utilities/enums.h @@ -13,7 +13,8 @@ typedef enum KERNEL TCF, DCF, SIN_OVER_R, - MQ + MQ, + USER } KERNEL; From d40e634de768d38efea0c43d71de9f11977cdd5a Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 7 Oct 2020 14:09:04 -0400 Subject: [PATCH 87/95] Update example.in --- examples/example.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/example.in b/examples/example.in index 5ec2badb..316c0402 100644 --- a/examples/example.in +++ b/examples/example.in @@ -1,6 +1,6 @@ num_sources 20000 num_targets 20000 -order 2 +degree 2 theta 0.9 beta -1.0 size_check 0.0 @@ -14,4 +14,3 @@ distribution uniform run_direct 1 slice 10 verbosity 1 -temp 0 \ No newline at end of file From 3fabb95e3eed9845ccafa1a974422e4808d46dab Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Wed, 7 Oct 2020 14:13:49 -0400 Subject: [PATCH 88/95] Adding user_kernels directory --- src/kernels/user_kernel/user_kernel.h | 9 ++++ src/kernels/user_kernel/user_kernel_cp.c | 63 ++++++++++++++++++++++++ src/kernels/user_kernel/user_kernel_cp.h | 15 ++++++ src/kernels/user_kernel/user_kernel_pc.c | 57 +++++++++++++++++++++ src/kernels/user_kernel/user_kernel_pc.h | 15 ++++++ src/kernels/user_kernel/user_kernel_pp.c | 57 +++++++++++++++++++++ src/kernels/user_kernel/user_kernel_pp.h | 14 ++++++ 7 files changed, 230 insertions(+) create mode 100644 src/kernels/user_kernel/user_kernel.h create mode 100644 src/kernels/user_kernel/user_kernel_cp.c create mode 100644 src/kernels/user_kernel/user_kernel_cp.h create mode 100644 src/kernels/user_kernel/user_kernel_pc.c create mode 100644 src/kernels/user_kernel/user_kernel_pc.h create mode 100644 src/kernels/user_kernel/user_kernel_pp.c create mode 100644 src/kernels/user_kernel/user_kernel_pp.h diff --git a/src/kernels/user_kernel/user_kernel.h b/src/kernels/user_kernel/user_kernel.h new file mode 100644 index 00000000..6c53bfe0 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel.h @@ -0,0 +1,9 @@ +/* Interaction Kernels */ +#ifndef H_K_USER_KERNEL_H +#define H_K_USER_KERNEL_H + +#include "user_kernel_pp.h" +#include "user_kernel_pc.h" +#include "user_kernel_cp.h" + +#endif /* H_K_USER_KERNEL_H */ diff --git a/src/kernels/user_kernel/user_kernel_cp.c b/src/kernels/user_kernel/user_kernel_cp.c new file mode 100644 index 00000000..d65c3084 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_cp.c @@ -0,0 +1,63 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "user_kernel_cp.h" + + +void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, + struct RunParams *run_params, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ + cluster_x, cluster_y, cluster_z, cluster_q) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { + + double temporary_potential = 0.0; + + double cx = cluster_x[starting_index_of_cluster + i]; + double cy = cluster_y[starting_index_of_cluster + i]; + double cz = cluster_z[starting_index_of_cluster + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_sources_in_batch; j++) { +#ifdef OPENACC_ENABLED + #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ + source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) +#endif + + int jj = starting_index_of_sources + j; + double dx = cx - source_x[jj]; + double dy = cy - source_y[jj]; + double dz = cz - source_z[jj]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + temporary_potential += source_q[jj] * exp(-kernel_parameter * r) / r; + + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + cluster_q[starting_index_of_cluster + i] += temporary_potential; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} diff --git a/src/kernels/user_kernel/user_kernel_cp.h b/src/kernels/user_kernel/user_kernel_cp.h new file mode 100644 index 00000000..353f1849 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_cp.h @@ -0,0 +1,15 @@ +/* Interaction Kernels */ +#ifndef H_K_USER_KERNEL_CP_H +#define H_K_USER_KERNEL_CP_H + +#include "../../run_params/struct_run_params.h" + + +void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_sources, int starting_index_of_cluster, + double *source_x, double *source_y, double *source_z, double *source_q, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, + struct RunParams *run_params, int gpu_async_stream_id); + + +#endif /* H_K_USER_KERNEL_CP_H */ diff --git a/src/kernels/user_kernel/user_kernel_pc.c b/src/kernels/user_kernel/user_kernel_pc.c new file mode 100644 index 00000000..d9f55dc1 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_pc.c @@ -0,0 +1,57 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "user_kernel_pc.h" + + +void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_target, int starting_index_of_cluster, + double *target_x, double *target_y, double *target_z, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, + struct RunParams *run_params, double *potential, int gpu_async_stream_id) +{ + + double kernel_parameter = run_params->kernel_params[0]; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ + cluster_x, cluster_y, cluster_z, cluster_charge, potential) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_targets_in_batch; i++) { + + double temporary_potential = 0.0; + + double tx = target_x[starting_index_of_target + i]; + double ty = target_y[starting_index_of_target + i]; + double tz = target_z[starting_index_of_target + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { + + double dx = tx - cluster_x[starting_index_of_cluster + j]; + double dy = ty - cluster_y[starting_index_of_cluster + j]; + double dz = tz - cluster_z[starting_index_of_cluster + j]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + temporary_potential += cluster_charge[starting_index_of_cluster + j] * exp(-kernel_parameter * r) / r; + + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + potential[starting_index_of_target + i] += temporary_potential; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} + diff --git a/src/kernels/user_kernel/user_kernel_pc.h b/src/kernels/user_kernel/user_kernel_pc.h new file mode 100644 index 00000000..58bce657 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_pc.h @@ -0,0 +1,15 @@ +/* Interaction Kernels */ +#ifndef H_K_USER_KERNEL_PC_H +#define H_K_USER_KERNEL_PC_H + +#include "../../run_params/struct_run_params.h" + + +void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_target, int starting_index_of_cluster, + double *target_x, double *target_y, double *target_z, + double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, + struct RunParams *run_params, double *potential, int gpu_async_stream_id); + + +#endif /* H_K_USER_KERNEL_PC_H */ diff --git a/src/kernels/user_kernel/user_kernel_pp.c b/src/kernels/user_kernel/user_kernel_pp.c new file mode 100644 index 00000000..7a85fa7d --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_pp.c @@ -0,0 +1,57 @@ +#include +#include +#include + +#include "../../run_params/struct_run_params.h" +#include "user_kernel_pp.h" + + +void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, + int starting_index_of_target, int starting_index_of_source, + double *target_x, double *target_y, double *target_z, + double *source_x, double *source_y, double *source_z, double *source_charge, + struct RunParams *run_params, double *potential, int gpu_async_stream_id) +{ + + double kernel_parameter=run_params->kernel_params[0]; + +#ifdef OPENACC_ENABLED + #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ + source_x, source_y, source_z, source_charge, potential) + { +#endif +#ifdef OPENACC_ENABLED + #pragma acc loop independent +#endif + for (int i = 0; i < number_of_targets_in_batch; i++) { + + double temporary_potential = 0.0; + + double tx = target_x[starting_index_of_target + i]; + double ty = target_y[starting_index_of_target + i]; + double tz = target_z[starting_index_of_target + i]; + +#ifdef OPENACC_ENABLED + #pragma acc loop independent reduction(+:temporary_potential) +#endif + for (int j = 0; j < number_of_source_points_in_cluster; j++) { + + double dx = tx - source_x[starting_index_of_source + j]; + double dy = ty - source_y[starting_index_of_source + j]; + double dz = tz - source_z[starting_index_of_source + j]; + double r = sqrt(dx*dx + dy*dy + dz*dz); + + if (r > DBL_MIN) { + temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r; + } + } // end loop over interpolation points +#ifdef OPENACC_ENABLED + #pragma acc atomic +#endif + potential[starting_index_of_target + i] += temporary_potential; + } +#ifdef OPENACC_ENABLED + } // end kernel +#endif + return; +} diff --git a/src/kernels/user_kernel/user_kernel_pp.h b/src/kernels/user_kernel/user_kernel_pp.h new file mode 100644 index 00000000..b94144d9 --- /dev/null +++ b/src/kernels/user_kernel/user_kernel_pp.h @@ -0,0 +1,14 @@ +/* Interaction Kernels */ +#ifndef H_K_USER_KERNEL_PP_H +#define H_K_USER_KERNEL_PP_H + +#include "../../run_params/struct_run_params.h" + +void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, + int starting_index_of_target, int starting_index_of_cluster, + double *target_x, double *target_y, double *target_z, + double *source_x, double *source_y, double *source_z, double *source_charge, + struct RunParams *run_params, double *potential, int gpu_async_stream_id); + + +#endif /* H_K_USER_KERNEL_PP_H */ From 9c8fd3870bb1f408ca4ddf30bded11d6b894c43a Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 7 Oct 2020 14:44:53 -0400 Subject: [PATCH 89/95] If targets and sources are identical in memory, duplicate them --- src/particles/particles.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/particles/particles.c b/src/particles/particles.c index 3f880aa7..cdc11894 100644 --- a/src/particles/particles.c +++ b/src/particles/particles.c @@ -160,16 +160,27 @@ void Particles_Validate(struct Particles *sources, struct Particles *targets) MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (sources->x == targets->x) { - if (rank == 0) { - printf("[BaryTree]\n"); - printf("[BaryTree] ERROR! Sources and targets cannot be the same location in memory.\n"); - printf("[BaryTree] If you are trying to run with identical sources and targets,\n"); - printf("[BaryTree] you must duplicate the arrays.\n"); - printf("[BaryTree]\n"); - printf("[BaryTree] Exiting.\n"); - } - - exit(1); + printf("[BaryTree]\n"); + printf("[BaryTree] Sources and targets cannot be the same location in memory.\n"); + printf("[BaryTree] Making duplicate arrays for targets.\n"); + printf("[BaryTree]\n"); + make_vector(targets->x, targets->num); + memcpy(targets->x, sources->x, targets->num * sizeof(double)); + } + + if (sources->y == targets->y) { + make_vector(targets->y, targets->num); + memcpy(targets->y, sources->y, targets->num * sizeof(double)); + } + + if (sources->z == targets->z) { + make_vector(targets->z, targets->num); + memcpy(targets->z, sources->z, targets->num * sizeof(double)); + } + + if (sources->q == targets->q) { + make_vector(targets->q, targets->num); + memcpy(targets->q, sources->q, targets->num * sizeof(double)); } return; From 6af8f3c429bfb65fed599aac4dcddafa7ba9d2b8 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 7 Oct 2020 21:04:05 -0400 Subject: [PATCH 90/95] Printing out more interaction information --- src/drivers/treedriver.c | 432 ++++++++++-------- .../interaction_compute_cc.c | 20 +- .../interaction_compute_cp.c | 8 +- .../interaction_compute_pc.c | 8 +- src/interaction_lists/interaction_lists.c | 69 +-- .../struct_interaction_lists.h | 18 +- src/particles/struct_particles.h | 9 +- src/run_params/run_params.c | 2 +- 8 files changed, 307 insertions(+), 259 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 52bc2c3c..d03c937c 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -67,19 +67,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } double time1; - long long int total_num_direct = 0; - long long int total_num_approx = 0; - long long int total_num_inter = 0; - - long long int total_num_direct_interact = 0; - long long int total_num_approx_interact = 0; - long long int total_num_interact = 0; - - // These types of interactions only occur for CC - long long int total_num_source_approx = 0; - long long int total_num_target_approx = 0; - long long int total_num_source_approx_interact = 0; - long long int total_num_target_approx_interact = 0; + long long num_pp = 0; + long long num_cc = 0; + long long num_pc = 0; + long long num_cp = 0; + + long long num_pp_ptwise = 0; + long long num_cc_ptwise = 0; + long long num_pc_ptwise = 0; + long long num_cp_ptwise = 0; + + long long num_cc_replaced = 0; + long long num_pc_replaced = 0; + long long num_cp_replaced = 0; + //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ @@ -163,17 +164,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); - total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); + num_cp += sum_int(local_interaction_list->num_cp, batches->numnodes); + num_pp += sum_int(local_interaction_list->num_pp, batches->numnodes); for (int i = 0; i < batches->numnodes; ++i) { - for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) batches->numpar[i] - * (long long int) tree->numpar[local_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < local_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) batches->numpar[i] + * (long long) tree->numpar[local_interaction_list->pp_interactions[i][j]]; } - for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += (long long int) batches->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + for (int j = 0; j < local_interaction_list->num_cp[i]; ++j) { + num_cp_ptwise += (long long) batches->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_cp_replaced += (long long) batches->numpar[i] + * (long long) tree->numpar[local_interaction_list->cp_interactions[i][j]]; } } } @@ -213,17 +217,20 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(let_interaction_list->num_approx, remote_batches->numnodes); - total_num_direct += sum_int(let_interaction_list->num_direct, remote_batches->numnodes); + num_cp += sum_int(let_interaction_list->num_cp, remote_batches->numnodes); + num_pp += sum_int(let_interaction_list->num_pp, remote_batches->numnodes); for (int i = 0; i < remote_batches->numnodes; ++i) { - for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) remote_batches->numpar[i] - * (long long int) tree->numpar[let_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < let_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) remote_batches->numpar[i] + * (long long) tree->numpar[let_interaction_list->pp_interactions[i][j]]; } - for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += (long long int) remote_batches->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + for (int j = 0; j < let_interaction_list->num_cp[i]; ++j) { + num_cp_ptwise += (long long) remote_batches->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_cp_replaced += (long long) remote_batches->numpar[i] + * (long long) tree->numpar[let_interaction_list->cp_interactions[i][j]]; } } } @@ -436,17 +443,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); - total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); + num_pc += sum_int(local_interaction_list->num_pc, batches->numnodes); + num_pp += sum_int(local_interaction_list->num_pp, batches->numnodes); for (int i = 0; i < batches->numnodes; ++i) { - for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) batches->numpar[i] - * (long long int) tree->numpar[local_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < local_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) batches->numpar[i] + * (long long) tree->numpar[local_interaction_list->pp_interactions[i][j]]; } - for (int j = 0; j < local_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += (long long int) batches->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + + for (int j = 0; j < local_interaction_list->num_pc[i]; ++j) { + num_pc_ptwise += (long long) batches->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_pc_replaced += (long long) batches->numpar[i] + * (long long) tree->numpar[local_interaction_list->pc_interactions[i][j]]; } } } @@ -500,17 +511,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(let_interaction_list->num_approx, batches->numnodes); - total_num_direct += sum_int(let_interaction_list->num_direct, batches->numnodes); + num_pc += sum_int(let_interaction_list->num_pc, batches->numnodes); + num_pp += sum_int(let_interaction_list->num_pp, batches->numnodes); for (int i = 0; i < batches->numnodes; ++i) { - for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) batches->numpar[i] - * (long long int) let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < let_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) batches->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->pp_interactions[i][j]]; } - for (int j = 0; j < let_interaction_list->num_approx[i]; ++j) { - total_num_approx_interact += (long long int) batches->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + + for (int j = 0; j < let_interaction_list->num_pc[i]; ++j) { + num_pc_ptwise += (long long) batches->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_pc_replaced += (long long) batches->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->pc_interactions[i][j]]; } } } @@ -719,30 +734,42 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(local_interaction_list->num_approx, target_tree->numnodes); - total_num_direct += sum_int(local_interaction_list->num_direct, target_tree->numnodes); + num_cc += sum_int(local_interaction_list->num_cc, target_tree->numnodes); + num_pp += sum_int(local_interaction_list->num_pp, target_tree->numnodes); - total_num_source_approx += sum_int(local_interaction_list->num_cc_source_approx, - target_tree->numnodes); - total_num_target_approx += sum_int(local_interaction_list->num_cc_target_approx, - target_tree->numnodes); + num_pc += sum_int(local_interaction_list->num_pc, target_tree->numnodes); + num_cp += sum_int(local_interaction_list->num_cp, target_tree->numnodes); - total_num_approx_interact += (long long int) sum_int(local_interaction_list->num_approx, target_tree->numnodes) - * (long long int) run_params->interp_pts_per_cluster * run_params->interp_pts_per_cluster; + num_cc_ptwise += (long long) sum_int(local_interaction_list->num_cc, target_tree->numnodes) + * (long long) run_params->interp_pts_per_cluster + * (long long) run_params->interp_pts_per_cluster; for (int i = 0; i < target_tree->numnodes; ++i) { - for (int j = 0; j < local_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) target_tree->numpar[i] - * (long long int) source_tree->numpar[local_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < local_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) target_tree->numpar[i] + * (long long) source_tree->numpar[local_interaction_list->pp_interactions[i][j]]; } - for (int j = 0; j < local_interaction_list->num_cc_source_approx[i]; ++j) { - total_num_source_approx_interact += (long long int) target_tree->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + + for (int j = 0; j < local_interaction_list->num_cc[i]; ++j) { + num_cc_replaced += (long long) target_tree->numpar[i] + * (long long) source_tree->numpar[local_interaction_list->cc_interactions[i][j]]; } - for (int j = 0; j < local_interaction_list->num_cc_target_approx[i]; ++j) { - total_num_target_approx_interact += (long long int) run_params->interp_pts_per_cluster - * (long long int) source_tree->numpar[local_interaction_list->cc_target_approx_interactions[i][j]]; + + for (int j = 0; j < local_interaction_list->num_pc[i]; ++j) { + num_pc_ptwise += (long long) target_tree->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_pc_replaced += (long long) target_tree->numpar[i] + * (long long) source_tree->numpar[local_interaction_list->pc_interactions[i][j]]; + } + + for (int j = 0; j < local_interaction_list->num_cp[i]; ++j) { + num_cp_ptwise += (long long) run_params->interp_pts_per_cluster + * (long long) source_tree->numpar[local_interaction_list->cp_interactions[i][j]]; + + num_cp_replaced += (long long) target_tree->numpar[i] + * (long long) source_tree->numpar[local_interaction_list->cp_interactions[i][j]]; } } } @@ -795,31 +822,40 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - total_num_approx += sum_int(let_interaction_list->num_approx, target_tree->numnodes); - total_num_direct += sum_int(let_interaction_list->num_direct, target_tree->numnodes); - - total_num_source_approx += sum_int(let_interaction_list->num_cc_source_approx, - target_tree->numnodes); - total_num_target_approx += sum_int(let_interaction_list->num_cc_target_approx, - target_tree->numnodes); - + num_pp += sum_int(let_interaction_list->num_pp, target_tree->numnodes); + num_cc += sum_int(let_interaction_list->num_cc, target_tree->numnodes); + num_pc += sum_int(let_interaction_list->num_pc, target_tree->numnodes); + num_cp += sum_int(let_interaction_list->num_cp, target_tree->numnodes); - total_num_approx_interact += (long long int) sum_int(let_interaction_list->num_approx, target_tree->numnodes) - * (long long int) run_params->interp_pts_per_cluster - * (long long int) run_params->interp_pts_per_cluster; + num_cc_ptwise += (long long) sum_int(let_interaction_list->num_cc, target_tree->numnodes) + * (long long) run_params->interp_pts_per_cluster + * (long long) run_params->interp_pts_per_cluster; for (int i = 0; i < target_tree->numnodes; ++i) { - for (int j = 0; j < let_interaction_list->num_direct[i]; ++j) { - total_num_direct_interact += (long long int) target_tree->numpar[i] - * (long long int) let_trees[get_from]->numpar[let_interaction_list->direct_interactions[i][j]]; + for (int j = 0; j < let_interaction_list->num_pp[i]; ++j) { + num_pp_ptwise += (long long) target_tree->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->pp_interactions[i][j]]; + } + + for (int j = 0; j < let_interaction_list->num_cc[i]; ++j) { + num_cc_replaced += (long long) target_tree->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->cc_interactions[i][j]]; } - for (int j = 0; j < let_interaction_list->num_cc_source_approx[i]; ++j) { - total_num_source_approx_interact += (long long int) target_tree->numpar[i] - * (long long int) run_params->interp_pts_per_cluster; + + for (int j = 0; j < let_interaction_list->num_pc[i]; ++j) { + num_pc_ptwise += (long long) target_tree->numpar[i] + * (long long) run_params->interp_pts_per_cluster; + + num_pc_replaced += (long long) target_tree->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->pc_interactions[i][j]]; } - for (int j = 0; j < let_interaction_list->num_cc_target_approx[i]; ++j) { - total_num_target_approx_interact += (long long int) run_params->interp_pts_per_cluster - * (long long int) let_trees[get_from]->numpar[let_interaction_list->cc_target_approx_interactions[i][j]]; + + for (int j = 0; j < let_interaction_list->num_cp[i]; ++j) { + num_cp_ptwise += (long long) run_params->interp_pts_per_cluster + * (long long) let_trees[get_from]->numpar[let_interaction_list->cp_interactions[i][j]]; + + num_cp_replaced += (long long) target_tree->numpar[i] + * (long long) let_trees[get_from]->numpar[let_interaction_list->cp_interactions[i][j]]; } } } @@ -919,145 +955,147 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0) { - int global_num_inter, max_num_inter, min_num_inter; - int global_num_direct, max_num_direct, min_num_direct; - int global_num_approx, max_num_approx, min_num_approx; - - int global_num_source_approx, max_num_source_approx, min_num_source_approx; - int global_num_target_approx, max_num_target_approx, min_num_target_approx; + int global_num_all = 0, max_num_all = 0, min_num_all = 0; + int global_num_pp = 0, max_num_pp = 0, min_num_pp = 0; + int global_num_cc = 0, max_num_cc = 0, min_num_cc = 0; + int global_num_pc = 0, max_num_pc = 0, min_num_pc = 0; + int global_num_cp = 0, max_num_cp = 0, min_num_cp = 0; - total_num_inter = total_num_direct + total_num_approx - + total_num_source_approx + total_num_target_approx; + long long num_all = num_pp + num_cc + num_pc + num_cp; - MPI_Reduce(&total_num_inter, &global_num_inter, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_inter, &max_num_inter, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_inter, &min_num_inter, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); - - MPI_Reduce(&total_num_direct, &global_num_direct, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct, &max_num_direct, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct, &min_num_direct, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all, &global_num_all, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all, &max_num_all, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all, &min_num_all, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx, &global_num_approx, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx, &max_num_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx, &min_num_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp, &global_num_pp, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp, &max_num_pp, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp, &min_num_pp, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); - // These types of interactions only occur for CC - if (run_params->compute_type == CLUSTER_CLUSTER) { - MPI_Reduce(&total_num_source_approx, &global_num_source_approx, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx, &max_num_source_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx, &min_num_source_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc, &global_num_cc, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc, &max_num_cc, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc, &min_num_cc, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx, &global_num_target_approx, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx, &max_num_target_approx, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx, &min_num_target_approx, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); - } + MPI_Reduce(&num_pc, &global_num_pc, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pc, &max_num_pc, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pc, &min_num_pc, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&num_cp, &global_num_cp, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cp, &max_num_cp, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cp, &min_num_cp, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); if (rank == 0) { printf("[BaryTree]\n"); - printf("[BaryTree] Interaction information: \n"); - printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative interactions across all ranks: %d\n", global_num_inter); - printf("[BaryTree] Maximum interactions across all ranks: %d\n", max_num_inter); - printf("[BaryTree] Minimum interactions across all ranks: %d\n", min_num_inter); - printf("[BaryTree] Ratio: %f\n", - (double)max_num_inter / (double)min_num_inter); + printf("[BaryTree] Clusterwise interaction information:\n"); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative direct interactions across all ranks: %d\n", global_num_direct); - printf("[BaryTree] Maximum direct interactions across all ranks: %d\n", max_num_direct); - printf("[BaryTree] Minimum direct interactions across all ranks: %d\n", min_num_direct); - printf("[BaryTree] Ratio: %f\n", - (double)max_num_direct / (double)min_num_direct); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | | Cumulative Minimum per rank Maximum per rank |\n"); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | Total | %22d %22d %22d |\n", global_num_all, min_num_all, max_num_all); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | PP | %22d %22d %22d |\n", global_num_pp, min_num_pp, max_num_pp); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | PC | %22d %22d %22d |\n", global_num_pc, min_num_pc, max_num_pc); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | CP | %22d %22d %22d |\n", global_num_cp, min_num_cp, max_num_cp); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | CC | %22d %22d %22d |\n", global_num_cc, min_num_cc, max_num_cc); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative approx interactions across all ranks: %d\n", global_num_approx); - printf("[BaryTree] Maximum approx interactions across all ranks: %d\n", max_num_approx); - printf("[BaryTree] Minimum approx interactions across all ranks: %d\n", min_num_approx); - printf("[BaryTree] Ratio: %f\n", - (double)max_num_approx / (double)min_num_approx); - printf("[BaryTree]\n"); - - // These types of interactions only occur for CC - if (run_params->compute_type == CLUSTER_CLUSTER) { - printf("[BaryTree] Cumulative source approx inter across all ranks: %d\n", global_num_source_approx); - printf("[BaryTree] Maximum source approx inter across all ranks: %d\n", max_num_source_approx); - printf("[BaryTree] Minimum source approx inter across all ranks: %d\n", min_num_source_approx); - printf("[BaryTree] Ratio: %f\n", - (double)max_num_source_approx / (double)min_num_source_approx); - printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative target approx inter across all ranks: %d\n", global_num_target_approx); - printf("[BaryTree] Maximum target approx inter across all ranks: %d\n", max_num_target_approx); - printf("[BaryTree] Minimum target approx inter across all ranks: %d\n", min_num_target_approx); - printf("[BaryTree] Ratio: %f\n", - (double)max_num_target_approx / (double)min_num_target_approx); - printf("[BaryTree]\n"); - } } - /* For the pointwise interactions */ - long long int global_num_interact, max_num_interact, min_num_interact; - long long int global_num_direct_interact, max_num_direct_interact, min_num_direct_interact; - long long int global_num_approx_interact, max_num_approx_interact, min_num_approx_interact; - - long long int global_num_source_approx_interact, max_num_source_approx_interact, min_num_source_approx_interact; - long long int global_num_target_approx_interact, max_num_target_approx_interact, min_num_target_approx_interact; + long long global_num_all_ptwise = 0, max_num_all_ptwise = 0, min_num_all_ptwise = 0; + long long global_num_pp_ptwise = 0, max_num_pp_ptwise = 0, min_num_pp_ptwise = 0; + long long global_num_cc_ptwise = 0, max_num_cc_ptwise = 0, min_num_cc_ptwise = 0; + long long global_num_pc_ptwise = 0, max_num_pc_ptwise = 0, min_num_pc_ptwise = 0; + long long global_num_cp_ptwise = 0, max_num_cp_ptwise = 0, min_num_cp_ptwise = 0; - total_num_interact = total_num_direct_interact + total_num_approx_interact - + total_num_source_approx_interact + total_num_target_approx_interact; + long long num_all_ptwise = num_pp_ptwise + num_cc_ptwise + num_pc_ptwise + num_cp_ptwise; - MPI_Reduce(&total_num_interact, &global_num_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_interact, &max_num_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_interact, &min_num_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all_ptwise, &global_num_all_ptwise, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all_ptwise, &max_num_all_ptwise, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_all_ptwise, &min_num_all_ptwise, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &global_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &max_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_direct_interact, &min_num_direct_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp_ptwise, &global_num_pp_ptwise, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp_ptwise, &max_num_pp_ptwise, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pp_ptwise, &min_num_pp_ptwise, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &global_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &max_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_approx_interact, &min_num_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc_ptwise, &global_num_cc_ptwise, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc_ptwise, &max_num_cc_ptwise, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cc_ptwise, &min_num_cc_ptwise, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - // These types of interactions only occur for CC - if (run_params->compute_type == CLUSTER_CLUSTER) { - MPI_Reduce(&total_num_source_approx_interact, &global_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx_interact, &max_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_source_approx_interact, &min_num_source_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - - MPI_Reduce(&total_num_target_approx_interact, &global_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx_interact, &max_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&total_num_target_approx_interact, &min_num_target_approx_interact, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); - } + MPI_Reduce(&num_pc_ptwise, &global_num_pc_ptwise, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pc_ptwise, &max_num_pc_ptwise, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pc_ptwise, &min_num_pc_ptwise, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); + + MPI_Reduce(&num_cp_ptwise, &global_num_cp_ptwise, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cp_ptwise, &max_num_cp_ptwise, 1, MPI_LONG_LONG_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cp_ptwise, &min_num_cp_ptwise, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, MPI_COMM_WORLD); if (rank == 0) { printf("[BaryTree]\n"); - printf("[BaryTree] Cumulative pointwise interactions across all ranks: %lld\n", global_num_interact); - printf("[BaryTree] Maximum pointwise interactions across all ranks: %lld\n", max_num_interact); - printf("[BaryTree] Minimum pointwise interactions across all ranks: %lld\n", min_num_interact); - printf("[BaryTree]\n"); - - printf("[BaryTree] Cumulative direct pointwise interactions across all ranks: %lld\n", global_num_direct_interact); - printf("[BaryTree] Maximum direct pointwise interactions across all ranks: %lld\n", max_num_direct_interact); - printf("[BaryTree] Minimum direct pointwise interactions across all ranks: %lld\n", min_num_direct_interact); + printf("[BaryTree] Pointwise interaction information:\n"); + printf("[BaryTree]\n"); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | | Cumulative Minimum per rank Maximum per rank |\n"); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | Total | %22lld %22lld %22lld |\n", + global_num_all_ptwise, min_num_all_ptwise, max_num_all_ptwise); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | PP | %22lld %22lld %22lld |\n", + global_num_pp_ptwise, min_num_pp_ptwise, max_num_pp_ptwise); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | PC | %22lld %22lld %22lld |\n", + global_num_pc_ptwise, min_num_pc_ptwise, max_num_pc_ptwise); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | CP | %22lld %22lld %22lld |\n", + global_num_cp_ptwise, min_num_cp_ptwise, max_num_cp_ptwise); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree] | CC | %22lld %22lld %22lld |\n", + global_num_cc_ptwise, min_num_cc_ptwise, max_num_cc_ptwise); + printf("[BaryTree] |------------------------------------------------------------------------------|\n"); + printf("[BaryTree]\n"); + } + + long long global_num_cc_replaced = 0; + long long global_num_pc_replaced = 0; + long long global_num_cp_replaced = 0; + + MPI_Reduce(&num_cc_replaced, &global_num_cc_replaced, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_pc_replaced, &global_num_pc_replaced, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&num_cp_replaced, &global_num_cp_replaced, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); + + long long global_num_pure_pp = global_num_pp_ptwise + global_num_cc_replaced + + global_num_pc_replaced + global_num_cp_replaced; + + if (rank == 0) { + printf("[BaryTree]\n"); + printf("[BaryTree] Pointwise interactions accounted for:\n"); + printf("[BaryTree]\n"); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | | Number Percentage |\n"); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | Total | %22lld 100.000000%% |\n", + global_num_pure_pp); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | PP | %22lld %21.6f%% |\n", + global_num_pp_ptwise, 100. * (double)global_num_pp_ptwise / (double)global_num_pure_pp); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | PC | %22lld %21.6f%% |\n", + global_num_pc_replaced, 100. * (double)global_num_pc_replaced / (double)global_num_pure_pp); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | CP | %22lld %21.6f%% |\n", + global_num_cp_replaced, 100. * (double)global_num_cp_replaced / (double)global_num_pure_pp); + printf("[BaryTree] |-------------------------------------------------------|\n"); + printf("[BaryTree] | CC | %22lld %21.6f%% |\n", + global_num_cc_replaced, 100. * (double)global_num_cc_replaced / (double)global_num_pure_pp); + printf("[BaryTree] |-------------------------------------------------------|\n"); printf("[BaryTree]\n"); - - printf("[BaryTree] Cumulative approx pointwise interactions across all ranks: %lld\n", global_num_approx_interact); - printf("[BaryTree] Maximum approx pointwise interactions across all ranks: %lld\n", max_num_approx_interact); - printf("[BaryTree] Minimum approx pointwise interactions across all ranks: %lld\n", min_num_approx_interact); - printf("[BaryTree]\n"); - - // These types of interactions only occur for CC - if (run_params->compute_type == CLUSTER_CLUSTER) { - printf("[BaryTree] Cumulative source approx pointwise interactions across all ranks: %lld\n", global_num_source_approx_interact); - printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %lld\n", max_num_source_approx_interact); - printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %lld\n", min_num_source_approx_interact); - printf("[BaryTree]\n"); - - printf("[BaryTree] Cumulative target approx pointwise interactions across all ranks: %lld\n", global_num_target_approx_interact); - printf("[BaryTree] Maximum source approx pointwise interactions across all ranks: %lld\n", max_num_target_approx_interact); - printf("[BaryTree] Minimum source approx pointwise interactions across all ranks: %lld\n", min_num_target_approx_interact); - printf("[BaryTree]\n"); - } + } + + if (rank == 0) { printf("[BaryTree] BaryTree has finished.\n"); printf("[BaryTree]\n"); } diff --git a/src/interaction_compute/interaction_compute_cc.c b/src/interaction_compute/interaction_compute_cc.c index 4ba5779d..0ba1c095 100644 --- a/src/interaction_compute/interaction_compute_cc.c +++ b/src/interaction_compute/interaction_compute_cc.c @@ -28,17 +28,15 @@ void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct T { int interp_pts_per_cluster = run_params->interp_pts_per_cluster; - int **approx_inter_list = interaction_list->approx_interactions; - int **direct_inter_list = interaction_list->direct_interactions; - - int *num_approx = interaction_list->num_approx; - int *num_direct = interaction_list->num_direct; - - int **source_approx_inter_list = interaction_list->cc_source_approx_interactions; - int **target_approx_inter_list = interaction_list->cc_target_approx_interactions; - - int *num_source_approx = interaction_list->num_cc_source_approx; - int *num_target_approx = interaction_list->num_cc_target_approx; + int **direct_inter_list = interaction_list->pp_interactions; + int **approx_inter_list = interaction_list->cc_interactions; + int **source_approx_inter_list = interaction_list->pc_interactions; + int **target_approx_inter_list = interaction_list->cp_interactions; + + int *num_direct = interaction_list->num_pp; + int *num_approx = interaction_list->num_cc; + int *num_source_approx = interaction_list->num_pc; + int *num_target_approx = interaction_list->num_cp; int source_tree_numnodes = source_tree->numnodes; int target_tree_numnodes = target_tree->numnodes; diff --git a/src/interaction_compute/interaction_compute_cp.c b/src/interaction_compute/interaction_compute_cp.c index 09b34272..622acf31 100644 --- a/src/interaction_compute/interaction_compute_cp.c +++ b/src/interaction_compute/interaction_compute_cp.c @@ -50,11 +50,11 @@ void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *ba double *cluster_q = clusters->q; double *cluster_w = clusters->w; - int **approx_inter_list = interaction_list->approx_interactions; - int **direct_inter_list = interaction_list->direct_interactions; + int **approx_inter_list = interaction_list->cp_interactions; + int **direct_inter_list = interaction_list->pp_interactions; - int *num_approx = interaction_list->num_approx; - int *num_direct = interaction_list->num_direct; + int *num_approx = interaction_list->num_cp; + int *num_direct = interaction_list->num_pp; int tree_numnodes = tree->numnodes; int batch_numnodes = batches->numnodes; diff --git a/src/interaction_compute/interaction_compute_pc.c b/src/interaction_compute/interaction_compute_pc.c index 8a38b32c..b36b5437 100644 --- a/src/interaction_compute/interaction_compute_pc.c +++ b/src/interaction_compute/interaction_compute_pc.c @@ -53,11 +53,11 @@ void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *ba double *cluster_q = clusters->q; double *cluster_w = clusters->w; - int **approx_inter_list = interaction_list->approx_interactions; - int **direct_inter_list = interaction_list->direct_interactions; + int **approx_inter_list = interaction_list->pc_interactions; + int **direct_inter_list = interaction_list->pp_interactions; - int *num_approx = interaction_list->num_approx; - int *num_direct = interaction_list->num_direct; + int *num_approx = interaction_list->num_pc; + int *num_direct = interaction_list->num_pp; int tree_numnodes = tree->numnodes; int batch_numnodes = batches->numnodes; diff --git a/src/interaction_lists/interaction_lists.c b/src/interaction_lists/interaction_lists.c index eb4ae017..0c2696fc 100644 --- a/src/interaction_lists/interaction_lists.c +++ b/src/interaction_lists/interaction_lists.c @@ -56,28 +56,37 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, /* Nullify unallocated arrays in interaction_list struct */ + + interaction_list->num_pp = NULL; + interaction_list->num_cc = NULL; + interaction_list->num_pc = NULL; + interaction_list->num_cp = NULL; - interaction_list->approx_interactions = NULL; - interaction_list->direct_interactions = NULL; - - interaction_list->num_approx = NULL; - interaction_list->num_direct = NULL; - - interaction_list->cc_source_approx_interactions = NULL; - interaction_list->cc_target_approx_interactions = NULL; - - interaction_list->num_cc_source_approx = NULL; - interaction_list->num_cc_target_approx = NULL; + interaction_list->pp_interactions = NULL; + interaction_list->cc_interactions = NULL; + interaction_list->pc_interactions = NULL; + interaction_list->cp_interactions = NULL; /* Set addresses for interaction lists common to PC, CP, and CC */ - int ***approx_inter_list_addr = &(interaction_list->approx_interactions); - int ***direct_inter_list_addr = &(interaction_list->direct_interactions); - - int **num_approx_addr = &(interaction_list->num_approx); - int **num_direct_addr = &(interaction_list->num_direct); + int **num_direct_addr = &(interaction_list->num_pp); + int ***direct_inter_list_addr = &(interaction_list->pp_interactions); + int **num_approx_addr; + int ***approx_inter_list_addr; + if (run_params->compute_type == PARTICLE_CLUSTER) { + num_approx_addr = &(interaction_list->num_pc); + approx_inter_list_addr = &(interaction_list->pc_interactions); + + } else if (run_params->compute_type == CLUSTER_PARTICLE) { + num_approx_addr = &(interaction_list->num_cp); + approx_inter_list_addr = &(interaction_list->cp_interactions); + + } else if (run_params->compute_type == CLUSTER_CLUSTER) { + num_approx_addr = &(interaction_list->num_cc); + approx_inter_list_addr = &(interaction_list->cc_interactions); + } /* Set addresses for variables pointing to source and target tree struct members */ @@ -158,11 +167,11 @@ void InteractionLists_Make(struct InteractionLists **interaction_list_addr, /* Allocate interaction lists exclusive to CC */ - int ***cc_source_approx_inter_list_addr = &(interaction_list->cc_source_approx_interactions); - int ***cc_target_approx_inter_list_addr = &(interaction_list->cc_target_approx_interactions); + int ***cc_source_approx_inter_list_addr = &(interaction_list->pc_interactions); + int ***cc_target_approx_inter_list_addr = &(interaction_list->cp_interactions); - int **num_cc_source_approx_addr = &(interaction_list->num_cc_source_approx); - int **num_cc_target_approx_addr = &(interaction_list->num_cc_target_approx); + int **num_cc_source_approx_addr = &(interaction_list->num_pc); + int **num_cc_target_approx_addr = &(interaction_list->num_cp); make_matrix(*cc_source_approx_inter_list_addr, target_tree_numnodes, 50); make_matrix(*cc_target_approx_inter_list_addr, target_tree_numnodes, 50); @@ -230,17 +239,15 @@ void InteractionLists_Free(struct InteractionLists **interaction_list_addr) { struct InteractionLists *interaction_list = *interaction_list_addr; - free_matrix(interaction_list->approx_interactions); - free_matrix(interaction_list->direct_interactions); - - free_vector(interaction_list->num_approx); - free_vector(interaction_list->num_direct); - - free_matrix(interaction_list->cc_source_approx_interactions); - free_matrix(interaction_list->cc_target_approx_interactions); - - free_vector(interaction_list->num_cc_source_approx); - free_vector(interaction_list->num_cc_target_approx); + free_matrix(interaction_list->pp_interactions); + free_matrix(interaction_list->cc_interactions); + free_matrix(interaction_list->pc_interactions); + free_matrix(interaction_list->cp_interactions); + + free_vector(interaction_list->num_pp); + free_vector(interaction_list->num_cc); + free_vector(interaction_list->num_pc); + free_vector(interaction_list->num_cp); free(interaction_list); diff --git a/src/interaction_lists/struct_interaction_lists.h b/src/interaction_lists/struct_interaction_lists.h index 59397c73..334e78f6 100644 --- a/src/interaction_lists/struct_interaction_lists.h +++ b/src/interaction_lists/struct_interaction_lists.h @@ -4,17 +4,15 @@ struct InteractionLists { - int **approx_interactions; - int **direct_interactions; - - int *num_approx; - int *num_direct; - - int **cc_source_approx_interactions; - int **cc_target_approx_interactions; + int *num_pp; + int *num_cc; + int *num_pc; + int *num_cp; - int *num_cc_source_approx; - int *num_cc_target_approx; + int **pp_interactions; + int **cc_interactions; + int **pc_interactions; + int **cp_interactions; }; diff --git a/src/particles/struct_particles.h b/src/particles/struct_particles.h index 57a368b3..b7b32a63 100644 --- a/src/particles/struct_particles.h +++ b/src/particles/struct_particles.h @@ -16,7 +16,14 @@ struct Particles int *iend; int *order; - + + int sources_w_dummy; + int targets_q_dummy; + + int targets_x_duplicate; + int targets_y_duplicate; + int targets_z_duplicate; + int targets_q_duplicate; }; #endif /* H_PARTICLES_H */ diff --git a/src/run_params/run_params.c b/src/run_params/run_params.c index 8c9bdfdc..2f335e10 100644 --- a/src/run_params/run_params.c +++ b/src/run_params/run_params.c @@ -182,7 +182,7 @@ void RunParams_Print(struct RunParams *run_params) printf("[BaryTree] singularity = %d\n", run_params->singularity); printf("[BaryTree] compute_type = %d\n", run_params->compute_type); printf("[BaryTree] theta = %f\n", run_params->theta); - printf("[BaryTree] interp_degree = %d\n", run_params->interp_degree); + printf("[BaryTree] interp_degree = %d\n", run_params->interp_degree); printf("[BaryTree] interp_pts_per_cluster = %d\n", run_params->interp_pts_per_cluster); printf("[BaryTree] interp_weights_per_cluster = %d\n", run_params->interp_weights_per_cluster); printf("[BaryTree] interp_charges_per_cluster = %d\n", run_params->interp_charges_per_cluster); From 05d1cbb404100fcb84ff5657f75901302846a6f8 Mon Sep 17 00:00:00 2001 From: Nathan Vaughn Date: Thu, 8 Oct 2020 16:58:59 -0400 Subject: [PATCH 91/95] Performing timings inside Treedriver. V>0 gives total time, V>2 gives breakdown --- src/CMakeLists.txt | 2 + src/drivers/treedriver.c | 75 ++++++++++++++-- src/utilities/advanced_timings.c | 144 +++++++++++++++++++++++++++++++ src/utilities/advanced_timings.h | 14 +++ 4 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 src/utilities/advanced_timings.c create mode 100644 src/utilities/advanced_timings.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f4a7336c..622d423d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,8 @@ SET(SRCS_TREEDRIVER utilities/xmalloc.c utilities/array.h utilities/enums.h + utilities/advanced_timings.c + utilities/advanced_timings.h # Calling functions for tree and pp calculations drivers/directdriver.h drivers/directdriver.c diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 52bc2c3c..7c9648d7 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -7,6 +7,7 @@ #include "../utilities/tools.h" #include "../utilities/timers.h" #include "../utilities/enums.h" +#include "../utilities/advanced_timings.h" #include "../tree/struct_tree.h" #include "../tree/tree.h" @@ -62,9 +63,38 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0 && rank == 0) { printf("[BaryTree]\n"); - printf("[BaryTree] Running BaryTree with %d ranks.\n", num_procs); + printf("[BaryTree] BaryTree has started.\n"); RunParams_Print(run_params); } + if (run_params->verbosity > 1 ) { + + + int M_min_g, M_max_g, M_avg_g, N_min_g, N_max_g, N_avg_g; + + int M_max = targets->num; + int M_min = targets->num; + int M_avg = targets->num; + + int N_max = sources->num; + int N_min = sources->num; + int N_avg = sources->num; + + MPI_Reduce(&M_max, &M_max_g, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&M_min, &M_min_g, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&M_avg, &M_avg_g, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + M_avg_g = M_avg_g / num_procs; + + MPI_Reduce(&N_max, &N_max_g, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&N_min, &N_min_g, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&N_avg, &N_avg_g, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + N_avg_g = N_avg_g / num_procs; + + if ( rank == 0) { + printf("[BaryTree] %d target and %d source particles distributed over %d MPI ranks.\n", M_avg_g*num_procs, N_avg_g*num_procs, num_procs); + printf("[BaryTree] min, max, avg targets per rank: (%d,%d,%d).\n", M_min_g,M_max_g,M_avg_g); + printf("[BaryTree] min, max, avg sources per rank: (%d,%d,%d).\n", N_min_g,N_max_g,N_avg_g); + } + } double time1; long long int total_num_direct = 0; @@ -162,7 +192,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[4]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); @@ -212,7 +242,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[6]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(let_interaction_list->num_approx, remote_batches->numnodes); total_num_direct += sum_int(let_interaction_list->num_direct, remote_batches->numnodes); @@ -307,6 +337,8 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run // Total compute time time_tree[12] = time_tree[5] + time_tree[7] + time_tree[8]; + + MPI_Barrier(MPI_COMM_WORLD); @@ -367,7 +399,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[2]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 1) { Tree_Print(tree); Batches_Print(batches); } @@ -435,7 +467,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[4]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(local_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, batches->numnodes); @@ -499,7 +531,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] += time1; //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(let_interaction_list->num_approx, batches->numnodes); total_num_direct += sum_int(let_interaction_list->num_direct, batches->numnodes); @@ -649,7 +681,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[2]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 1) { Tree_Print(source_tree); Tree_Print(target_tree); } @@ -718,7 +750,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run STOP_TIMER(&time_tree[4]); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(local_interaction_list->num_approx, target_tree->numnodes); total_num_direct += sum_int(local_interaction_list->num_direct, target_tree->numnodes); @@ -794,7 +826,7 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run time_tree[6] += time1; //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ - if (run_params->verbosity > 0) { + if (run_params->verbosity > 2) { total_num_approx += sum_int(let_interaction_list->num_approx, target_tree->numnodes); total_num_direct += sum_int(let_interaction_list->num_direct, target_tree->numnodes); @@ -917,7 +949,21 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ + + double total_time[1], total_time_glob[1]; if (run_params->verbosity > 0) { + + /* Total treedriver time */ + total_time[0] = time_tree[0] + time_tree[1] + time_tree[2] + time_tree[3] + time_tree[4] + + time_tree[5] + time_tree[6] + time_tree[7] + time_tree[8] + time_tree[9] + + time_tree[10]; + + if (rank==0) { + printf("[BaryTree] Total BaryTree time: %1.3f seconds.\n", total_time[0]); + } + } + + if (run_params->verbosity > 2) { int global_num_inter, max_num_inter, min_num_inter; int global_num_direct, max_num_direct, min_num_direct; @@ -1061,6 +1107,17 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run printf("[BaryTree] BaryTree has finished.\n"); printf("[BaryTree]\n"); } + + + /* variables for date-time calculation */ + double time_tree_glob[3][13]; + + + + Timing_Calculate(time_tree_glob, time_tree, total_time_glob, total_time); + Timing_Print(time_tree_glob, total_time_glob, run_params); + + } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ E N D ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ diff --git a/src/utilities/advanced_timings.c b/src/utilities/advanced_timings.c new file mode 100644 index 00000000..856e7094 --- /dev/null +++ b/src/utilities/advanced_timings.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include + +#include "tools.h" +#include "../run_params/run_params.h" +#include "../run_params/struct_run_params.h" + +#include "advanced_timings.h" + + +/*----------------------------------------------------------------------------*/ +void Timing_Calculate(double time_tree_glob[3][13], double time_tree[13], double total_time_glob[1], double total_time[1]) +{ + + MPI_Reduce(time_tree, &time_tree_glob[0], 13, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(time_tree, &time_tree_glob[1], 13, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(time_tree, &time_tree_glob[2], 13, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + + MPI_Reduce(total_time, total_time_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + + return; +} + + +/*----------------------------------------------------------------------------*/ +void Timing_Print(double time_tree_glob[3][13], double total_time_glob[1], struct RunParams *run_params) +{ + int rank, numProcs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &numProcs); + + if (rank == 0) { + + +// double avg_percent_tree=1.0; +// double max_percent_tree=1.0; + + /* Printing direct and treecode time calculations: */ + printf("[BaryTree]\n"); + printf("[BaryTree] "); + printf("Treecode timing summary (all times in seconds)...\n"); + printf("[BaryTree] "); + printf(" Max Avg Max/Min\n"); + printf("[BaryTree] "); + printf("| Treedriver...................... %9.3e s (100.00%%) \n", + total_time_glob[0]); + printf("[BaryTree] "); + printf("| |\n"); + printf("[BaryTree] "); + printf("| |....Build local tree........... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][0], time_tree_glob[1][0] / total_time_glob[0]*100.0, + time_tree_glob[2][0]/numProcs, time_tree_glob[2][0]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][0]/time_tree_glob[0][0]); + printf("[BaryTree] "); + printf("| |....Build local batches........ %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][1], time_tree_glob[1][1] / total_time_glob[0]*100.0, + time_tree_glob[2][1]/numProcs, time_tree_glob[2][1]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][1]/time_tree_glob[0][1]); + printf("[BaryTree] "); + printf("| |....Build local clusters....... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][2], time_tree_glob[1][2] / total_time_glob[0]*100.0, + time_tree_glob[2][2]/numProcs, time_tree_glob[2][2]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][2]/time_tree_glob[0][2]); + + if (numProcs > 1) { + printf("[BaryTree] "); + printf("| |....Build LET.................. %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][3], time_tree_glob[1][3] / total_time_glob[0]*100.0, + time_tree_glob[2][3]/numProcs, time_tree_glob[2][3]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][3]/time_tree_glob[0][3]); + } + + printf("[BaryTree] "); + printf("| |....Build local lists.......... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][4], time_tree_glob[1][4] / total_time_glob[0]*100.0, + time_tree_glob[2][4]/numProcs, time_tree_glob[2][4]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][4]/time_tree_glob[0][4]); + printf("[BaryTree] "); + printf("| |....Compute local.............. %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][5], time_tree_glob[1][5] / total_time_glob[0]*100.0, + time_tree_glob[2][5]/numProcs, time_tree_glob[2][5]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][5]/time_tree_glob[0][5]); + + if (numProcs > 1) { + printf("[BaryTree] "); + printf("| |....Build remote lists......... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][6], time_tree_glob[1][6] / total_time_glob[0]*100.0, + time_tree_glob[2][6]/numProcs, time_tree_glob[2][6]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][6]/time_tree_glob[0][6]); + printf("[BaryTree] "); + printf("| |....Compute remote............. %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][7], time_tree_glob[1][7] / total_time_glob[0]*100.0, + time_tree_glob[2][7]/numProcs, time_tree_glob[2][7]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][7]/time_tree_glob[0][7]); + } + + if (run_params->compute_type == CLUSTER_PARTICLE || run_params->compute_type == CLUSTER_CLUSTER) { + printf("[BaryTree] "); + printf("| |....Compute cp2................ %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][8], time_tree_glob[1][8] / total_time_glob[0]*100.0, + time_tree_glob[2][8]/numProcs, time_tree_glob[2][8]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][8]/time_tree_glob[0][8]); + } + + printf("[BaryTree] "); + printf("| |....Correct potential.......... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][9], time_tree_glob[1][9] / total_time_glob[0]*100.0, + time_tree_glob[2][9]/numProcs, time_tree_glob[2][9]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][9]/time_tree_glob[0][9]); + printf("[BaryTree] "); + printf("| |....Cleanup.................... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f \n", + time_tree_glob[1][10], time_tree_glob[1][10] / total_time_glob[0]*100.0, + time_tree_glob[2][10]/numProcs, time_tree_glob[2][10]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][10]/time_tree_glob[0][10]); + printf("[BaryTree]\n"); + + if (numProcs > 1) { + printf("[BaryTree] "); + printf("(( |....Total setup................ %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f ))\n", + time_tree_glob[1][11], time_tree_glob[1][11] / total_time_glob[0]*100.0, + time_tree_glob[2][11]/numProcs, time_tree_glob[2][11]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][11]/time_tree_glob[0][11]); + printf("[BaryTree] "); + printf("(( |....Build local clusters....... %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f ))\n", + time_tree_glob[1][02], time_tree_glob[1][02] / total_time_glob[0]*100.0, + time_tree_glob[2][02]/numProcs, time_tree_glob[2][02]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][02]/time_tree_glob[0][02]); + printf("[BaryTree] "); + printf("(( |....Total compute.............. %9.3e s (%6.2f%%) %9.3e s (%6.2f%%) %8.3f ))\n", + time_tree_glob[1][12], time_tree_glob[1][12] / total_time_glob[0]*100.0, + time_tree_glob[2][12]/numProcs, time_tree_glob[2][12]/numProcs / total_time_glob[0]*100.0, + time_tree_glob[1][12]/time_tree_glob[0][12]); + printf("[BaryTree]\n"); + printf("[BaryTree]\n"); + } + } + + return; +} + + diff --git a/src/utilities/advanced_timings.h b/src/utilities/advanced_timings.h new file mode 100644 index 00000000..a361bcee --- /dev/null +++ b/src/utilities/advanced_timings.h @@ -0,0 +1,14 @@ +#ifndef H_ADVANCED_TIMINGS_H +#define H_ADVANCED_TIMINGS_H + +#include + +#include "../run_params/struct_run_params.h" + + +void Timing_Calculate(double time_tree_glob[3][13], double time_tree[13], double total_time_glob[1], double total_time[1]); + +void Timing_Print(double time_tree_glob[3][13], double total_time_glob[1], struct RunParams *run_params); + + +#endif /* H_ADVANCED_TIMINGS_H */ From 7c3ca31a0fb49353c083df148acf18928e42a410 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Thu, 8 Oct 2020 20:52:37 -0400 Subject: [PATCH 92/95] Making sure particle arrays are properly accounted for --- src/drivers/treedriver.c | 17 +++----------- src/particles/particles.c | 49 +++++++++++++++++++++++++++++++++++---- src/particles/particles.h | 5 ++-- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/drivers/treedriver.c b/src/drivers/treedriver.c index 594667f8..babc5384 100644 --- a/src/drivers/treedriver.c +++ b/src/drivers/treedriver.c @@ -44,21 +44,10 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run MPI_Comm_size(MPI_COMM_WORLD, &num_procs); RunParams_Validate(run_params); - Particles_Validate(sources, targets); + Particles_Validate(sources, targets, run_params); Particles_ConstructOrder(sources); Particles_ConstructOrder(targets); - int sources_w_dummy = 0; - int targets_q_dummy = 0; - - if (sources->w == NULL) { - sources_w_dummy = 1; - make_vector(sources->w, sources->num); - } - if (targets->q == NULL) { - targets_q_dummy = 1; - make_vector(targets->q, targets->num); - } //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if (run_params->verbosity > 0 && rank == 0) { @@ -978,8 +967,8 @@ void treedriver(struct Particles *sources, struct Particles *targets, struct Run } - if (sources_w_dummy) free_vector(sources->w); - if (targets_q_dummy) free_vector(targets->q); + if (sources->sources_w_dummy) free_vector(sources->w); + if (targets->targets_q_dummy) free_vector(targets->q); //~ ~ ~ D I A G N O S T I C S ~ ~ ~ S T A R T ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ diff --git a/src/particles/particles.c b/src/particles/particles.c index cdc11894..bd38bdd1 100644 --- a/src/particles/particles.c +++ b/src/particles/particles.c @@ -154,33 +154,74 @@ void Particles_FreeOrder(struct Particles *particles) -void Particles_Validate(struct Particles *sources, struct Particles *targets) +void Particles_Validate(struct Particles *sources, struct Particles *targets, struct RunParams *run_params) { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (sources->x == targets->x) { + sources->sources_w_dummy = 0; + targets->targets_q_dummy = 0; + + targets->targets_x_duplicate = 0; + targets->targets_y_duplicate = 0; + targets->targets_z_duplicate = 0; + targets->targets_q_duplicate = 0; + + if (sources->x == NULL || sources->y == NULL || sources->z == NULL || sources->q == NULL + || targets->x == NULL || targets->y == NULL || targets->z == NULL) { printf("[BaryTree]\n"); - printf("[BaryTree] Sources and targets cannot be the same location in memory.\n"); - printf("[BaryTree] Making duplicate arrays for targets.\n"); + printf("[BaryTree] ERROR! One or more required particle arrays is unset. Exiting.\n"); + printf("[BaryTree]\n"); + exit(1); + } + + if (sources->w == NULL && run_params->singularity == SUBTRACTION) { printf("[BaryTree]\n"); + printf("[BaryTree] ERROR! Singularity subtraction requires weights for the sources. Exiting.\n"); + printf("[BaryTree]\n"); + exit(1); + } + + if (sources->w == NULL) { + make_vector(sources->w, sources->num); + sources->sources_w_dummy = 1; + } + + if (targets->q == NULL) { + make_vector(targets->q, targets->num); + targets->targets_q_dummy = 1; + } + + if (sources->x == targets->x) { make_vector(targets->x, targets->num); memcpy(targets->x, sources->x, targets->num * sizeof(double)); + targets->targets_x_duplicate = 1; } if (sources->y == targets->y) { make_vector(targets->y, targets->num); memcpy(targets->y, sources->y, targets->num * sizeof(double)); + targets->targets_y_duplicate = 1; } if (sources->z == targets->z) { make_vector(targets->z, targets->num); memcpy(targets->z, sources->z, targets->num * sizeof(double)); + targets->targets_z_duplicate = 1; } if (sources->q == targets->q) { make_vector(targets->q, targets->num); memcpy(targets->q, sources->q, targets->num * sizeof(double)); + targets->targets_q_duplicate = 1; + } + + if (targets->targets_x_duplicate || targets->targets_x_duplicate || targets->targets_z_duplicate + || targets->targets_q_duplicate) { + printf("[BaryTree]\n"); + printf("[BaryTree] Sources and targets cannot be the same location in memory.\n"); + printf("[BaryTree] Making duplicate arrays for targets as necessary.\n"); + printf("[BaryTree]\n"); } return; diff --git a/src/particles/particles.h b/src/particles/particles.h index 00ca73ad..e2cf224e 100644 --- a/src/particles/particles.h +++ b/src/particles/particles.h @@ -1,9 +1,9 @@ #ifndef H_PARTICLE_FUNCTIONS_H #define H_PARTICLE_FUNCTIONS_H +#include "../run_params/struct_run_params.h" #include "struct_particles.h" - void Particles_Alloc(struct Particles **particles_addr, int length); void Particles_Free(struct Particles **particles_addr); @@ -16,7 +16,8 @@ void Particles_ConstructOrder(struct Particles *particles); void Particles_FreeOrder(struct Particles *particles); -void Particles_Validate(struct Particles *sources, struct Particles *targets); +void Particles_Validate(struct Particles *sources, struct Particles *targets, + struct RunParams *run_params); #endif /* H_PARTICLE_FUNCTIONS */ From e848cb587690adbb1861f315edb7485426ca0677 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 21 Oct 2020 12:36:19 -0400 Subject: [PATCH 93/95] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8a586b0f..60ac94ab 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ BaryTree ======== A work-in-progress library for fast computation of N-body interactions on multiple GPUs, - BaryTree implements barycentric Lagrange and Hermite polynomial interpolation treecodes. - The current code employs an OpenACC GPU implementation. + BaryTree implements barycentric Lagrange and Hermite polynomial interpolation fast + summation methods. The current code employs an OpenACC GPU implementation with MPI + for distributed memory parallelization. Authors: From ee3f5cd41de0202c198661f0da897c3d88d33259 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 21 Oct 2020 12:55:35 -0400 Subject: [PATCH 94/95] Update README.md --- examples/README.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/examples/README.md b/examples/README.md index 86045a42..537f7c1c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -30,17 +30,20 @@ The parameters that can be specified in the infile are as follows: | `num_particles` | Number of sources and targets. Its use is exclusive with the `num_sources` and `num_targets` parameters. | `num_sources` | Number of sources. | `num_targets` | Number of targets. -| `order` | Order of polynomial interpolation. +| `distribution` | Underlying particle distribution: `UNIFORM`, `GAUSSIAN`, `EXPONENTIAL`, `PLUMMER`, or `PLUMMER_SYMMETRIC`. +| `degree` | Degree of polynomial interpolation. | `theta` | Multipole acceptance criterion (MAC). -| `max_per_leaf` | Maximum number of particles per tree leaf. -| `max_per_batch` | Maximum number of particles per batch. -| `kernel_name` | Name of interaction kernel: `yukawa` or `coulomb`. -| `approximation` | Type of polynomial: `lagrange` and `hermite`. -| `size_check` | If the product of this parameter and the number of interpolation points in a cluster is greater than the number of particles in the cluster, then the interaction will be performed directly even if the MAC is accepted. +| `max_per_source_leaf` | Maximum number of particles per source tree leaf (or source batch, for `CLUSTER_PARTICLE`). +| `max_per_target_leaf` | Maximum number of particles per target tree leaf (or target batch, for `PARTICLE_CLUSTER`). +| `beta` | Automatic tuning accuracy parameter. Number in [0,1], higher is more accurate. +| `compute_type` | Type of treecode method. `CLUSTER_PARTICLE`, `PARTICLE_CLUSTER` (i.e. BLTC), `CLUSTER_CLUSTER` (i.e. BLDTT). +| `approximation` | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster. +| `kernel_name` | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`. +| `kernel_params` | Comma separated list of parameters for given kernel. | `run_direct` | Run direct calculation for error comparison: `on` or `off`. -| `verbosity` | Determines verbosity level of output. `0` is quiet, `1` is verbose. +| `verbosity` | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output. | `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. -| `kernel_params` | Comma separated list of parameters for given kernel. + Note the difference between these executables: From 6b8e963acfb9fe5bb50fab21263dece421b57d60 Mon Sep 17 00:00:00 2001 From: "Leighton W. Wilson" Date: Wed, 21 Oct 2020 12:57:06 -0400 Subject: [PATCH 95/95] Update README.md --- examples/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 537f7c1c..82520a79 100644 --- a/examples/README.md +++ b/examples/README.md @@ -40,9 +40,9 @@ The parameters that can be specified in the infile are as follows: | `approximation` | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster. | `kernel_name` | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`. | `kernel_params` | Comma separated list of parameters for given kernel. -| `run_direct` | Run direct calculation for error comparison: `on` or `off`. +| `run_direct` | Run direct calculation for error comparison: `ON` or `OFF`. | `verbosity` | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output. -| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. +| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. 10 would mean every 10th target is sampled. Note the difference between these executables: