Skip to content

Commit

Permalink
Merge pull request #72 from Treecodes/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
lwwilson1 authored Oct 21, 2020
2 parents 83bba1b + 6b8e963 commit 86cc0e5
Show file tree
Hide file tree
Showing 112 changed files with 5,905 additions and 1,794 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ BaryTree
========

A work-in-progress library for fast computation of N-body interactions on multiple GPUs,
BaryTree implements barycentric Lagrange and Hermite polynomial interpolation treecodes.
The current code employs an OpenACC GPU implementation.
BaryTree implements barycentric Lagrange and Hermite polynomial interpolation fast
summation methods. The current code employs an OpenACC GPU implementation with MPI
for distributed memory parallelization.


Authors:
Expand Down
8 changes: 8 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ if(BUILD_EXAMPLES)
target_link_libraries(random_cube_reproducible_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
install(TARGETS random_cube_reproducible_cpu DESTINATION bin)

add_executable(run_readin_cpu run_readin.c ${AUX_SRCS})
target_link_libraries(run_readin_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
install(TARGETS run_readin_cpu DESTINATION bin)

add_executable(test_BaryTreeInterface_cpu test_BaryTreeInterface.c)
target_link_libraries(test_BaryTreeInterface_cpu PRIVATE BaryTree_cpu)
install(TARGETS test_BaryTreeInterface_cpu DESTINATION bin)
Expand All @@ -27,6 +31,10 @@ if(BUILD_EXAMPLES)
target_link_libraries(random_cube_reproducible_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
install(TARGETS random_cube_reproducible_gpu DESTINATION bin)

add_executable(run_readin_gpu run_readin.c ${AUX_SRCS})
target_link_libraries(run_readin_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
install(TARGETS run_readin_gpu DESTINATION bin)

add_executable(test_BaryTreeInterface_gpu test_BaryTreeInterface.c)
target_link_libraries(test_BaryTreeInterface_gpu PRIVATE BaryTree_gpu)
install(TARGETS test_BaryTreeInterface_gpu DESTINATION bin)
Expand Down
21 changes: 12 additions & 9 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,20 @@ The parameters that can be specified in the infile are as follows:
| `num_particles` | Number of sources and targets. Its use is exclusive with the `num_sources` and `num_targets` parameters.
| `num_sources` | Number of sources.
| `num_targets` | Number of targets.
| `order` | Order of polynomial interpolation.
| `distribution` | Underlying particle distribution: `UNIFORM`, `GAUSSIAN`, `EXPONENTIAL`, `PLUMMER`, or `PLUMMER_SYMMETRIC`.
| `degree` | Degree of polynomial interpolation.
| `theta` | Multipole acceptance criterion (MAC).
| `max_per_leaf` | Maximum number of particles per tree leaf.
| `max_per_batch` | Maximum number of particles per batch.
| `kernel_name` | Name of interaction kernel: `yukawa` or `coulomb`.
| `approximation` | Type of polynomial: `lagrange` and `hermite`.
| `size_check` | If the product of this parameter and the number of interpolation points in a cluster is greater than the number of particles in the cluster, then the interaction will be performed directly even if the MAC is accepted.
| `run_direct` | Run direct calculation for error comparison: `on` or `off`.
| `verbosity` | Determines verbosity level of output. `0` is quiet, `1` is verbose.
| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison.
| `max_per_source_leaf` | Maximum number of particles per source tree leaf (or source batch, for `CLUSTER_PARTICLE`).
| `max_per_target_leaf` | Maximum number of particles per target tree leaf (or target batch, for `PARTICLE_CLUSTER`).
| `beta` | Automatic tuning accuracy parameter. Number in [0,1], higher is more accurate.
| `compute_type` | Type of treecode method. `CLUSTER_PARTICLE`, `PARTICLE_CLUSTER` (i.e. BLTC), `CLUSTER_CLUSTER` (i.e. BLDTT).
| `approximation` | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster.
| `kernel_name` | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`.
| `kernel_params` | Comma separated list of parameters for given kernel.
| `run_direct` | Run direct calculation for error comparison: `ON` or `OFF`.
| `verbosity` | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output.
| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. 10 would mean every 10th target is sampled.


Note the difference between these executables:

Expand Down
4 changes: 3 additions & 1 deletion examples/example.in
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
num_sources 20000
num_targets 20000
order 2
degree 2
theta 0.9
beta -1.0
size_check 0.0
max_per_source_leaf 100
max_per_target_leaf 100
kernel_name coulomb
kernel_params 1.0
approximation lagrange
compute_type particle-cluster
distribution uniform
run_direct 1
slice 10
verbosity 1
68 changes: 42 additions & 26 deletions examples/random_cube.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,19 @@ int main(int argc, char **argv)

/* run parameters */
int N, M, run_direct, slice;
double xyz_limits[6];
DISTRIBUTION distribution;
PARTITION partition;
int sample_size = 1000000;

struct RunParams *run_params = NULL;
int sample_size = 10000;

FILE *fp = fopen(argv[1], "r");
Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice);
Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution, &partition);

double xmin = xyz_limits[0], xmax = xyz_limits[1];
double ymin = xyz_limits[2], ymax = xyz_limits[3];
double zmin = xyz_limits[4], zmax = xyz_limits[5];

/* Zoltan variables */
int rc;
Expand Down Expand Up @@ -94,15 +103,13 @@ int main(int argc, char **argv)
time_t t = time(NULL);
unsigned t_hashed = (unsigned) t;
t_hashed = mrand * t_hashed + crand;
srand(t_hashed ^ rank);
srand(1);
srandom(t_hashed ^ rank);
//srandom(1);

for (int i = 0; i < sample_size; ++i) {
mySources.x[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
mySources.y[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
mySources.z[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
mySources.q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
mySources.w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
mySources.x[i] = Point_Set_Init(distribution);
mySources.y[i] = Point_Set_Init(distribution);
mySources.z[i] = Point_Set_Init(distribution);
mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i);

mySources.b[i] = 1.0; // dummy weighting scheme
Expand Down Expand Up @@ -158,8 +165,6 @@ int main(int argc, char **argv)
mySources.x[i] = mySources.x[mySources.numMyPoints-1];
mySources.y[i] = mySources.y[mySources.numMyPoints-1];
mySources.z[i] = mySources.z[mySources.numMyPoints-1];
mySources.q[i] = mySources.q[mySources.numMyPoints-1];
mySources.w[i] = mySources.w[mySources.numMyPoints-1];
mySources.myGlobalIDs[i] = mySources.myGlobalIDs[mySources.numMyPoints-1];
mySources.numMyPoints--;
} else {
Expand All @@ -174,12 +179,12 @@ int main(int argc, char **argv)
exit(0);
}

double xmin = minval(mySources.x, mySources.numMyPoints);
double ymin = minval(mySources.y, mySources.numMyPoints);
double zmin = minval(mySources.z, mySources.numMyPoints);
double xmax = maxval(mySources.x, mySources.numMyPoints);
double ymax = maxval(mySources.y, mySources.numMyPoints);
double zmax = maxval(mySources.z, mySources.numMyPoints);
double zz_bound_x_min = minval(mySources.x, mySources.numMyPoints);
double zz_bound_y_min = minval(mySources.y, mySources.numMyPoints);
double zz_bound_z_min = minval(mySources.z, mySources.numMyPoints);
double zz_bound_x_max = maxval(mySources.x, mySources.numMyPoints);
double zz_bound_y_max = maxval(mySources.y, mySources.numMyPoints);
double zz_bound_z_max = maxval(mySources.z, mySources.numMyPoints);


Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids,
Expand Down Expand Up @@ -224,12 +229,23 @@ int main(int argc, char **argv)
/* Generating sources and targets based on Zoltan bounding box */

for (int i = 0; i < sources->num; ++i) {
sources->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin;
sources->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin;
sources->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin;
sources->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
sources->w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
sources->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin;
sources->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin;
sources->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin;

sources->q[i] = Point_Set(UNIFORM, -1., 1.);
sources->w[i] = Point_Set(UNIFORM, -1., 1.);
}

/*
char points_file[256];
sprintf(points_file, "points_rank_%d.csv", rank);
FILE *points_fp = fopen(points_file, "w");
for (int i = 0; i < sources->num; ++i) {
fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]);
}
fclose(points_fp);
*/

/* MPI-allocated target arrays for RMA use */

Expand All @@ -241,10 +257,10 @@ int main(int argc, char **argv)
/* Generating targets based on Zoltan bounding box */

for (int i = 0; i < targets->num; ++i) {
targets->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin;
targets->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin;
targets->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin;
targets->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
targets->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin;
targets->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin;
targets->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin;
targets->q[i] = Point_Set(UNIFORM, -1., 1.);
}

#ifdef OPENACC_ENABLED
Expand Down
Loading

0 comments on commit 86cc0e5

Please sign in to comment.