diff --git a/simtbx/diffBragg/device.py b/simtbx/diffBragg/device.py index 1dade37c57..fcd4bc952a 100644 --- a/simtbx/diffBragg/device.py +++ b/simtbx/diffBragg/device.py @@ -21,6 +21,7 @@ def __init__(self, gpu_id, force=False): """ self.gpu_id = gpu_id self.force_kokkos_init = force + self.is_using_kokkos_gpu = USE_KOKKOS_GPU def __enter__(self): if USE_KOKKOS_GPU or self.force_kokkos_init: diff --git a/simtbx/diffBragg/phil.py b/simtbx/diffBragg/phil.py index b6558f77fb..2414d298d4 100644 --- a/simtbx/diffBragg/phil.py +++ b/simtbx/diffBragg/phil.py @@ -1319,6 +1319,9 @@ .help = diffbragg offers CUDA support via the DIFFBRAGG_USE_CUDA=1 environment variable specification .help = or openmp support using the OMP_NUM_THREADS flag .help = The exascale only uses CUDA (will raise error if CUDA is not confugured) + mosaic_samples_override = None + .type = int + .help = Specify the number of mosaic spread samples } """ diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index a42cc5b960..03fa68afee 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -1967,7 +1967,7 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows db_cryst.Nd = Nd; db_cryst.Ne = Ne; db_cryst.Nf = Nf; - + db_cryst.xtal_shape = xtal_shape; db_beam.number_of_sources = sources; db_beam.source_X = source_X; diff --git a/simtbx/diffBragg/src/diffBraggCUDA.cu b/simtbx/diffBragg/src/diffBraggCUDA.cu index 9c58687f40..1e5731e8c3 100644 --- a/simtbx/diffBragg/src/diffBraggCUDA.cu +++ b/simtbx/diffBragg/src/diffBraggCUDA.cu @@ -39,13 +39,6 @@ void diffBragg_sum_over_steps_cuda( diffBragg_cudaPointers& cp, timer_variables& TIMERS){ - - if (db_cryst.phisteps > 1){ - printf("PHI (goniometer rotations) not supported in GPU code: phi0=%f phisteps=%d, phistep=%f\n", db_cryst.phi0, db_cryst.phisteps, db_cryst.phistep); - printf("SPINDLE VEC: %f %f %f\n",db_cryst.spindle_vec[0], db_cryst.spindle_vec[1], db_cryst.spindle_vec[2]); - exit(-1); - } - int numblocks; int blocksize; char* diffBragg_blocks = getenv("DIFFBRAGG_NUM_BLOCKS"); @@ -513,7 +506,9 @@ void diffBragg_sum_over_steps_cuda( cp.data_freq, cp.data_trusted, cp.FhklLinear_ASUid, cp.Fhkl_channels, - cp.Fhkl_scale, cp.Fhkl_scale_deriv + cp.Fhkl_scale, cp.Fhkl_scale_deriv, + db_cryst.xtal_shape==GAUSS_STAR, + db_cryst.xtal_shape==SQUARE ); error_msg(cudaGetLastError(), "after kernel call"); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp index 273088f765..bca80ba988 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp @@ -57,12 +57,6 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( cuda_flags& db_cu_flags, // diffBragg_kokkosPointers& kp, timer_variables& TIMERS) { - if (db_cryst.phi0 != 0 || db_cryst.phisteps > 1) { - printf( - "PHI (goniometer position) not supported in GPU code: phi0=%f phisteps=%d, phistep=%f\n", - db_cryst.phi0, db_cryst.phisteps, db_cryst.phistep); - exit(-1); - } Kokkos::Tools::pushRegion("diffBragg_sum_over_steps_kokkos"); Kokkos::Tools::pushRegion("local detector, beam and crystal"); @@ -529,7 +523,10 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( m_data_freq, m_data_trusted, m_FhklLinear_ASUid, m_Fhkl_channels, - m_Fhkl_scale, m_Fhkl_scale_deriv); + m_Fhkl_scale, m_Fhkl_scale_deriv, + db_cryst.xtal_shape==GAUSS_STAR, + db_cryst.xtal_shape==SQUARE + ); } else { kokkos_sum_over_steps( Npix_to_model, m_panels_fasts_slows, m_floatimage, m_wavelenimage, m_d_Umat_images, @@ -568,7 +565,9 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( m_data_freq, m_data_trusted, m_FhklLinear_ASUid, m_Fhkl_channels, - m_Fhkl_scale, m_Fhkl_scale_deriv + m_Fhkl_scale, m_Fhkl_scale_deriv, + db_cryst.xtal_shape==GAUSS_STAR, + db_cryst.xtal_shape==SQUARE ); } diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.h b/simtbx/diffBragg/src/diffBraggKOKKOS.h index 3aa07ae27b..7df7346b46 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.h +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.h @@ -10,6 +10,7 @@ #include "simtbx/diffBragg/src/util.h" #include "simtbx/diffBragg/src/util_kokkos.h" #include "simtbx/diffBragg/src/diffBragg_refine_flag.h" +#include "simtbx/nanoBragg/nanotypes.h" using vector_vec3_t = view_1d_t; using vector_mat3_t = view_1d_t; diff --git a/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp b/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp index 94cd9fa501..d3458b7ac2 100644 --- a/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp +++ b/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp @@ -401,6 +401,13 @@ void diffBragg_sum_over_steps( double fp_fdp_manager_dI[2] = {0,0}; double dI_latt_diffuse[6] = {0,0,0,0,0,0}; + // compute unit cell volume + Eigen::Vector3d ap_vec(1e10*db_cryst.eig_B(0,0), 1e10*db_cryst.eig_B(1,0), 1e10*db_cryst.eig_B(2,0)); + Eigen::Vector3d bp_vec(1e10*db_cryst.eig_B(0,1), 1e10*db_cryst.eig_B(1,1), 1e10*db_cryst.eig_B(2,1)); + Eigen::Vector3d cp_vec(1e10*db_cryst.eig_B(0,2), 1e10*db_cryst.eig_B(1,2), 1e10*db_cryst.eig_B(2,2)); + double cell_vol = ap_vec.dot(bp_vec.cross(cp_vec)); // cubic Angstrom + double xtal_size = pow(db_cryst.Na*db_cryst.Nb*db_cryst.Nc*cell_vol, (double)1/double(3)); // Angstrom + for (int i_step=0; i_step < db_steps.Nsteps; i_step++){ int subS = db_steps.subS_pos[i_step]; @@ -474,25 +481,21 @@ void diffBragg_sum_over_steps( } } - double phi = db_cryst.phi0 + db_cryst.phistep*phi_tic; - Eigen::Matrix3d Bmat_realspace = db_cryst.eig_B; - if( phi != 0.0 ) - { - double cosphi = cos(phi); - double sinphi = sin(phi); - Eigen::Vector3d ap_vec(db_cryst.eig_B(0,0), db_cryst.eig_B(1,0), db_cryst.eig_B(2,0)); - Eigen::Vector3d bp_vec(db_cryst.eig_B(0,1), db_cryst.eig_B(1,1), db_cryst.eig_B(2,1)); - Eigen::Vector3d cp_vec(db_cryst.eig_B(0,2), db_cryst.eig_B(1,2), db_cryst.eig_B(2,2)); - - ap_vec = ap_vec*cosphi + db_cryst.spindle_vec.cross(ap_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(ap_vec))*(1-cosphi); - bp_vec = bp_vec*cosphi + db_cryst.spindle_vec.cross(bp_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(bp_vec))*(1-cosphi); - cp_vec = cp_vec*cosphi + db_cryst.spindle_vec.cross(cp_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(cp_vec))*(1-cosphi); - - Bmat_realspace << ap_vec[0], bp_vec[0], cp_vec[0], - ap_vec[1], bp_vec[1], cp_vec[1], - ap_vec[2], bp_vec[2], cp_vec[2]; - } - Bmat_realspace *= 1e10; + Eigen::Matrix3d Bmat_realspace = 1e10*db_cryst.eig_B; + //if( phi != 0.0 ) + //{ + // double cosphi = cos(phi); + // double sinphi = sin(phi); + + // ap_vec = ap_vec*cosphi + db_cryst.spindle_vec.cross(ap_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(ap_vec))*(1-cosphi); + // bp_vec = bp_vec*cosphi + db_cryst.spindle_vec.cross(bp_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(bp_vec))*(1-cosphi); + // cp_vec = cp_vec*cosphi + db_cryst.spindle_vec.cross(cp_vec)*sinphi + db_cryst.spindle_vec*(db_cryst.spindle_vec.dot(cp_vec))*(1-cosphi); + + // Bmat_realspace << ap_vec[0], bp_vec[0], cp_vec[0], + // ap_vec[1], bp_vec[1], cp_vec[1], + // ap_vec[2], bp_vec[2], cp_vec[2]; + //} + //Bmat_realspace *= 1e10; if (db_flags.use_diffuse && db_flags.gamma_miller_units){ anisoG_local = anisoG_local * Bmat_realspace; for (int i_gam=0; i_gam<3; i_gam++){ @@ -501,9 +504,28 @@ void diffBragg_sum_over_steps( } Eigen::Matrix3d U = db_cryst.eig_U; - Eigen::Matrix3d UBO = (db_cryst.UMATS_RXYZ[mos_tic] * U*Bmat_realspace*(db_cryst.eig_O.transpose())).transpose(); - Eigen::Matrix3d Ainv = U*(Bmat_realspace.transpose().inverse())* (db_cryst.eig_O.inverse()); + Eigen::Matrix3d UBOt=U*Bmat_realspace*(db_cryst.eig_O.transpose()); + double phi = db_cryst.phi0 + db_cryst.phistep*phi_tic; + if (phi != 0){ + if (i_pix==0){ + printf("phistep=%f, phi0=%f, phi=%f\n", db_cryst.phistep, db_cryst.phi0, phi); + } + + double c = cos(phi); + double omc = 1-c; + double s = sin(phi); + Eigen::Matrix3d Rphi; + double gx = db_cryst.spindle_vec[0]; + double gy = db_cryst.spindle_vec[1]; + double gz = db_cryst.spindle_vec[2]; + Rphi << c + gx*gx*omc, gx*gy*omc-gz*s, gx*gz*omc+gy*s, + gy*gx*omc + gz*s, c + gy*gy*omc, gy*gz*omc - gx*s, + gz*gx*omc - gy*s, gz*gy*omc + gx*s, c + gz*gz*omc; + UBOt = Rphi*UBOt; + } + Eigen::Matrix3d UBO = (db_cryst.UMATS_RXYZ[mos_tic]*UBOt).transpose(); + Eigen::Matrix3d Ainv = UBO.inverse(); Eigen::Vector3d q_vec(scattering[0], scattering[1], scattering[2]); q_vec *= 1e-10; Eigen::Vector3d H_vec = UBO*q_vec; @@ -528,12 +550,30 @@ void diffBragg_sum_over_steps( Eigen::Vector3d V = NABC*delta_H; double hrad_sqr = V.dot(V); double NABC_determ = NABC.determinant(); - double F_latt; - if (db_flags.no_Nabc_scale) - F_latt = exp(-( hrad_sqr / 0.63 * db_cryst.fudge )); - else - //F_latt = db_cryst.Na*db_cryst.Nb*db_cryst.Nc*exp(-( hrad_sqr / 0.63 * db_cryst.fudge )); - F_latt = NABC_determ*exp(-( hrad_sqr / 0.63 * db_cryst.fudge )); + double F_latt = 0; + if (db_cryst.xtal_shape==SQUARE){ + F_latt = 1; + if (db_cryst.Na > 1) + F_latt *= sincg(M_PI * h, db_cryst.Na); + if (db_cryst.Nb > 1) + F_latt *= sincg(M_PI * k, db_cryst.Nb); + if (db_cryst.Nc > 1) + F_latt *= sincg(M_PI * l, db_cryst.Nc); + } + else { // gaussian relp model + double exparg; + if (db_cryst.xtal_shape==GAUSS_STAR){ + Eigen::Vector3d delta_Q = UBO.inverse()*delta_H; + double rad_star_sqr = delta_Q.dot(delta_Q)*xtal_size*xtal_size; + exparg = rad_star_sqr *1.9* db_cryst.fudge ; + } + else + exparg = hrad_sqr / 0.63 * db_cryst.fudge; + + F_latt = exp(-exparg); + if (! db_flags.no_Nabc_scale) + F_latt *= NABC_determ; + } /* convert amplitudes into intensity (photons per steradian) */ if (!db_flags.oversample_omega && !db_flags.Fhkl_gradient_mode) @@ -749,7 +789,6 @@ void diffBragg_sum_over_steps( // printf("hkl= %f %f %f hkl1= %d %d %d Fcell=%f\n", h,k,l,h0,k0,l0, F_cell); double two_C = 2*C; - Eigen::Matrix3d UBOt = U*Bmat_realspace*(db_cryst.eig_O.transpose()); if (db_flags.refine_Umat[0]){ Eigen::Matrix3d RyRzUBOt = db_cryst.RotMats[1]*db_cryst.RotMats[2]*UBOt; Eigen::Vector3d delta_H_prime = (db_cryst.UMATS[mos_tic]*db_cryst.dRotMats[0]*RyRzUBOt).transpose()*q_vec; diff --git a/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu b/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu index f74319f810..babbe0b344 100644 --- a/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu +++ b/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu @@ -2,6 +2,13 @@ #include #include +/* Fourier transform of a grating */ +__device__ CUDAREAL sincg(CUDAREAL x, CUDAREAL N) { + if (x != 0.0) + return sin(x * N) / sin(x); + return N; +} + __global__ void gpu_sum_over_steps( int Npix_to_model, unsigned int* panels_fasts_slows, @@ -66,11 +73,16 @@ void gpu_sum_over_steps( const int* __restrict__ data_freq, const bool* __restrict__ data_trusted, const int* __restrict__ FhklLinear_ASUid, const CUDAREAL* __restrict__ Fhkl_channels, - const CUDAREAL* __restrict__ Fhkl_scale, CUDAREAL* Fhkl_scale_deriv) + const CUDAREAL* __restrict__ Fhkl_scale, CUDAREAL* Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape) { // BEGIN GPU kernel int tid = blockIdx.x * blockDim.x + threadIdx.x; int thread_stride = blockDim.x * gridDim.x; + __shared__ CUDAREAL s_phi0, s_phistep, gx,gy,gz; + __shared__ int s_phisteps; + __shared__ bool s_gaussian_star_shape; + __shared__ bool s_square_shape; __shared__ bool s_Fhkl_channels_empty; __shared__ bool s_Fhkl_have_scale_factors; __shared__ bool s_Fhkl_gradient_mode; @@ -95,9 +107,9 @@ void gpu_sum_over_steps( __shared__ CUDAREAL two_C; __shared__ MAT3 Bmat_realspace; __shared__ MAT3 Amat_init; - //__shared__ CUDAREAL s_Na; - //__shared__ CUDAREAL s_Nb; - //__shared__ CUDAREAL s_Nc; + __shared__ CUDAREAL s_Na; + __shared__ CUDAREAL s_Nb; + __shared__ CUDAREAL s_Nc; __shared__ CUDAREAL s_NaNbNc_squared; __shared__ int s_h_max, s_k_max, s_l_max, s_h_min, s_k_min, s_l_min, s_h_range, s_k_range, s_l_range; __shared__ int s_oversample, s_detector_thicksteps, s_sources, s_mosaic_domains, s_printout_fpixel, @@ -126,8 +138,17 @@ void gpu_sum_over_steps( __shared__ VEC3 Hmin, Hmax, dHH, Hrange; //extern __shared__ CUDAREAL det_vecs[]; //__shared__ int det_stride; + __shared__ CUDAREAL s_xtal_size_sq; if (threadIdx.x==0){ // TODO can we get speed gains by dividing up the following definitions over more threads ? + s_phisteps = phisteps; + s_phi0 = phi0; + s_phistep = phistep; + gx = spindle_vec[0]; + gy = spindle_vec[1]; + gz = spindle_vec[2]; + s_gaussian_star_shape = gaussian_star_shape; + s_square_shape = square_shape; for (int i=0; i<3; i++){ s_refine_Ncells[i] = refine_Ncells[i]; s_refine_Umat[i] = refine_Umat[i]; @@ -170,9 +191,9 @@ void gpu_sum_over_steps( s_NABC_det_sq = s_NABC_det*s_NABC_det; C = 2 / 0.63 * fudge; two_C = 2*C; - //s_Na = Na; - //s_Nb = Nb; - //s_Nc = Nc; + s_Na = Na; + s_Nb = Nb; + s_Nc = Nc; s_NaNbNc_squared = (Na*Nb*Nc); s_NaNbNc_squared *= s_NaNbNc_squared; s_h_max = h_max; @@ -238,6 +259,12 @@ void gpu_sum_over_steps( // det_vecs[i] = fdet_vectors[i]; // det_vecs[i+det_stride] = sdet_vectors[i]; //} + MAT3 Amat0 = Amatrices[0]; + VEC3 A(Amat0(0,0),Amat0(0,1), Amat0(0,2)); + VEC3 B(Amat0(1,0),Amat0(1,1), Amat0(1,2)); + VEC3 C(Amat0(2,0),Amat0(2,1), Amat0(2,2)); + CUDAREAL cell_vol = A.dot(B.cross(C)); + s_xtal_size_sq = pow(s_NABC_det*cell_vol, (CUDAREAL)2/(CUDAREAL)3); } //extern __shared__ CUDAREAL source_data[]; @@ -432,10 +459,26 @@ void gpu_sum_over_steps( texture_scale *= cap_frac_times_omega; texture_scale *= sI; - for(int _mos_tic=0;_mos_tic1) + I0 *= sincg(M_PI*_h,s_Na); + if(s_Nb>1) + I0 *= sincg(M_PI*_k,s_Nb); + if(s_Nc>1) + I0 *= sincg(M_PI*_l,s_Nc); + I0 *= I0; + } + else{ + CUDAREAL exparg; + if (s_gaussian_star_shape){ + MAT3 Ainv = UBO.inverse(); + VEC3 delta_Q = Ainv*delta_H; + CUDAREAL rad_star_sqr = delta_Q.dot(delta_Q)*s_xtal_size_sq; + exparg = rad_star_sqr*1.9*fudge; + } else - I0 = (s_NABC_det_sq)*exp(-2*exparg); + exparg = _hrad_sqr*C/2; + if (exparg< 35) + if (s_no_Nabc_scale) + I0 = exp(-2*exparg); + else + I0 = (s_NABC_det_sq)*exp(-2*exparg); + } // are we doing diffuse scattering CUDAREAL step_diffuse_param[6] = {0,0,0,0,0,0}; @@ -606,6 +668,8 @@ void gpu_sum_over_steps( MAT3 UBOt; if (s_refine_Umat[0] || s_refine_Umat[1] ||s_refine_Umat[2] || s_refine_eta){ UBOt = Amat_init; + if (phi != 0) + UBOt = Rphi*UBOt; } if (s_refine_Umat[0]){ MAT3 RyRzUBOt = RotMats[1]*RotMats[2]*UBOt; @@ -910,6 +974,7 @@ void gpu_sum_over_steps( } } // end of mos_tic loop + } // end of phi_tic loop } // end of source loop } // end of thick step loop } // end of fpos loop diff --git a/simtbx/diffBragg/src/diffBragg_gpu_kernel.h b/simtbx/diffBragg/src/diffBragg_gpu_kernel.h index fb0f2b218c..686e933ed7 100644 --- a/simtbx/diffBragg/src/diffBragg_gpu_kernel.h +++ b/simtbx/diffBragg/src/diffBragg_gpu_kernel.h @@ -75,6 +75,6 @@ __global__ void gpu_sum_over_steps( const int* __restrict__ data_freq, const bool* __restrict__ data_trusted, const int* __restrict__ FhklLinear_ASUid, const CUDAREAL* __restrict__ Fhkl_channels, - const CUDAREAL* __restrict__ Fhkl_scale, CUDAREAL* Fhkl_scale_deriv - + const CUDAREAL* __restrict__ Fhkl_scale, CUDAREAL* Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape ); diff --git a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp index 5705914642..cd49361ba0 100644 --- a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp +++ b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp @@ -1,5 +1,6 @@ #include #include "diffBraggKOKKOS.h" +#include #include void kokkos_sum_over_steps( @@ -150,7 +151,9 @@ void kokkos_sum_over_steps( const vector_int_t FhklLinear_ASUid, const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, - vector_cudareal_t Fhkl_scale_deriv) { // BEGIN GPU kernel + vector_cudareal_t Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape + ) { // BEGIN GPU kernel const KOKKOS_MAT3 Bmat_realspace = eig_B * 1e10; const KOKKOS_MAT3 eig_Otranspose = eig_O.transpose(); @@ -174,6 +177,9 @@ void kokkos_sum_over_steps( vector_cudareal_t dG_trace = vector_cudareal_t("dG_trace", 3); int num_laue_mats = 0; int dhh = 0, dkk = 0, dll = 0; + CUDAREAL gx = spindle_vec[0]; + CUDAREAL gy = spindle_vec[1]; + CUDAREAL gz = spindle_vec[2]; Kokkos::View UMATS_prime("UMATS_prime", mosaic_domains); Kokkos::View UMATS_dbl_prime("UMATS_dbl_prime", mosaic_domains); @@ -418,9 +424,24 @@ void kokkos_sum_over_steps( // TODO rename CUDAREAL texture_scale = _capture_fraction * _omega_pixel * sI; + for (int _phi_tic=0; _phi_tic1) + F_latt *= sincg(M_PI*_h,Na); + if(Nb>1) + F_latt *= sincg(M_PI*_k,Nb); + if(Nc>1) + F_latt *= sincg(M_PI*_l,Nc); + I0 = F_latt*F_latt; + } + else { + CUDAREAL exparg; + if (gaussian_star_shape){ + // TODO can precompute xtal_size_sq to save time + KOKKOS_VEC3 A {UBO[0], UBO[1], UBO[2]}; + KOKKOS_VEC3 B {UBO[3], UBO[4], UBO[5]}; + KOKKOS_VEC3 C {UBO[6], UBO[7], UBO[8]}; + CUDAREAL cell_vol = A.dot(B.cross(C)); + CUDAREAL xtal_size_sq = pow(NABC_det*cell_vol, CUDAREAL(2)/CUDAREAL(3)); + KOKKOS_MAT3 Ainv = UBO.inverse(); + KOKKOS_VEC3 delta_Q = Ainv*delta_H; + CUDAREAL rad_star_sqr = delta_Q.dot(delta_Q)*xtal_size_sq; + exparg = rad_star_sqr*1.9*fudge ; + } else - I0 = (NABC_det_sq) * - ::Kokkos::exp(-2 * exparg); + exparg = _hrad_sqr * C / 2; + I0 = 0; + if (exparg < 35) + if (no_Nabc_scale) + I0 = ::Kokkos::exp(-2 * exparg); + else + I0 = (NABC_det_sq) * + ::Kokkos::exp(-2 * exparg); + } // are we doing diffuse scattering CUDAREAL step_diffuse_param[6] = {0, 0, 0, 0, 0, 0}; @@ -613,6 +660,8 @@ void kokkos_sum_over_steps( KOKKOS_MAT3 UBOt; if (refine_flag & (REFINE_UMAT | REFINE_ETA)) { UBOt = Amat_init; + if (phi != 0) + UBOt = Rphi*UBOt; } if (refine_flag & REFINE_UMAT1) { const KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 0) * q_vec; @@ -935,6 +984,7 @@ void kokkos_sum_over_steps( } // end of printout if } // end of mos_tic loop + } // end of phi_tic loop } // end of source loop } // end of thick step loop } // end of fpos loop @@ -1334,7 +1384,8 @@ void kokkos_sum_over_steps( const vector_int_t FhklLinear_ASUid, const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, - vector_cudareal_t Fhkl_scale_deriv) { // BEGIN GPU kernel + vector_cudareal_t Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape) { // BEGIN GPU kernel const KOKKOS_MAT3 Bmat_realspace = eig_B * 1e10; const KOKKOS_MAT3 eig_Otranspose = eig_O.transpose(); @@ -1358,6 +1409,9 @@ void kokkos_sum_over_steps( vector_cudareal_t dG_trace = vector_cudareal_t("dG_trace", 3); int num_laue_mats = 0; int dhh = 0, dkk = 0, dll = 0; + CUDAREAL gx = spindle_vec[0]; + CUDAREAL gy = spindle_vec[1]; + CUDAREAL gz = spindle_vec[2]; Kokkos::View UMATS_prime("UMATS_prime", mosaic_domains); Kokkos::View UMATS_dbl_prime("UMATS_dbl_prime", mosaic_domains); @@ -1599,9 +1653,24 @@ void kokkos_sum_over_steps( // TODO rename CUDAREAL texture_scale = _capture_fraction * _omega_pixel * sI; + for (int _phi_tic=0; _phi_tic1) + F_latt *= sincg(M_PI*_h,Na); + if(Nb>1) + F_latt *= sincg(M_PI*_k,Nb); + if(Nc>1) + F_latt *= sincg(M_PI*_l,Nc); + I0 = F_latt*F_latt; + } + else{ // using a gaussian model + CUDAREAL exparg; + if (gaussian_star_shape){ + // TODO: can precompute xtal_vol to save time ... but prob not necessary as gaussian star model only used for forward calc + KOKKOS_VEC3 A {UBO[0], UBO[1], UBO[2]}; + KOKKOS_VEC3 B {UBO[3], UBO[4], UBO[5]}; + KOKKOS_VEC3 C {UBO[6], UBO[7], UBO[8]}; + CUDAREAL cell_vol = A.dot(B.cross(C)); + CUDAREAL xtal_size_sq = pow(NABC_det*cell_vol, CUDAREAL(2)/CUDAREAL(3)); + KOKKOS_MAT3 Ainv = UBO.inverse(); + KOKKOS_VEC3 delta_Q = Ainv*delta_H; + CUDAREAL rad_star_sqr = delta_Q.dot(delta_Q)*xtal_size_sq; + exparg = rad_star_sqr*1.9*fudge ; + } else - I0 = (NABC_det_sq) * - ::Kokkos::exp(-2 * exparg); + exparg = _hrad_sqr * C / 2; + + if (exparg < 35) + if (no_Nabc_scale) + I0 = ::Kokkos::exp(-2 * exparg); + else + I0 = (NABC_det_sq) * + ::Kokkos::exp(-2 * exparg); + } // are we doing diffuse scattering CUDAREAL step_diffuse_param[6] = {0, 0, 0, 0, 0, 0}; @@ -1790,6 +1886,8 @@ void kokkos_sum_over_steps( KOKKOS_MAT3 UBOt; if (refine_flag & (REFINE_UMAT | REFINE_ETA)) { UBOt = Amat_init; + if (phi != 0) + UBOt = Rphi*UBOt; } if (refine_flag & REFINE_UMAT1) { const KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 0) * q_vec; @@ -2112,6 +2210,7 @@ void kokkos_sum_over_steps( } // end of printout if } // end of mos_tic loop + } // end of phi_tic loop } // end of source loop } // end of thick step loop } // end of fpos loop @@ -2506,4 +2605,5 @@ kokkos_sum_over_steps< const vector_int_t FhklLinear_ASUid, const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, - vector_cudareal_t Fhkl_scale_deriv); + vector_cudareal_t Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape); diff --git a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h index 3586183087..36d8c7939f 100644 --- a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h +++ b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h @@ -151,7 +151,8 @@ void kokkos_sum_over_steps( const vector_int_t FhklLinear_ASUid, const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, - vector_cudareal_t Fhkl_scale_deriv + vector_cudareal_t Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape ); template < @@ -313,6 +314,7 @@ void kokkos_sum_over_steps( const vector_int_t FhklLinear_ASUid, const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, - vector_cudareal_t Fhkl_scale_deriv + vector_cudareal_t Fhkl_scale_deriv, + bool gaussian_star_shape, bool square_shape ); #endif diff --git a/simtbx/diffBragg/src/util.h b/simtbx/diffBragg/src/util.h index ec197c1dc9..ebe1244ea7 100644 --- a/simtbx/diffBragg/src/util.h +++ b/simtbx/diffBragg/src/util.h @@ -14,6 +14,12 @@ #define CUDAREAL double #endif +#include "simtbx/nanoBragg/nanotypes.h" +using simtbx::nanoBragg::shapetype; +using simtbx::nanoBragg::SQUARE; +using simtbx::nanoBragg::GAUSS; +using simtbx::nanoBragg::GAUSS_STAR; + using image_type = std::vector; typedef Eigen::Matrix VEC3; typedef Eigen::Matrix MAT3; @@ -152,6 +158,7 @@ struct flags{ }; struct crystal{ + shapetype xtal_shape = GAUSS; double Friedel_beta = 1e10; // restraint factor for Friedel pairs double Finit_beta = 1e10; // restraint factor for Friedel pairs std::vector pos_inds; // indices of the positive Friedel mate diff --git a/simtbx/diffBragg/tests/tst_diffBragg_nanoBragg_congruency.py b/simtbx/diffBragg/tests/tst_diffBragg_nanoBragg_congruency.py deleted file mode 100644 index 194607a9c6..0000000000 --- a/simtbx/diffBragg/tests/tst_diffBragg_nanoBragg_congruency.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import division -##from simtbx.kokkos import gpu_instance -#kokkos_run = gpu_instance(deviceId = 0) -from simtbx.diffBragg.utils import find_diffBragg_instances -import numpy as np - - -def main(): - from simtbx.nanoBragg.sim_data import SimData - - S = SimData(use_default_crystal=True) - S.instantiate_diffBragg() - S.D.nopolar = True - S.D.oversample = 3 - - S.D.add_diffBragg_spots() - S._add_background() - - diff_img = S.D.raw_pixels.as_numpy_array() - - S.D.raw_pixels *= 0 - S.D.add_nanoBragg_spots() - S._add_background() - - nano_img = S.D.raw_pixels.as_numpy_array() - - assert np.allclose(diff_img, nano_img, atol=1e-9) - for name in find_diffBragg_instances(globals()): del globals()[name] - - -if __name__ == "__main__": - import sys - if "--kokkos" in sys.argv: - import os - os.environ["DIFFBRAGG_USE_KOKKOS"]="1" - from simtbx.diffBragg.device import DeviceWrapper - with DeviceWrapper(0) as _: - main() - print ("OK!") diff --git a/simtbx/diffBragg/tests/tst_diffBragg_rotation.py b/simtbx/diffBragg/tests/tst_diffBragg_rotation.py index 339e3ffcfc..bf444e7ea3 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_rotation.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_rotation.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function -from simtbx.kokkos import gpu_instance -kokkos_run = gpu_instance(deviceId = 0) from simtbx.diffBragg import utils, hopper_utils +from simtbx.nanoBragg import nanoBragg from simtbx.nanoBragg.tst_nanoBragg_multipanel import beam as dxtbx_beam from simtbx.nanoBragg.tst_nanoBragg_multipanel import whole_det as dxtbx_det from simtbx.nanoBragg import nanoBragg_crystal, nanoBragg_beam, sim_data @@ -9,8 +8,7 @@ from dxtbx_model_ext import Crystal from scitbx import matrix import numpy as np -# from simtbx.nanoBragg import shapetype -# from simtbx.nanoBragg import nanoBragg +import os import libtbx.load_env # possibly implicit pdb_lines = """HEADER TEST @@ -32,12 +30,20 @@ def write_test_pdb(fileout="test.pdb"): pdb_inp.write_pdb_file(fileout) return fileout -def run_sim(testpdb, version="nanoBragg", spindle_axis=(1,0,0), phi_start=0, phistep_deg=-1, phisteps=-1, osc_deg=-1): +def run_sim(testpdb, version="nanoBragg", spindle_axis=(1,0,0), phi_start=0, osc_deg=-1, phisteps=-1, verbose=False, + manual_mode=False, manual_angle=0.): # crystal symmetry=extract_from(testpdb) sg = str(symmetry.space_group_info()) fmat = matrix.sqr(symmetry.unit_cell().fractionalization_matrix()) dxtbx_cryst = Crystal(fmat, sg) + if manual_mode: + sa = matrix.col(spindle_axis) + U = matrix.sqr(dxtbx_cryst.get_U()) + Rphi = sa.axis_and_angle_as_r3_rotation_matrix(manual_angle, deg=True) + U = Rphi*U + dxtbx_cryst.set_U(U.elems) + crystal = nanoBragg_crystal.NBcrystal(init_defaults=True) crystal.isotropic_ncells = False crystal.dxtbx_crystal = dxtbx_cryst @@ -63,69 +69,112 @@ def run_sim(testpdb, version="nanoBragg", spindle_axis=(1,0,0), phi_start=0, phi SIM.detector = utils.strip_thickness_from_detector(dxtbx_det) SIM.detector = dxtbx_det SIM.crystal = crystal + SIM.beam = beam SIM.panel_id = 0 def setup_rotation(SIM): - SIM.D.spindle_axis=spindle_axis - SIM.D.phi_deg = phi_start - SIM.D.phistep_deg = phistep_deg - SIM.D.phisteps = phisteps + if not manual_mode: + SIM.D.spindle_axis = spindle_axis + SIM.D.phi_deg = phi_start + SIM.D.osc_deg = osc_deg + SIM.D.phisteps = phisteps if version == "nanoBragg": - SIM.instantiate_nanoBragg(oversample=1, device_Id=0, default_F=0) - SIM.D.printout_pixel_fastslow = FAST,SLOW + SIM.instantiate_nanoBragg(oversample=1, device_Id=0, default_F=0, interpolate=0) setup_rotation(SIM) - SIM.D.show_params() + if verbose: + SIM.D.printout_pixel_fastslow = FAST,SLOW + SIM.D.show_params() SIM.D.add_nanoBragg_spots() pix = SIM.D.raw_pixels.as_numpy_array() SIM.D.free_all() return pix else: - SIM.instantiate_diffBragg(oversample=1, device_Id=0, default_F=0) + SIM.instantiate_diffBragg(oversample=1, device_Id=0, default_F=0, interpolate=0) SIM.D.xray_beams = SIM.beam.xray_beams ucell_man = utils.manager_from_params(ucell_p) Bmatrix = ucell_man.B_recipspace SIM.D.Bmatrix = Bmatrix npix = int(len(pfs)/3) - SIM.D.printout_pixel_fastslow = FAST,SLOW setup_rotation(SIM) - SIM.D.show_params() + if verbose: + SIM.D.printout_pixel_fastslow = FAST,SLOW + SIM.D.show_params() SIM.D.add_diffBragg_spots_full() pix = SIM.D.raw_pixels_roi.as_numpy_array() - SIM.D.free_all() + hopper_utils.free_SIM_mem(SIM) return pix def assert_identical_pixels(pix1, pix2): - assert np.allclose(pix1,pix2) + assert np.allclose(pix1,pix2, atol=0.1) + +def test_manual_rotation(): + testpdb = write_test_pdb(fileout="test.pdb") + SIM = nanoBragg() + SIM.phi_deg = 0 + SIM.osc_deg = 0.5 + SIM.phisteps = 50 + phistep_deg = SIM.osc_deg / SIM.phisteps + assert phistep_deg == SIM.phistep_deg # Note, this was broken by a bug recently + + pix_sums = [] + for version in ("nanoBragg", "diffBragg"): + pix = None + for phi_tic in range(SIM.phisteps): + manual_angle = SIM.phi_deg + phi_tic*phistep_deg + print("phiTic=%d, deltaPhi=%f deg." % (phi_tic, manual_angle)) + temp = run_sim(testpdb, version=version, spindle_axis=(1, 0, 0), phi_start=0, osc_deg=-1, phisteps=-1, + verbose=False, manual_mode=True, manual_angle=manual_angle) + if pix is None: + pix = temp + else: + pix += temp + pix /= SIM.phisteps + pix_sums.append(pix) + nb_pix, db_pix = pix_sums + assert np.allclose(nb_pix.ravel(), db_pix) + print("tst manual diffBragg vs manual nanoBragg: OK!") + + temp_db = run_sim(testpdb, version="diffBragg", spindle_axis=(1, 0, 0), phi_start=SIM.phi_deg, osc_deg=SIM.osc_deg, + phisteps=SIM.phisteps, verbose=False, manual_mode=False) + assert np.allclose(db_pix, temp_db) + print("tst manual vs internal diffBragg: OK!") + + temp_nb = run_sim(testpdb, version="nanoBragg", spindle_axis=(1, 0, 0), phi_start=SIM.phi_deg, osc_deg=SIM.osc_deg, + phisteps=SIM.phisteps, verbose=False, manual_mode=False) + # for some reason, nanoBragg phi rotation seems to propagate rounding error maybe, so we must use a larger atol... + assert np.allclose(nb_pix, temp_nb, atol=0.1) + print("tst manual vs internal nanoBragg: OK!") + SIM.free_all() def test_range_of_rotation_steps(): - # automatic param setting for phistep_deg, phisteps and osc_deg when passed values are <0 + # automatic param setting for phisteps and osc_deg when passed values are <0 # conflicting params are tolerated and should be handled with the same decision tree testpdb = write_test_pdb(fileout="test.pdb") for phi_start in (0, 30): - for phistep_deg in (-1, 0.01, 0.1): - for phisteps in (-1, 1, 10): - for osc_deg in (-1, 0.1, 1): - pix1 = run_sim( - testpdb, - version="nanoBragg", - spindle_axis=(1,0,0), - phi_start=phi_start, - phistep_deg=phistep_deg, - phisteps=phisteps, - osc_deg=osc_deg)#, fileout="rotimg_nano_%03d.h5") - pix2 = run_sim( - testpdb, - version="diffBragg", - spindle_axis=(1,0,0), - phi_start=phi_start, - phistep_deg=phistep_deg, - phisteps=phisteps, - osc_deg=osc_deg)#, fileout="rotimg_diff_%03d.h5") - pix2 = pix2.reshape(pix1.shape) - assert_identical_pixels(pix1, pix2) + for osc_deg in (0., 0.01, 0.1): + for phisteps in (1, 10, 15): + pix1 = run_sim( + testpdb, + version="nanoBragg", + spindle_axis=(1,0,0), + phi_start=phi_start, + osc_deg=osc_deg, + phisteps=phisteps + )#, fileout="rotimg_nano_%03d.h5") + pix2 = run_sim( + testpdb, + version="diffBragg", + spindle_axis=(1,0,0), + phi_start=phi_start, + osc_deg=osc_deg, + phisteps=phisteps + )#, fileout="rotimg_diff_%03d.h5") + pix2 = pix2.reshape(pix1.shape) + assert_identical_pixels(pix1, pix2) + print("tst range: phisteps=%d, osc_deg=%f deg., phistart=%f deg. ... OK!" %(phisteps, osc_deg, phi_start)) def test_sweep(): testpdb = write_test_pdb(fileout="test.pdb") @@ -135,29 +184,34 @@ def test_sweep(): version="nanoBragg", spindle_axis=(1,0,0), phi_start=phi_start, - phistep_deg=0.05, - phisteps=20, - osc_deg=1)#, fileout="rotimg_nano_%03d.h5") + osc_deg=0.05, + phisteps=20 + )#, fileout="rotimg_nano_%03d.h5") pix2 = run_sim( testpdb, version="diffBragg", spindle_axis=(1,0,0), phi_start=phi_start, - phistep_deg=0.05, - phisteps=20, - osc_deg=1)#, fileout="rotimg_diff_%03d.h5") + osc_deg=0.05, + phisteps=20 + )#, fileout="rotimg_diff_%03d.h5") pix2 = pix2.reshape(pix1.shape) assert_identical_pixels(pix1, pix2) + print("tst sweep: phisteps=20, osc_deg=0.05 deg., phistart=%f deg. ... OK!" % (phi_start, )) def tst_all(): - import os - # currently only tests CPU implementation - if "DIFFBRAGG_USE_CUDA" in os.environ: - os.environ.unsetenv("DIFFBRAGG_USE_CUDA") - #os.environ["DIFFBRAGG_USE_CUDA"]=1 + test_manual_rotation() test_range_of_rotation_steps() test_sweep() if __name__=="__main__": - tst_all() - print("OK") + import sys + if "--kokkos" in sys.argv: + os.environ["DIFFBRAGG_USE_KOKKOS"] = "1" + from simtbx.diffBragg.utils import find_diffBragg_instances + from simtbx.diffBragg.device import DeviceWrapper + + with DeviceWrapper(0) as _: + tst_all() + for name in find_diffBragg_instances(globals()): del globals()[name] + print("OK") diff --git a/simtbx/diffBragg/utils.py b/simtbx/diffBragg/utils.py index d8c439e26d..340bf9360e 100644 --- a/simtbx/diffBragg/utils.py +++ b/simtbx/diffBragg/utils.py @@ -837,6 +837,7 @@ def simulator_from_expt_and_params(expt, params=None): # create nanoBragg crystal crystal = NBcrystal(init_defaults=False) + crystal.xtal_shape = "gauss" crystal.isotropic_ncells = has_isotropic_ncells if params.simulator.crystal.rotXYZ_ucell is not None: rotXYZ = params.simulator.crystal.rotXYZ_ucell[:3] @@ -902,7 +903,7 @@ def simulator_from_expt_and_params(expt, params=None): return SIM -def update_SIM_with_gonio(SIM, params=None, delta_phi=None, num_phi_steps=5): +def update_SIM_with_gonio(SIM, params=None, delta_phi=None, num_phi_steps=5, spindle_axis=(1,0,0)): """ :param SIM: sim_data instance @@ -918,9 +919,10 @@ def update_SIM_with_gonio(SIM, params=None, delta_phi=None, num_phi_steps=5): num_phi_steps = params.simulator.gonio.phi_steps if delta_phi is not None: + SIM.D.spindle_axis = spindle_axis SIM.D.phi_deg = 0 SIM.D.osc_deg = delta_phi - SIM.D.phisteps = num_phi_steps + SIM.D.phisteps = int(num_phi_steps) def get_complex_fcalc_from_pdb( diff --git a/simtbx/kokkos/simulation_kernels.h b/simtbx/kokkos/simulation_kernels.h index 0a91f33e59..31e5c00c75 100644 --- a/simtbx/kokkos/simulation_kernels.h +++ b/simtbx/kokkos/simulation_kernels.h @@ -15,6 +15,7 @@ using simtbx::nanoBragg::SQUARE; using simtbx::nanoBragg::ROUND; using simtbx::nanoBragg::GAUSS; using simtbx::nanoBragg::GAUSS_ARGCHK; +using simtbx::nanoBragg::GAUSS_STAR; using simtbx::nanoBragg::TOPHAT; void calc_CrystalOrientations(CUDAREAL phi0, @@ -297,6 +298,18 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, if (my_arg<35.){ F_latt = Na * Nb * Nc * exp(-(my_arg)); } else { F_latt = 0.; } // warps coalesce when blocks of 32 pixels have no Bragg signal } + if (xtal_shape == GAUSS_STAR){ + mat3 At (a[0],a[1],a[2], + b[0],b[1],b[2], + c[0],c[1],c[2]); + vec3 delta_H {h-h0, k-k0, l-l0}; + vec3 delta_Q = (1e10*At).inverse()*delta_H; + CUDAREAL Nvol=Na*Nb*Nc; + CUDAREAL xtal_size_sq = pow(Nvol*V_cell, double(2)/double(3)); + CUDAREAL rad_star_sqr = delta_Q.dot(delta_Q)*xtal_size_sq; + F_latt = Nvol*exp(-( rad_star_sqr *1.9 * fudge )); + } + if (xtal_shape == TOPHAT) { // make a flat-top spot of same height and volume as square_xtal spots F_latt = Na * Nb * Nc * (hrad_sqr * fudge < 0.3969); diff --git a/simtbx/modeling/forward_models.py b/simtbx/modeling/forward_models.py index 090290e8fc..b1bc63e751 100644 --- a/simtbx/modeling/forward_models.py +++ b/simtbx/modeling/forward_models.py @@ -70,7 +70,7 @@ def model_spots_from_pandas(pandas_frame, rois_per_panel=None, symbol_override=None, quiet=False, reset_Bmatrix=False, nopolar=False, force_no_detector_thickness=False, printout_pix=None, norm_by_nsource=False, use_exascale_api=False, use_db=False, show_timings=False, perpixel_wavelen=False, - det_thicksteps=None, from_pdb=None): + det_thicksteps=None, from_pdb=None, mosaic_samples_override=None): if perpixel_wavelen and not use_db: raise NotImplementedError("to get perpixel wavelengths set use_db=True to use the diffBragg backend") if use_exascale_api: @@ -196,6 +196,8 @@ def model_spots_from_pandas(pandas_frame, rois_per_panel=None, mos_dom = 1 if "num_mosaicity_samples" in list(df): mos_dom = int(df.num_mosaicity_samples.values[0]) + if mosaic_samples_override is not None: + mos_dom = mosaic_samples_override eta_abc = df.eta_abc.values[0] LOGGER.debug("Num mos samples=%d, eta_abc=%f %f %f" % ((mos_dom,)+ eta_abc ) ) LOGGER.debug("Num energy channels=%d" % len(energies)) @@ -244,7 +246,8 @@ def diffBragg_forward(CRYSTAL, DETECTOR, BEAM, Famp, energies, fluxes, nopolar=False, diffuse_params=None, cuda=False, show_timings=False,perpixel_wavelen=False, det_thicksteps=None, eta_abc=None, Ncells_def=None, - num_phi_steps=1, delta_phi=None, div_mrad=0, divsteps=0): + num_phi_steps=1, delta_phi=None, div_mrad=0, divsteps=0, + spindle_axis=(1,0,0)): if cuda: os.environ["DIFFBRAGG_USE_CUDA"] = "1" @@ -308,7 +311,7 @@ def diffBragg_forward(CRYSTAL, DETECTOR, BEAM, Famp, energies, fluxes, S.D.diffuse_sigma = diffuse_params["sigma"] if delta_phi is not None: - utils.update_SIM_with_gonio(S, delta_phi=delta_phi, num_phi_steps=num_phi_steps ) + utils.update_SIM_with_gonio(S, delta_phi=delta_phi, num_phi_steps=num_phi_steps, spindle_axis=spindle_axis) S.D.add_diffBragg_spots_full() if show_timings or LOGGER.level <= 10: S.D.show_timings() diff --git a/simtbx/nanoBragg/nanoBragg.cpp b/simtbx/nanoBragg/nanoBragg.cpp index 14ba8b26b8..2ff5ba28ce 100644 --- a/simtbx/nanoBragg/nanoBragg.cpp +++ b/simtbx/nanoBragg/nanoBragg.cpp @@ -2380,6 +2380,7 @@ nanoBragg::show_params() if(xtal_shape == SQUARE) printf("parallelpiped"); if(xtal_shape == GAUSS ) printf("gaussian"); if(xtal_shape == GAUSS_ARGCHK ) printf("gaussian_argchk"); + if(xtal_shape == GAUSS_STAR ) printf("gaussian_star"); if(xtal_shape == TOPHAT) printf("tophat-spot"); printf(" xtal: %.1fx%.1fx%.1f cells\n",Na,Nb,Nc); printf("Unit Cell: %g %g %g %g %g %g\n", a_A[0],b_A[0],c_A[0],alpha*RTD,beta*RTD,gamma*RTD); @@ -2673,6 +2674,15 @@ nanoBragg::add_nanoBragg_spots() if (my_arg<35.){ F_latt = Na * Nb * Nc * exp(-(my_arg));} else { F_latt = 0.; } // not expected to give performance gain on optimized C++, only on GPU } + if (xtal_shape == GAUSS_STAR){ + double xtal_size_sq = pow(Na*Nb*Nc*V_cell, double(2)/double(3)); + vec3 delta_H {h-h0, k-k0, l-l0}; + mat3 At(a[1], a[2], a[3], b[1], b[2], b[3], c[1], c[2], c[3]); + mat3 Ainverse = (1e10*At).inverse(); + vec3 delta_Q = Ainverse*delta_H; + double rad_star_sqr = (delta_Q*delta_Q)*xtal_size_sq; // dot product + F_latt = Na*Nb*Nc*exp(-( rad_star_sqr*1.9*fudge )); + } if(xtal_shape == TOPHAT) { diff --git a/simtbx/nanoBragg/nanoBraggCUDA.cu b/simtbx/nanoBragg/nanoBraggCUDA.cu index fe153bbf20..025af9180c 100644 --- a/simtbx/nanoBragg/nanoBraggCUDA.cu +++ b/simtbx/nanoBragg/nanoBraggCUDA.cu @@ -21,6 +21,7 @@ using simtbx::nanoBragg::SQUARE; using simtbx::nanoBragg::ROUND; using simtbx::nanoBragg::GAUSS; using simtbx::nanoBragg::GAUSS_ARGCHK; +using simtbx::nanoBragg::GAUSS_STAR; using simtbx::nanoBragg::TOPHAT; static void CheckCudaErrorAux(const char *, unsigned, const char *, cudaError_t); @@ -740,12 +741,39 @@ CUDAREAL pixel_size, CUDAREAL subpixel_size, int steps, CUDAREAL detector_thicks /* fudge the radius so that volume and FWHM are similar to square_xtal spots */ F_latt = Na * Nb * Nc * exp(-(hrad_sqr / 0.63 * fudge)); } - if (s_xtal_shape == GAUSS_ARGCHK) { - /* fudge the radius so that volume and FWHM are similar to square_xtal spots */ - double my_arg = hrad_sqr / 0.63 * fudge; - if (my_arg<35.){ F_latt = Na * Nb * Nc * exp(-(my_arg)); - } else { F_latt = 0.; } // warps coalesce when blocks of 32 pixels have no Bragg signal - } + if (s_xtal_shape == GAUSS_ARGCHK) { + /* fudge the radius so that volume and FWHM are similar to square_xtal spots */ + double my_arg = hrad_sqr / 0.63 * fudge; + if (my_arg<35.){ F_latt = Na * Nb * Nc * exp(-(my_arg)); + } else { F_latt = 0.; } // warps coalesce when blocks of 32 pixels have no Bragg signal + } + if (s_xtal_shape == GAUSS_STAR){ + CUDAREAL a_cross_b[] = { 0.0, 0.0, 0.0, 0.0 }; + CUDAREAL b_cross_c[] = { 0.0, 0.0, 0.0, 0.0 }; + CUDAREAL c_cross_a[] = { 0.0, 0.0, 0.0, 0.0 }; + cross_product(a,b,a_cross_b); + cross_product(b,c,b_cross_c); + cross_product(c,a,c_cross_a); + /* new reciprocal-space cell vectors */ + CUDAREAL a_star_tic[] = {0,0,0,0}; + CUDAREAL b_star_tic[] = {0,0,0,0}; + CUDAREAL c_star_tic[] = {0,0,0,0}; + vector_scale(b_cross_c,a_star_tic,1e20/V_cell); + vector_scale(c_cross_a,b_star_tic,1e20/V_cell); + vector_scale(a_cross_b,c_star_tic,1e20/V_cell); + //} + CUDAREAL dh=h-h0; + CUDAREAL dk=k-k0; + CUDAREAL dl=l-l0; + CUDAREAL dx_star = dh*a_star_tic[1] + dk*b_star_tic[1] + dl*c_star_tic[1]; + CUDAREAL dy_star = dh*a_star_tic[2] + dk*b_star_tic[2] + dl*c_star_tic[2]; + CUDAREAL dz_star = dh*a_star_tic[3] + dk*b_star_tic[3] + dl*c_star_tic[3]; + CUDAREAL Nvol=Na*Nb*Nc; + CUDAREAL xtal_size_sq = pow(Nvol*V_cell, CUDAREAL(2)/CUDAREAL(3)); + CUDAREAL rad_star_sqr = ( dx_star*dx_star + dy_star*dy_star + dz_star*dz_star ) + *xtal_size_sq; + F_latt = Nvol*exp(-( rad_star_sqr *1.9 * fudge )); + } if (s_xtal_shape == TOPHAT) { /* make a flat-top spot of same height and volume as square_xtal spots */ F_latt = Na * Nb * Nc * (hrad_sqr * fudge < 0.3969); diff --git a/simtbx/nanoBragg/nanoBragg_crystal.py b/simtbx/nanoBragg/nanoBragg_crystal.py index 397c0fcb2e..3c6408a7b9 100644 --- a/simtbx/nanoBragg/nanoBragg_crystal.py +++ b/simtbx/nanoBragg/nanoBragg_crystal.py @@ -162,6 +162,8 @@ def n_mos_domains(self, val): def xtal_shape(self): if self._xtal_shape == "gauss": return shapetype.Gauss + if self._xtal_shape == "gauss_star": + return shapetype.Gauss_star elif self._xtal_shape == "gauss_argchk": return shapetype.Gauss_argchk elif self._xtal_shape == "round": diff --git a/simtbx/nanoBragg/nanoBragg_ext.cpp b/simtbx/nanoBragg/nanoBragg_ext.cpp index 269c11634f..ea6b0eaa87 100644 --- a/simtbx/nanoBragg/nanoBragg_ext.cpp +++ b/simtbx/nanoBragg/nanoBragg_ext.cpp @@ -1154,7 +1154,7 @@ printf("DEBUG: pythony_stolFbg[1]=(%g,%g)\n",nanoBragg.pythony_stolFbg[1][0],nan /* spindle angle phi step, in deg */ static double get_phistep_deg(nanoBragg const& nanoBragg) { - return nanoBragg.phi0*RTD;; + return nanoBragg.phistep*RTD;; } static void set_phistep_deg(nanoBragg& nanoBragg, double const& value) { nanoBragg.phistep = value/RTD; @@ -1337,6 +1337,7 @@ printf("DEBUG: pythony_stolFbg[1]=(%g,%g)\n",nanoBragg.pythony_stolFbg[1][0],nan .value("Round",ROUND) .value("Gauss",GAUSS) .value("Gauss_argchk",GAUSS_ARGCHK) + .value("Gauss_star",GAUSS_STAR) .value("Tophat",TOPHAT) .value("Fiber",FIBER) .value("Unknown",UNKNOWN) @@ -1862,6 +1863,10 @@ printf("DEBUG: pythony_stolFbg[1]=(%g,%g)\n",nanoBragg.pythony_stolFbg[1][0],nan "random number seed for detector calibration error, keep the same for a given detector") /* noise generation parameters */ + .add_property("fudge", + make_getter(&nanoBragg::fudge,rbv()), + make_setter(&nanoBragg::fudge,dcp()), + "fudge factor on F_latt falloff (default=1)") .add_property("spot_scale", make_getter(&nanoBragg::spot_scale,rbv()), make_setter(&nanoBragg::spot_scale,dcp()), diff --git a/simtbx/nanoBragg/nanotypes.h b/simtbx/nanoBragg/nanotypes.h index 524bd2371c..fbc73bb32f 100644 --- a/simtbx/nanoBragg/nanotypes.h +++ b/simtbx/nanoBragg/nanotypes.h @@ -25,7 +25,7 @@ struct hklParams { }; typedef enum { SAMPLE, BEAM } pivot; -typedef enum { UNKNOWN, SQUARE, ROUND, GAUSS, GAUSS_ARGCHK, TOPHAT, FIBER } shapetype; +typedef enum { UNKNOWN, SQUARE, ROUND, GAUSS, GAUSS_ARGCHK, TOPHAT, FIBER, GAUSS_STAR } shapetype; // GAUSS_ARGCHK provides a lightweight backdoor for efficient implementation of // the GAUSS shapetype in CUDA. Precaluates the argument of the exponential. typedef enum { CUSTOM, ADXV, MOSFLM, XDS, DIALS, DENZO } convention; diff --git a/simtbx/nanoBragg/sim_data.py b/simtbx/nanoBragg/sim_data.py index f40c2581da..604c442adb 100644 --- a/simtbx/nanoBragg/sim_data.py +++ b/simtbx/nanoBragg/sim_data.py @@ -371,9 +371,11 @@ def update_Fhkl_tuple(self): Fimag = flex.double(Fimag) self.D.Fhkl_tuple = ma_on_detector.indices(), Freal, Fimag elif self.using_diffBragg_spots: - self.D.Fhkl_tuple = ma_on_detector.indices(), ma_on_detector.data(), None + inds = ma_on_detector.indices() + dat = ma_on_detector.data() + self.D.Fhkl_tuple = inds, dat, None else: - self.D.Fhkl_tuple = self.crystal.miller_array.indices(), self.crystal.miller_array.data(), None + self.D.Fhkl_tuple = ma_on_detector.indices(), ma_on_detector.data(), None def set_dspace_binning(self, nbins, verbose=False): dsp = self.D.Fhkl.d_spacings().data().as_numpy_array() @@ -406,10 +408,13 @@ def _crystal_properties(self): self.D.Omatrix = self.crystal.Omatrix self.D.Bmatrix = self.crystal.dxtbx_crystal.get_B() # self.D.Umatrix = self.crystal.dxtbx_crystal.get_U() - self.update_Fhkl_tuple() - self.D.unit_cell_tuple = self.crystal.dxtbx_crystal.get_unit_cell().parameters() if self.using_diffBragg_spots: + shape = str(self.crystal.xtal_shape) + if shape not in ["Gauss", "Gauss_star", "Square"]: + raise ValueError("nanoBragg_crystal xtal_shape should be square, gauss or gauss_star") + self.update_Fhkl_tuple() + self.D.unit_cell_tuple = self.crystal.dxtbx_crystal.get_unit_cell().parameters() # init mosaic domain size: if self.crystal.isotropic_ncells: self.D.Ncells_abc = self.crystal.Ncells_abc[0] diff --git a/simtbx/nanoBragg/tst_nanoBragg_Flatt_sym.py b/simtbx/nanoBragg/tst_nanoBragg_Flatt_sym.py new file mode 100644 index 0000000000..f8d9554952 --- /dev/null +++ b/simtbx/nanoBragg/tst_nanoBragg_Flatt_sym.py @@ -0,0 +1,242 @@ +from __future__ import division +""" +The script attempts to simulate diffraction patterns +for symmetrically equivalent crystal orientations, and tests if the intensities change. +If changes in intensity are detected, the stdout will indicate Failure. +""" +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("sym", type=str, choices=["C","I","F","P1","P2","P3","P4","P6"], help="crystal system") +parser.add_argument("--cuda", action="store_true", help="Uses Giles nanoBragg CUDA kernel") +parser.add_argument("--fix", action="store_true", help="fix using symmetric mosaic blocks (exaFEL fix)") +parser.add_argument("--mos", action="store_true", help="add mosaic spread") +parser.add_argument("--plot", action="store_true", help="display plots at the end") +parser.add_argument("--gauss", action="store_true", help="Use the original Gaussian RELP shape") +parser.add_argument("--square", action="store_true", help="Use the Square RELP shape") +args = parser.parse_args() +from scipy.spatial.transform import Rotation +import numpy as np +from simtbx.diffBragg import utils as db_utils + +from simtbx.nanoBragg import nanoBragg, shapetype +from scitbx.matrix import sqr +from dxtbx.model import Crystal +from dxtbx.model import DetectorFactory, BeamFactory +from cctbx import sgtbx, miller, crystal, uctbx +from cctbx.array_family import flex +from cctbx.sgtbx.literal_description import literal_description + + +# =================== +# prepare the miller array for nanoBragg +def prep_miller_array(mill_arr): + """prep for nanoBragg""" + # TODO: is this correct order of things? + cb_op = mill_arr.space_group_info().change_of_basis_op_to_primitive_setting() + mill_arr = mill_arr.expand_to_p1() + mill_arr = mill_arr.generate_bijvoet_mates() + dtrm = cb_op.c().r().determinant() + if not dtrm == 1: + mill_arr = mill_arr.change_basis(cb_op) + return mill_arr + + +# simple method to check whether the structure factors in nanoBragg obey the symmetry +def sanity_check_Fs(SIM, sg, ucell_p1): + """ + SIM: nanoBragg instance + sg: space group operator + ucell_p1: p1 unit cell params + """ + inds, amps = SIM.Fhkl_tuple + print(ucell_p1) + + sym = crystal.symmetry(ucell_p1, "P1") + mset = miller.set(sym, flex.miller_index(inds), True) + ma = miller.array(mset, flex.double(amps)).set_observation_type_xray_amplitude().resolution_filter(-1, + 2) + print(sg.info()) + ops = [o for o in sg.build_derived_laue_group().all_ops() if o.r().determinant() == 1] + for o in ops: + print("Op=", o.as_xyz()) + ma2 = ma.change_basis(sgtbx.change_of_basis_op(o)) + r = ma.r1_factor(ma2, assume_index_matching=True) + print("R=", r, "\n") + assert r == 0 +# =================== + +add_spots_func="add_nanoBragg_spots" +if args.cuda: + add_spots_func = "add_nanoBragg_spots_cuda" + +syms = { + 'C': # 1hk5 + [(190.14, 39.04, 96.23, 90, 105.07, 90), + "C 1 2 1"], + 'I': # 3dll + [(169.7, 410, 695, 90, 90, 90), + "I 2 2 2"], + 'F': # 1r03 + [(181.41, 181.41, 181.41, 90, 90, 90), + "F 4 3 2"], + 'P6': # 4pgu + [(62.207, 62.207, 293.474, 90, 90, 120), + "P 65 2 2"], + 'P3': # 3dxj + [(235.09, 235.09, 250.88, 90, 90, 120), + "P 32"], + 'P4': # 1z35 + [(136.8, 136.8, 136.8, 90, 90, 90), + "P 41 3 2"], + 'P2': # 3nxs + [(78.63, 91.46, 57.47, 90, 90, 90), + "P 21 21 2"], + 'P1': # 3l89 + [(94.53, 107.71, 154.1, 90.01, 90.1, 104.7), + "P 1"]} + +ucell_p, symbol = syms[args.sym] +ucell = uctbx.unit_cell(ucell_p) +F = db_utils.make_miller_array(unit_cell=ucell_p, symbol=symbol) + +sg = F.space_group() +sgi = sg.info() +print("unit cell, space group:\n", F, "\n") + +O = ucell.orthogonalization_matrix() +# real space vectors +a = O[0], O[3], O[6] +b = O[1], O[4], O[7] +c = O[2], O[5], O[8] +C_sg = Crystal(a,b,c,sg) +# this should have the identity as a Umat +assert np.allclose(C_sg.get_U(), (1,0,0,0,1,0,0,0,1)) + +# convert crystal to P1 +to_p1_op = sgi.change_of_basis_op_to_primitive_setting() +C_p1 = C_sg.change_basis(to_p1_op) + +# random orientation +Misset = sqr(Rotation.random(random_state=1).as_matrix().flatten()) +# reorient the P1 crystal +U_p1 = sqr(C_p1.get_U()) +C_p1.set_U(Misset*U_p1) + +# make a detector and a beam +beam = BeamFactory.simple(wavelength=1) +detector = DetectorFactory.simple( + sensor='PAD', + distance=200, + beam_centre=(51.25,51.25), + fast_direction='+x', + slow_direction='-y', + pixel_size=(.1,.1), + image_size=(1024,1024)) + +SIM = nanoBragg(detector=detector, beam=beam) +#SIM.printout_pixel_fastslow = 504, 422 +SIM.xtal_shape = shapetype.Gauss_star +if args.gauss and args.square: + raise RuntimeError("Can only have gauss, square or neither (default, which is Gauss_star)") +if args.gauss: + SIM.xtal_shape = shapetype.Gauss +if args.square: + SIM.xtal_shape = shapetype.Square + +SIM.oversample = 1 +SIM.interpolate = 0 +# convert to P1 (and change basis depending on space group) +F_p1 = prep_miller_array(F) +SIM.Fhkl_tuple = F_p1.indices(), F_p1.data() +SIM.Amatrix = sqr(C_p1.get_A()).transpose() +if args.mos: + SIM.mosaic_spread_deg = 1 + SIM.mosaic_domains = 10 +assert np.allclose(SIM.unit_cell_tuple, C_p1.get_unit_cell().parameters()) +SIM.Ncells_abc = 12, 12, 12 +if args.fix: + SIM.set_mosaic_blocks_sym(C_p1, reference_symbol=sgi.type().lookup_symbol(), orig_mos_domains=1) +getattr(SIM, add_spots_func)() +reference = SIM.raw_pixels.as_numpy_array() +print(SIM.fudge) +#exit() + +#sanity_check_Fs(SIM, sg, C_p1.get_unit_cell().parameters()) +print("USINGSHAPE:", SIM.xtal_shape) + +ops = sg.build_derived_laue_group().all_ops() +ops = [o for o in ops if o.r().determinant() == 1] + +results = [] +for o in ops: + sg_op = sgtbx.change_of_basis_op(o) + C_o = C_sg.change_basis(sg_op).change_basis(to_p1_op) + U = sqr(C_o.get_U()) + C_o.set_U(Misset*U) + Amat = sqr(C_o.get_A()) + if args.fix: + SIM.set_mosaic_blocks_sym(C_o, sgi.type().lookup_symbol(), orig_mos_domains=1) + SIM.raw_pixels *= 0 + SIM.Amatrix = Amat.transpose() + + print("operator:", o.as_xyz()) + forms = literal_description(o) + print(forms.long_form()) + getattr(SIM, add_spots_func)() + img = SIM.raw_pixels.as_numpy_array() + if not np.allclose(img, reference, atol=1e-7): + passed=False + print("FAILED\n") + SIM.raw_pixels *= 0 + getattr(SIM, add_spots_func)() + else: + passed=True + print("PASSED\n") + results.append((passed, img, o)) + +SIM.free_all() +nfail = sum([not passed for passed,_,_ in results ]) +nops = len(results) +if nfail > 0: + print("Test failed for %d / %d ops." %(nfail, len(results))) +else: + print("All tests pass (%d ops) ! " %(len(results))) + print("Ok!") + +if args.plot: + try: + from pylab import * + from itertools import cycle + imgs = cycle(results) + m = reference[reference >0].mean() + s = reference[reference>0].std() + vmax=m+0.5*s + fig, (ax1,ax2) = subplots(nrows=1, ncols=2, layout='constrained') + fig.set_size_inches((8,4)) + ax1.imshow(reference, vmin=0, vmax=vmax) + ax2.imshow(results[0][1], vmin=0, vmax=vmax) + ax1.set_title("+x,+y,+z", fontsize=16) + suptitle("%s" % str(sgi), fontsize=16) + for ax in (ax1, ax2): + ax.set_xlim(300, 700) + ax.set_ylim(700, 300) + while 1: + if not fignum_exists(fig.number): + break + passed, img, op = next(imgs) + ax2.images[0].set_data(img) + pass_s = "passed" if passed else "failed" + def check_sign(x): + if not x.startswith("-"): + x = "+"+x + return x + x,y,z = map(check_sign, op.as_xyz().split(',')) + xyz = ",".join([x,y,z]) + + ax2.set_title("%s (%s)" % (xyz, pass_s), fontsize=16) + plt.draw() + plt.pause(2) + + except KeyboardInterrupt: + close() + exit() diff --git a/simtbx/run_tests.py b/simtbx/run_tests.py index 2a677dc246..6b8dbfa977 100644 --- a/simtbx/run_tests.py +++ b/simtbx/run_tests.py @@ -24,7 +24,6 @@ "$D/diffBragg/tests/tst_diffBragg_update_dxtbx_geoms.py", "$D/diffBragg/tests/tst_diffBragg_deriv_rois.py", "$D/diffBragg/tests/tst_diffBragg_detdist_derivatives.py", - "$D/diffBragg/tests/tst_diffBragg_nanoBragg_congruency.py", "$D/diffBragg/tests/tst_diffBragg_ncells_property.py", "$D/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py", ["$D/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py", "--idx 1"], @@ -75,6 +74,7 @@ tst_list = nb_tst_list if OPT.enable_cxx11 and sys.platform != 'win32': tst_list += db_tst_list+db_tst_list_nonCuda + tst_list += ["$D/tests/tst_api_congruency.py"] if OPT.enable_cuda: tst_list_parallel += [ @@ -109,6 +109,7 @@ tst_list_parallel.append(par_tst) tst_list_parallel += db_tst_list_onlyCuda + tst_list_parallel += ["$D/tests/tst_api_congruency.py", "--kokkos"] def run(): build_dir = libtbx.env.under_build("simtbx") diff --git a/simtbx/tests/tst_api_congruency.py b/simtbx/tests/tst_api_congruency.py new file mode 100644 index 0000000000..fb0f61a02f --- /dev/null +++ b/simtbx/tests/tst_api_congruency.py @@ -0,0 +1,115 @@ +from __future__ import division +from simtbx.diffBragg.utils import find_diffBragg_instances +import numpy as np + +"""" +CHECK NANOBRAGG, DIFFBRAGG, and EXAFEL+KOKKOS PRODUCE EQUIVALENT RESULTS +NOTE: Cannot --kokkos flag will skip the exafel API +""" + +import sys +import libtbx.load_env +HAS_KOKKOS_EXAFEL = False +if libtbx.env.build_options.enable_kokkos and sys.platform.startswith('linux'): + # the Exafel KOKKOS API: + from simtbx.kokkos import gpu_energy_channels + from simtbx.kokkos import exascale_api + from simtbx.kokkos import gpu_detector as kokkosd + HAS_KOKKOS_EXAFEL = True + + +def _test_exafel(S, reference, add_background=False): + """ + # ONLY RUN THIS IF KOKKOS INSTANCE EXISTS + :param S: SimData instance (nanoBragg/sim_data.py) + :param reference: reference 2D image from different API to test against + :param add_background: whether or not to include background + """ + kokkos_channels_singleton = gpu_energy_channels() + inds, amps = S.D.Fhkl_tuple + kokkos_channels_singleton.structure_factors_to_GPU_direct(0, inds, amps) + kokkos_simulation = exascale_api(nanoBragg=S.D) + kokkos_simulation.allocate() + + kokkos_detector = kokkosd(detector=S.detector, beam=S.D.xray_beams[0]) + kokkos_detector.each_image_allocate() + + # loop over energies + kokkos_simulation.add_energy_channel_from_gpu_amplitudes( + 0, kokkos_channels_singleton, kokkos_detector) + if add_background: + kokkos_simulation.add_background(kokkos_detector) + kokkos_detector.write_raw_pixels(S.D) + kokkos_img = S.D.raw_pixels.as_numpy_array() + if add_background: + # note, why is there a difference in the images when adding a background? + assert np.allclose(kokkos_img, reference, atol=1e-3, rtol=0) + else: + assert np.allclose(kokkos_img, reference) + + +def main(shape="gauss_star", background=True, test_exafel=False, nb_cuda=False): + # TODO: test non-P space groups + # TODO: test divergence model + # TODO: test dispersion model + + from simtbx.nanoBragg.sim_data import SimData + S = SimData(use_default_crystal=True) + # Important, only add water OR air, but not both, because the exafel API will only look for one background model + S.add_water = True + S.add_air = False + S.crystal.xtal_shape = shape + S.crystal.mos_spread_deg = 1 + S.crystal.n_mos_domains = 20 + S.instantiate_diffBragg() + S.D.nopolar = True + S.D.oversample = 3 + S.D.spot_scale = 10 + + S.D.add_diffBragg_spots() + if background: + S._add_background() + + diff_img = S.D.raw_pixels.as_numpy_array() + + S.D.raw_pixels *= 0 + if nb_cuda: + S.D.add_nanoBragg_spots_cuda() + else: + S.D.add_nanoBragg_spots() + if background: + S._add_background() + + nano_img = S.D.raw_pixels.as_numpy_array() + + assert np.allclose(diff_img, nano_img, atol=1e-9) + a = nano_img.mean() + b = nano_img.max() + c = nano_img.min() + d = nano_img.std() + e = np.median(nano_img) + vals = a,b,c,d,e + print("Reference image has pixel values Mean,Max,Min,Median,Std=%f,%f,%f,%f,%f" %vals) + if HAS_KOKKOS_EXAFEL and test_exafel: + S.D.raw_pixels *= 0 + _test_exafel(S, nano_img, background) + else: + print("No Kokkos GPU instance, skipping _test_exafel ... ") + for name in find_diffBragg_instances(globals()): del globals()[name] + + +if __name__ == "__main__": + import sys + import os + if "--kokkos" in sys.argv: + # If we use this flag, then the deviceWrapper will instantiate KOKKOS instance + os.environ["DIFFBRAGG_USE_KOKKOS"]="1" + nb_cuda = "--cuda" in sys.argv + + from simtbx.diffBragg.device import DeviceWrapper + with DeviceWrapper(0) as wrapper: + for background in [False,True]: + test_exafel = wrapper.is_using_kokkos_gpu + main("gauss", background=background, test_exafel=test_exafel, nb_cuda=nb_cuda) + main("gauss_star", background=background, test_exafel=test_exafel, nb_cuda=nb_cuda) + print ("OK!")