tjlane · apeck12 · Nov 28, 2017 · Nov 28, 2017 · Nov 28, 2017 · Nov 29, 2017
diff --git a/src/python/parse.py b/src/python/parse.py
@@ -894,7 +894,7 @@ def find_center(image2d, mask=None, initial_guess=None, pix_res=0.1, window=15,
     x_size = image2d.shape[0]
     y_size = image2d.shape[1]
 
-    if mask != None:
+    if mask is not None:
         if not mask.shape == image2d.shape:
             raise ValueError('Mask and image must have same shape! Got %s, %s'
                              ' respectively.' % ( str(mask.shape), str(image2d.shape) ))

diff --git a/src/python/scatter.py b/src/python/scatter.py
@@ -69,7 +69,7 @@ def rand(self, *args):
 def simulate_atomic(traj, num_molecules, detector, traj_weights=None,
                     finite_photon=False, ignore_hydrogens=False, 
                     dont_rotate=False, procs_per_node=1, 
-                    nodes=[], devices=[], random_state=None):
+                    nodes=[], devices=[], random_state=None, U=None):
     """
     Simulate a scattering 'shot', i.e. one exposure of x-rays to a sample.
 
@@ -117,6 +117,10 @@ def simulate_atomic(traj, num_molecules, detector, traj_weights=None,
     dont_rotate : bool
         Don't apply a random rotation to the molecule before computing the
         scattering. Good for debugging and the like.
+
+    U : ndarray, float
+        Atomic displacement parameters, either a 1d n_atoms array [isotropic case]
+        or 3d (n_atoms, 3, 3,) tensor.
 
     Returns
     -------
@@ -180,6 +184,21 @@ def simulate_atomic(traj, num_molecules, detector, traj_weights=None,
         atoms_to_keep = np.ones(n_atoms, dtype=np.bool)
 
 
+    # if a 1d U matrix is passed, expand to 3d; if no U matrix is input,
+    # set all elements to zero
+    n_atoms = len(atoms_to_keep)
+    if U is None:
+        U = np.zeros((n_atoms, 3, 3))
+    elif U.shape == (n_atoms,):
+        e = np.eye(3)
+        U = U[:,None,None] * e[None,None,:]
+    elif U.shape == (n_atoms, 3, 3):
+        # correctly sized for lower level interface
+        pass
+    else:
+        raise TypeError("U matrix has invalide shape.")
+
+
     qxyz = _qxyz_from_detector(detector)
     amplitudes = np.zeros(qxyz.shape[0], dtype=np.complex128)
 
@@ -196,7 +215,8 @@ def simulate_atomic(traj, num_molecules, detector, traj_weights=None,
                                                        procs_per_node=procs_per_node,
                                                        nodes=nodes,
                                                        devices=devices,
-                                                       random_state=random_state)
+                                                       random_state=random_state,
+                                                       U=U)
 
     if finite_photon is not False:
         intensities = np.square(np.abs(amplitudes))
@@ -254,7 +274,8 @@ def simulate_density(grid, grid_spacing, num_molecules, detector,
                                                   procs_per_node=procs_per_node,
                                                   nodes=nodes,
                                                   devices=devices,
-                                                  random_state=random_state)
+                                                  random_state=random_state,
+                                                  U=None)
 
     # put the output back in sq grid form if requested
     if reshape_output:

diff --git a/src/scatter/cpp_scatter.cpp b/src/scatter/cpp_scatter.cpp
@@ -142,6 +142,35 @@ void qVq_product(float const * const V,
 
 }
 
+#ifdef __CUDACC__
+    __host__ __device__
+#endif
+void qUq_product(float const * const U,
+		 int i,
+		 float qx,
+		 float qy,
+		 float qz,
+
+		 float &o_qUq
+
+		 ){
+
+      // compute < q | U_ii | q >
+
+      float qUq;
+
+      qUq  =     qx * qx * U[9*i + 0];
+      qUq +=     qy * qy * U[9*i + 4];
+      qUq +=     qz * qz * U[9*i + 8];
+      qUq += 2 * qx * qy * U[9*i + 1];
+      qUq += 2 * qx * qz * U[9*i + 2];
+      qUq += 2 * qy * qz * U[9*i + 5];
+
+      o_qUq = qUq;
+
+}
+
+
 /******************************************************************************
  * Hybrid CPU/GPU Code
  * decides whether to try and call GPU code or raise an exception, depending
@@ -168,6 +197,9 @@ void gpuscatter (int device_id_,
                  int   * h_atom_types,
                  float * h_cromermann,
 
+		 // atomic displacement parameters
+		 float * h_U,
+
                  // random numbers for rotations
                  int     n_rotations,
                  float * rand1,
@@ -183,7 +215,7 @@ void gpuscatter (int device_id_,
         _gpuscatter( device_id_,
                      n_q, h_qx, h_qy, h_qz,
                      n_atoms, h_rx, h_ry, h_rz,
-                     n_atom_types, h_atom_types, h_cromermann,
+                     n_atom_types, h_atom_types, h_cromermann, h_U,
                      n_rotations, rand1, rand2, rand3,
                      h_q_out_real, h_q_out_imag);
     #else
@@ -252,6 +284,8 @@ void cpu_kernel( int   const n_q,
                  int   const * const __restrict__ atom_types,
                  float const * const __restrict__ cromermann,
 
+                 float const * const __restrict__ U,
+
                  int   const n_rotations,
                  float const * const __restrict__ randN1, 
                  float const * const __restrict__ randN2, 
@@ -273,6 +307,7 @@ void cpu_kernel( int   const n_q,
      * n_atom_types        : the number of unique atom types (formfactors)
      * atom_types          : the atom "type", which is an arbitrary index
      * cromermann          : 9 params specifying formfactor for each atom type 
+     * U                   : 3d array of the atomic displacement parameters
      * n_rotations/        : The number of molecules to independently rotate and
      *  randN{1,2,3}         the random numbers used to perform those rotations
      *
@@ -290,8 +325,9 @@ void cpu_kernel( int   const n_q,
     float q_sum_real, q_sum_imag; // partial sum of real and imaginary amplitude
     float qr;                     // dot product of q and r
 
-    // we will use a small array to store form factors
+    // we will use a small array to store form factors and Debye-Waller factors
     float * formfactors = (float *) malloc(n_atom_types * sizeof(float));
+    float * qUq = (float *) malloc(n_atoms * sizeof(float));
 
     // pre-compute rotation quaternions    
     float * q0 = (float *) malloc(n_rotations * sizeof(float));
@@ -336,6 +372,11 @@ void cpu_kernel( int   const n_q,
 
         }
 
+	// precompute Debye-Waller factors for each atom
+	for( int a = 0; a < n_atoms; a++ ) {
+	  qUq_product(U, a, qx, qy, qz, qUq[a]);
+	}
+
         // for each molecule (2nd nested loop)
         for( int im = 0; im < n_rotations; im++ ) {
 
@@ -352,10 +393,10 @@ void cpu_kernel( int   const n_q,
                        ax, ay, az);
 
                 qr = ax*qx + ay*qy + az*qz;
-                
+
                 // FIXME :: swap cos and sin???? e^i*t = cos(t) + i sin(t)
-                q_sum_real += fi * sinf(qr);
-                q_sum_imag += fi * cosf(qr);
+                q_sum_real += fi * sinf(qr) * exp(- 0.5 * qUq[a]);
+                q_sum_imag += fi * cosf(qr) * exp(- 0.5 * qUq[a]);
 
             } // finished one atom (3rd loop)
         } // finished one molecule (2nd loop)
@@ -371,6 +412,7 @@ void cpu_kernel( int   const n_q,
     free(q1);
     free(q2);
     free(q3);
+    free(qUq);
 
 }
 
@@ -388,6 +430,8 @@ void cpuscatter(  int  n_q,
                   int   * atom_types,
                   float * cromermann,
 
+		          float * U,
+
                   int   n_rotations,
                   float * randN1, 
                   float * randN2, 
@@ -399,7 +443,7 @@ void cpuscatter(  int  n_q,
 
     cpu_kernel( n_q, q_x, q_y, q_z, 
                 n_atoms, r_x, r_y, r_z,
-                n_atom_types, atom_types, cromermann,
+                n_atom_types, atom_types, cromermann, U,
                 n_rotations, randN1, randN2, randN3,
                 q_out_real, q_out_imag );
 }
@@ -541,77 +585,13 @@ void cpudiffuse( int   n_q,
 }
 
 // This is a meaningless test, for speed only...
-// #ifndef __CUDACC__
-// int main() {
-//
-//     int nQ_ = 1000;
-//     int nAtoms_ = 1000;
-//     int n_atom_types_ = 10;
-//     int nRot_ = 1000;
-//
-//     float * h_qx_ = new float[nQ_];
-//     float * h_qy_ = new float[nQ_];
-//     float * h_qz_ = new float[nQ_];
-//
-//     float * h_rx_ = new float[nAtoms_];
-//     float * h_ry_ = new float[nAtoms_];
-//     float * h_rz_ = new float[nAtoms_];
-//
-//     int   * atom_types_ = new int[nAtoms_];
-//     float * cromermann_ = new float[n_atom_types_ * 9];
-//
-//     float * h_rand1_ = new float[nRot_];
-//     float * h_rand2_ = new float[nRot_];
-//     float * h_rand3_ = new float[nRot_];
-//
-//     float * h_outQ_R = new float[nQ_];
-//     float * h_outQ_I = new float[nQ_];
-//
-//     cpuscatter    ( // q vectors
-//                     nQ_,
-//                     h_qx_,
-//                     h_qy_,
-//                     h_qz_,
-//
-//                     // atomic positions, ids
-//                     nAtoms_,
-//                     h_rx_,
-//                     h_ry_,
-//                     h_rz_,
-//
-//                     // formfactor info
-//                     n_atom_types_,
-//                     atom_types_,
-//                     cromermann_,
-//
-//                     // random numbers for rotations
-//                     nRot_,
-//                     h_rand1_,
-//                     h_rand2_,
-//                     h_rand3_,
-//
-//                     // output
-//                     h_outQ_R,
-//                     h_outQ_I );
-//
-//     printf("CPP OUTPUT:\n");
-//     printf("%f\n", h_outQ_R[0]);
-//     printf("%f\n", h_outQ_I[0]);
-//
-//     return 0;
-// }
-// #endif
-
 #ifndef __CUDACC__
 int main() {
 
-    int nQ_ = 100;
-    int nAtoms_ = 1500;
-
-    std::cout << nQ_ << " q-vectors :: " << nAtoms_ << " atoms\n";
-    std::cout << "remember: linear in q-vectors, quadratic in atoms\n";
-
+    int nQ_ = 1000;
+    int nAtoms_ = 1000;
     int n_atom_types_ = 10;
+    int nRot_ = 1000;
 
     float * h_qx_ = new float[nQ_];
     float * h_qy_ = new float[nQ_];
@@ -623,13 +603,17 @@ int main() {
 
     int   * atom_types_ = new int[nAtoms_];
     float * cromermann_ = new float[n_atom_types_ * 9];
+    float * U_ = new float[nAtoms_ * 3 * 3];
+
 
-    float * V = new float[nAtoms_ * nAtoms_ * 3 * 3];
+    float * h_rand1_ = new float[nRot_];
+    float * h_rand2_ = new float[nRot_];
+    float * h_rand3_ = new float[nRot_];
 
     float * h_outQ_R = new float[nQ_];
     float * h_outQ_I = new float[nQ_];
 
-    cpudiffuse   ( // q vectors
+    cpuscatter    ( // q vectors
                     nQ_,
                     h_qx_,
                     h_qy_,
@@ -645,9 +629,15 @@ int main() {
                     n_atom_types_,
                     atom_types_,
                     cromermann_,
-
-                    // correlation matrix
-                    V,
+
+					          // ADP info
+                    U_,
+
+                    // random numbers for rotations
+                    nRot_,
+                    h_rand1_,
+                    h_rand2_,
+                    h_rand3_,
 
                     // output
                     h_outQ_R,
@@ -660,3 +650,64 @@ int main() {
     return 0;
 }
 #endif
+
+// Testing speed of diffuse calculation
+// #ifndef __CUDACC__
+// int main() {
+//
+//     int nQ_ = 100;
+//     int nAtoms_ = 1500;
+//
+//     std::cout << nQ_ << " q-vectors :: " << nAtoms_ << " atoms\n";
+//     std::cout << "remember: linear in q-vectors, quadratic in atoms\n";
+//
+//     int n_atom_types_ = 10;
+//
+//     float * h_qx_ = new float[nQ_];
+//     float * h_qy_ = new float[nQ_];
+//     float * h_qz_ = new float[nQ_];
+//
+//     float * h_rx_ = new float[nAtoms_];
+//     float * h_ry_ = new float[nAtoms_];
+//     float * h_rz_ = new float[nAtoms_];
+//
+//     int   * atom_types_ = new int[nAtoms_];
+//     float * cromermann_ = new float[n_atom_types_ * 9];
+//
+//     float * V = new float[nAtoms_ * nAtoms_ * 3 * 3];
+//
+//     float * h_outQ_R = new float[nQ_];
+//     float * h_outQ_I = new float[nQ_];
+//
+//     cpudiffuse   ( // q vectors
+//                     nQ_,
+//                     h_qx_,
+//                     h_qy_,
+//                     h_qz_,
+//
+//                     // atomic positions, ids
+//                     nAtoms_,
+//                     h_rx_,
+//                     h_ry_,
+//                     h_rz_,
+// 
+//                     // formfactor info
+//                     n_atom_types_,
+//                     atom_types_,
+//                     cromermann_,
+//
+//
+//                     // correlation matrix
+//                     V,
+//
+//                     // output
+//                     h_outQ_R,
+//                     h_outQ_I );
+//
+//     printf("CPP OUTPUT:\n");
+//     printf("%f\n", h_outQ_R[0]);
+//     printf("%f\n", h_outQ_I[0]);
+//
+//     return 0;
+// }
+// #endif