Remove backwards-compatible atomicAdd for doubles.

cvangysel · Apr 8, 2018 · cff67f9 · cff67f9
1 parent 92ddabe
commit cff67f9
Show file tree

Hide file tree

Showing 3 changed files with 1 addition and 27 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,7 +3,7 @@ project (device_matrix)
 
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/third_party)
 
-find_package(CUDA 7.0 REQUIRED)
+find_package(CUDA 8.0 REQUIRED)
 find_package(ExtraCUDA REQUIRED) # nvToolsExt
 find_package(Glog 0.3.4 REQUIRED)
 

diff --git a/cpp/device_matrix.cu b/cpp/device_matrix.cu
@@ -1,23 +1,5 @@
 #include "device_matrix/device_matrix.h"
 
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
-#else
-// From http://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu.
-//
-// This is a hack that allows the tests to run in double precision.
-// atomicAdd for doubles is available in CUDA 8 and onwards.
-__device__ double atomicAdd(double* address, double val) {
-    unsigned long long int* address_as_ull = (unsigned long long int*) address;
-    unsigned long long int old = *address_as_ull, assumed;
-    do {
-        assumed = old;
-        old = atomicCAS(address_as_ull, assumed, 
-                        __double_as_longlong(val + __longlong_as_double(assumed)));
-    } while (assumed != old);
-    return __longlong_as_double(old);
-}
-#endif
-
 namespace cuda {
 
 cudaStream_t merge_streams(const cudaStream_t first,

diff --git a/include/device_matrix/device_matrix.h b/include/device_matrix/device_matrix.h
@@ -38,14 +38,6 @@
 #include "streams.h"
 #include "runtime.h"
 
-// Slow implementation of atomicAdd for double-precision; only available in debug mode.
-#ifndef NDEBUG
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
-#else
-__device__ double atomicAdd(double* address, double val);
-#endif
-#endif
-
 namespace cuda {
 
 // Forward declarations.