13 #if defined(__CUDA_ARCH__) 15 #if __COMPUTE_CAPABILITY__ < 600 23 static inline __device__
double atomicAdd(
double *addr,
double val){
24 double old = *addr, assumed;
27 old = __longlong_as_double( atomicCAS((
unsigned long long int*)addr,
28 __double_as_longlong(assumed),
29 __double_as_longlong(
val + assumed)));
30 }
while ( __double_as_longlong(assumed) != __double_as_longlong(old) );
43 static inline __device__ double2 atomicAdd(double2 *addr, double2
val){
45 old.x = atomicAdd((
double*)addr,
val.x);
46 old.y = atomicAdd((
double*)addr + 1,
val.y);
57 static inline __device__ float2 atomicAdd(float2 *addr, float2
val){
59 old.x = atomicAdd((
float*)addr,
val.x);
60 old.y = atomicAdd((
float*)addr + 1,
val.y);
static __inline__ enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode int val