QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
shared_memory_cache_helper.cuh
Go to the documentation of this file.
1 #pragma once
2 
10 namespace quda
11 {
12 
21  template <typename real, typename Vector> class VectorCache
22  {
23 
28  __device__ inline real *cache()
29  {
30  extern __shared__ int cache_[];
31  return reinterpret_cast<real *>(cache_);
32  }
33 
34 public:
40  __device__ inline void save(const Vector &a)
41  {
42  int j = (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
43 #pragma unroll
44  for (int i = 0; i < 2 * a.size; i++) {
45  cache()[j] = *(reinterpret_cast<const real *>(a.data) + i);
46  j += blockDim.z * blockDim.y * blockDim.x;
47  }
48  }
49 
57  __device__ inline Vector load(int x, int y, int z)
58  {
59  Vector a;
60  int j = (z * blockDim.y + y) * blockDim.x + x;
61 #pragma unroll
62  for (int i = 0; i < 2 * a.size; i++) {
63  *(reinterpret_cast<real *>(a.data) + i) = cache()[j];
64  j += blockDim.z * blockDim.y * blockDim.x;
65  }
66  return a;
67  }
68 
72  __device__ inline void sync() { __syncthreads(); }
73  };
74 
75 } // namespace quda
__device__ real * cache()
This is the handle to the shared memory.
__device__ Vector load(int x, int y, int z)
Load a vector from the shared memory cache.
__device__ void sync()
Synchronize the cache.
Class which wraps around a shared memory cache for a Vector type, where each thread in the thread blo...
VectorXcd Vector
__device__ void save(const Vector &a)
Save the vector into the 3-d shared memory cache. Implicitly store the vector at coordinates given by...