quda-ref/v1.0.0/shared__memory__cache__helper_8cuh_source.html

 #pragma once

 namespace quda
 {

   template <typename real, typename Vector> class VectorCache
   {

     __device__ inline real *cache()
     {
       extern __shared__ int cache_[];
       return reinterpret_cast<real *>(cache_);
     }

 public:
     __device__ inline void save(const Vector &a)
     {
       int j = (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
 #pragma unroll
       for (int i = 0; i < 2 * a.size; i++) {
         cache()[j] = *(reinterpret_cast<const real *>(a.data) + i);
         j += blockDim.z * blockDim.y * blockDim.x;
       }
     }

     __device__ inline Vector load(int x, int y, int z)
     {
       Vector a;
       int j = (z * blockDim.y + y) * blockDim.x + x;
 #pragma unroll
       for (int i = 0; i < 2 * a.size; i++) {
         *(reinterpret_cast<real *>(a.data) + i) = cache()[j];
         j += blockDim.z * blockDim.y * blockDim.x;
       }
       return a;
     }

     __device__ inline void sync() { __syncthreads(); }
   };

 } // namespace quda
quda
Definition: blas_cublas.h:5

quda::VectorCache::cache
__device__ real * cache()
This is the handle to the shared memory.
Definition: shared_memory_cache_helper.cuh:28

quda::VectorCache::load
__device__ Vector load(int x, int y, int z)
Load a vector from the shared memory cache.
Definition: shared_memory_cache_helper.cuh:57

quda::VectorCache::sync
__device__ void sync()
Synchronize the cache.
Definition: shared_memory_cache_helper.cuh:72

quda::VectorCache
Class which wraps around a shared memory cache for a Vector type, where each thread in the thread blo...
Definition: shared_memory_cache_helper.cuh:21

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::VectorCache::save
__device__ void save(const Vector &a)
Save the vector into the 3-d shared memory cache. Implicitly store the vector at coordinates given by...
Definition: shared_memory_cache_helper.cuh:40