QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
extend_compress.cu
Go to the documentation of this file.
1 #include <blas_quda.h>
2 #include <tune_quda.h>
3 #include <float_vector.h>
4 
5 namespace quda {
8  cudaStream_t* getBlasStream();
9 
10  namespace quda {
11 #include <texture.h>
12 
13  static struct {
15  int stride;
16  } blasConstants;
17 
18  template <typename FloatN, int N, typename Input>
19  __global__ void copyKernel(Output Y, Input X, int length){
20  unsigned int i = blockIdx.x*(blockDim.x) + threadIdx.x;
21  unsigned int gridSize = gridDim.x*blockDim.x;
22 
23  while(i<length){
24  FloatN x[N];
25  X.load(x, i);
26  Y.save(x, i);
27  i += gridSize;
28  }
29  }
30 
31  // Cut Kernel -> takes a larger kernel and cuts off a border region
32  __global__ void cutKernel(Output Y, Input X, int length, int dim[4]){
33 
34  unsigned int i = blockIdx.x*(blockDim.x) + threadIdx.x;
35  unsigned int gridSize = gridDim.x*blockDim.x;
36 
37  while(i<length){
38 
39  }
40  }
41 
42 
43  }
44 
45 } // namespace copy