6 #define checkSpinorLength(a, b) \
8 if (a.Length() != b.Length()) \
9 errorQuda("lengths do not match: %d %d", a.Length(), b.Length()); \
10 if (a.Stride() != b.Stride()) \
11 errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \
27 template <
typename FloatN,
int N,
typename Output,
typename Input>
29 unsigned int i = blockIdx.x*(blockDim.x) + threadIdx.x;
30 unsigned int gridSize = gridDim.x*blockDim.x;
40 template <
typename FloatN,
int N,
typename Output,
typename Input>
48 unsigned int sharedBytesPerThread()
const {
return 0; }
49 unsigned int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0; }
51 virtual bool advanceSharedBytes(
TuneParam ¶m)
const
56 param.
shared_bytes = sharedBytesPerThread()*nthreads > sharedBytesPerBlock(param) ?
57 sharedBytesPerThread()*nthreads : sharedBytesPerBlock(param);
62 CopyCuda(Output &Y, Input &X,
int length) : X(X), Y(Y), length(length) { }
66 return TuneKey(blasStrings.vol_str,
"copyKernel", blasStrings.aux_str);
77 long long flops()
const {
return 0; }
79 const int Ninternal = (
sizeof(FloatN)/
sizeof(((FloatN*)0)->x))*N;
80 size_t bytes = (X.Precision() + Y.Precision())*Ninternal;
89 if (&src == &dst)
return;
94 errorQuda(
"Spinor fields do not have matching subsets dst=%d src=%d\n",
107 strcat(tmp,
",src=");
109 blasStrings.aux_str =
tmp;
121 cudaMemcpy(dst.
V(), src.
V(), dst.
Bytes(), cudaMemcpyDeviceToDevice);
127 if (src.
Nspin() == 4){
143 if (src.
Nspin() == 4){
160 if (src.
Nspin() == 4){
177 if (src.
Nspin() == 4){
194 if (src.
Nspin() == 4){
211 if (src.
Nspin() == 4){
CopyCuda(Output &Y, Input &X, int length)
QudaVerbosity getVerbosity()
unsigned long long blas_bytes
__host__ __device__ void copy(T1 &a, const T2 &b)
cudaColorSpinorField & Odd() const
cudaColorSpinorField * tmp
const char * AuxString() const
virtual bool advanceBlockDim(TuneParam ¶m) const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
const char * VolString() const
#define checkSpinorLength(a, b)
cudaStream_t * getBlasStream()
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
QudaPrecision Precision() const
void apply(const cudaStream_t &stream)
QudaSiteSubset SiteSubset() const
__global__ void copyKernel(Output Y, Input X, int length)
cudaColorSpinorField & Even() const