6 #define checkSpinorLength(a, b) \
8 if (a.Length() != b.Length()) \
9 errorQuda("lengths do not match: %d %d", a.Length(), b.Length()); \
10 if (a.Stride() != b.Stride()) \
11 errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \
29 template <
typename FloatN,
int N,
typename Output,
typename Input>
31 unsigned int i = blockIdx.x*(blockDim.x) + threadIdx.x;
32 unsigned int gridSize = gridDim.x*blockDim.x;
42 template <
typename FloatN,
int N,
typename Output,
typename Input>
50 int sharedBytesPerThread()
const {
return 0; }
51 int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0; }
53 virtual bool advanceSharedBytes(
TuneParam ¶m)
const
58 param.
shared_bytes = sharedBytesPerThread()*nthreads > sharedBytesPerBlock(param) ?
59 sharedBytesPerThread()*nthreads : sharedBytesPerBlock(param);
64 CopyCuda(Output &Y, Input &X,
int length) : X(X), Y(Y), length(length) { ; }
68 std::stringstream vol, aux;
69 vol << blasConstants.x[0] <<
"x";
70 vol << blasConstants.x[1] <<
"x";
71 vol << blasConstants.x[2] <<
"x";
72 vol << blasConstants.x[3];
73 aux <<
"stride=" << blasConstants.stride <<
",out_prec=" << Y.Precision() <<
",in_prec=" << X.Precision();
74 return TuneKey(vol.str(),
"copyKernel", aux.str());
85 long long flops()
const {
return 0; }
87 const int Ninternal = (
sizeof(
FloatN)/
sizeof(((
FloatN*)0)->x))*N;
88 size_t bytes = (X.Precision() + Y.Precision())*Ninternal;
96 if (&src == &dst)
return;
101 errorQuda(
"Spinor fields do not have matching subsets dst=%d src=%d\n",
110 for (
int d=0; d<
QUDA_MAX_DIM; d++) blasConstants.x[d] = src.
X()[d];
111 blasConstants.stride = src.
Stride();
120 cudaMemcpy(dst.
V(), src.
V(), dst.
Bytes(), cudaMemcpyDeviceToDevice);
126 if (src.
Nspin() == 4){
131 copy(dst_spinor, src_tex, src.
Volume());
138 copy(dst_spinor, src_tex, src.
Volume());
142 if (src.
Nspin() == 4){
147 copy(dst_spinor, src_tex, src.
Volume());
154 copy(dst_spinor, src_tex, src.
Volume());
159 if (src.
Nspin() == 4){
164 copy(dst_spinor, src_tex, src.
Volume());
171 copy(dst_spinor, src_tex, src.
Volume());
176 if (src.
Nspin() == 4){
181 copy(dst_spinor, src_tex, src.
Volume());
188 copy(dst_spinor, src_tex, src.
Volume());
193 if (src.
Nspin() == 4){
198 copy(dst_spinor, src_tex, src.
Volume());
205 copy(dst_spinor, src_tex, src.
Volume());
210 if (src.
Nspin() == 4){
215 copy(dst_spinor, src_tex, src.
Volume());
222 copy(dst_spinor, src_tex, src.
Volume());