7 #define checkSpinorLength(a, b) \ 9 if (a.Length() != b.Length()) \ 10 errorQuda("lengths do not match: %lu %lu", a.Length(), b.Length()); \ 11 if (a.Stride() != b.Stride()) \ 12 errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \ 13 if (a.GammaBasis() != b.GammaBasis()) \ 14 errorQuda("gamma basis does not match: %d %d", a.GammaBasis(), b.GammaBasis()); \ 31 template <
typename FloatN,
int N,
typename Output,
typename Input>
33 unsigned int i = blockIdx.x*(blockDim.x) + threadIdx.x;
34 unsigned int parity = blockIdx.y;
35 unsigned int gridSize = gridDim.x*blockDim.x;
45 template <
typename FloatN,
int N,
typename Output,
typename Input>
68 CopyCuda(Output &Y, Input &X,
int length,
int nParity)
69 : X(X), Y(Y), length(length/nParity), nParity(nParity) { }
94 long long flops()
const {
return 0; }
96 const int Ninternal = (
sizeof(FloatN)/
sizeof(((FloatN*)0)->x))*N;
97 size_t bytes = (X.Precision() + Y.Precision())*Ninternal;
106 if (&src == &dst)
return;
117 strcat(tmp,
",src=");
139 if (src.
Nspin() == 4){
145 }
else if (src.
Nspin() == 2) {
147 errorQuda(
"Non-zero stride not supported");
153 }
else if (src.
Nspin() == 1) {
163 if (src.
Nspin() == 4){
169 }
else if (src.
Nspin() == 2) {
171 errorQuda(
"Non-zero stride not supported");
177 }
else if (src.
Nspin() == 1) {
188 if (src.
Nspin() == 4){
194 }
else if (src.
Nspin() == 1) {
205 if (src.
Nspin() == 4){
211 }
else if (src.
Nspin() == 1) {
222 if (src.
Nspin() == 4){
228 }
else if (src.
Nspin() == 1) {
239 if (src.
Nspin() == 4){
245 }
else if (src.
Nspin() == 1) {
258 if (src.
Nspin() == 4){
263 }
else if (src.
Nspin() == 1) {
273 if (src.
Nspin() == 4){
278 }
else if (src.
Nspin() == 1) {
288 if (src.
Nspin() == 4){
293 }
else if (src.
Nspin() == 1) {
303 if (src.
Nspin() == 4){
308 }
else if (src.
Nspin() == 1) {
318 if (src.
Nspin() == 4){
323 }
else if (src.
Nspin() == 1) {
333 if (src.
Nspin() == 4){
338 }
else if (src.
Nspin() == 1) {
359 static_cast<const cudaColorSpinorField&>(src));
void defaultTuneParam(TuneParam ¶m) const
const char * AuxString() const
QudaVerbosity getVerbosity()
void initTuneParam(TuneParam ¶m) const
cudaColorSpinorField * tmp
virtual bool advanceSharedBytes(TuneParam ¶m) const
unsigned int sharedBytesPerBlock(const TuneParam ¶m) const
const char * VolString() const
size_t RealLength() const
cudaStream_t * getStream()
QudaSiteSubset SiteSubset() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Provides precision abstractions and defines the register precision given the storage precision using ...
#define checkSpinorLength(a, b)
QudaFieldLocation Location() const
void apply(const cudaStream_t &stream)
__global__ void copyKernel(Output Y, Input X, int length)
#define qudaMemcpyAsync(dst, src, count, kind, stream)
static struct quda::blas::copy_ns::@4 blasStrings
virtual void initTuneParam(TuneParam ¶m) const
unsigned int sharedBytesPerThread() const
virtual bool advanceBlockDim(TuneParam ¶m) const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
void copy(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
CopyCuda(Output &Y, Input &X, int length, int nParity)
virtual void defaultTuneParam(TuneParam ¶m) const