QUDA  0.9.0
clover_field_order.h
Go to the documentation of this file.
1 #ifndef _CLOVER_ORDER_H
2 #define _CLOVER_ORDER_H
3 
10 // trove requires the warp shuffle instructions introduced with Kepler
11 #if __COMPUTE_CAPABILITY__ >= 300
12 #include <trove/ptr.h>
13 #else
14 #define DISABLE_TROVE
15 #endif
16 #include <register_traits.h>
17 #include <clover_field.h>
18 #include <complex_quda.h>
19 #include <quda_matrix.h>
20 #include <color_spinor.h>
21 
22 namespace quda {
23 
36  template <typename Float, typename T>
37  struct clover_wrapper {
38  T &field;
39  const int x_cb;
40  const int parity;
41  const int chirality;
42 
50  __device__ __host__ inline clover_wrapper<Float,T>(T &field, int x_cb, int parity, int chirality)
52 
57  template<typename C>
58  __device__ __host__ inline void operator=(const C &a) {
59  field.save((Float*)a.data, x_cb, parity, chirality);
60  }
61  };
62 
63  template <typename T, int N>
64  template <typename S>
65  __device__ __host__ inline void HMatrix<T,N>::operator=(const clover_wrapper<T,S> &a) {
66  a.field.load((T*)data, a.x_cb, a.parity, a.chirality);
67  }
68 
69  template <typename T, int N>
70  template <typename S>
71  __device__ __host__ inline HMatrix<T,N>::HMatrix(const clover_wrapper<T,S> &a) {
72  a.field.load((T*)data, a.x_cb, a.parity, a.chirality);
73  }
74 
75  namespace clover {
76 
161  template<typename Float, int nColor, int nSpin, QudaCloverFieldOrder order> struct Accessor {
162  mutable complex<Float> dummy;
163  Accessor(const CloverField &A, bool inverse=false) {
164  errorQuda("Not implemented for order %d", order);
165  }
166 
167  __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row, int s_col,
168  int c_row, int c_col) const {
169  return dummy;
170  }
171  };
172 
173  template<int N>
174  __device__ __host__ inline int indexFloatN(int k, int stride, int x) {
175  int j = k / N;
176  int i = k % N;
177  return (j*stride+x)*N + i;
178  };
179 
180  template<typename Float, int nColor, int nSpin>
182  Float *a;
183  int stride;
184  size_t offset;
185  static constexpr int N = nSpin * nColor / 2;
186  Accessor(const CloverField &A, bool inverse=false)
187  : a(static_cast<Float*>(const_cast<void*>(A.V(inverse)))), stride(A.Stride()), offset(A.Bytes()/(2*sizeof(Float))) { }
188 
189  __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
190  // if not in the diagonal chiral block then return 0.0
191  if (s_col / 2 != s_row / 2) { return complex<Float>(0.0); }
192 
193  const int chirality = s_col / 2;
194 
195  int row = s_row%2 * nColor + c_row;
196  int col = s_col%2 * nColor + c_col;
197  Float *a_ = a+parity*offset+stride*chirality*N*N;
198 
199  if (row == col) {
200  return 2*a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(row, stride, x) ];
201  } else if (col < row) {
202  // switch coordinates to count from bottom right instead of top left of matrix
203  int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
204  int idx = N + 2*k;
205 
206  return 2*complex<Float>(a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
207  a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
208  } else {
209  // requesting upper triangular so return conjugate transpose
210  // switch coordinates to count from bottom right instead of top left of matrix
211  int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
212  int idx = N + 2*k;
213 
214  return 2*complex<Float>( a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
215  -a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
216  }
217 
218  }
219 
220  };
221 
222  template<typename Float, int nColor, int nSpin>
223  struct Accessor<Float,nColor,nSpin,QUDA_PACKED_CLOVER_ORDER> {
224  Float *a[2];
225  const int N = nSpin * nColor / 2;
226  complex<Float> zero;
227  Accessor(const CloverField &A, bool inverse=false) {
228  // even
229  a[0] = static_cast<Float*>(const_cast<void*>(A.V(inverse)));
230  // odd
231  a[1] = static_cast<Float*>(const_cast<void*>(A.V(inverse))) + A.Bytes()/(2*sizeof(Float));
232  zero = complex<Float>(0.0,0.0);
233  }
234 
235  __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
236  // if not in the diagonal chiral block then return 0.0
237  if (s_col / 2 != s_row / 2) { return zero; }
238 
239  const int chirality = s_col / 2;
240 
241  unsigned int row = s_row%2 * nColor + c_row;
242  unsigned int col = s_col%2 * nColor + c_col;
243 
244  if (row == col) {
245  complex<Float> tmp = a[parity][(x*2 + chirality)*N*N + row];
246  return tmp;
247  } else if (col < row) {
248  // switch coordinates to count from bottom right instead of top left of matrix
249  int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
250  int idx = (x*2 + chirality)*N*N + N + 2*k;
251  return complex<Float>(a[parity][idx], a[parity][idx+1]);
252  } else {
253  // switch coordinates to count from bottom right instead of top left of matrix
254  int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
255  int idx = (x*2 + chirality)*N*N + N + 2*k;
256  return complex<Float>(a[parity][idx], -a[parity][idx+1]);
257  }
258  }
259 
260  };
261 
267  template <typename Float, int nColor, int nSpin, QudaCloverFieldOrder order>
268  struct FieldOrder {
269 
270  protected:
273  const int volumeCB;
275 
276  public:
281  FieldOrder(CloverField &A, bool inverse=false) : A(A), volumeCB(A.VolumeCB()), accessor(A,inverse)
282  { }
283 
284  CloverField& Field() { return A; }
285 
286  virtual ~FieldOrder() { ; }
287 
298  __device__ __host__ inline const complex<Float> operator()(int parity, int x, int s_row,
299  int s_col, int c_row, int c_col) const {
300  return accessor(parity, x, s_row, s_col, c_row, c_col);
301  }
302 
317  __device__ __host__ inline complex<Float> operator()(int dummy, int parity, int x, int s_row,
318  int s_col, int c_row, int c_col) const {
319  return accessor(parity,x,s_row,s_col,c_row,c_col);
320  }
321 
332  /*
333  __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row,
334  int s_col, int c_row, int c_col) {
335  //errorQuda("Clover accessor not implemented as a lvalue");
336  return accessor(parity, x, s_row, s_col, c_row, c_col);
337  }
338  */
339 
341  __device__ __host__ inline int Ncolor() const { return nColor; }
342 
344  __device__ __host__ inline int Volume() const { return 2*volumeCB; }
345 
347  __device__ __host__ inline int VolumeCB() const { return volumeCB; }
348 
350  size_t Bytes() const {
351  constexpr int n = (nSpin * nColor) / 2;
352  constexpr int chiral_block = n * n / 2;
353  return static_cast<size_t>(volumeCB) * chiral_block * 2ll * 2ll * sizeof(Float); // 2 from complex, 2 from chirality
354  }
355  };
356 
366  template <typename Float, int length, int N, bool huge_alloc=false>
367  struct FloatNOrder {
368  typedef typename mapper<Float>::type RegType;
371  static const int M=length/(N*2); // number of short vectors per chiral block
372  static const int block=length/2; // chiral block size
373  Float *clover;
374  float *norm;
375  const AllocInt offset; // offset can be 32-bit or 64-bit
377 #ifdef USE_TEXTURE_OBJECTS
378  typedef typename TexVectorType<RegType,N>::type TexVector;
379  cudaTextureObject_t tex;
380  cudaTextureObject_t normTex;
381  const int tex_offset;
382 #endif
383  const int volumeCB;
384  const int stride;
385 
386  const bool twisted;
387  const Float mu2;
388 
389  size_t bytes;
390  size_t norm_bytes;
391  void *backup_h;
393 
394  FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_=0, float *norm_=0, bool override=false)
395  : offset(clover.Bytes()/(2*sizeof(Float))), norm_offset(clover.NormBytes()/(2*sizeof(float))),
396 #ifdef USE_TEXTURE_OBJECTS
397  tex(0), normTex(0), tex_offset(offset/N),
398 #endif
399  volumeCB(clover.VolumeCB()), stride(clover.Stride()),
401  norm_bytes(clover.NormBytes()), backup_h(nullptr), backup_norm_h(nullptr)
402  {
403  this->clover = clover_ ? clover_ : (Float*)(clover.V(is_inverse));
404  this->norm = norm_ ? norm_ : (float*)(clover.Norm(is_inverse));
405 #ifdef USE_TEXTURE_OBJECTS
406  if (clover.Location() == QUDA_CUDA_FIELD_LOCATION) {
407  if (is_inverse) {
408  tex = static_cast<const cudaCloverField&>(clover).InvTex();
409  normTex = static_cast<const cudaCloverField&>(clover).InvNormTex();
410  } else {
411  tex = static_cast<const cudaCloverField&>(clover).Tex();
412  normTex = static_cast<const cudaCloverField&>(clover).NormTex();
413  }
414  if (!huge_alloc && (this->clover != clover.V(is_inverse) ||
415  (clover.Precision() == QUDA_HALF_PRECISION && this->norm != clover.Norm(is_inverse)) ) && !override) {
416  errorQuda("Cannot use texture read since data pointer does not equal field pointer - use with huge_alloc=true instead");
417  }
418  }
419 #endif
420  }
421 
422  bool Twisted() const {return twisted;}
423  Float Mu2() const {return mu2;}
424 
435  __device__ __host__ inline clover_wrapper<RegType,FloatNOrder<Float,length,N> >
436  operator()(int x_cb, int parity, int chirality) {
437  return clover_wrapper<RegType,FloatNOrder<Float,length,N> >(*this, x_cb, parity, chirality);
438  }
439 
450  __device__ __host__ inline const clover_wrapper<RegType,FloatNOrder<Float,length,N> >
451  operator()(int x_cb, int parity, int chirality) const {
453  (const_cast<FloatNOrder<Float,length,N>&>(*this), x_cb, parity, chirality);
454  }
455 
463  __device__ __host__ inline void load(RegType v[block], int x, int parity, int chirality) const {
464 #pragma unroll
465  for (int i=0; i<M; i++) {
466  // first do vectorized copy from memory
467 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
468  if (!huge_alloc) { // use textures unless we have a huge alloc
469  TexVector vecTmp = tex1Dfetch<TexVector>(tex, parity*tex_offset + stride*(chirality*M+i) + x);
470  // second do vectorized copy converting into register type
471 #pragma unroll
472  for (int j=0; j<N; j++) copy(v[i*N+j], reinterpret_cast<RegType*>(&vecTmp)[j]);
473  } else
474 #endif
475  {
476  Vector vecTmp = vector_load<Vector>(clover + parity*offset, x + stride*(chirality*M+i));
477  // second do scalar copy converting into register type
478 #pragma unroll
479  for (int j=0; j<N; j++) copy(v[i*N+j], reinterpret_cast<Float*>(&vecTmp)[j]);
480  }
481  }
482 
483  if (sizeof(Float)==sizeof(short)) {
484 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
485  RegType nrm = !huge_alloc ? tex1Dfetch<float>(normTex, parity*norm_offset + chirality*stride + x) :
486  norm[parity*norm_offset + chirality*stride + x];
487 #else
488  RegType nrm = norm[parity*norm_offset + chirality*stride + x];
489 #endif
490 #pragma unroll
491  for (int i=0; i<block; i++) v[i] *= nrm;
492  }
493  }
494 
502  __device__ __host__ inline void save(const RegType v[block], int x, int parity, int chirality) {
503 
504  // find the norm of each chiral block
505  RegType scale = 0.0;
506  if (sizeof(Float)==sizeof(short)) {
507 #pragma unroll
508  for (int i=0; i<block; i++) scale = fabs(v[i]) > scale ? fabs(v[i]) : scale;
509  norm[parity*norm_offset + chirality*stride + x] = scale;
510  }
511 
512 #pragma unroll
513  for (int i=0; i<M; i++) {
514  Vector vecTmp;
515  // first do scalar copy converting into storage type and rescaling if necessary
516  if (sizeof(Float)==sizeof(short))
517 #pragma unroll
518  for (int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j], v[i*N+j] / scale);
519  else
520 #pragma unroll
521  for (int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j], v[i*N+j]);
522 
523  // second do vectorized copy into memory
524  vector_store(clover + parity*offset, x + stride*(chirality*M+i), vecTmp);
525  }
526  }
527 
535  __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
536 #pragma unroll
537  for (int chirality=0; chirality<2; chirality++) load(&v[chirality*36], x, parity, chirality);
538  }
539 
547  __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
548 #pragma unroll
549  for (int chirality=0; chirality<2; chirality++) save(&v[chirality*36], x, parity, chirality);
550  }
551 
555  void save() {
556  if (backup_h) errorQuda("Already allocated host backup");
558  cudaMemcpy(backup_h, clover, bytes, cudaMemcpyDeviceToHost);
559  if (norm_bytes) {
561  cudaMemcpy(backup_norm_h, norm, norm_bytes, cudaMemcpyDeviceToHost);
562  }
563  checkCudaError();
564  }
565 
569  void load() {
570  cudaMemcpy(clover, backup_h, bytes, cudaMemcpyHostToDevice);
572  backup_h = nullptr;
573  if (norm_bytes) {
574  cudaMemcpy(norm, backup_norm_h, norm_bytes, cudaMemcpyHostToDevice);
576  backup_norm_h = nullptr;
577  }
578  checkCudaError();
579  }
580 
581  size_t Bytes() const {
582  size_t bytes = length*sizeof(Float);
583  if (sizeof(Float)==sizeof(short)) bytes += 2*sizeof(float);
584  return bytes;
585  }
586  };
587 
594  template <typename real, int length> struct S { real v[length]; };
595 
599  template <typename Float, int length>
600  struct QDPOrder {
601  typedef typename mapper<Float>::type RegType;
602  Float *clover;
603  const int volumeCB;
604  const int stride;
605  const int offset;
606 
607  const bool twisted;
608  const Float mu2;
609 
610  QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
611  : volumeCB(clover.VolumeCB()), stride(volumeCB), offset(clover.Bytes()/(2*sizeof(Float))),
613  this->clover = clover_ ? clover_ : (Float*)(clover.V(inverse));
614  }
615 
616  bool Twisted() const {return twisted;}
617  Float Mu2() const {return mu2;}
618 
619  __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
620  // factor of 0.5 comes from basis change
621 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
622  typedef S<Float,length> structure;
623  trove::coalesced_ptr<structure> clover_((structure*)clover);
624  structure v_ = clover_[parity*volumeCB + x];
625  for (int i=0; i<length; i++) v[i] = 0.5*(RegType)v_.v[i];
626 #else
627  for (int i=0; i<length; i++) v[i] = 0.5*clover[parity*offset + x*length+i];
628 #endif
629  }
630 
631  __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
632 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
633  typedef S<Float,length> structure;
634  trove::coalesced_ptr<structure> clover_((structure*)clover);
635  structure v_;
636  for (int i=0; i<length; i++) v_.v[i] = 2.0*(Float)v[i];
637  clover_[parity*volumeCB + x] = v_;
638 #else
639  for (int i=0; i<length; i++) clover[parity*offset + x*length+i] = 2.0*v[i];
640 #endif
641  }
642 
643  size_t Bytes() const { return length*sizeof(Float); }
644  };
645 
649  template <typename Float, int length>
650  struct QDPJITOrder {
651  typedef typename mapper<Float>::type RegType;
652  Float *diag;
653  Float *offdiag;
654  const int volumeCB;
655  const int stride;
656 
657  const bool twisted;
658  const Float mu2;
659 
660  QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
661  : volumeCB(clover.VolumeCB()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
662  offdiag = clover_ ? ((Float**)clover_)[0] : ((Float**)clover.V(inverse))[0];
663  diag = clover_ ? ((Float**)clover_)[1] : ((Float**)clover.V(inverse))[1];
664  }
665 
666  bool Twisted() const {return twisted;}
667  Float Mu2() const {return mu2;}
668 
669  __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
670  // the factor of 0.5 comes from a basis change
671  for (int chirality=0; chirality<2; chirality++) {
672  // set diagonal elements
673  for (int i=0; i<6; i++) {
674  v[chirality*36 + i] = 0.5*diag[((i*2 + chirality)*2 + parity)*volumeCB + x];
675  }
676 
677  // the off diagonal elements
678  for (int i=0; i<30; i++) {
679  int z = i%2;
680  int off = i/2;
681  const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
682  v[chirality*36 + 6 + i] = 0.5*offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x];
683  }
684 
685  }
686  }
687 
688  __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
689  // the factor of 2.0 comes from undoing the basis change
690  for (int chirality=0; chirality<2; chirality++) {
691  // set diagonal elements
692  for (int i=0; i<6; i++) {
693  diag[((i*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + i];
694  }
695 
696  // the off diagonal elements
697  for (int i=0; i<30; i++) {
698  int z = i%2;
699  int off = i/2;
700  const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
701  offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + 6 + i];
702  }
703  }
704  }
705 
706  size_t Bytes() const { return length*sizeof(Float); }
707  };
708 
709 
716  template <typename Float, int length>
717  struct BQCDOrder {
718  typedef typename mapper<Float>::type RegType;
719  Float *clover[2];
720  const int volumeCB;
721  const int stride;
722 
723  const bool twisted;
724  const Float mu2;
725 
726  BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
727  : volumeCB(clover.Stride()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
728  this->clover[0] = clover_ ? clover_ : (Float*)(clover.V(inverse));
729  this->clover[1] = (Float*)((char*)this->clover[0] + clover.Bytes()/2);
730  }
731 
732 
733  bool Twisted() const {return twisted;}
734  Float Mu2() const {return mu2;}
735 
741  __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
742  int bq[36] = { 21, 32, 33, 0, 1, 20, // diagonal
743  28, 29, 30, 31, 6, 7, 14, 15, 22, 23, // column 1 6
744  34, 35, 8, 9, 16, 17, 24, 25, // column 2 16
745  10, 11, 18, 19, 26, 27, // column 3 24
746  2, 3, 4, 5, // column 4 30
747  12, 13};
748 
749  // flip the sign of the imaginary components
750  int sign[36];
751  for (int i=0; i<6; i++) sign[i] = 1;
752  for (int i=6; i<36; i+=2) {
753  if ( (i >= 10 && i<= 15) || (i >= 18 && i <= 29) ) { sign[i] = -1; sign[i+1] = -1; }
754  else { sign[i] = 1; sign[i+1] = -1; }
755  }
756 
757  const int M=length/2;
758  for (int chirality=0; chirality<2; chirality++)
759  for (int i=0; i<M; i++)
760  v[chirality*M+i] = sign[i] * clover[parity][x*length+chirality*M+bq[i]];
761 
762  }
763 
764  // FIXME implement the save routine for BQCD ordered fields
765  __device__ __host__ inline void save(RegType v[length], int x, int parity) {
766 
767  };
768 
769  size_t Bytes() const { return length*sizeof(Float); }
770  };
771 
772  } // namespace clover
773 
774  // Use traits to reduce the template explosion
775  template<typename Float,int N=72> struct clover_mapper { };
776 
777  // double precision uses Float2
778  template<int N> struct clover_mapper<double,N> { typedef clover::FloatNOrder<double, N, 2> type; };
779 
780  // single precision uses Float4
781  template<int N> struct clover_mapper<float,N> { typedef clover::FloatNOrder<float, N, 4> type; };
782 
783  // half precision uses Float4
784  template<int N> struct clover_mapper<short,N> { typedef clover::FloatNOrder<short, N, 4> type; };
785 
786 } // namespace quda
787 
788 #endif //_CLOVER_ORDER_H
789 
790 
AllocType< huge_alloc >::type AllocInt
void * V(bool inverse=false)
Definition: clover_field.h:73
__device__ __host__ const complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function.
#define errorQuda(...)
Definition: util_quda.h:90
__device__ __host__ int Ncolor() const
Complex-member accessor function.
size_t Bytes() const
Definition: clover_field.h:97
#define host_free(ptr)
Definition: malloc_quda.h:59
mapper< Float >::type RegType
mapper< Float >::type RegType
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
__device__ __host__ complex< Float > operator()(int dummy, int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function. This is a special variant that is compatible with the equ...
void load()
Restore the field from the host after tuning.
mapper< Float >::type RegType
const Accessor< Float, nColor, nSpin, order > accessor
clover::FloatNOrder< float, N, 4 > type
This is just a dummy structure we use for trove to define the required structure size.
__device__ __host__ void save(RegType v[length], int x, int parity)
size_t size_t offset
FieldOrder(CloverField &A, bool inverse=false)
VectorType< Float, N >::type Vector
clover::FloatNOrder< double, N, 2 > type
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
__device__ __host__ clover_wrapper< RegType, FloatNOrder< Float, length, N > > operator()(int x_cb, int parity, int chirality)
This accessor routine returns a clover_wrapper to this object, allowing us to overload various operat...
clover::FloatNOrder< short, N, 4 > type
__device__ __host__ int VolumeCB() const
__device__ __host__ void load(RegType v[block], int x, int parity, int chirality) const
Load accessor for a single chiral block.
void * backup_norm_h
host memory for backing up the field when tuning
const int nColor
Definition: covdev_test.cpp:77
__device__ __host__ HMatrix()
Definition: quda_matrix.h:210
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
int V
Definition: test_util.cpp:28
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
__device__ __host__ void save(const RegType v[length], int x, int parity)
__device__ __host__ int Volume() const
__device__ __host__ void load(RegType v[length], int x, int parity) const
Provides precision abstractions and defines the register precision given the storage precision using ...
__device__ __host__ void save(const RegType v[block], int x, int parity, int chirality)
Store accessor for a single chiral block.
Accessor(const CloverField &A, bool inverse=false)
#define safe_malloc(size)
Definition: malloc_quda.h:54
__device__ __host__ void load(RegType v[length], int x, int parity) const
Load accessor for the clover matrix.
QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
mapper< Float >::type RegType
__device__ __host__ void operator=(const C &a)
Assignment operator with H matrix instance as input.
void save()
Backup the field to the host when tuning.
__device__ __host__ void operator=(const HMatrix< U, N > &b)
Definition: quda_matrix.h:241
__device__ __host__ void save(const RegType v[length], int x, int parity)
double fabs(double)
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
void size_t length
Accessor routine for CloverFields in native field order.
QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
__device__ __host__ complex< Float > & operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
#define checkCudaError()
Definition: util_quda.h:129
__device__ __host__ const clover_wrapper< RegType, FloatNOrder< Float, length, N > > operator()(int x_cb, int parity, int chirality) const
This accessor routine returns a const colorspinor_wrapper to this object, allowing us to overload var...
BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
__device__ __host__ void load(RegType v[length], int x, int parity) const
QudaParity parity
Definition: covdev_test.cpp:53
#define a
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82
__device__ __host__ int indexFloatN(int k, int stride, int x)
__device__ __host__ void load(RegType v[length], int x, int parity) const
FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_=0, float *norm_=0, bool override=false)
host memory for backing up norm when tuning
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
__device__ __host__ void save(const RegType v[length], int x, int parity)
Store accessor for the clover matrix.