quda-ref/v1.0.0/clover__field__order_8h_source.html

 #ifndef _CLOVER_ORDER_H
 #define _CLOVER_ORDER_H

 #include <register_traits.h>
 #include <clover_field.h>
 #include <complex_quda.h>
 #include <thrust_helper.cuh>
 #include <quda_matrix.h>
 #include <color_spinor.h>
 #include <trove_helper.cuh>
 #include <texture_helper.cuh>

 namespace quda {

   template <typename Float, typename T>
     struct clover_wrapper {
       T &field;
       const int x_cb;
       const int parity;
       const int chirality;

       __device__ __host__ inline clover_wrapper<Float,T>(T &field, int x_cb, int parity, int chirality)
   : field(field), x_cb(x_cb), parity(parity), chirality(chirality) { }

       template<typename C>
       __device__ __host__ inline void operator=(const C &a) {
         field.save(a.data, x_cb, parity, chirality);
       }
     };

   template <typename T, int N>
     template <typename S>
     __device__ __host__ inline void HMatrix<T,N>::operator=(const clover_wrapper<T,S> &a) {
     a.field.load(data, a.x_cb, a.parity, a.chirality);
   }

   template <typename T, int N>
     template <typename S>
     __device__ __host__ inline HMatrix<T,N>::HMatrix(const clover_wrapper<T,S> &a) {
     a.field.load(data, a.x_cb, a.parity, a.chirality);
   }

   namespace clover {

     template<typename ReduceType, typename Float> struct square_ {
       __host__ __device__ inline ReduceType operator()(const quda::complex<Float> &x)
       { return static_cast<ReduceType>(norm(x)); }
     };

     template<typename ReduceType, typename Float> struct abs_ {
       __host__ __device__ inline ReduceType operator()(const quda::complex<Float> &x)
       { return static_cast<ReduceType>(abs(x)); }
     };

     template<typename Float, int nColor, int nSpin, QudaCloverFieldOrder order> struct Accessor {
       mutable complex<Float> dummy;
       Accessor(const CloverField &A, bool inverse=false) {
   errorQuda("Not implemented for order %d", order);
       }

       __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row, int s_col,
                   int c_row, int c_col) const {
   return dummy;
       }

       template<typename helper, typename reducer>
         __host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double i) const
       {
         return 0.0;
       }
     };

     template<typename Float, int nColor, int nSpin>
       struct Accessor<Float,nColor,nSpin,QUDA_FLOAT2_CLOVER_ORDER> {
       Float *a;
       int stride;
       size_t offset_cb;
       static constexpr int N = nSpin * nColor / 2;
     Accessor(const CloverField &A, bool inverse=false)
       : a(static_cast<Float*>(const_cast<void*>(A.V(inverse)))), stride(A.Stride()),
   offset_cb(A.Bytes()/(2*sizeof(Float))) { }

       __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
   // if not in the diagonal chiral block then return 0.0
   if (s_col / 2 != s_row / 2) { return complex<Float>(0.0); }

   const int chirality = s_col / 2;

   int row = s_row%2 * nColor + c_row;
   int col = s_col%2 * nColor + c_col;
   Float *a_ = a+parity*offset_cb+stride*chirality*N*N;

   if (row == col) {
     return 2*a_[ row*stride+x ];
   } else if (col < row) {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
           complex<Float> *off = reinterpret_cast<complex<Float>*>(a_ + N);

           return 2*off[k*stride + x];
   } else {
     // requesting upper triangular so return conjugate transpose
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
           complex<Float> *off = reinterpret_cast<complex<Float>*>(a_ + N);
           return 2*conj(off[k*stride + x]);
   }

       }

       template<typename helper, typename reducer>
         __host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const {
         double result = init;
         if (location == QUDA_CUDA_FIELD_LOCATION) {
           thrust_allocator alloc;
           thrust::device_ptr<complex<Float> > ptr(reinterpret_cast<complex<Float>*>(a));
           result = thrust::transform_reduce(thrust::cuda::par(alloc), ptr, ptr+offset_cb, h, result, r);
         } else {
           // just use offset_cb, since factor of two from parity is equivalent to complexity
           complex<Float> *ptr = reinterpret_cast<complex<Float>*>(a);
           result = thrust::transform_reduce(thrust::seq, ptr, ptr+offset_cb, h, result, r);
         }
         return 2.0 * result; // factor of two is normalization
       }

     };

     template<int N>
       __device__ __host__ inline int indexFloatN(int k, int stride, int x) {
       int j = k / N;
       int i = k % N;
       return (j*stride+x)*N + i;
     };

     template<typename Float, int nColor, int nSpin>
       struct Accessor<Float,nColor,nSpin,QUDA_FLOAT4_CLOVER_ORDER> {
       Float *a;
       int stride;
       size_t offset_cb;
       static constexpr int N = nSpin * nColor / 2;
     Accessor(const CloverField &A, bool inverse=false)
       : a(static_cast<Float*>(const_cast<void*>(A.V(inverse)))), stride(A.Stride()),
   offset_cb(A.Bytes()/(2*sizeof(Float))) { }

       __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
   // if not in the diagonal chiral block then return 0.0
   if (s_col / 2 != s_row / 2) { return complex<Float>(0.0); }

   const int chirality = s_col / 2;

   int row = s_row%2 * nColor + c_row;
   int col = s_col%2 * nColor + c_col;
   Float *a_ = a+parity*offset_cb+stride*chirality*N*N;

   if (row == col) {
     return 2*a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(row, stride, x) ];
   } else if (col < row) {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
           int idx = N + 2*k;

           return 2*complex<Float>(a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
           a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
   } else {
     // requesting upper triangular so return conjugate transpose
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
           int idx = N + 2*k;

           return 2*complex<Float>( a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
           -a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
   }

       }

       template<typename helper, typename reducer>
         __host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const {
         double result = init;
         if (location == QUDA_CUDA_FIELD_LOCATION) {
           thrust_allocator alloc;
           thrust::device_ptr<complex<Float> > ptr(reinterpret_cast<complex<Float>*>(a));
           result = thrust::transform_reduce(thrust::cuda::par(alloc), ptr, ptr+offset_cb, h, result, r);
         } else {
           // just use offset_cb, since factor of two from parity is equivalent to complexity
           complex<Float> *ptr = reinterpret_cast<complex<Float>*>(a);
           result = thrust::transform_reduce(thrust::seq, ptr, ptr+offset_cb, h, result, r);
         }
         return 2.0 * result; // factor of two is normalization
       }

     };

     template<typename Float, int nColor, int nSpin>
       struct Accessor<Float,nColor,nSpin,QUDA_PACKED_CLOVER_ORDER> {
       Float *a[2];
       const int N = nSpin * nColor / 2;
       complex<Float> zero;
       Accessor(const CloverField &A, bool inverse=false) {
   // even
   a[0] = static_cast<Float*>(const_cast<void*>(A.V(inverse)));
   // odd
   a[1] = static_cast<Float*>(const_cast<void*>(A.V(inverse))) + A.Bytes()/(2*sizeof(Float));
   zero = complex<Float>(0.0,0.0);
       }

       __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
   // if not in the diagonal chiral block then return 0.0
   if (s_col / 2 != s_row / 2) { return zero; }

   const int chirality = s_col / 2;

   unsigned int row = s_row%2 * nColor + c_row;
   unsigned int col = s_col%2 * nColor + c_col;

   if (row == col) {
     complex<Float> tmp = a[parity][(x*2 + chirality)*N*N + row];
     return tmp;
   } else if (col < row) {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
           int idx = (x*2 + chirality)*N*N + N + 2*k;
           return complex<Float>(a[parity][idx], a[parity][idx+1]);
   } else {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
           int idx = (x*2 + chirality)*N*N + N + 2*k;
           return complex<Float>(a[parity][idx], -a[parity][idx+1]);
   }
       }

       template <typename helper, typename reducer>
       __host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const
       {
         errorQuda("Not implemented");
   return 0.0;
       }
     };

     /*
       FIXME the below is the old optimization used for reading the
       clover field, making use of the symmetry to reduce the number of
       reads.

 #define READ_CLOVER2_DOUBLE_STR(clover_, chi)                           \
     double2 C0, C1, C2, C3, C4, C5, C6, C7, C8, C9;                       \
     double2 C10, C11, C12, C13, C14, C15, C16, C17;                       \
     double2* clover = (double2*)clover_;                                  \
     load_streaming_double2(C0, &clover[sid + (18*chi+0)*param.cl_stride]); \
     load_streaming_double2(C1, &clover[sid + (18*chi+1)*param.cl_stride]); \
     double diag = 0.5*(C0.x + C1.y);                                      \
     double diag_inv = 1.0/diag;                                           \
     C2 = make_double2(diag*(2-C0.y*diag_inv), diag*(2-C1.x*diag_inv));    \
     load_streaming_double2(C3, &clover[sid + (18*chi+3)*param.cl_stride]);        \
     load_streaming_double2(C4, &clover[sid + (18*chi+4)*param.cl_stride]);        \
     load_streaming_double2(C5, &clover[sid + (18*chi+5)*param.cl_stride]);        \
     load_streaming_double2(C6, &clover[sid + (18*chi+6)*param.cl_stride]);        \
     load_streaming_double2(C7, &clover[sid + (18*chi+7)*param.cl_stride]);        \
     load_streaming_double2(C8, &clover[sid + (18*chi+8)*param.cl_stride]);        \
     load_streaming_double2(C9, &clover[sid + (18*chi+9)*param.cl_stride]);        \
     load_streaming_double2(C10, &clover[sid + (18*chi+10)*param.cl_stride]);      \
     load_streaming_double2(C11, &clover[sid + (18*chi+11)*param.cl_stride]);      \
     load_streaming_double2(C12, &clover[sid + (18*chi+12)*param.cl_stride]);      \
     load_streaming_double2(C13, &clover[sid + (18*chi+13)*param.cl_stride]);      \
     load_streaming_double2(C14, &clover[sid + (18*chi+14)*param.cl_stride]); \
     C15 = make_double2(-C3.x,-C3.y);                                      \
     C16 = make_double2(-C4.x,-C4.y);                                      \
     C17 = make_double2(-C8.x,-C8.y);                                      \
     */

     template <typename Float, int nColor, int nSpin, QudaCloverFieldOrder order>
       struct FieldOrder {

       protected:
   CloverField &A;
   const int volumeCB;
   const Accessor<Float,nColor,nSpin,order> accessor;
   bool inverse;
   const QudaFieldLocation location;

       public:
       FieldOrder(CloverField &A, bool inverse=false)
       : A(A), volumeCB(A.VolumeCB()), accessor(A,inverse), inverse(inverse), location(A.Location())
   { }

   CloverField& Field() { return A; }

   __device__ __host__ inline const complex<Float> operator()(int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) const {
     return accessor(parity, x, s_row, s_col, c_row, c_col);
   }

   __device__ __host__ inline complex<Float> operator()(int dummy, int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) const {
     return accessor(parity,x,s_row,s_col,c_row,c_col);
   }

   /*
   __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) {
     //errorQuda("Clover accessor not implemented as a lvalue");
     return accessor(parity, x, s_row, s_col, c_row, c_col);
     }
   */

   __device__ __host__ inline int Ncolor() const { return nColor; }

   __device__ __host__ inline int Volume() const { return 2*volumeCB; }

   __device__ __host__ inline int VolumeCB() const { return volumeCB; }

   size_t Bytes() const {
     constexpr int n = (nSpin * nColor) / 2;
     constexpr int chiral_block = n * n / 2;
     return static_cast<size_t>(volumeCB) * chiral_block * 2ll * 2ll * sizeof(Float); // 2 from complex, 2 from chirality
   }

   __host__ double norm1(int dim=-1, bool global=true) const {
           double nrm1 = accessor.transform_reduce(location, abs_<double,Float>(),
                                                   thrust::plus<double>(), 0.0);
     if (global) comm_allreduce(&nrm1);
     return nrm1;
   }

   __host__ double norm2(int dim=-1, bool global=true) const {
           double nrm2 = accessor.transform_reduce(location, square_<double,Float>(),
                                                   thrust::plus<double>(), 0.0);
     if (global) comm_allreduce(&nrm2);
     return nrm2;
   }

   __host__ double abs_max(int dim=-1, bool global=true) const {
     double absmax = accessor.transform_reduce(location, abs_<Float,Float>(),
                                                     thrust::maximum<Float>(), 0.0);
     if (global) comm_allreduce_max(&absmax);
     return absmax;
   }

   __host__ double abs_min(int dim=-1, bool global=true) const {
     double absmax = accessor.transform_reduce(location, abs_<Float,Float>(),
                                                     thrust::minimum<Float>(), std::numeric_limits<double>::max());
     if (global) comm_allreduce_min(&absmax);
     return absmax;
   }

       };

     template <typename Float, int length, int N, bool add_rho=false, bool huge_alloc=false>
     struct FloatNOrder {
       using Accessor = FloatNOrder<Float, length, N, add_rho, huge_alloc>;
       using real = typename mapper<Float>::type;
       typedef typename VectorType<Float, N>::type Vector;
       typedef typename AllocType<huge_alloc>::type AllocInt;
       typedef float norm_type;
       static const int M = length / (N * 2); // number of short vectors per chiral block
       static const int block = length / 2;   // chiral block size
       Float *clover;
       norm_type *norm;
       const AllocInt offset; // offset can be 32-bit or 64-bit
       const AllocInt norm_offset;
 #ifdef USE_TEXTURE_OBJECTS
   typedef typename TexVectorType<real, N>::type TexVector;
   cudaTextureObject_t tex;
   cudaTextureObject_t normTex;
   const int tex_offset;
 #endif
   const int volumeCB;
   const int stride;

   const bool twisted;
   const real mu2;
         const real rho;

         size_t bytes;
   size_t norm_bytes;
   void *backup_h;
   void *backup_norm_h;

         FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_ = 0, norm_type *norm_ = 0,
                     bool override = false) :
             offset(clover.Bytes() / (2 * sizeof(Float))),
             norm_offset(clover.NormBytes() / (2 * sizeof(norm_type))),
 #ifdef USE_TEXTURE_OBJECTS
             tex(0),
             normTex(0),
             tex_offset(offset / N),
 #endif
             volumeCB(clover.VolumeCB()),
             stride(clover.Stride()),
             twisted(clover.Twisted()),
             mu2(clover.Mu2()),
             rho(clover.Rho()),
             bytes(clover.Bytes()),
             norm_bytes(clover.NormBytes()),
             backup_h(nullptr),
             backup_norm_h(nullptr)
   {
     this->clover = clover_ ? clover_ : (Float*)(clover.V(is_inverse));
           this->norm = norm_ ? norm_ : (norm_type *)(clover.Norm(is_inverse));
 #ifdef USE_TEXTURE_OBJECTS
     if (clover.Location() == QUDA_CUDA_FIELD_LOCATION) {
       if (is_inverse) {
         tex = static_cast<const cudaCloverField&>(clover).InvTex();
         normTex = static_cast<const cudaCloverField&>(clover).InvNormTex();
       } else {
         tex = static_cast<const cudaCloverField&>(clover).Tex();
         normTex = static_cast<const cudaCloverField&>(clover).NormTex();
       }
       if (!huge_alloc && (this->clover != clover.V(is_inverse) ||
         ((clover.Precision() == QUDA_HALF_PRECISION || clover.Precision() == QUDA_QUARTER_PRECISION) && this->norm != clover.Norm(is_inverse)) ) && !override) {
         errorQuda("Cannot use texture read since data pointer does not equal field pointer - use with huge_alloc=true instead");
       }
     }
 #endif
   }

   bool Twisted() const { return twisted; }
   real Mu2() const { return mu2; }

         __device__ __host__ inline clover_wrapper<real, Accessor> operator()(int x_cb, int parity, int chirality)
         {
           return clover_wrapper<real, Accessor>(*this, x_cb, parity, chirality);
         }

         __device__ __host__ inline const clover_wrapper<real, Accessor> operator()(
             int x_cb, int parity, int chirality) const
         {
           return clover_wrapper<real, Accessor>(const_cast<Accessor &>(*this), x_cb, parity, chirality);
         }

   __device__ __host__ inline void load(real v[block], int x, int parity, int chirality) const
         {
           norm_type nrm;
           if (isFixed<Float>::value) {
 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
             nrm = !huge_alloc ? tex1Dfetch_<float>(normTex, parity * norm_offset + chirality * stride + x) :
                                 norm[parity * norm_offset + chirality * stride + x];
 #else
             nrm = norm[parity * norm_offset + chirality * stride + x];
 #endif
           }

 #pragma unroll
     for (int i=0; i<M; i++) {
 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
       if (!huge_alloc) { // use textures unless we have a huge alloc
                                // first do texture load from memory
               TexVector vecTmp = tex1Dfetch_<TexVector>(tex, parity*tex_offset + stride*(chirality*M+i) + x);
               // now insert into output array
 #pragma unroll
               for (int j = 0; j < N; j++) {
                 copy(v[i * N + j], reinterpret_cast<real *>(&vecTmp)[j]);
                 if (isFixed<Float>::value) v[i * N + j] *= nrm;
               }
             } else
 #endif
       {
               // first load from memory
               Vector vecTmp = vector_load<Vector>(clover + parity*offset, x + stride*(chirality*M+i));
         // second do scalar copy converting into register type
 #pragma unroll
               for (int j = 0; j < N; j++) { copy_and_scale(v[i * N + j], reinterpret_cast<Float *>(&vecTmp)[j], nrm); }
             }
     }

           if (add_rho) for (int i=0; i<6; i++) v[i] += rho;
         }

   __device__ __host__ inline void save(const real v[block], int x, int parity, int chirality)
         {
           real tmp[block];

           // find the norm of each chiral block
           if (isFixed<Float>::value) {
             norm_type scale = 0.0;
 #pragma unroll
             for (int i = 0; i < block; i++) scale = fabsf((norm_type)v[i]) > scale ? fabsf((norm_type)v[i]) : scale;
             norm[parity*norm_offset + chirality*stride + x] = scale;

 #ifdef __CUDA_ARCH__
             real scale_inv = __fdividef(fixedMaxValue<Float>::value, scale);
 #else
             real scale_inv = fixedMaxValue<Float>::value / scale;
 #endif
 #pragma unroll
             for (int i = 0; i < block; i++) tmp[i] = v[i] * scale_inv;
           } else {
 #pragma unroll
             for (int i = 0; i < block; i++) tmp[i] = v[i];
           }

 #pragma unroll
           for (int i = 0; i < M; i++) {
             Vector vecTmp;
             // first do scalar copy converting into storage type
             for (int j = 0; j < N; j++) copy_scaled(reinterpret_cast<Float *>(&vecTmp)[j], tmp[i * N + j]);
             // second do vectorized copy into memory
       vector_store(clover + parity*offset, x + stride*(chirality*M+i), vecTmp);
           }
         }

   __device__ __host__ inline void load(real v[length], int x, int parity) const {
 #pragma unroll
           for (int chirality = 0; chirality < 2; chirality++) load(&v[chirality * block], x, parity, chirality);
         }

   __device__ __host__ inline void save(const real v[length], int x, int parity) {
 #pragma unroll
           for (int chirality = 0; chirality < 2; chirality++) save(&v[chirality * block], x, parity, chirality);
         }

   void save() {
     if (backup_h) errorQuda("Already allocated host backup");
     backup_h = safe_malloc(bytes);
     cudaMemcpy(backup_h, clover, bytes, cudaMemcpyDeviceToHost);
     if (norm_bytes) {
       backup_norm_h = safe_malloc(norm_bytes);
       cudaMemcpy(backup_norm_h, norm, norm_bytes, cudaMemcpyDeviceToHost);
     }
     checkCudaError();
   }

   void load() {
     cudaMemcpy(clover, backup_h, bytes, cudaMemcpyHostToDevice);
     host_free(backup_h);
     backup_h = nullptr;
     if (norm_bytes) {
       cudaMemcpy(norm, backup_norm_h, norm_bytes, cudaMemcpyHostToDevice);
       host_free(backup_norm_h);
       backup_norm_h = nullptr;
     }
     checkCudaError();
   }

   size_t Bytes() const {
     size_t bytes = length*sizeof(Float);
           if (isFixed<Float>::value) bytes += 2 * sizeof(norm_type);
           return bytes;
   }
       };

     template <typename real, int length> struct S { real v[length]; };

     template <typename Float, int length>
       struct QDPOrder {
   typedef typename mapper<Float>::type RegType;
   Float *clover;
   const int volumeCB;
   const int stride;
   const int offset;

   const bool twisted;
   const Float mu2;

       QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.VolumeCB()), stride(volumeCB), offset(clover.Bytes()/(2*sizeof(Float))),
   twisted(clover.Twisted()), mu2(clover.Mu2()) {
   this->clover = clover_ ? clover_ : (Float*)(clover.V(inverse));
       }

   bool  Twisted() const {return twisted;}
   Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     // factor of 0.5 comes from basis change
 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
     typedef S<Float,length> structure;
     trove::coalesced_ptr<structure> clover_((structure*)clover);
     structure v_ = clover_[parity*volumeCB + x];
     for (int i=0; i<length; i++) v[i] = 0.5*(RegType)v_.v[i];
 #else
     for (int i=0; i<length; i++) v[i] = 0.5*clover[parity*offset + x*length+i];
 #endif
   }

   __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
     typedef S<Float,length> structure;
     trove::coalesced_ptr<structure> clover_((structure*)clover);
     structure v_;
     for (int i=0; i<length; i++) v_.v[i] = 2.0*(Float)v[i];
     clover_[parity*volumeCB + x] = v_;
 #else
     for (int i=0; i<length; i++) clover[parity*offset + x*length+i] = 2.0*v[i];
 #endif
   }

   size_t Bytes() const { return length*sizeof(Float); }
       };

     template <typename Float, int length>
       struct QDPJITOrder {
   typedef typename mapper<Float>::type RegType;
   Float *diag;
   Float *offdiag;
   const int volumeCB;
   const int stride;

   const bool twisted;
   const Float mu2;

       QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.VolumeCB()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
   offdiag = clover_ ? ((Float**)clover_)[0] : ((Float**)clover.V(inverse))[0];
   diag = clover_ ? ((Float**)clover_)[1] : ((Float**)clover.V(inverse))[1];
       }

       bool  Twisted() const {return twisted;}
       Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     // the factor of 0.5 comes from a basis change
     for (int chirality=0; chirality<2; chirality++) {
       // set diagonal elements
       for (int i=0; i<6; i++) {
         v[chirality*36 + i] = 0.5*diag[((i*2 + chirality)*2 + parity)*volumeCB + x];
       }

       // the off diagonal elements
       for (int i=0; i<30; i++) {
         int z = i%2;
         int off = i/2;
         const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
         v[chirality*36 + 6 + i] = 0.5*offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x];
       }

     }
   }

   __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
     // the factor of 2.0 comes from undoing the basis change
     for (int chirality=0; chirality<2; chirality++) {
       // set diagonal elements
       for (int i=0; i<6; i++) {
         diag[((i*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + i];
       }

       // the off diagonal elements
       for (int i=0; i<30; i++) {
         int z = i%2;
         int off = i/2;
         const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
         offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + 6 + i];
       }
     }
   }

   size_t Bytes() const { return length*sizeof(Float); }
       };


     template <typename Float, int length>
       struct BQCDOrder {
   typedef typename mapper<Float>::type RegType;
   Float *clover[2];
   const int volumeCB;
   const int stride;

   const bool twisted;
   const Float mu2;

       BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.Stride()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
   this->clover[0] = clover_ ? clover_ : (Float*)(clover.V(inverse));
   this->clover[1] = (Float*)((char*)this->clover[0] + clover.Bytes()/2);
       }


   bool  Twisted() const {return twisted;}
   Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     int bq[36] = { 21, 32, 33, 0,  1, 20,                   // diagonal
        28, 29, 30, 31, 6, 7,  14, 15, 22, 23,   // column 1  6
        34, 35, 8, 9, 16, 17, 24, 25,            // column 2  16
        10, 11, 18, 19, 26, 27,                  // column 3  24
        2,  3,  4,  5,                           // column 4  30
        12, 13};

     // flip the sign of the imaginary components
     int sign[36];
     for (int i=0; i<6; i++) sign[i] = 1;
     for (int i=6; i<36; i+=2) {
       if ( (i >= 10 && i<= 15) || (i >= 18 && i <= 29) )  { sign[i] = -1; sign[i+1] = -1; }
       else { sign[i] = 1; sign[i+1] = -1; }
     }

     const int M=length/2;
     for (int chirality=0; chirality<2; chirality++)
       for (int i=0; i<M; i++)
         v[chirality*M+i] = sign[i] * clover[parity][x*length+chirality*M+bq[i]];

   }

   // FIXME implement the save routine for BQCD ordered fields
   __device__ __host__ inline void save(RegType v[length], int x, int parity) {

   };

   size_t Bytes() const { return length*sizeof(Float); }
       };

   } // namespace clover

   // Use traits to reduce the template explosion
   template<typename Float,int N=72, bool add_rho=false> struct clover_mapper { };

   // double precision uses Float2
   template<int N, bool add_rho> struct clover_mapper<double,N,add_rho> { typedef clover::FloatNOrder<double, N, 2, add_rho> type; };

   // single precision uses Float4
   template<int N, bool add_rho> struct clover_mapper<float,N,add_rho> { typedef clover::FloatNOrder<float, N, 4, add_rho> type; };

   // half precision uses Float4
   template<int N, bool add_rho> struct clover_mapper<short,N,add_rho> { typedef clover::FloatNOrder<short, N, 4, add_rho> type; };

   // quarter precision uses Float4
   template<int N, bool add_rho> struct clover_mapper<char,N,add_rho> { typedef clover::FloatNOrder<char, N, 4, add_rho> type; };

 } // namespace quda

 #endif //_CLOVER_ORDER_H


quda::clover::FloatNOrder::load
__device__ __host__ void load(real v[block], int x, int parity, int chirality) const
Load accessor for a single chiral block.
Definition: clover_field_order.h:650

quda::clover::FieldOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:394

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:254

quda::clover::QDPOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:843

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::zero
complex< Float > zero
Definition: clover_field_order.h:310

quda::clover::FloatNOrder::norm_offset
const AllocInt norm_offset
Definition: clover_field_order.h:552

QUDA_PACKED_CLOVER_ORDER
Definition: enum_quda.h:256

quda::clover::BQCDOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:933

quda::CloverField::V
void * V(bool inverse=false)
Definition: clover_field.h:74

quda::norm
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:1092

quda::clover::Accessor::transform_reduce
__host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double i) const
Definition: clover_field_order.h:180

quda::clover::QDPJITOrder
Definition: clover_field_order.h:850

quda::clover::FieldOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:472

quda::clover::FieldOrder::operator()
__device__ __host__ const complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function.
Definition: clover_field_order.h:420

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::clover::FieldOrder::Ncolor
__device__ __host__ int Ncolor() const
Complex-member accessor function.
Definition: clover_field_order.h:463

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

quda::CloverField::Bytes
size_t Bytes() const
Definition: clover_field.h:98

host_free
#define host_free(ptr)
Definition: malloc_quda.h:71

quda::clover::QDPJITOrder::diag
Float * diag
Definition: clover_field_order.h:852

QUDA_QUARTER_PRECISION
Definition: enum_quda.h:59

quda::clover::BQCDOrder::twisted
const bool twisted
Definition: clover_field_order.h:923

QUDA_HALF_PRECISION
Definition: enum_quda.h:60

quda::CloverField
Definition: clover_field.h:45

quda::clover::FloatNOrder::real
typename mapper< Float >::type real
Definition: clover_field_order.h:543

quda::clover::QDPOrder::mu2
const Float mu2
Definition: clover_field_order.h:808

quda::clover::QDPJITOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:851

quda::alloc
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
Definition: malloc.cpp:53

quda::clover::BQCDOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:918

quda::clover::BQCDOrder::mu2
const Float mu2
Definition: clover_field_order.h:924

quda::clover::FloatNOrder::operator()
__device__ __host__ clover_wrapper< real, Accessor > operator()(int x_cb, int parity, int chirality)
This accessor routine returns a clover_wrapper to this object, allowing us to overload various operat...
Definition: clover_field_order.h:622

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44

quda::clover::FloatNOrder::operator()
__device__ __host__ const clover_wrapper< real, Accessor > operator()(int x_cb, int parity, int chirality) const
This accessor routine returns a const colorspinor_wrapper to this object, allowing us to overload var...
Definition: clover_field_order.h:637

quda::clover::FloatNOrder::clover
Float * clover
Definition: clover_field_order.h:549

quda::clover::QDPOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:817

quda::clover::QDPOrder::stride
const int stride
Definition: clover_field_order.h:804

quda::clover::FieldOrder::operator()
__device__ __host__ complex< Float > operator()(int dummy, int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function. This is a special variant that is compatible with the equ...
Definition: clover_field_order.h:439

quda::copy
__host__ __device__ void copy(T1 &a, const T2 &b)
Definition: register_traits.h:152

quda::clover_wrapper::parity
const int parity
Definition: clover_field_order.h:37

quda::clover::QDPOrder::offset
const int offset
Definition: clover_field_order.h:805

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::stride
int stride
Definition: clover_field_order.h:189

quda::clover::QDPJITOrder::twisted
const bool twisted
Definition: clover_field_order.h:857

length
int length[]
Definition: gauge_force_test.cpp:34

texture_helper.cuh

quda::clover::QDPJITOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:906

quda::clover::FloatNOrder::backup_norm_h
void * backup_norm_h
host memory for backing up the field when tuning
Definition: clover_field_order.h:569

quda::clover::FieldOrder::accessor
const Accessor< Float, nColor, nSpin, order > accessor
Definition: clover_field_order.h:395

quda
Definition: blas_cublas.h:5

quda::clover::S
This is just a dummy structure we use for trove to define the required structure size.
Definition: clover_field_order.h:794

thrust_helper.cuh

quda::clover::FloatNOrder::load
__device__ __host__ void load(real v[length], int x, int parity) const
Load accessor for the clover matrix.
Definition: clover_field_order.h:735

quda::clover::BQCDOrder::stride
const int stride
Definition: clover_field_order.h:921

quda::clover::BQCDOrder::save
__device__ __host__ void save(RegType v[length], int x, int parity)
Definition: clover_field_order.h:965

quda::clover::FieldOrder::FieldOrder
FieldOrder(CloverField &A, bool inverse=false)
Definition: clover_field_order.h:404

quda::TexVectorType
Definition: register_traits.h:390

quda::clover::square_
Definition: clover_field_order.h:74

quda::clover::QDPJITOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:854

quda::clover::BQCDOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:920

quda::clover_wrapper::field
T & field
Definition: clover_field_order.h:35

QUDA_FLOAT2_CLOVER_ORDER
Definition: enum_quda.h:254

quda::clover::FloatNOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:609

quda::cudaCloverField
Definition: clover_field.h:168

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:319

comm_allreduce_min
void comm_allreduce_min(double *data)
Definition: comm_mpi.cpp:265

quda::clover::abs_::operator()
__host__ __device__ ReduceType operator()(const quda::complex< Float > &x)
Definition: clover_field_order.h:80

quda::clover::FieldOrder::VolumeCB
__device__ __host__ int VolumeCB() const
Definition: clover_field_order.h:469

quda::VectorType
Definition: register_traits.h:367

quda::clover::abs_
Definition: clover_field_order.h:79

quda::clover::FieldOrder::location
const QudaFieldLocation location
Definition: clover_field_order.h:397

nColor
const int nColor
Definition: covdev_test.cpp:75

quda::clover::Accessor::dummy
complex< Float > dummy
Definition: clover_field_order.h:169

quda::clover::FloatNOrder::norm
norm_type * norm
Definition: clover_field_order.h:550

quda::clover::FloatNOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:781

quda::HMatrix::HMatrix
__device__ __host__ HMatrix()
Definition: quda_matrix.h:307

quda::vector_store
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
Definition: register_traits.h:422

quda::clover::FloatNOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:559

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:311

quda::clover::FloatNOrder::offset
const AllocInt offset
Definition: clover_field_order.h:551

quda::clover::Accessor
Definition: clover_field_order.h:168

quda::clover::FloatNOrder::AllocInt
AllocType< huge_alloc >::type AllocInt
Definition: clover_field_order.h:545

quda::clover_mapper< char, N, add_rho >::type
clover::FloatNOrder< char, N, 4, add_rho > type
Definition: clover_field_order.h:987

quda::clover::FieldOrder::abs_max
__host__ double abs_max(int dim=-1, bool global=true) const
Returns the Linfinity norm of the field.
Definition: clover_field_order.h:507

quda::clover_mapper< short, N, add_rho >::type
clover::FloatNOrder< short, N, 4, add_rho > type
Definition: clover_field_order.h:984

quda::clover_wrapper
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
Definition: clover_field_order.h:34

quda::clover::FloatNOrder::load
void load()
Restore the field from the host after tuning.
Definition: clover_field_order.h:769

quda::clover::FieldOrder::inverse
bool inverse
Definition: clover_field_order.h:396

quda::clover::QDPOrder::save
__device__ __host__ void save(const RegType v[length], int x, int parity)
Definition: clover_field_order.h:831

quda::clover::FloatNOrder::backup_h
void * backup_h
Definition: clover_field_order.h:568

quda::clover::FieldOrder::Volume
__device__ __host__ int Volume() const
Definition: clover_field_order.h:466

quda::clover::QDPJITOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:869

quda::clover::FloatNOrder::norm_type
float norm_type
Definition: clover_field_order.h:546

register_traits.h
Provides precision abstractions and defines the register precision given the storage precision using ...

QUDA_FLOAT4_CLOVER_ORDER
Definition: enum_quda.h:255

quda::clover::FloatNOrder::mu2
const real mu2
Definition: clover_field_order.h:563

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::transform_reduce
__host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const
Definition: clover_field_order.h:290

quda::clover::FieldOrder::norm2
__host__ double norm2(int dim=-1, bool global=true) const
Returns the L2 norm suared of the field.
Definition: clover_field_order.h:495

quda::clover::Accessor::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:170

quda::cublas::init
void init()
Create the CUBLAS context.
Definition: blas_cublas.cu:31

clover_field.h

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::offset_cb
size_t offset_cb
Definition: clover_field_order.h:252

quda::clover::FloatNOrder::twisted
const bool twisted
Definition: clover_field_order.h:562

trove_helper.cuh

safe_malloc
#define safe_malloc(size)
Definition: malloc_quda.h:66

quda::clover::QDPOrder
Definition: clover_field_order.h:800

quda::clover_mapper< double, N, add_rho >::type
clover::FloatNOrder< double, N, 2, add_rho > type
Definition: clover_field_order.h:978

V
int V
Definition: test_util.cpp:27

quda::clover::FloatNOrder::Mu2
real Mu2() const
Definition: clover_field_order.h:610

quda::clover::QDPOrder::QDPOrder
QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:810

quda::clover::FloatNOrder::save
__device__ __host__ void save(const real v[length], int x, int parity)
Store accessor for the clover matrix.
Definition: clover_field_order.h:747

quda::LatticeField::Location
QudaFieldLocation Location() const
Definition: lattice_field.cpp:660

quda::clover::FloatNOrder::save
__device__ __host__ void save(const real v[block], int x, int parity, int chirality)
Store accessor for a single chiral block.
Definition: clover_field_order.h:695

quda::clover::FieldOrder::norm1
__host__ double norm1(int dim=-1, bool global=true) const
Returns the L1 norm of the field.
Definition: clover_field_order.h:483

quda::clover::FieldOrder
Definition: clover_field_order.h:389

quda::clover::FloatNOrder::rho
const real rho
Definition: clover_field_order.h:564

quda_matrix.h

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

quda::clover::BQCDOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:969

quda::clover::QDPOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:801

quda::isFixed
Definition: register_traits.h:144

quda::clover_wrapper::operator=
__device__ __host__ void operator=(const C &a)
Assignment operator with H matrix instance as input.
Definition: clover_field_order.h:55

quda::inverse
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
Definition: quda_matrix.h:611

quda::clover::QDPJITOrder::stride
const int stride
Definition: clover_field_order.h:855

quda::clover_wrapper::x_cb
const int x_cb
Definition: clover_field_order.h:36

quda::clover_mapper< float, N, add_rho >::type
clover::FloatNOrder< float, N, 4, add_rho > type
Definition: clover_field_order.h:981

quda::mapper
Definition: register_traits.h:43

quda::CloverField::Norm
void * Norm(bool inverse=false)
Definition: clover_field.h:75

quda::clover::FloatNOrder::Vector
VectorType< Float, N >::type Vector
Definition: clover_field_order.h:544

quda::clover::QDPJITOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:867

quda::clover::FloatNOrder::norm_bytes
size_t norm_bytes
Definition: clover_field_order.h:567

quda::clover::FloatNOrder::FloatNOrder
FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_=0, norm_type *norm_=0, bool override=false)
host memory for backing up norm when tuning
Definition: clover_field_order.h:571

quda::clover_wrapper::chirality
const int chirality
Definition: clover_field_order.h:38

quda::HMatrix::operator=
__device__ __host__ void operator=(const HMatrix< U, N > &b)
Definition: quda_matrix.h:338

quda::clover::QDPJITOrder::save
__device__ __host__ void save(const RegType v[length], int x, int parity)
Definition: clover_field_order.h:888

quda::clover::QDPJITOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:866

quda::clover_mapper
Definition: clover_field_order.h:975

quda::clover::QDPOrder::clover
Float * clover
Definition: clover_field_order.h:802

quda::clover::QDPOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:816

quda::clover::FloatNOrder
Accessor routine for CloverFields in native field order.
Definition: clover_field_order.h:541

complex_quda.h

quda::clover::QDPJITOrder::QDPJITOrder
QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:860

quda::clover::FieldOrder::Field
CloverField & Field()
Definition: clover_field_order.h:408

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::transform_reduce
__host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const
Definition: clover_field_order.h:225

quda::clover::Accessor::operator()
__device__ __host__ complex< Float > & operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:174

quda::abs
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125

quda::clover::QDPOrder::twisted
const bool twisted
Definition: clover_field_order.h:807

quda::clover::BQCDOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:934

volumeCB
static int volumeCB
Definition: face_gauge.cpp:43

checkCudaError
#define checkCudaError()
Definition: util_quda.h:161

quda::clover::QDPJITOrder::mu2
const Float mu2
Definition: clover_field_order.h:858

quda::clover::BQCDOrder
Definition: clover_field_order.h:917

comm_allreduce
void comm_allreduce(double *data)
Definition: comm_mpi.cpp:242

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

comm_allreduce_max
void comm_allreduce_max(double *data)
Definition: comm_mpi.cpp:258

quda::clover::BQCDOrder::BQCDOrder
BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:926

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:192

quda::clover::square_::operator()
__host__ __device__ ReduceType operator()(const quda::complex< Float > &x)
Definition: clover_field_order.h:75

quda::AllocType
Definition: register_traits.h:493

thrust_allocator
Definition: thrust_helper.cuh:27

quda::clover::FloatNOrder::bytes
size_t bytes
Definition: clover_field_order.h:566

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:546

quda::clover::BQCDOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:941

quda::clover::FloatNOrder::stride
const int stride
Definition: clover_field_order.h:560

quda::clover::FieldOrder::A
CloverField & A
Definition: clover_field_order.h:393

quda::zero
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54

quda::clover::indexFloatN
__device__ __host__ int indexFloatN(int k, int stride, int x)
Definition: clover_field_order.h:242

quda::clover::QDPJITOrder::offdiag
Float * offdiag
Definition: clover_field_order.h:853

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::offset_cb
size_t offset_cb
Definition: clover_field_order.h:190

quda::clover::QDPOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:819

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::a
Float * a
Definition: clover_field_order.h:250

quda::clover::FieldOrder::abs_min
__host__ double abs_min(int dim=-1, bool global=true) const
Returns the minimum absolute value of the field.
Definition: clover_field_order.h:519

quda::blas::bytes
unsigned long long bytes
Definition: blas_quda.cu:23

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::operator()
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:196

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::transform_reduce
__host__ double transform_reduce(QudaFieldLocation location, helper h, reducer r, double init) const
Definition: clover_field_order.h:345

quda::fixedMaxValue
Definition: quda_internal.h:53

quda::clover::FloatNOrder::save
void save()
Backup the field to the host when tuning.
Definition: clover_field_order.h:755

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::operator()
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:258

quda::clover::QDPOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:803

color_spinor.h

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::stride
int stride
Definition: clover_field_order.h:251

quda::copy_and_scale
__host__ __device__ void copy_and_scale(T1 &a, const T2 &b, const T3 &c)
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.
Definition: register_traits.h:249

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::a
Float * a
Definition: clover_field_order.h:188