v0.9.0/doc/clover__field__order_8h_source.html

 #ifndef _CLOVER_ORDER_H
 #define _CLOVER_ORDER_H

 // trove requires the warp shuffle instructions introduced with Kepler
 #if __COMPUTE_CAPABILITY__ >= 300
 #include <trove/ptr.h>
 #else
 #define DISABLE_TROVE
 #endif
 #include <register_traits.h>
 #include <clover_field.h>
 #include <complex_quda.h>
 #include <quda_matrix.h>
 #include <color_spinor.h>

 namespace quda {

   template <typename Float, typename T>
     struct clover_wrapper {
       T &field;
       const int x_cb;
       const int parity;
       const int chirality;

       __device__ __host__ inline clover_wrapper<Float,T>(T &field, int x_cb, int parity, int chirality)
   : field(field), x_cb(x_cb), parity(parity), chirality(chirality) { }

       template<typename C>
       __device__ __host__ inline void operator=(const C &a) {
   field.save((Float*)a.data, x_cb, parity, chirality);
       }
     };

   template <typename T, int N>
     template <typename S>
     __device__ __host__ inline void HMatrix<T,N>::operator=(const clover_wrapper<T,S> &a) {
     a.field.load((T*)data, a.x_cb, a.parity, a.chirality);
   }

   template <typename T, int N>
     template <typename S>
     __device__ __host__ inline HMatrix<T,N>::HMatrix(const clover_wrapper<T,S> &a) {
     a.field.load((T*)data, a.x_cb, a.parity, a.chirality);
   }

   namespace clover {

     template<typename Float, int nColor, int nSpin, QudaCloverFieldOrder order> struct Accessor {
       mutable complex<Float> dummy;
       Accessor(const CloverField &A, bool inverse=false) {
   errorQuda("Not implemented for order %d", order);
       }

       __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row, int s_col,
                   int c_row, int c_col) const {
   return dummy;
       }
     };

     template<int N>
       __device__ __host__ inline int indexFloatN(int k, int stride, int x) {
       int j = k / N;
       int i = k % N;
       return (j*stride+x)*N + i;
     };

     template<typename Float, int nColor, int nSpin>
       struct Accessor<Float,nColor,nSpin,QUDA_FLOAT4_CLOVER_ORDER> {
       Float *a;
       int stride;
       size_t offset;
       static constexpr int N = nSpin * nColor / 2;
     Accessor(const CloverField &A, bool inverse=false)
       : a(static_cast<Float*>(const_cast<void*>(A.V(inverse)))), stride(A.Stride()), offset(A.Bytes()/(2*sizeof(Float))) { }

       __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
   // if not in the diagonal chiral block then return 0.0
   if (s_col / 2 != s_row / 2) { return complex<Float>(0.0); }

   const int chirality = s_col / 2;

   int row = s_row%2 * nColor + c_row;
   int col = s_col%2 * nColor + c_col;
   Float *a_ = a+parity*offset+stride*chirality*N*N;

   if (row == col) {
     return 2*a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(row, stride, x) ];
   } else if (col < row) {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
           int idx = N + 2*k;

           return 2*complex<Float>(a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
           a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
   } else {
     // requesting upper triangular so return conjugate transpose
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
           int idx = N + 2*k;

           return 2*complex<Float>( a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+0,stride,x) ],
           -a_[ indexFloatN<QUDA_FLOAT4_CLOVER_ORDER>(idx+1,stride,x) ]);
   }

       }

     };

     template<typename Float, int nColor, int nSpin>
       struct Accessor<Float,nColor,nSpin,QUDA_PACKED_CLOVER_ORDER> {
       Float *a[2];
       const int N = nSpin * nColor / 2;
       complex<Float> zero;
       Accessor(const CloverField &A, bool inverse=false) {
   // even
   a[0] = static_cast<Float*>(const_cast<void*>(A.V(inverse)));
   // odd
   a[1] = static_cast<Float*>(const_cast<void*>(A.V(inverse))) + A.Bytes()/(2*sizeof(Float));
   zero = complex<Float>(0.0,0.0);
       }

       __device__ __host__ inline complex<Float> operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const {
   // if not in the diagonal chiral block then return 0.0
   if (s_col / 2 != s_row / 2) { return zero; }

   const int chirality = s_col / 2;

   unsigned int row = s_row%2 * nColor + c_row;
   unsigned int col = s_col%2 * nColor + c_col;

   if (row == col) {
     complex<Float> tmp = a[parity][(x*2 + chirality)*N*N + row];
     return tmp;
   } else if (col < row) {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-col)*(N-col-1)/2 + row - col - 1;
           int idx = (x*2 + chirality)*N*N + N + 2*k;
           return complex<Float>(a[parity][idx], a[parity][idx+1]);
   } else {
     // switch coordinates to count from bottom right instead of top left of matrix
     int k = N*(N-1)/2 - (N-row)*(N-row-1)/2 + col - row - 1;
           int idx = (x*2 + chirality)*N*N + N + 2*k;
           return complex<Float>(a[parity][idx], -a[parity][idx+1]);
   }
       }

     };

     template <typename Float, int nColor, int nSpin, QudaCloverFieldOrder order>
       struct FieldOrder {

       protected:
   CloverField &A;
   const int volumeCB;
   const Accessor<Float,nColor,nSpin,order> accessor;

       public:
       FieldOrder(CloverField &A, bool inverse=false) : A(A), volumeCB(A.VolumeCB()), accessor(A,inverse)
   { }

   CloverField& Field() { return A; }

   virtual ~FieldOrder() { ; }

   __device__ __host__ inline const complex<Float> operator()(int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) const {
     return accessor(parity, x, s_row, s_col, c_row, c_col);
   }

   __device__ __host__ inline complex<Float> operator()(int dummy, int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) const {
     return accessor(parity,x,s_row,s_col,c_row,c_col);
   }

   /*
   __device__ __host__ inline complex<Float>& operator()(int parity, int x, int s_row,
                    int s_col, int c_row, int c_col) {
     //errorQuda("Clover accessor not implemented as a lvalue");
     return accessor(parity, x, s_row, s_col, c_row, c_col);
     }
   */

   __device__ __host__ inline int Ncolor() const { return nColor; }

   __device__ __host__ inline int Volume() const { return 2*volumeCB; }

   __device__ __host__ inline int VolumeCB() const { return volumeCB; }

   size_t Bytes() const {
     constexpr int n = (nSpin * nColor) / 2;
     constexpr int chiral_block = n * n / 2;
     return static_cast<size_t>(volumeCB) * chiral_block * 2ll * 2ll * sizeof(Float); // 2 from complex, 2 from chirality
   }
       };

     template <typename Float, int length, int N, bool huge_alloc=false>
       struct FloatNOrder {
   typedef typename mapper<Float>::type RegType;
   typedef typename VectorType<Float,N>::type Vector;
   typedef typename AllocType<huge_alloc>::type AllocInt;
   static const int M=length/(N*2); // number of short vectors per chiral block
   static const int block=length/2; // chiral block size
   Float *clover;
   float *norm;
   const AllocInt offset; // offset can be 32-bit or 64-bit
   const AllocInt norm_offset;
 #ifdef USE_TEXTURE_OBJECTS
   typedef typename TexVectorType<RegType,N>::type TexVector;
   cudaTextureObject_t tex;
   cudaTextureObject_t normTex;
   const int tex_offset;
 #endif
   const int volumeCB;
   const int stride;

   const bool twisted;
   const Float mu2;

   size_t bytes;
   size_t norm_bytes;
   void *backup_h;
   void *backup_norm_h;

       FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_=0, float *norm_=0, bool override=false)
   : offset(clover.Bytes()/(2*sizeof(Float))), norm_offset(clover.NormBytes()/(2*sizeof(float))),
 #ifdef USE_TEXTURE_OBJECTS
     tex(0), normTex(0), tex_offset(offset/N),
 #endif
     volumeCB(clover.VolumeCB()), stride(clover.Stride()),
     twisted(clover.Twisted()), mu2(clover.Mu2()), bytes(clover.Bytes()),
     norm_bytes(clover.NormBytes()), backup_h(nullptr), backup_norm_h(nullptr)
   {
     this->clover = clover_ ? clover_ : (Float*)(clover.V(is_inverse));
     this->norm = norm_ ? norm_ : (float*)(clover.Norm(is_inverse));
 #ifdef USE_TEXTURE_OBJECTS
     if (clover.Location() == QUDA_CUDA_FIELD_LOCATION) {
       if (is_inverse) {
         tex = static_cast<const cudaCloverField&>(clover).InvTex();
         normTex = static_cast<const cudaCloverField&>(clover).InvNormTex();
       } else {
         tex = static_cast<const cudaCloverField&>(clover).Tex();
         normTex = static_cast<const cudaCloverField&>(clover).NormTex();
       }
       if (!huge_alloc && (this->clover != clover.V(is_inverse) ||
         (clover.Precision() == QUDA_HALF_PRECISION && this->norm != clover.Norm(is_inverse)) ) && !override) {
         errorQuda("Cannot use texture read since data pointer does not equal field pointer - use with huge_alloc=true instead");
       }
     }
 #endif
   }

   bool  Twisted() const {return twisted;}
   Float Mu2() const {return mu2;}

   __device__ __host__ inline clover_wrapper<RegType,FloatNOrder<Float,length,N> >
     operator()(int x_cb, int parity, int chirality) {
     return clover_wrapper<RegType,FloatNOrder<Float,length,N> >(*this, x_cb, parity, chirality);
   }

   __device__ __host__ inline const clover_wrapper<RegType,FloatNOrder<Float,length,N> >
     operator()(int x_cb, int parity, int chirality) const {
     return clover_wrapper<RegType,FloatNOrder<Float,length,N> >
       (const_cast<FloatNOrder<Float,length,N>&>(*this), x_cb, parity, chirality);
   }

   __device__ __host__ inline void load(RegType v[block], int x, int parity, int chirality) const {
 #pragma unroll
     for (int i=0; i<M; i++) {
       // first do vectorized copy from memory
 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
       if (!huge_alloc) { // use textures unless we have a huge alloc
         TexVector vecTmp = tex1Dfetch<TexVector>(tex, parity*tex_offset + stride*(chirality*M+i) + x);
         // second do vectorized copy converting into register type
 #pragma unroll
         for (int j=0; j<N; j++) copy(v[i*N+j], reinterpret_cast<RegType*>(&vecTmp)[j]);
       } else
 #endif
       {
         Vector vecTmp = vector_load<Vector>(clover + parity*offset, x + stride*(chirality*M+i));
         // second do scalar copy converting into register type
 #pragma unroll
         for (int j=0; j<N; j++) copy(v[i*N+j], reinterpret_cast<Float*>(&vecTmp)[j]);
       }
     }

     if (sizeof(Float)==sizeof(short)) {
 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__)
       RegType nrm = !huge_alloc ? tex1Dfetch<float>(normTex, parity*norm_offset + chirality*stride + x) :
         norm[parity*norm_offset + chirality*stride + x];
 #else
             RegType nrm = norm[parity*norm_offset + chirality*stride + x];
 #endif
 #pragma unroll
       for (int i=0; i<block; i++) v[i] *= nrm;
     }
   }

   __device__ __host__ inline void save(const RegType v[block], int x, int parity, int chirality) {

     // find the norm of each chiral block
     RegType scale = 0.0;
     if (sizeof(Float)==sizeof(short)) {
 #pragma unroll
       for (int i=0; i<block; i++) scale = fabs(v[i]) > scale ? fabs(v[i]) : scale;
       norm[parity*norm_offset + chirality*stride + x] = scale;
     }

 #pragma unroll
     for (int i=0; i<M; i++) {
       Vector vecTmp;
       // first do scalar copy converting into storage type and rescaling if necessary
       if (sizeof(Float)==sizeof(short))
 #pragma unroll
         for (int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j], v[i*N+j] / scale);
       else
 #pragma unroll
         for (int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j], v[i*N+j]);

       // second do vectorized copy into memory
       vector_store(clover + parity*offset, x + stride*(chirality*M+i), vecTmp);
     }
   }

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
 #pragma unroll
     for (int chirality=0; chirality<2; chirality++) load(&v[chirality*36], x, parity, chirality);
   }

   __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
 #pragma unroll
     for (int chirality=0; chirality<2; chirality++) save(&v[chirality*36], x, parity, chirality);
   }

   void save() {
     if (backup_h) errorQuda("Already allocated host backup");
     backup_h = safe_malloc(bytes);
     cudaMemcpy(backup_h, clover, bytes, cudaMemcpyDeviceToHost);
     if (norm_bytes) {
       backup_norm_h = safe_malloc(norm_bytes);
       cudaMemcpy(backup_norm_h, norm, norm_bytes, cudaMemcpyDeviceToHost);
     }
     checkCudaError();
   }

   void load() {
     cudaMemcpy(clover, backup_h, bytes, cudaMemcpyHostToDevice);
     host_free(backup_h);
     backup_h = nullptr;
     if (norm_bytes) {
       cudaMemcpy(norm, backup_norm_h, norm_bytes, cudaMemcpyHostToDevice);
       host_free(backup_norm_h);
       backup_norm_h = nullptr;
     }
     checkCudaError();
   }

   size_t Bytes() const {
     size_t bytes = length*sizeof(Float);
     if (sizeof(Float)==sizeof(short)) bytes += 2*sizeof(float);
     return bytes;
   }
       };

     template <typename real, int length> struct S { real v[length]; };

     template <typename Float, int length>
       struct QDPOrder {
   typedef typename mapper<Float>::type RegType;
   Float *clover;
   const int volumeCB;
   const int stride;
   const int offset;

   const bool twisted;
   const Float mu2;

       QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.VolumeCB()), stride(volumeCB), offset(clover.Bytes()/(2*sizeof(Float))),
   twisted(clover.Twisted()), mu2(clover.Mu2()) {
   this->clover = clover_ ? clover_ : (Float*)(clover.V(inverse));
       }

   bool  Twisted() const {return twisted;}
   Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     // factor of 0.5 comes from basis change
 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
     typedef S<Float,length> structure;
     trove::coalesced_ptr<structure> clover_((structure*)clover);
     structure v_ = clover_[parity*volumeCB + x];
     for (int i=0; i<length; i++) v[i] = 0.5*(RegType)v_.v[i];
 #else
     for (int i=0; i<length; i++) v[i] = 0.5*clover[parity*offset + x*length+i];
 #endif
   }

   __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE)
     typedef S<Float,length> structure;
     trove::coalesced_ptr<structure> clover_((structure*)clover);
     structure v_;
     for (int i=0; i<length; i++) v_.v[i] = 2.0*(Float)v[i];
     clover_[parity*volumeCB + x] = v_;
 #else
     for (int i=0; i<length; i++) clover[parity*offset + x*length+i] = 2.0*v[i];
 #endif
   }

   size_t Bytes() const { return length*sizeof(Float); }
       };

     template <typename Float, int length>
       struct QDPJITOrder {
   typedef typename mapper<Float>::type RegType;
   Float *diag;
   Float *offdiag;
   const int volumeCB;
   const int stride;

   const bool twisted;
   const Float mu2;

       QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.VolumeCB()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
   offdiag = clover_ ? ((Float**)clover_)[0] : ((Float**)clover.V(inverse))[0];
   diag = clover_ ? ((Float**)clover_)[1] : ((Float**)clover.V(inverse))[1];
       }

       bool  Twisted() const {return twisted;}
       Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     // the factor of 0.5 comes from a basis change
     for (int chirality=0; chirality<2; chirality++) {
       // set diagonal elements
       for (int i=0; i<6; i++) {
         v[chirality*36 + i] = 0.5*diag[((i*2 + chirality)*2 + parity)*volumeCB + x];
       }

       // the off diagonal elements
       for (int i=0; i<30; i++) {
         int z = i%2;
         int off = i/2;
         const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
         v[chirality*36 + 6 + i] = 0.5*offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x];
       }

     }
   }

   __device__ __host__ inline void save(const RegType v[length], int x, int parity) {
     // the factor of 2.0 comes from undoing the basis change
     for (int chirality=0; chirality<2; chirality++) {
       // set diagonal elements
       for (int i=0; i<6; i++) {
         diag[((i*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + i];
       }

       // the off diagonal elements
       for (int i=0; i<30; i++) {
         int z = i%2;
         int off = i/2;
         const int idtab[15]={0,1,3,6,10,2,4,7,11,5,8,12,9,13,14};
         offdiag[(((z*15 + idtab[off])*2 + chirality)*2 + parity)*volumeCB + x] = 2.0*v[chirality*36 + 6 + i];
       }
     }
   }

   size_t Bytes() const { return length*sizeof(Float); }
       };


     template <typename Float, int length>
       struct BQCDOrder {
   typedef typename mapper<Float>::type RegType;
   Float *clover[2];
   const int volumeCB;
   const int stride;

   const bool twisted;
   const Float mu2;

       BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
       : volumeCB(clover.Stride()), stride(volumeCB), twisted(clover.Twisted()), mu2(clover.Mu2()) {
   this->clover[0] = clover_ ? clover_ : (Float*)(clover.V(inverse));
   this->clover[1] = (Float*)((char*)this->clover[0] + clover.Bytes()/2);
       }


   bool  Twisted() const {return twisted;}
   Float Mu2() const {return mu2;}

   __device__ __host__ inline void load(RegType v[length], int x, int parity) const {
     int bq[36] = { 21, 32, 33, 0,  1, 20,                   // diagonal
        28, 29, 30, 31, 6, 7,  14, 15, 22, 23,   // column 1  6
        34, 35, 8, 9, 16, 17, 24, 25,            // column 2  16
        10, 11, 18, 19, 26, 27,                  // column 3  24
        2,  3,  4,  5,                           // column 4  30
        12, 13};

     // flip the sign of the imaginary components
     int sign[36];
     for (int i=0; i<6; i++) sign[i] = 1;
     for (int i=6; i<36; i+=2) {
       if ( (i >= 10 && i<= 15) || (i >= 18 && i <= 29) )  { sign[i] = -1; sign[i+1] = -1; }
       else { sign[i] = 1; sign[i+1] = -1; }
     }

     const int M=length/2;
     for (int chirality=0; chirality<2; chirality++)
       for (int i=0; i<M; i++)
         v[chirality*M+i] = sign[i] * clover[parity][x*length+chirality*M+bq[i]];

   }

   // FIXME implement the save routine for BQCD ordered fields
   __device__ __host__ inline void save(RegType v[length], int x, int parity) {

   };

   size_t Bytes() const { return length*sizeof(Float); }
       };

   } // namespace clover

   // Use traits to reduce the template explosion
   template<typename Float,int N=72> struct clover_mapper { };

   // double precision uses Float2
   template<int N> struct clover_mapper<double,N> { typedef clover::FloatNOrder<double, N, 2> type; };

   // single precision uses Float4
   template<int N> struct clover_mapper<float,N> { typedef clover::FloatNOrder<float, N, 4> type; };

   // half precision uses Float4
   template<int N> struct clover_mapper<short,N> { typedef clover::FloatNOrder<short, N, 4> type; };

 } // namespace quda

 #endif //_CLOVER_ORDER_H


quda::clover::S::v
real v[length]
Definition: clover_field_order.h:594

quda::clover::FieldOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:273

quda::clover::FloatNOrder::bytes
size_t bytes
Definition: clover_field_order.h:389

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:186

quda::clover::QDPOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:643

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::zero
complex< Float > zero
Definition: clover_field_order.h:226

quda::clover::BQCDOrder::clover
Float * clover[2]
Definition: clover_field_order.h:719

QUDA_PACKED_CLOVER_ORDER
Definition: enum_quda.h:228

quda::clover::FloatNOrder::AllocInt
AllocType< huge_alloc >::type AllocInt
Definition: clover_field_order.h:370

quda::clover::BQCDOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:733

quda::CloverField::V
void * V(bool inverse=false)
Definition: clover_field.h:73

quda::clover::FloatNOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:423

quda::clover::QDPJITOrder
Definition: clover_field_order.h:650

quda::clover::FieldOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:350

quda::clover::FieldOrder::operator()
__device__ __host__ const complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function.
Definition: clover_field_order.h:298

quda::clover::FloatNOrder::offset
const AllocInt offset
Definition: clover_field_order.h:375

errorQuda
#define errorQuda(...)
Definition: util_quda.h:90

quda::clover::FieldOrder::Ncolor
__device__ __host__ int Ncolor() const
Complex-member accessor function.
Definition: clover_field_order.h:341

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:297

quda::CloverField::Bytes
size_t Bytes() const
Definition: clover_field.h:97

host_free
#define host_free(ptr)
Definition: malloc_quda.h:59

quda::clover::QDPJITOrder::diag
Float * diag
Definition: clover_field_order.h:652

quda::clover::BQCDOrder::twisted
const bool twisted
Definition: clover_field_order.h:723

QUDA_HALF_PRECISION
Definition: enum_quda.h:59

quda::clover::FloatNOrder::backup_h
void * backup_h
Definition: clover_field_order.h:391

quda::CloverField
Definition: clover_field.h:44

quda::clover::QDPOrder::mu2
const Float mu2
Definition: clover_field_order.h:608

quda::clover::QDPJITOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:651

quda::clover::FloatNOrder::twisted
const bool twisted
Definition: clover_field_order.h:386

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::offset
size_t offset
Definition: clover_field_order.h:184

quda::clover::BQCDOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:718

quda::clover::BQCDOrder::mu2
const Float mu2
Definition: clover_field_order.h:724

quda::clover::FloatNOrder::mu2
const Float mu2
Definition: clover_field_order.h:387

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44

quda::clover::QDPOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:617

quda::clover::QDPOrder::stride
const int stride
Definition: clover_field_order.h:604

z
int int z
Definition: CMakeCUDACompilerId.cpp1.ii:2637

quda::clover::FieldOrder::operator()
__device__ __host__ complex< Float > operator()(int dummy, int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Read-only complex-member accessor function. This is a special variant that is compatible with the equ...
Definition: clover_field_order.h:317

quda::clover::FloatNOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:422

quda::clover_wrapper::parity
const int parity
Definition: clover_field_order.h:40

quda::clover::QDPOrder::offset
const int offset
Definition: clover_field_order.h:605

quda::clover::QDPJITOrder::twisted
const bool twisted
Definition: clover_field_order.h:657

quda::clover::FloatNOrder::load
void load()
Restore the field from the host after tuning.
Definition: clover_field_order.h:569

quda::clover::FloatNOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:368

quda::clover::QDPJITOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:706

quda::clover::FieldOrder::accessor
const Accessor< Float, nColor, nSpin, order > accessor
Definition: clover_field_order.h:274

quda::clover_mapper< float, N >::type
clover::FloatNOrder< float, N, 4 > type
Definition: clover_field_order.h:781

quda
Definition: blas_cublas.h:6

quda::clover::S
This is just a dummy structure we use for trove to define the required structure size.
Definition: clover_field_order.h:594

quda::clover::BQCDOrder::stride
const int stride
Definition: clover_field_order.h:721

quda::clover::BQCDOrder::save
__device__ __host__ void save(RegType v[length], int x, int parity)
Definition: clover_field_order.h:765

offset
size_t size_t offset
Definition: CMakeCUDACompilerId.cpp1.ii:2497

quda::clover::FieldOrder::FieldOrder
FieldOrder(CloverField &A, bool inverse=false)
Definition: clover_field_order.h:281

quda::TexVectorType
Definition: register_traits.h:258

x
p x
Definition: CMakeCUDACompilerId.cpp1.ii:3011

quda::clover::QDPJITOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:654

quda::clover::FloatNOrder::Vector
VectorType< Float, N >::type Vector
Definition: clover_field_order.h:369

quda::clover::BQCDOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:720

quda::clover_wrapper::field
T & field
Definition: clover_field_order.h:38

dslash_cuda_gen.clover
bool clover
Definition: dslash_cuda_gen.py:1224

quda::cudaCloverField
Definition: clover_field.h:132

quda::clover_mapper< double, N >::type
clover::FloatNOrder< double, N, 2 > type
Definition: clover_field_order.h:778

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:235

quda::clover::FloatNOrder::operator()
__device__ __host__ clover_wrapper< RegType, FloatNOrder< Float, length, N > > operator()(int x_cb, int parity, int chirality)
This accessor routine returns a clover_wrapper to this object, allowing us to overload various operat...
Definition: clover_field_order.h:436

quda::clover_mapper< short, N >::type
clover::FloatNOrder< short, N, 4 > type
Definition: clover_field_order.h:784

quda::clover::FieldOrder::VolumeCB
__device__ __host__ int VolumeCB() const
Definition: clover_field_order.h:347

quda::VectorType
Definition: register_traits.h:240

quda::clover::FloatNOrder::load
__device__ __host__ void load(RegType v[block], int x, int parity, int chirality) const
Load accessor for a single chiral block.
Definition: clover_field_order.h:463

quda::clover::FloatNOrder::backup_norm_h
void * backup_norm_h
host memory for backing up the field when tuning
Definition: clover_field_order.h:392

quda::clover::FieldOrder::~FieldOrder
virtual ~FieldOrder()
Definition: clover_field_order.h:286

nColor
const int nColor
Definition: covdev_test.cpp:77

quda::clover::Accessor::dummy
complex< Float > dummy
Definition: clover_field_order.h:162

quda::HMatrix::HMatrix
__device__ __host__ HMatrix()
Definition: quda_matrix.h:210

quda::vector_store
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
Definition: register_traits.h:285

quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:227

quda::clover::Accessor
Definition: clover_field_order.h:161

quda::clover::FloatNOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:383

V
int V
Definition: test_util.cpp:28

fused_exterior_ndeg_tm_dslash_cuda_gen.i
int i
start here
Definition: fused_exterior_ndeg_tm_dslash_cuda_gen.py:816

double
double
Definition: CMakeCUDACompilerId.cpp1.ii:8010

quda::clover_wrapper
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
Definition: clover_field_order.h:37

quda::clover::FloatNOrder::M
static const int M
Definition: clover_field_order.h:371

quda::clover::QDPOrder::save
__device__ __host__ void save(const RegType v[length], int x, int parity)
Definition: clover_field_order.h:631

quda::clover::FieldOrder::Volume
__device__ __host__ int Volume() const
Definition: clover_field_order.h:344

quda::clover::QDPJITOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:669

quda::clover::FloatNOrder::clover
Float * clover
Definition: clover_field_order.h:373

register_traits.h
Provides precision abstractions and defines the register precision given the storage precision using ...

QUDA_FLOAT4_CLOVER_ORDER
Definition: enum_quda.h:227

quda::clover::FloatNOrder::save
__device__ __host__ void save(const RegType v[block], int x, int parity, int chirality)
Store accessor for a single chiral block.
Definition: clover_field_order.h:502

quda::clover::Accessor::Accessor
Accessor(const CloverField &A, bool inverse=false)
Definition: clover_field_order.h:163

quda::clover::FloatNOrder::norm_bytes
size_t norm_bytes
Definition: clover_field_order.h:390

clover_field.h

safe_malloc
#define safe_malloc(size)
Definition: malloc_quda.h:54

quda::clover::QDPOrder
Definition: clover_field_order.h:600

quda::clover::FloatNOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Load accessor for the clover matrix.
Definition: clover_field_order.h:535

quda::clover::QDPOrder::QDPOrder
QDPOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:610

quda::clover::FloatNOrder::norm
float * norm
Definition: clover_field_order.h:374

quda::clover::FieldOrder
Definition: clover_field_order.h:268

quda_matrix.h

quda::clover::BQCDOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:769

quda::clover::QDPOrder::RegType
mapper< Float >::type RegType
Definition: clover_field_order.h:601

quda::clover_wrapper::operator=
__device__ __host__ void operator=(const C &a)
Assignment operator with H matrix instance as input.
Definition: clover_field_order.h:58

quda::clover::QDPJITOrder::stride
const int stride
Definition: clover_field_order.h:655

idx
int idx
Definition: staggered_fused_exterior_dslash_core.h:355

quda::clover_wrapper::x_cb
const int x_cb
Definition: clover_field_order.h:39

quda::mapper
Definition: register_traits.h:41

quda::clover::QDPJITOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:667

quda::clover::FloatNOrder::block
static const int block
Definition: clover_field_order.h:372

quda::clover::FloatNOrder::save
void save()
Backup the field to the host when tuning.
Definition: clover_field_order.h:555

n
int n
Definition: CMakeCUDACompilerId.cpp1.ii:8086

quda::clover_wrapper::chirality
const int chirality
Definition: clover_field_order.h:41

quda::HMatrix::operator=
__device__ __host__ void operator=(const HMatrix< U, N > &b)
Definition: quda_matrix.h:241

quda::clover::QDPJITOrder::save
__device__ __host__ void save(const RegType v[length], int x, int parity)
Definition: clover_field_order.h:688

quda::clover::QDPJITOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:666

fabs
double fabs(double)

deg_tm_dslash_cuda_gen.sign
def sign(x)
Definition: deg_tm_dslash_cuda_gen.py:104

tex
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
Definition: CMakeCUDACompilerId.cpp1.ii:15874

quda::clover_mapper
Definition: clover_field_order.h:775

quda::clover::FloatNOrder::norm_offset
const AllocInt norm_offset
Definition: clover_field_order.h:376

quda::clover::QDPOrder::clover
Float * clover
Definition: clover_field_order.h:602

quda::clover::QDPOrder::Twisted
bool Twisted() const
Definition: clover_field_order.h:616

length
void size_t length
Definition: CMakeCUDACompilerId.cpp1.ii:2433

quda::clover::FloatNOrder
Accessor routine for CloverFields in native field order.
Definition: clover_field_order.h:367

complex_quda.h

quda::clover::QDPJITOrder::QDPJITOrder
QDPJITOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:660

quda::clover::FieldOrder::Field
CloverField & Field()
Definition: clover_field_order.h:284

quda::clover::Accessor::operator()
__device__ __host__ complex< Float > & operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:167

quda::clover::QDPOrder::twisted
const bool twisted
Definition: clover_field_order.h:607

quda::clover::BQCDOrder::Mu2
Float Mu2() const
Definition: clover_field_order.h:734

checkCudaError
#define checkCudaError()
Definition: util_quda.h:129

quda::clover::QDPJITOrder::mu2
const Float mu2
Definition: clover_field_order.h:658

quda::clover::FloatNOrder::operator()
__device__ __host__ const clover_wrapper< RegType, FloatNOrder< Float, length, N > > operator()(int x_cb, int parity, int chirality) const
This accessor routine returns a const colorspinor_wrapper to this object, allowing us to overload var...
Definition: clover_field_order.h:451

quda::clover::FloatNOrder::Bytes
size_t Bytes() const
Definition: clover_field_order.h:581

float
float
Definition: CMakeCUDACompilerId.cpp1.ii:12791

quda::clover::BQCDOrder
Definition: clover_field_order.h:717

quda::clover::FloatNOrder::stride
const int stride
Definition: clover_field_order.h:384

quda::clover::BQCDOrder::BQCDOrder
BQCDOrder(const CloverField &clover, bool inverse, Float *clover_=0)
Definition: clover_field_order.h:726

quda::AllocType
Definition: register_traits.h:334

quda::clover::BQCDOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:741

parity
QudaParity parity
Definition: covdev_test.cpp:53

a
#define a
Definition: dw_dslash4_core.h:82

quda::clover::FieldOrder::A
CloverField & A
Definition: clover_field_order.h:272

quda::zero
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82

quda::clover::indexFloatN
__device__ __host__ int indexFloatN(int k, int stride, int x)
Definition: clover_field_order.h:174

quda::clover::QDPJITOrder::offdiag
Float * offdiag
Definition: clover_field_order.h:653

quda::clover::QDPOrder::load
__device__ __host__ void load(RegType v[length], int x, int parity) const
Definition: clover_field_order.h:619

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::a
Float * a
Definition: clover_field_order.h:182

quda::clover::FloatNOrder::FloatNOrder
FloatNOrder(const CloverField &clover, bool is_inverse, Float *clover_=0, float *norm_=0, bool override=false)
host memory for backing up norm when tuning
Definition: clover_field_order.h:394

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::operator()
__device__ __host__ complex< Float > operator()(int parity, int x, int s_row, int s_col, int c_row, int c_col) const
Definition: clover_field_order.h:189

quda::clover::QDPOrder::volumeCB
const int volumeCB
Definition: clover_field_order.h:603

color_spinor.h

quda::clover::FloatNOrder::save
__device__ __host__ void save(const RegType v[length], int x, int parity)
Store accessor for the clover matrix.
Definition: clover_field_order.h:547

quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::stride
int stride
Definition: clover_field_order.h:183