quda-ref/v1.0.0/color__spinor_8h_source.html

 #pragma once

 #include <complex_quda.h>
 #include <quda_matrix.h>

 namespace quda {

   template<typename Float, typename T> struct colorspinor_wrapper;
   template<typename Float, typename T> struct colorspinor_ghost_wrapper;

   template <typename Float, int Nc, int Ns>
     struct ColorSpinor {

     static constexpr int size = Nc * Ns;
     complex<Float> data[size];

     __device__ __host__ inline ColorSpinor<Float, Nc, Ns>()
     {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] = 0; }
       }

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const ColorSpinor<Float, Nc, Ns> &a) {
 #pragma unroll
         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
       }

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>& operator=(const ColorSpinor<Float, Nc, Ns> &a) {
   if (this != &a) {
 #pragma unroll
           for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
         }
   return *this;
       }

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns> operator-() const
       {
         ColorSpinor<Float, Nc, Ns> a;
 #pragma unroll
         for (int i = 0; i < size; i++) { a.data[i] = -data[i]; }
         return a;
       }

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>& operator+=(const ColorSpinor<Float, Nc, Ns> &a) {
 #pragma unroll
         for (int i = 0; i < size; i++) { data[i] += a.data[i]; }
         return *this;
       }

       template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, Ns> &operator*=(const T &a)
       {
 #pragma unroll
         for (int i = 0; i < size; i++) { data[i] *= a; }
         return *this;
       }

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns> &operator-=(const ColorSpinor<Float, Nc, Ns> &a)
       {
         if (this != &a) {
 #pragma unroll
           for (int i = 0; i < Nc * Ns; i++) { data[i] -= a.data[i]; }
         }
         return *this;
       }

       template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);

       template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);

       template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);

       template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);

       __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }

       __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }

       __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }

       __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }

       __device__ __host__ void print() const
       {
         for (int s=0; s<Ns; s++) {
           for (int c=0; c<Nc; c++) {
             printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());
           }
         }
       }
     };

     template <typename Float, int Nc> struct ColorSpinor<Float, Nc, 4> {
       static constexpr int Ns = 4;
       static constexpr int size = Nc * Ns;
       complex<Float> data[size];

       __device__ __host__ inline ColorSpinor<Float, Nc, 4>()
       {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] = 0; }
       }

     __device__ __host__ inline ColorSpinor<Float, Nc, 4>(const ColorSpinor<Float, Nc, 4> &a) {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 4>& operator=(const ColorSpinor<Float, Nc, 4> &a) {
       if (this != &a) {
 #pragma unroll
         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
       }
       return *this;
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 4>& operator+=(const ColorSpinor<Float, Nc, 4> &a) {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] += a.data[i]; }
       return *this;
     }

     template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, 4> &operator*=(const T &a)
     {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] *= a; }
       return *this;
     }

     __device__ __host__ inline ColorSpinor<Float,Nc,4> gamma(int dim) {
       ColorSpinor<Float,Nc,4> a;
       complex<Float> j(0.0,1.0);

       switch (dim) {
       case 0: // x dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  j*(*this)(3,i);
     a(1,i) =  j*(*this)(2,i);
     a(2,i) = -j*(*this)(1,i);
     a(3,i) = -j*(*this)(0,i);
   }
   break;
       case 1: // y dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  (*this)(3,i);
     a(1,i) = -(*this)(2,i);
     a(2,i) = -(*this)(1,i);
     a(3,i) =  (*this)(0,i);
   }
   break;
       case 2: // z dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  j*(*this)(2,i);
     a(1,i) = -j*(*this)(3,i);
     a(2,i) = -j*(*this)(0,i);
     a(3,i) =  j*(*this)(1,i);
   }
   break;
       case 3: // t dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  (*this)(0,i);
     a(1,i) =  (*this)(1,i);
     a(2,i) = -(*this)(2,i);
     a(3,i) = -(*this)(3,i);
   }
   break;
       case 4: // gamma_5
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) = (*this)(2,i);
     a(1,i) = (*this)(3,i);
     a(2,i) = (*this)(0,i);
     a(3,i) = (*this)(1,i);
   }
   break;
       }

       return a;
     }

     __device__ __host__ inline ColorSpinor<Float,Nc,4> igamma(int dim) {
       ColorSpinor<Float,Nc,4> a;
       complex<Float> j(0.0,1.0);

       switch (dim) {
       case 0: // x dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) = -(*this)(3,i);
     a(1,i) = -(*this)(2,i);
     a(2,i) =  (*this)(1,i);
     a(3,i) =  (*this)(0,i);
   }
   break;
       case 1: // y dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  j*(*this)(3,i);
     a(1,i) = -j*(*this)(2,i);
     a(2,i) = -j*(*this)(1,i);
     a(3,i) =  j*(*this)(0,i);
   }
   break;
       case 2: // z dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) = -(*this)(2,i);
     a(1,i) =  (*this)(3,i);
     a(2,i) =  (*this)(0,i);
     a(3,i) = -(*this)(1,i);
   }
   break;
       case 3: // t dimension
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) =  j*(*this)(0,i);
     a(1,i) =  j*(*this)(1,i);
     a(2,i) = -j*(*this)(2,i);
     a(3,i) = -j*(*this)(3,i);
   }
   break;
       case 4: // gamma_5
 #pragma unroll
   for (int i=0; i<Nc; i++) {
     a(0,i) = complex<Float>(-(*this)(2,i).imag(), (*this)(2,i).real());
     a(1,i) = complex<Float>(-(*this)(3,i).imag(), (*this)(3,i).real());
     a(2,i) = complex<Float>(-(*this)(0,i).imag(), (*this)(0,i).real());
     a(3,i) = complex<Float>(-(*this)(1,i).imag(), (*this)(1,i).real());
   }
   break;
       }

       return a;
     }

     __device__ __host__ inline ColorSpinor<Float,Nc,2> chiral_project(int chirality) const {
       ColorSpinor<Float,Nc,2> proj;
 #pragma unroll
       for (int s=0; s<Ns/2; s++) {
 #pragma unroll
   for (int c=0; c<Nc; c++) {
     proj(s,c) = (*this)(chirality*Ns/2+s,c);
   }
       }
       return proj;
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 2> project(int dim, int sign) const
     {
       ColorSpinor<Float,Nc,2> proj;
       complex<Float> j(0.0,1.0);

       switch (dim) {
       case 0: // x dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) + j * (*this)(3,i);
       proj(1,i) = (*this)(1,i) + j * (*this)(2,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) - j * (*this)(3,i);
       proj(1,i) = (*this)(1,i) - j * (*this)(2,i);
     }
     break;
   }
   break;
       case 1: // y dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) + (*this)(3,i);
       proj(1,i) = (*this)(1,i) - (*this)(2,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) - (*this)(3,i);
       proj(1,i) = (*this)(1,i) + (*this)(2,i);
     }
     break;
   }
         break;
       case 2: // z dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) + j * (*this)(2,i);
       proj(1,i) = (*this)(1,i) - j * (*this)(3,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = (*this)(0,i) - j * (*this)(2,i);
       proj(1,i) = (*this)(1,i) + j * (*this)(3,i);
     }
     break;
   }
   break;
       case 3: // t dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = 2*(*this)(0,i);
       proj(1,i) = 2*(*this)(1,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       proj(0,i) = 2*(*this)(2,i);
       proj(1,i) = 2*(*this)(3,i);
     }
     break;
   }
   break;
       case 4:
         switch (sign) {
         case 1: // positive projector
 #pragma unroll
           for (int i = 0; i < Nc; i++) {
             proj(0, i) = (*this)(0, i) + (*this)(2, i);
             proj(1, i) = (*this)(1, i) + (*this)(3, i);
           }
           break;
         case -1: // negative projector
 #pragma unroll
           for (int i = 0; i < Nc; i++) {
             proj(0, i) = (*this)(0, i) - (*this)(2, i);
             proj(1, i) = (*this)(1, i) - (*this)(3, i);
           }
           break;
         }
         break;
       }

       return proj;
     }

     __device__ __host__ inline ColorSpinor<Float,Nc,4> sigma(int mu, int nu) {
       ColorSpinor<Float,Nc,4> a;
       ColorSpinor<Float,Nc,4> &b = *this;
       complex<Float> j(0.0,1.0);

       switch(mu) {
       case 0:
   switch(nu) {
   case 1:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) =  j*b(0,i);
       a(1,i) = -j*b(1,i);
       a(2,i) =  j*b(2,i);
       a(3,i) = -j*b(3,i);
     }
     break;
   case 2:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -b(1,i);
       a(1,i) =  b(0,i);
       a(2,i) = -b(3,i);
       a(3,i) =  b(2,i);
     }
     break;
   case 3:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -j*b(3,i);
       a(1,i) = -j*b(2,i);
       a(2,i) = -j*b(1,i);
       a(3,i) = -j*b(0,i);
     }
     break;
   }
   break;
       case 1:
   switch(nu) {
   case 0:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -j*b(0,i);
       a(1,i) =  j*b(1,i);
       a(2,i) = -j*b(2,i);
       a(3,i) =  j*b(3,i);
     }
     break;
   case 2:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = j*b(1,i);
       a(1,i) = j*b(0,i);
       a(2,i) = j*b(3,i);
       a(3,i) = j*b(2,i);
     }
     break;
   case 3:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -b(3,i);
       a(1,i) =  b(2,i);
       a(2,i) = -b(1,i);
       a(3,i) =  b(0,i);
     }
     break;
   }
   break;
       case 2:
   switch(nu) {
   case 0:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) =  b(1,i);
       a(1,i) = -b(0,i);
       a(2,i) =  b(3,i);
       a(3,i) = -b(2,i);
     }
     break;
   case 1:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -j*b(1,i);
       a(1,i) = -j*b(0,i);
       a(2,i) = -j*b(3,i);
       a(3,i) = -j*b(2,i);
     }
     break;
   case 3:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = -j*b(2,i);
       a(1,i) =  j*b(3,i);
       a(2,i) = -j*b(0,i);
       a(3,i) =  j*b(1,i);
     }
     break;
   }
   break;
       case 3:
   switch(nu) {
   case 0:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) = j*b(3,i);
       a(1,i) = j*b(2,i);
       a(2,i) = j*b(1,i);
       a(3,i) = j*b(0,i);
     }
     break;
   case 1:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) =  b(3,i);
       a(1,i) = -b(2,i);
       a(2,i) =  b(1,i);
       a(3,i) = -b(0,i);
     }
     break;
   case 2:
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       a(0,i) =  j*b(2,i);
       a(1,i) = -j*b(3,i);
       a(2,i) =  j*b(0,i);
       a(3,i) = -j*b(1,i);
     }
     break;
   }
   break;
       }
       return a;
     }


     __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }

     __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }

     __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }

     __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }

     template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);

     __device__ __host__ inline void toNonRel() {
       ColorSpinor<Float,Nc,Ns> a;
 #pragma unroll
       for (int c=0; c<Nc; c++) {
   a(0,c) =  (*this)(1,c)+(*this)(3,c);
   a(1,c) = -(*this)(2,c)-(*this)(0,c);
   a(2,c) = -(*this)(3,c)+(*this)(1,c);
   a(3,c) = -(*this)(0,c)+(*this)(2,c);
       }
       *this = a;
     }

     __device__ __host__ inline void toRel() {
       ColorSpinor<Float,Nc,Ns> a;
 #pragma unroll
       for (int c=0; c<Nc; c++) {
   a(0,c) = -(*this)(1,c)-(*this)(3,c);
   a(1,c) =  (*this)(2,c)+(*this)(0,c);
   a(2,c) =  (*this)(3,c)-(*this)(1,c);
   a(3,c) =  (*this)(0,c)-(*this)(2,c);
       }
       *this = a;
     }

     __device__ __host__ void print() const
     {
       for (int s=0; s<Ns; s++) {
   for (int c=0; c<Nc; c++) {
     printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());
   }
       }
     }
     };

   template <typename Float, int Nc>
     struct ColorSpinor<Float, Nc, 2> {
     static constexpr int Ns = 2;
     static constexpr int size = Ns * Nc;
     complex<Float> data[size];

     __device__ __host__ inline ColorSpinor<Float, Nc, 2>() {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] = 0; }
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 2>(const ColorSpinor<Float, Nc, 2> &a) {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 2>& operator=(const ColorSpinor<Float, Nc, 2> &a) {
       if (this != &a) {
 #pragma unroll
         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }
       }
       return *this;
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 2>& operator+=(const ColorSpinor<Float, Nc, 2> &a) {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] += a.data[i]; }
       return *this;
     }

     template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, 2> &operator*=(const T &a)
     {
 #pragma unroll
       for (int i = 0; i < size; i++) { data[i] *= a; }
       return *this;
     }

     __device__ __host__ inline ColorSpinor<Float,Nc,4> chiral_reconstruct(int chirality) const {
       ColorSpinor<Float,Nc,4> recon;
 #pragma unroll
       for (int s=0; s<Ns; s++) {
 #pragma unroll
   for (int c=0; c<Nc; c++) {
     recon(chirality*Ns+s,c) = (*this)(s,c);
   }
       }
       return recon;
     }

     __device__ __host__ inline ColorSpinor<Float, Nc, 4> reconstruct(int dim, int sign) const
     {
       ColorSpinor<Float, Nc, 4> recon;
       complex<Float> j(0.0,1.0);

       switch (dim) {
       case 0: // x dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = -j*(*this)(1,i);
       recon(3,i) = -j*(*this)(0,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = j*(*this)(1,i);
       recon(3,i) = j*(*this)(0,i);
     }
     break;
   }
   break;
       case 1: // y dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = -(*this)(1,i);
       recon(3,i) = (*this)(0,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = (*this)(1,i);
       recon(3,i) = -(*this)(0,i);
     }
           break;
         }
         break;
       case 2: // z dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = -j*(*this)(0,i);
       recon(3,i) = j*(*this)(1,i);
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = j*(*this)(0,i);
       recon(3,i) = -j*(*this)(1,i);
     }
     break;
   }
   break;
       case 3: // t dimension
   switch (sign) {
   case 1: // positive projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = (*this)(0,i);
       recon(1,i) = (*this)(1,i);
       recon(2,i) = 0;
       recon(3,i) = 0;
     }
     break;
   case -1: // negative projector
 #pragma unroll
     for (int i=0; i<Nc; i++) {
       recon(0,i) = 0;
       recon(1,i) = 0;
       recon(2,i) = (*this)(0,i);
       recon(3,i) = (*this)(1,i);
     }
     break;
   }
   break;
       case 4:
         switch (sign) {
         case 1: // positive projector
 #pragma unroll
           for (int i = 0; i < Nc; i++) {
             recon(0, i) = (*this)(0, i);
             recon(1, i) = (*this)(1, i);
             recon(2, i) = (*this)(0, i);
             recon(3, i) = (*this)(1, i);
           }
           break;
         case -1: // negative projector
 #pragma unroll
           for (int i = 0; i < Nc; i++) {
             recon(0, i) = (*this)(0, i);
             recon(1, i) = (*this)(1, i);
             recon(2, i) = -(*this)(0, i);
             recon(3, i) = -(*this)(1, i);
           }
           break;
         }
         break;
       }
       return recon;
     }

     __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }

     __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }

     __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }

     __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }

     template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);

     template<typename S>
       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);

     __device__ __host__ void print() const
     {
       for (int s=0; s<Ns; s++) {
   for (int c=0; c<Nc; c++) {
     printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());
   }
       }
     }
   };

   template <typename Float, int Nc, int Ns>
   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,
                                                          const ColorSpinor<Float, Nc, Ns> &b)
   {
     complex<Float> dot = 0;
 #pragma unroll
     for (int s = 0; s < Ns; s++) { dot += innerProduct(a, b, s, s); }
     return dot;
   }

   template <typename Float, int Nc, int Ns>
   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,
                                                          const ColorSpinor<Float, Nc, Ns> &b, int s)
   {
     return innerProduct(a, b, s, s);
   }

   template <typename Float, int Nc, int Ns>
   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,
                                                          const ColorSpinor<Float, Nc, Ns> &b, int sa, int sb)
   {
     complex<Float> dot = 0;
 #pragma unroll
     for (int c = 0; c < Nc; c++) {
       dot.x += a(sa, c).real() * b(sb, c).real();
       dot.x += a(sa, c).imag() * b(sb, c).imag();
       dot.y += a(sa, c).real() * b(sb, c).imag();
       dot.y -= a(sa, c).imag() * b(sb, c).real();
     }
     return dot;
   }

   template <typename Float, int Nc, int Ns>
   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, 1> &a,
                                                          const ColorSpinor<Float, Nc, Ns> &b, int s)
   {
     return innerProduct(a, b, 0, s);
   }

   template <typename Float, int Nc, int Ns>
   __device__ __host__ inline Matrix<complex<Float>, Nc> outerProdSpinTrace(const ColorSpinor<Float, Nc, Ns> &a,
                                                                            const ColorSpinor<Float, Nc, Ns> &b)
   {

     Matrix<complex<Float>, Nc> out;

     // outer product over color
 #pragma unroll
     for (int i = 0; i < Nc; i++) {
 #pragma unroll
       for (int j = 0; j < Nc; j++) {
         // trace over spin (manual unroll for perf)
         out(j, i).real(a(0, j).real() * b(0, i).real());
         out(j, i).real(out(j, i).real() + a(0, j).imag() * b(0, i).imag());
         out(j, i).imag(a(0, j).imag() * b(0, i).real());
         out(j, i).imag(out(j, i).imag() - a(0, j).real() * b(0, i).imag());
         // out(j,i) = a(0,j) * conj(b(0,i));

 #pragma unroll
   for (int s=1; s<Ns; s++) {
     out(j,i).real( out(j,i).real() + a(s,j).real() * b(s,i).real() );
     out(j,i).real( out(j,i).real() + a(s,j).imag() * b(s,i).imag() );
     out(j,i).imag( out(j,i).imag() + a(s,j).imag() * b(s,i).real() );
     out(j,i).imag( out(j,i).imag() - a(s,j).real() * b(s,i).imag() );
     // out(j,i) += a(s,j) * conj(b(s,i));
   }
       }
     }
     return out;
   }

   template<typename Float, int Nc, int Ns> __device__ __host__ inline
     ColorSpinor<Float,Nc,Ns> operator+(const ColorSpinor<Float,Nc,Ns> &x, const ColorSpinor<Float,Nc,Ns> &y) {

     ColorSpinor<Float,Nc,Ns> z;

 #pragma unroll
     for (int i=0; i<Nc; i++) {
 #pragma unroll
       for (int s=0; s<Ns; s++) {
   z.data[s*Nc + i] = x.data[s*Nc + i] + y.data[s*Nc + i];
       }
     }

     return z;
   }

   template<typename Float, int Nc, int Ns> __device__ __host__ inline
     ColorSpinor<Float,Nc,Ns> operator-(const ColorSpinor<Float,Nc,Ns> &x, const ColorSpinor<Float,Nc,Ns> &y) {

     ColorSpinor<Float,Nc,Ns> z;

 #pragma unroll
     for (int i=0; i<Nc; i++) {
 #pragma unroll
       for (int s=0; s<Ns; s++) {
   z.data[s*Nc + i] = x.data[s*Nc + i] - y.data[s*Nc + i];
       }
     }

     return z;
   }

   template<typename Float, int Nc, int Ns, typename S> __device__ __host__ inline
     ColorSpinor<Float,Nc,Ns> operator*(const S &a, const ColorSpinor<Float,Nc,Ns> &x) {

     ColorSpinor<Float,Nc,Ns> y;

 #pragma unroll
     for (int i=0; i<Nc; i++) {
 #pragma unroll
       for (int s=0; s<Ns; s++) {
   y.data[s*Nc + i] = a * x.data[s*Nc + i];
       }
     }

     return y;
   }

   template<typename Float, int Nc, int Ns> __device__ __host__ inline
     ColorSpinor<Float,Nc,Ns> operator*(const Matrix<complex<Float>,Nc> &A, const ColorSpinor<Float,Nc,Ns> &x) {

     ColorSpinor<Float,Nc,Ns> y;

 #pragma unroll
     for (int i=0; i<Nc; i++) {
 #pragma unroll
       for (int s=0; s<Ns; s++) {
   y.data[s*Nc + i].x  = A(i,0).real() * x.data[s*Nc + 0].real();
   y.data[s*Nc + i].x -= A(i,0).imag() * x.data[s*Nc + 0].imag();
   y.data[s*Nc + i].y  = A(i,0).real() * x.data[s*Nc + 0].imag();
   y.data[s*Nc + i].y += A(i,0).imag() * x.data[s*Nc + 0].real();
       }
 #pragma unroll
       for (int j=1; j<Nc; j++) {
 #pragma unroll
   for (int s=0; s<Ns; s++) {
     y.data[s*Nc + i].x += A(i,j).real() * x.data[s*Nc + j].real();
     y.data[s*Nc + i].x -= A(i,j).imag() * x.data[s*Nc + j].imag();
     y.data[s*Nc + i].y += A(i,j).real() * x.data[s*Nc + j].imag();
     y.data[s*Nc + i].y += A(i,j).imag() * x.data[s*Nc + j].real();
   }
       }
     }

     return y;
   }

   template<typename Float, int Nc, int Ns> __device__ __host__ inline
     ColorSpinor<Float,Nc,Ns> operator*(const HMatrix<Float,Nc*Ns> &A, const ColorSpinor<Float,Nc,Ns> &x) {

     ColorSpinor<Float,Nc,Ns> y;
     constexpr int N = Ns * Nc;

 #pragma unroll
     for (int i=0; i<N; i++) {
       if (i==0) {
   y.data[i].x  = A(i,0).real() * x.data[0].real();
   y.data[i].y  = A(i,0).real() * x.data[0].imag();
       } else {
   y.data[i].x  = A(i,0).real() * x.data[0].real();
   y.data[i].x -= A(i,0).imag() * x.data[0].imag();
   y.data[i].y  = A(i,0).real() * x.data[0].imag();
   y.data[i].y += A(i,0).imag() * x.data[0].real();
       }
 #pragma unroll
       for (int j=1; j<N; j++) {
   if (i==j) {
     y.data[i].x += A(i,j).real() * x.data[j].real();
     y.data[i].y += A(i,j).real() * x.data[j].imag();
   } else {
     y.data[i].x += A(i,j).real() * x.data[j].real();
     y.data[i].x -= A(i,j).imag() * x.data[j].imag();
     y.data[i].y += A(i,j).real() * x.data[j].imag();
     y.data[i].y += A(i,j).imag() * x.data[j].real();
   }
       }
     }

     return y;
   }

 } // namespace quda
quda::ColorSpinor< Float, Nc, 2 >::operator=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator=(const ColorSpinor< Float, Nc, 2 > &a)
Definition: color_spinor.h:691

quda::ColorSpinor::operator+=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator+=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:56

quda::ColorSpinor< Float, Nc, 4 >::chiral_project
__device__ __host__ ColorSpinor< Float, Nc, 2 > chiral_project(int chirality) const
Project four-component spinor to either chirality.
Definition: color_spinor.h:298

mu
double mu
Definition: test_util.cpp:1648

quda::ColorSpinor< Float, Nc, 4 >::toNonRel
__device__ __host__ void toNonRel()
Transform from relativistic into non-relavisitic basis Required normalization factor of 1/2 included ...
Definition: color_spinor.h:633

quda::ColorSpinor< Float, Nc, 2 >::operator+=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator+=(const ColorSpinor< Float, Nc, 2 > &a)
Definition: color_spinor.h:699

quda::ColorSpinor< Float, Nc, 4 >::gamma
__device__ __host__ ColorSpinor< Float, Nc, 4 > gamma(int dim)
Definition: color_spinor.h:179

quda::ColorSpinor< Float, Nc, 2 >::data
complex< Float > data[size]
Definition: color_spinor.h:678

quda::ColorSpinor::data
complex< Float > data[size]
Definition: color_spinor.h:27

quda::ColorSpinor
Definition: color_spinor.h:24

quda::colorspinor_wrapper
colorspinor_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
Definition: color_spinor.h:17

quda::ColorSpinor::print
__device__ __host__ void print() const
Prints the NsxNc complex elements of the color spinor.
Definition: color_spinor.h:123

quda::ColorSpinor::operator-=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator-=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:69

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:608

quda::ColorSpinor::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:111

quda::outerProdSpinTrace
__device__ __host__ Matrix< complex< Float >, Nc > outerProdSpinTrace(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
Definition: color_spinor.h:985

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:868

quda::ColorSpinor< Float, Nc, 4 >::operator*=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator*=(const T &a)
Definition: color_spinor.h:167

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:615

quda
Definition: blas_cublas.h:5

quda::clover::S
This is just a dummy structure we use for trove to define the required structure size.
Definition: clover_field_order.h:794

quda::ColorSpinor< Float, Nc, 4 >::operator=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator=(const ColorSpinor< Float, Nc, 4 > &a)
Definition: color_spinor.h:153

quda::ColorSpinor< Float, Nc, 4 >
Definition: color_spinor.h:137

quda::ColorSpinor< Float, Nc, 4 >::sigma
__device__ __host__ ColorSpinor< Float, Nc, 4 > sigma(int mu, int nu)
Definition: color_spinor.h:452

quda::ColorSpinor< Float, Nc, 2 >::operator*=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator*=(const T &a)
Definition: color_spinor.h:705

quda::ColorSpinor::operator*=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator*=(const T &a)
Definition: color_spinor.h:62

quda::HMatrix
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:61

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:882

quda::ColorSpinor::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:118

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:601

quda::innerProduct
__device__ __host__ complex< Float > innerProduct(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c)
Definition: color_spinor.h:914

quda::ColorSpinor< Float, Nc, 4 >::project
__device__ __host__ ColorSpinor< Float, Nc, 2 > project(int dim, int sign) const
Definition: color_spinor.h:316

quda::ColorSpinor::operator=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:40

quda::ColorSpinor< Float, Nc, 4 >::igamma
__device__ __host__ ColorSpinor< Float, Nc, 4 > igamma(int dim)
Definition: color_spinor.h:239

quda::ColorSpinor< Float, Nc, 4 >::print
__device__ __host__ void print() const
Definition: color_spinor.h:660

quda::ColorSpinor< Float, Nc, 2 >::reconstruct
__device__ __host__ ColorSpinor< Float, Nc, 4 > reconstruct(int dim, int sign) const
Spin reconstruct the full Spinor from the projected spinor.
Definition: color_spinor.h:734

quda::colorspinor_ghost_wrapper
colorspinor_ghost_wrapper is an internal class that is used to wrap instances of colorspinor accessor...
Definition: color_spinor.h:18

quda_matrix.h

quda::ColorSpinor< Float, Nc, 2 >::print
__device__ __host__ void print() const
Definition: color_spinor.h:896

out
cpuColorSpinorField * out
Definition: staggered_invert_test.cpp:99

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:875

quda::s
__shared__ float s[]

quda::operator+
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
Definition: color_spinor.h:1023

complex_quda.h

quda::ColorSpinor< Float, Nc, 4 >::data
complex< Float > data[size]
Definition: color_spinor.h:140

quda::ColorSpinor::operator-
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-() const
Definition: color_spinor.h:48

quda::ColorSpinor< Float, Nc, 2 >::chiral_reconstruct
__device__ __host__ ColorSpinor< Float, Nc, 4 > chiral_reconstruct(int chirality) const
Reconstruct two-component spinor to a four-component spinor.
Definition: color_spinor.h:716

quda::operator*
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
Definition: color_spinor.h:1067

quda::ColorSpinor< Float, Nc, 2 >
Definition: color_spinor.h:675

quda::ColorSpinor::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:104

quda::ColorSpinor< Float, Nc, 4 >::toRel
__device__ __host__ void toRel()
Transform from non-relativistic into relavisitic basis.
Definition: color_spinor.h:648

quda::ColorSpinor< Float, Nc, 4 >::operator+=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator+=(const ColorSpinor< Float, Nc, 4 > &a)
Definition: color_spinor.h:161

quda::Matrix
Definition: quda_matrix.h:64

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:593

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:860

quda::ColorSpinor::size
static constexpr int size
Definition: color_spinor.h:26

quda::ColorSpinor::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:96

dot
static void dot(sFloat *res, gFloat *a, sFloat *b)
Definition: dslash_util.h:56