quda-ref/v1.1.0/color__spinor_8h_source.html

 #pragma once


 #include <complex_quda.h>

 #include <quda_matrix.h>


 namespace quda {


   template<typename Float, typename T> struct colorspinor_wrapper;

   template<typename Float, typename T> struct colorspinor_ghost_wrapper;


   template <typename Float, int Nc, int Ns>

     struct ColorSpinor {


     static constexpr int size = Nc * Ns;

     complex<Float> data[size];


     __device__ __host__ inline ColorSpinor<Float, Nc, Ns>()

     {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] = 0; }

       }


       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const ColorSpinor<Float, Nc, Ns> &a) {

 #pragma unroll

         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

       }


       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>& operator=(const ColorSpinor<Float, Nc, Ns> &a) {

         if (this != &a) {

 #pragma unroll

           for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

         }

         return *this;

       }


       __device__ __host__ inline ColorSpinor<Float, Nc, Ns> operator-() const

       {

         ColorSpinor<Float, Nc, Ns> a;

 #pragma unroll

         for (int i = 0; i < size; i++) { a.data[i] = -data[i]; }

         return a;

       }


       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>& operator+=(const ColorSpinor<Float, Nc, Ns> &a) {

 #pragma unroll

         for (int i = 0; i < size; i++) { data[i] += a.data[i]; }

         return *this;

       }


       template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, Ns> &operator*=(const T &a)

       {

 #pragma unroll

         for (int i = 0; i < size; i++) { data[i] *= a; }

         return *this;

       }


       __device__ __host__ inline ColorSpinor<Float, Nc, Ns> &operator-=(const ColorSpinor<Float, Nc, Ns> &a)

       {

         if (this != &a) {

 #pragma unroll

           for (int i = 0; i < Nc * Ns; i++) { data[i] -= a.data[i]; }

         }

         return *this;

       }


       template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);


       template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);


       template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);


       template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);


       __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }


       __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }


       __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }


       __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }


       __device__ __host__ void print() const

       {

         for (int s=0; s<Ns; s++) {

           for (int c=0; c<Nc; c++) {

             printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());

           }

         }

       }

     };


     template <typename Float, int Nc> struct ColorSpinor<Float, Nc, 4> {

       static constexpr int Ns = 4;

       static constexpr int size = Nc * Ns;

       complex<Float> data[size];


       __device__ __host__ inline ColorSpinor<Float, Nc, 4>()

       {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] = 0; }

       }


     __device__ __host__ inline ColorSpinor<Float, Nc, 4>(const ColorSpinor<Float, Nc, 4> &a) {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 4>& operator=(const ColorSpinor<Float, Nc, 4> &a) {

       if (this != &a) {

 #pragma unroll

         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

       }

       return *this;

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 4>& operator+=(const ColorSpinor<Float, Nc, 4> &a) {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] += a.data[i]; }

       return *this;

     }


     template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, 4> &operator*=(const T &a)

     {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] *= a; }

       return *this;

     }


     __device__ __host__ inline ColorSpinor<Float,Nc,4> gamma(int dim) {

       ColorSpinor<Float,Nc,4> a;

       const auto &t = *this;


       switch (dim) {

       case 0: // x dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = i_(t(3, i));

           a(1, i) = i_(t(2, i));

           a(2, i) = -i_(t(1, i));

           a(3, i) = -i_(t(0, i));

         }

         break;

       case 1: // y dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = t(3, i);

           a(1, i) = -t(2, i);

           a(2, i) = -t(1, i);

           a(3, i) = t(0, i);

         }

         break;

       case 2: // z dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = i_(t(2, i));

           a(1, i) = -i_(t(3, i));

           a(2, i) = -i_(t(0, i));

           a(3, i) = i_(t(1, i));

         }

         break;

       case 3: // t dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = t(0, i);

           a(1, i) = t(1, i);

           a(2, i) = -t(2, i);

           a(3, i) = -t(3, i);

         }

         break;

       case 4: // gamma_5

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = t(2, i);

           a(1, i) = t(3, i);

           a(2, i) = t(0, i);

           a(3, i) = t(1, i);

         }

         break;

       }


       return a;

     }


     __device__ __host__ inline ColorSpinor<Float,Nc,4> igamma(int dim) {

       ColorSpinor<Float,Nc,4> a;

       const auto &t = *this;


       switch (dim) {

       case 0: // x dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = -t(3, i);

           a(1, i) = -t(2, i);

           a(2, i) = t(1, i);

           a(3, i) = t(0, i);

         }

         break;

       case 1: // y dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = i_(t(3, i));

           a(1, i) = -i_(t(2, i));

           a(2, i) = -i_(t(1, i));

           a(3, i) = i_(t(0, i));

         }

         break;

       case 2: // z dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = -t(2, i);

           a(1, i) = t(3, i);

           a(2, i) = t(0, i);

           a(3, i) = -t(1, i);

         }

         break;

       case 3: // t dimension

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = i_(t(0, i));

           a(1, i) = i_(t(1, i));

           a(2, i) = -i_(t(2, i));

           a(3, i) = -i_(t(3, i));

         }

         break;

       case 4: // gamma_5

 #pragma unroll

         for (int i=0; i<Nc; i++) {

           a(0, i) = i_(t(2, i));

           a(1, i) = i_(t(3, i));

           a(2, i) = i_(t(0, i));

           a(3, i) = i_(t(1, i));

         }

         break;

       }


       return a;

     }


     __device__ __host__ inline ColorSpinor<Float,Nc,2> chiral_project(int chirality) const {

       ColorSpinor<Float,Nc,2> proj;

 #pragma unroll

       for (int s=0; s<Ns/2; s++) {

 #pragma unroll

         for (int c=0; c<Nc; c++) {

           proj(s,c) = (*this)(chirality*Ns/2+s,c);

         }

       }

       return proj;

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 2> project(int dim, int sign) const

     {

       ColorSpinor<Float,Nc,2> proj;

       const auto &t = *this;

       switch (dim) {

       case 0: // x dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) + i_(t(3, i));

             proj(1, i) = t(1, i) + i_(t(2, i));

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) - i_(t(3, i));

             proj(1, i) = t(1, i) - i_(t(2, i));

           }

           break;

         }

         break;

       case 1: // y dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) + t(3, i);

             proj(1, i) = t(1, i) - t(2, i);

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) - t(3, i);

             proj(1, i) = t(1, i) + t(2, i);

           }

           break;

         }

         break;

       case 2: // z dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) + i_(t(2, i));

             proj(1, i) = t(1, i) - i_(t(3, i));

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = t(0, i) - i_(t(2, i));

             proj(1, i) = t(1, i) + i_(t(3, i));

           }

           break;

         }

         break;

       case 3: // t dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = 2 * t(0, i);

             proj(1, i) = 2 * t(1, i);

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             proj(0, i) = 2 * t(2, i);

             proj(1, i) = 2 * t(3, i);

           }

           break;

         }

         break;

       case 4:

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i = 0; i < Nc; i++) {

             proj(0, i) = t(0, i) + t(2, i);

             proj(1, i) = t(1, i) + t(3, i);

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i = 0; i < Nc; i++) {

             proj(0, i) = t(0, i) - t(2, i);

             proj(1, i) = t(1, i) - t(3, i);

           }

           break;

         }

         break;

       }


       return proj;

     }


     __device__ __host__ inline ColorSpinor<Float,Nc,4> sigma(int mu, int nu) {

       ColorSpinor<Float,Nc,4> a;

       ColorSpinor<Float,Nc,4> &b = *this;

       complex<Float> j(0.0,1.0);


       switch(mu) {

       case 0:

         switch(nu) {

         case 1:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) =  j*b(0,i);

             a(1,i) = -j*b(1,i);

             a(2,i) =  j*b(2,i);

             a(3,i) = -j*b(3,i);

           }

           break;

         case 2:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -b(1,i);

             a(1,i) =  b(0,i);

             a(2,i) = -b(3,i);

             a(3,i) =  b(2,i);

           }

           break;

         case 3:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -j*b(3,i);

             a(1,i) = -j*b(2,i);

             a(2,i) = -j*b(1,i);

             a(3,i) = -j*b(0,i);

           }

           break;

         }

         break;

       case 1:

         switch(nu) {

         case 0:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -j*b(0,i);

             a(1,i) =  j*b(1,i);

             a(2,i) = -j*b(2,i);

             a(3,i) =  j*b(3,i);

           }

           break;

         case 2:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = j*b(1,i);

             a(1,i) = j*b(0,i);

             a(2,i) = j*b(3,i);

             a(3,i) = j*b(2,i);

           }

           break;

         case 3:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -b(3,i);

             a(1,i) =  b(2,i);

             a(2,i) = -b(1,i);

             a(3,i) =  b(0,i);

           }

           break;

         }

         break;

       case 2:

         switch(nu) {

         case 0:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) =  b(1,i);

             a(1,i) = -b(0,i);

             a(2,i) =  b(3,i);

             a(3,i) = -b(2,i);

           }

           break;

         case 1:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -j*b(1,i);

             a(1,i) = -j*b(0,i);

             a(2,i) = -j*b(3,i);

             a(3,i) = -j*b(2,i);

           }

           break;

         case 3:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = -j*b(2,i);

             a(1,i) =  j*b(3,i);

             a(2,i) = -j*b(0,i);

             a(3,i) =  j*b(1,i);

           }

           break;

         }

         break;

       case 3:

         switch(nu) {

         case 0:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) = j*b(3,i);

             a(1,i) = j*b(2,i);

             a(2,i) = j*b(1,i);

             a(3,i) = j*b(0,i);

           }

           break;

         case 1:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) =  b(3,i);

             a(1,i) = -b(2,i);

             a(2,i) =  b(1,i);

             a(3,i) = -b(0,i);

           }

           break;

         case 2:

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             a(0,i) =  j*b(2,i);

             a(1,i) = -j*b(3,i);

             a(2,i) =  j*b(0,i);

             a(3,i) = -j*b(1,i);

           }

           break;

         }

         break;

       }

       return a;

     }


     __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }


     __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }


     __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }


     __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }


     template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);


     __device__ __host__ inline void toNonRel() {

       ColorSpinor<Float,Nc,Ns> a;

 #pragma unroll

       for (int c=0; c<Nc; c++) {

         a(0,c) =  (*this)(1,c)+(*this)(3,c);

         a(1,c) = -(*this)(2,c)-(*this)(0,c);

         a(2,c) = -(*this)(3,c)+(*this)(1,c);

         a(3,c) = -(*this)(0,c)+(*this)(2,c);

       }

       *this = a;

     }


     __device__ __host__ inline void toRel() {

       ColorSpinor<Float,Nc,Ns> a;

 #pragma unroll

       for (int c=0; c<Nc; c++) {

         a(0,c) = -(*this)(1,c)-(*this)(3,c);

         a(1,c) =  (*this)(2,c)+(*this)(0,c);

         a(2,c) =  (*this)(3,c)-(*this)(1,c);

         a(3,c) =  (*this)(0,c)-(*this)(2,c);

       }

       *this = a;

     }


     __device__ __host__ void print() const

     {

       for (int s=0; s<Ns; s++) {

         for (int c=0; c<Nc; c++) {

           printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());

         }

       }

     }

     };


   template <typename Float, int Nc>

     struct ColorSpinor<Float, Nc, 2> {

     static constexpr int Ns = 2;

     static constexpr int size = Ns * Nc;

     complex<Float> data[size];


     __device__ __host__ inline ColorSpinor<Float, Nc, 2>() {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] = 0; }

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 2>(const ColorSpinor<Float, Nc, 2> &a) {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 2>& operator=(const ColorSpinor<Float, Nc, 2> &a) {

       if (this != &a) {

 #pragma unroll

         for (int i = 0; i < size; i++) { data[i] = a.data[i]; }

       }

       return *this;

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 2>& operator+=(const ColorSpinor<Float, Nc, 2> &a) {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] += a.data[i]; }

       return *this;

     }


     template <typename T> __device__ __host__ inline ColorSpinor<Float, Nc, 2> &operator*=(const T &a)

     {

 #pragma unroll

       for (int i = 0; i < size; i++) { data[i] *= a; }

       return *this;

     }


     __device__ __host__ inline ColorSpinor<Float,Nc,4> chiral_reconstruct(int chirality) const {

       ColorSpinor<Float,Nc,4> recon;

 #pragma unroll

       for (int s=0; s<Ns; s++) {

 #pragma unroll

         for (int c=0; c<Nc; c++) {

           recon(chirality*Ns+s,c) = (*this)(s,c);

         }

       }

       return recon;

     }


     __device__ __host__ inline ColorSpinor<Float, Nc, 4> reconstruct(int dim, int sign) const

     {

       ColorSpinor<Float, Nc, 4> recon;

       const auto t = *this;


       switch (dim) {

       case 0: // x dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = -i_(t(1, i));

             recon(3, i) = -i_(t(0, i));

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = i_(t(1, i));

             recon(3, i) = i_(t(0, i));

           }

           break;

         }

         break;

       case 1: // y dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = -t(1, i);

             recon(3, i) = t(0, i);

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = t(1, i);

             recon(3, i) = -t(0, i);

           }

           break;

         }

         break;

       case 2: // z dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = -i_(t(0, i));

             recon(3, i) = i_(t(1, i));

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = i_(t(0, i));

             recon(3, i) = -i_(t(1, i));

           }

           break;

         }

         break;

       case 3: // t dimension

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = 0;

             recon(3,i) = 0;

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i=0; i<Nc; i++) {

             recon(0,i) = 0;

             recon(1,i) = 0;

             recon(2, i) = t(0, i);

             recon(3, i) = t(1, i);

           }

           break;

         }

         break;

       case 4:

         switch (sign) {

         case 1: // positive projector

 #pragma unroll

           for (int i = 0; i < Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = t(0, i);

             recon(3, i) = t(1, i);

           }

           break;

         case -1: // negative projector

 #pragma unroll

           for (int i = 0; i < Nc; i++) {

             recon(0, i) = t(0, i);

             recon(1, i) = t(1, i);

             recon(2, i) = -t(0, i);

             recon(3, i) = -t(1, i);

           }

           break;

         }

         break;

       }

       return recon;

     }


     __device__ __host__ inline complex<Float>& operator()(int s, int c) { return data[s*Nc + c]; }


     __device__ __host__ inline const complex<Float>& operator()(int s, int c) const { return data[s*Nc + c]; }


     __device__ __host__ inline complex<Float>& operator()(int idx) { return data[idx]; }


     __device__ __host__ inline const complex<Float>& operator()(int idx) const { return data[idx]; }


     template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline ColorSpinor<Float, Nc, Ns>(const colorspinor_ghost_wrapper<Float, S> &s);


     template<typename S>

       __device__ __host__ inline void operator=(const colorspinor_ghost_wrapper<Float, S> &s);


     __device__ __host__ void print() const

     {

       for (int s=0; s<Ns; s++) {

         for (int c=0; c<Nc; c++) {

           printf("s=%d c=%d %e %e\n", s, c, data[s*Nc+c].real(), data[s*Nc+c].imag());

         }

       }

     }

   };


   template <typename Float, int Nc, int Ns>

   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,

                                                          const ColorSpinor<Float, Nc, Ns> &b)

   {

     complex<Float> dot = 0;

 #pragma unroll

     for (int s = 0; s < Ns; s++) { dot += innerProduct(a, b, s, s); }

     return dot;

   }


   template <typename Float, int Nc, int Ns>

   __device__ __host__ inline complex<Float> colorContract(const ColorSpinor<Float, Nc, Ns> &a,

                                                           const ColorSpinor<Float, Nc, Ns> &b, int sa, int sb)

   {

     complex<Float> dot = 0;

     for (int c = 0; c < Nc; c++) {

       dot.real(dot.real() + a(sa, c).real() * b(sb, c).real());

       dot.real(dot.real() - a(sa, c).imag() * b(sb, c).imag());

       dot.imag(dot.imag() + a(sa, c).real() * b(sb, c).imag());

       dot.imag(dot.imag() + a(sa, c).imag() * b(sb, c).real());

     }


     return dot;

   }


   template <typename Float, int Nc, int Ns>

   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,

                                                          const ColorSpinor<Float, Nc, Ns> &b, int s)

   {

     return innerProduct(a, b, s, s);

   }


   template <typename Float, int Nc, int Ns>

   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Ns> &a,

                                                          const ColorSpinor<Float, Nc, Ns> &b, int sa, int sb)

   {

     complex<Float> dot = 0;

 #pragma unroll

     for (int c = 0; c < Nc; c++) {

       dot.real(dot.real() + a(sa, c).real() * b(sb, c).real());

       dot.real(dot.real() + a(sa, c).imag() * b(sb, c).imag());

       dot.imag(dot.imag() + a(sa, c).real() * b(sb, c).imag());

       dot.imag(dot.imag() - a(sa, c).imag() * b(sb, c).real());

     }

     return dot;

   }


   template <typename Float, int Nc, int Nsa, int Nsb>

   __device__ __host__ inline complex<Float> innerProduct(const ColorSpinor<Float, Nc, Nsa> &a,

                                                          const ColorSpinor<Float, Nc, Nsb> &b, int sa, int sb)

   {

     complex<Float> dot = 0;

 #pragma unroll

     for (int c = 0; c < Nc; c++) {

       dot.real(dot.real() + a(sa, c).real() * b(sb, c).real());

       dot.real(dot.real() + a(sa, c).imag() * b(sb, c).imag());

       dot.imag(dot.imag() + a(sa, c).real() * b(sb, c).imag());

       dot.imag(dot.imag() - a(sa, c).imag() * b(sb, c).real());

     }

     return dot;

   }


   template <typename Float, int Ns>

   __device__ __host__ inline ColorSpinor<Float, 3, 1> crossProduct(const ColorSpinor<Float, 3, Ns> &a,

                                                                    const ColorSpinor<Float, 3, Ns> &b, int sa, int sb)

   {

     ColorSpinor<Float, 3, 1> res;

     res(0, 0) = a(sa, 1) * b(sb, 2) - a(sa, 2) * b(sb, 1);

     res(0, 1) = a(sa, 2) * b(sb, 0) - a(sa, 0) * b(sb, 2);

     res(0, 2) = a(sa, 0) * b(sb, 1) - a(sa, 1) * b(sb, 0);

     return res;

   }


   template <typename Float, int Nc, int Ns>

   __device__ __host__ inline Matrix<complex<Float>, Nc> outerProdSpinTrace(const ColorSpinor<Float, Nc, Ns> &a,

                                                                            const ColorSpinor<Float, Nc, Ns> &b)

   {

     Matrix<complex<Float>, Nc> out;


     // outer product over color

 #pragma unroll

     for (int i = 0; i < Nc; i++) {

 #pragma unroll

       for (int j = 0; j < Nc; j++) {

         // trace over spin (manual unroll for perf)

         out(j, i).real(a(0, j).real() * b(0, i).real());

         out(j, i).real(out(j, i).real() + a(0, j).imag() * b(0, i).imag());

         out(j, i).imag(a(0, j).imag() * b(0, i).real());

         out(j, i).imag(out(j, i).imag() - a(0, j).real() * b(0, i).imag());

         // out(j,i) = a(0,j) * conj(b(0,i));


 #pragma unroll

         for (int s=1; s<Ns; s++) {

           out(j,i).real( out(j,i).real() + a(s,j).real() * b(s,i).real() );

           out(j,i).real( out(j,i).real() + a(s,j).imag() * b(s,i).imag() );

           out(j,i).imag( out(j,i).imag() + a(s,j).imag() * b(s,i).real() );

           out(j,i).imag( out(j,i).imag() - a(s,j).real() * b(s,i).imag() );

           // out(j,i) += a(s,j) * conj(b(s,i));

         }

       }

     }

     return out;

   }


   template <typename Float, int Nc>

   __device__ __host__ inline Matrix<complex<Float>, Nc> outerProduct(const ColorSpinor<Float, Nc, 1> &a,

                                                                      const ColorSpinor<Float, Nc, 1> &b)

   {

     Matrix<complex<Float>, Nc> out;


     // outer product over color

 #pragma unroll

     for (int i = 0; i < Nc; i++) {

 #pragma unroll

       for (int j = 0; j < Nc; j++) {

         // trace over spin (manual unroll for perf)

         out(j, i).real(a(0, j).real() * b(0, i).real());

         out(j, i).real(out(j, i).real() + a(0, j).imag() * b(0, i).imag());

         out(j, i).imag(a(0, j).imag() * b(0, i).real());

         out(j, i).imag(out(j, i).imag() - a(0, j).real() * b(0, i).imag());

         // out(j,i) = a(0,j) * conj(b(0,i));

       }

     }

     return out;

   }


   template<typename Float, int Nc, int Ns> __device__ __host__ inline

     ColorSpinor<Float,Nc,Ns> operator+(const ColorSpinor<Float,Nc,Ns> &x, const ColorSpinor<Float,Nc,Ns> &y) {


     ColorSpinor<Float,Nc,Ns> z;


 #pragma unroll

     for (int i=0; i<Nc; i++) {

 #pragma unroll

       for (int s=0; s<Ns; s++) {

         z.data[s*Nc + i] = x.data[s*Nc + i] + y.data[s*Nc + i];

       }

     }


     return z;

   }


   template<typename Float, int Nc, int Ns> __device__ __host__ inline

     ColorSpinor<Float,Nc,Ns> operator-(const ColorSpinor<Float,Nc,Ns> &x, const ColorSpinor<Float,Nc,Ns> &y) {


     ColorSpinor<Float,Nc,Ns> z;


 #pragma unroll

     for (int i=0; i<Nc; i++) {

 #pragma unroll

       for (int s=0; s<Ns; s++) {

         z.data[s*Nc + i] = x.data[s*Nc + i] - y.data[s*Nc + i];

       }

     }


     return z;

   }


   template<typename Float, int Nc, int Ns, typename S> __device__ __host__ inline

     ColorSpinor<Float,Nc,Ns> operator*(const S &a, const ColorSpinor<Float,Nc,Ns> &x) {


     ColorSpinor<Float,Nc,Ns> y;


 #pragma unroll

     for (int i=0; i<Nc; i++) {

 #pragma unroll

       for (int s=0; s<Ns; s++) {

         y.data[s*Nc + i] = a * x.data[s*Nc + i];

       }

     }


     return y;

   }


   template<typename Float, int Nc, int Ns> __device__ __host__ inline

     ColorSpinor<Float,Nc,Ns> operator*(const Matrix<complex<Float>,Nc> &A, const ColorSpinor<Float,Nc,Ns> &x) {


     ColorSpinor<Float,Nc,Ns> y;


 #pragma unroll

     for (int i=0; i<Nc; i++) {

 #pragma unroll

       for (int s=0; s<Ns; s++) {

         y.data[s*Nc + i].x  = A(i,0).real() * x.data[s*Nc + 0].real();

         y.data[s*Nc + i].x -= A(i,0).imag() * x.data[s*Nc + 0].imag();

         y.data[s*Nc + i].y  = A(i,0).real() * x.data[s*Nc + 0].imag();

         y.data[s*Nc + i].y += A(i,0).imag() * x.data[s*Nc + 0].real();

       }

 #pragma unroll

       for (int j=1; j<Nc; j++) {

 #pragma unroll

         for (int s=0; s<Ns; s++) {

           y.data[s*Nc + i].x += A(i,j).real() * x.data[s*Nc + j].real();

           y.data[s*Nc + i].x -= A(i,j).imag() * x.data[s*Nc + j].imag();

           y.data[s*Nc + i].y += A(i,j).real() * x.data[s*Nc + j].imag();

           y.data[s*Nc + i].y += A(i,j).imag() * x.data[s*Nc + j].real();

         }

       }

     }


     return y;

   }


   template<typename Float, int Nc, int Ns> __device__ __host__ inline

   ColorSpinor<Float,Nc,Ns> mv_add(const Matrix<complex<Float>,Nc> &A, const ColorSpinor<Float,Nc,Ns> &x, const ColorSpinor<Float,Nc,Ns> &y)

   {

     ColorSpinor<Float,Nc,Ns> z;


 #pragma unroll

     for (int i=0; i<Nc; i++) {

 #pragma unroll

       for (int s=0; s<Ns; s++) {

         z.data[s*Nc + i].x  = y.data[s*Nc + i].real() + A(i,0).real() * x.data[s*Nc + 0].real();

         z.data[s*Nc + i].x -= A(i,0).imag() * x.data[s*Nc + 0].imag();

         z.data[s*Nc + i].y  = y.data[s*Nc + i].imag() + A(i,0).real() * x.data[s*Nc + 0].imag();

         z.data[s*Nc + i].y += A(i,0).imag() * x.data[s*Nc + 0].real();

       }

 #pragma unroll

       for (int j=1; j<Nc; j++) {

 #pragma unroll

         for (int s=0; s<Ns; s++) {

           z.data[s*Nc + i].x += A(i,j).real() * x.data[s*Nc + j].real();

           z.data[s*Nc + i].x -= A(i,j).imag() * x.data[s*Nc + j].imag();

           z.data[s*Nc + i].y += A(i,j).real() * x.data[s*Nc + j].imag();

           z.data[s*Nc + i].y += A(i,j).imag() * x.data[s*Nc + j].real();

         }

       }

     }


     return z;

   }


   template<typename Float, int Nc, int Ns> __device__ __host__ inline

     ColorSpinor<Float,Nc,Ns> operator*(const HMatrix<Float,Nc*Ns> &A, const ColorSpinor<Float,Nc,Ns> &x) {


     ColorSpinor<Float,Nc,Ns> y;

     constexpr int N = Ns * Nc;


 #pragma unroll

     for (int i=0; i<N; i++) {

       if (i==0) {

         y.data[i].x  = A(i,0).real() * x.data[0].real();

         y.data[i].y  = A(i,0).real() * x.data[0].imag();

       } else {

         y.data[i].x  = A(i,0).real() * x.data[0].real();

         y.data[i].x -= A(i,0).imag() * x.data[0].imag();

         y.data[i].y  = A(i,0).real() * x.data[0].imag();

         y.data[i].y += A(i,0).imag() * x.data[0].real();

       }

 #pragma unroll

       for (int j=1; j<N; j++) {

         if (i==j) {

           y.data[i].x += A(i,j).real() * x.data[j].real();

           y.data[i].y += A(i,j).real() * x.data[j].imag();

         } else {

           y.data[i].x += A(i,j).real() * x.data[j].real();

           y.data[i].x -= A(i,j).imag() * x.data[j].imag();

           y.data[i].y += A(i,j).real() * x.data[j].imag();

           y.data[i].y += A(i,j).imag() * x.data[j].real();

         }

       }

     }


     return y;

   }


 } // namespace quda

quda::HMatrix
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices)
Definition: quda_matrix.h:282

quda::Matrix
Definition: quda_matrix.h:63

dim
std::array< int, 4 > dim
Definition: command_line_params.cpp:34

mu
double mu
Definition: command_line_params.cpp:73

complex_quda.h

quda
Definition: blas_lapack.h:24

quda::outerProduct
__device__ __host__ Matrix< complex< Float >, Nc > outerProduct(const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, 1 > &b)
Definition: color_spinor.h:1073

quda::operator*
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
Definition: color_spinor.h:1145

quda::mv_add
__device__ __host__ ColorSpinor< Float, Nc, Ns > mv_add(const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
Compute the matrix-vector product z = A * x + y.
Definition: color_spinor.h:1203

quda::innerProduct
__device__ __host__ complex< Float > innerProduct(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
Compute the inner product over color and spin dot = \sum_s,c conj(a(s,c)) * b(s,c)
Definition: color_spinor.h:913

quda::colorContract
__device__ __host__ complex< Float > colorContract(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb)
Compute the color contraction over color at spin s dot = \sum_s,c a(s,c) * b(s,c)
Definition: color_spinor.h:930

quda::operator+
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
Definition: color_spinor.h:1101

quda::crossProduct
__device__ __host__ ColorSpinor< Float, 3, 1 > crossProduct(const ColorSpinor< Float, 3, Ns > &a, const ColorSpinor< Float, 3, Ns > &b, int sa, int sb)
Definition: color_spinor.h:1017

quda::i_
__host__ __device__ complex< real > i_(const complex< real > &a)
Definition: complex_quda.h:1378

quda::outerProdSpinTrace
__device__ __host__ Matrix< complex< Float >, Nc > outerProdSpinTrace(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
Definition: color_spinor.h:1035

quda::operator-
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
Definition: color_spinor.h:1123

testing::internal::Float
FloatingPoint< float > Float
Definition: gtest-internal.h:396

quda_matrix.h

quda::ColorSpinor< Float, Nc, 2 >
Definition: color_spinor.h:674

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:874

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:867

quda::ColorSpinor< Float, Nc, 2 >::print
__device__ __host__ void print() const
Definition: color_spinor.h:895

quda::ColorSpinor< Float, Nc, 2 >::operator=
__device__ __host__ void operator=(const colorspinor_ghost_wrapper< Float, S > &s)

quda::ColorSpinor< Float, Nc, 2 >::operator+=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator+=(const ColorSpinor< Float, Nc, 2 > &a)
Definition: color_spinor.h:698

quda::ColorSpinor< Float, Nc, 2 >::operator=
__device__ __host__ void operator=(const colorspinor_wrapper< Float, S > &s)

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:881

quda::ColorSpinor< Float, Nc, 2 >::chiral_reconstruct
__device__ __host__ ColorSpinor< Float, Nc, 4 > chiral_reconstruct(int chirality) const
Reconstruct two-component spinor to a four-component spinor.
Definition: color_spinor.h:715

quda::ColorSpinor< Float, Nc, 2 >::operator*=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator*=(const T &a)
Definition: color_spinor.h:704

quda::ColorSpinor< Float, Nc, 2 >::reconstruct
__device__ __host__ ColorSpinor< Float, Nc, 4 > reconstruct(int dim, int sign) const
Spin reconstruct the full Spinor from the projected spinor.
Definition: color_spinor.h:733

quda::ColorSpinor< Float, Nc, 2 >::operator=
__device__ __host__ ColorSpinor< Float, Nc, 2 > & operator=(const ColorSpinor< Float, Nc, 2 > &a)
Definition: color_spinor.h:690

quda::ColorSpinor< Float, Nc, 2 >::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:859

quda::ColorSpinor< Float, Nc, 2 >::data
complex< Float > data[size]
Definition: color_spinor.h:677

quda::ColorSpinor< Float, Nc, 4 >
Definition: color_spinor.h:137

quda::ColorSpinor< Float, Nc, 4 >::igamma
__device__ __host__ ColorSpinor< Float, Nc, 4 > igamma(int dim)
Definition: color_spinor.h:239

quda::ColorSpinor< Float, Nc, 4 >::operator=
__device__ __host__ void operator=(const colorspinor_ghost_wrapper< Float, S > &s)

quda::ColorSpinor< Float, Nc, 4 >::operator+=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator+=(const ColorSpinor< Float, Nc, 4 > &a)
Definition: color_spinor.h:161

quda::ColorSpinor< Float, Nc, 4 >::data
complex< Float > data[size]
Definition: color_spinor.h:140

quda::ColorSpinor< Float, Nc, 4 >::toRel
__device__ __host__ void toRel()
Transform from non-relativistic into relavisitic basis.
Definition: color_spinor.h:647

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:592

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:600

quda::ColorSpinor< Float, Nc, 4 >::sigma
__device__ __host__ ColorSpinor< Float, Nc, 4 > sigma(int mu, int nu)
Definition: color_spinor.h:451

quda::ColorSpinor< Float, Nc, 4 >::operator=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator=(const ColorSpinor< Float, Nc, 4 > &a)
Definition: color_spinor.h:153

quda::ColorSpinor< Float, Nc, 4 >::chiral_project
__device__ __host__ ColorSpinor< Float, Nc, 2 > chiral_project(int chirality) const
Project four-component spinor to either chirality.
Definition: color_spinor.h:298

quda::ColorSpinor< Float, Nc, 4 >::operator=
__device__ __host__ void operator=(const colorspinor_wrapper< Float, S > &s)

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:614

quda::ColorSpinor< Float, Nc, 4 >::toNonRel
__device__ __host__ void toNonRel()
Transform from relativistic into non-relavisitic basis Required normalization factor of 1/2 included ...
Definition: color_spinor.h:632

quda::ColorSpinor< Float, Nc, 4 >::project
__device__ __host__ ColorSpinor< Float, Nc, 2 > project(int dim, int sign) const
Definition: color_spinor.h:316

quda::ColorSpinor< Float, Nc, 4 >::gamma
__device__ __host__ ColorSpinor< Float, Nc, 4 > gamma(int dim)
Definition: color_spinor.h:179

quda::ColorSpinor< Float, Nc, 4 >::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:607

quda::ColorSpinor< Float, Nc, 4 >::print
__device__ __host__ void print() const
Definition: color_spinor.h:659

quda::ColorSpinor< Float, Nc, 4 >::operator*=
__device__ __host__ ColorSpinor< Float, Nc, 4 > & operator*=(const T &a)
Definition: color_spinor.h:167

quda::ColorSpinor
Definition: color_spinor.h:24

quda::ColorSpinor::operator=
__device__ __host__ void operator=(const colorspinor_wrapper< Float, S > &s)

quda::ColorSpinor::operator=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:40

quda::ColorSpinor::operator()
__device__ __host__ const complex< Float > & operator()(int s, int c) const
2-d accessor functor
Definition: color_spinor.h:104

quda::ColorSpinor::operator()
__device__ __host__ complex< Float > & operator()(int idx)
1-d accessor functor
Definition: color_spinor.h:111

quda::ColorSpinor::operator+=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator+=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:56

quda::ColorSpinor::operator()
__device__ __host__ const complex< Float > & operator()(int idx) const
1-d accessor functor
Definition: color_spinor.h:118

quda::ColorSpinor::operator=
__device__ __host__ void operator=(const colorspinor_ghost_wrapper< Float, S > &s)

quda::ColorSpinor::data
complex< Float > data[size]
Definition: color_spinor.h:27

quda::ColorSpinor::operator-=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator-=(const ColorSpinor< Float, Nc, Ns > &a)
Definition: color_spinor.h:69

quda::ColorSpinor::operator*=
__device__ __host__ ColorSpinor< Float, Nc, Ns > & operator*=(const T &a)
Definition: color_spinor.h:62

quda::ColorSpinor::operator()
__device__ __host__ complex< Float > & operator()(int s, int c)
2-d accessor functor
Definition: color_spinor.h:96

quda::ColorSpinor::print
__device__ __host__ void print() const
Prints the NsxNc complex elements of the color spinor.
Definition: color_spinor.h:123

quda::ColorSpinor::operator-
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-() const
Definition: color_spinor.h:48

quda::ColorSpinor::size
static constexpr int size
Definition: color_spinor.h:26

quda::colorspinor_ghost_wrapper
colorspinor_ghost_wrapper is an internal class that is used to wrap instances of colorspinor accessor...
Definition: color_spinor_field_order.h:114

quda::colorspinor_wrapper
colorspinor_wrapper is an internal class that is used to wrap instances of colorspinor accessors,...
Definition: color_spinor_field_order.h:40

quda::complex< Float >

quda::complex::imag
__host__ __device__ ValueType imag() const volatile

quda::complex::real
__host__ __device__ ValueType real() const volatile