quda-ref/v0.7.0/extract__gauge__ghost__extended_8cu_source.html

 #include <quda_internal.h>

 #include <gauge_field_order.h>


 namespace quda {


   template <typename Order, int nDim>

   struct ExtractGhostExArg {

     Order order;

     int dim;

     int X[nDim];

     int R[nDim];

     int surfaceCB[nDim];

     int A0[nDim];

     int A1[nDim];

     int B0[nDim];

     int B1[nDim];

     int C0[nDim];

     int C1[nDim];

     int fBody[nDim][nDim];

     int fBuf[nDim][nDim];

     int localParity[nDim];

     ExtractGhostExArg(const Order &order, int dim, const int *X_, const int *R_,

                       const int *surfaceCB_,

                       const int *A0_, const int *A1_, const int *B0_, const int *B1_,

                       const int *C0_, const int *C1_, const int fBody_[nDim][nDim],

                       const int fBuf_[nDim][nDim], const int *localParity_)

   : order(order), dim(dim) {

       for (int d=0; d<nDim; d++) {

         X[d] = X_[d];

         R[d] = R_[d];

         surfaceCB[d] = surfaceCB_[d];

         A0[d] = A0_[d];

         A1[d] = A1_[d];

         B0[d] = B0_[d];

         B1[d] = B1_[d];

         C0[d] = C0_[d];

         C1[d] = C1_[d];

         for (int e=0; e<nDim; e++) {

           fBody[d][e] = fBody_[d][e];

           fBuf[d][e] = fBuf_[d][e];

         }

         localParity[d] = localParity_[d];

       }

     }


   };


   template <typename Float, int length, typename Arg>

   __device__ __host__ void extractor(Arg &arg, int dir, int a, int b,

                                      int c, int d, int g, int parity) {

     typename mapper<Float>::type u[length];

     int &dim = arg.dim;

     int srcIdx = (a*arg.fBody[dim][0] + b*arg.fBody[dim][1] +

                   c*arg.fBody[dim][2] + d*arg.fBody[dim][3]) >> 1;


     int dstIdx = (a*arg.fBuf[dim][0] + b*arg.fBuf[dim][1] +

                   c*arg.fBuf[dim][2] + (d-(dir?arg.X[dim]:arg.R[dim]))*arg.fBuf[dim][3]) >> 1;


     // load the ghost element from the bulk

     arg.order.load(u, srcIdx, g, parity);


     // need dir dependence in write

     // srcIdx is used here to determine boundary condition

     arg.order.saveGhostEx(u, dstIdx, srcIdx, dir, dim, g,

                           (parity+arg.localParity[dim])&1, arg.R);

   }


   template <typename Float, int length, typename Arg>

   __device__ __host__ void injector(Arg &arg, int dir, int a, int b,

                                     int c, int d, int g, int parity) {

     typename mapper<Float>::type u[length];

     int &dim = arg.dim;

     int srcIdx = (a*arg.fBuf[dim][0] + b*arg.fBuf[dim][1] +

                   c*arg.fBuf[dim][2] + (d-dir*(arg.X[dim]+arg.R[dim]))*arg.fBuf[dim][3]) >> 1;


     int dstIdx = (a*arg.fBody[dim][0] + b*arg.fBody[dim][1] +

                   c*arg.fBody[dim][2] + d*arg.fBody[dim][3]) >> 1;


     // need dir dependence in read

     // dstIdx is used here to determine boundary condition

     arg.order.loadGhostEx(u, srcIdx, dstIdx, dir, dim, g,

                           (parity+arg.localParity[dim])&1, arg.R);


     arg.order.save(u, dstIdx, g, parity); // save the ghost element into the bulk

   }


   template <typename Float, int length, int nDim, typename Order, bool extract>

   void extractGhostEx(ExtractGhostExArg<Order,nDim> arg) {

     typedef typename mapper<Float>::type RegType;


     int dim = arg.dim;


     for (int parity=0; parity<2; parity++) {


       // the following 4-way loop means this is specialized for 4 dimensions

       // dir = 0 backwards, dir = 1 forwards

       for (int dir = 0; dir<2; dir++) {


         int D0 = extract ? dir*arg.X[dim] + (1-dir)*arg.R[dim] : dir*(arg.X[dim] + arg.R[dim]);


         for (int d=D0; d<D0+arg.R[dim]; d++) {

           for (int a=arg.A0[dim]; a<arg.A1[dim]; a++) { // loop over the interior surface

             for (int b=arg.B0[dim]; b<arg.B1[dim]; b++) { // loop over the interior surface

               for (int c=arg.C0[dim]; c<arg.C1[dim]; c++) { // loop over the interior surface

                 for (int g=0; g<arg.order.geometry; g++) {


                   // we only do the extraction for parity we are currently working on

                   int oddness = (a+b+c+d) & 1;

                   if (oddness == parity) {

                     if (extract) extractor<Float,length>(arg, dir, a, b, c, d, g, parity);

                     else injector<Float,length>(arg, dir, a, b, c, d, g, parity);

                   } // oddness == parity

                 } // g

               } // c

             } // b

           } // a

         } // d

       } // dir


     } // parity


   }


   template <typename Float, int length, int nDim, typename Order, bool extract>

   __global__ void extractGhostExKernel(ExtractGhostExArg<Order,nDim> arg) {

     typedef typename mapper<Float>::type RegType;


     int dim = arg.dim;


     // parallelize over parity and dir using block or grid

     /*for (int parity=0; parity<2; parity++) {*/

     {

       int parity = blockIdx.z;


       // the following 4-way loop means this is specialized for 4 dimensions

       // dir = 0 backwards, dir = 1 forwards

       //for (int dir = 0; dir<2; dir++) {

       {

         int dir = blockIdx.y;


         // this will have two-warp divergence since we only do work on

         // one parity but parity alternates between threads

         // linear index used for writing into ghost buffer

         int X = blockIdx.x * blockDim.x + threadIdx.x;


         int dA = arg.A1[dim]-arg.A0[dim];

         int dB = arg.B1[dim]-arg.B0[dim];

         int dC = arg.C1[dim]-arg.C0[dim];

         int D0 = extract ? dir*arg.X[dim] + (1-dir)*arg.R[dim] : dir*(arg.X[dim] + arg.R[dim]);


         if (X >= arg.R[dim]*dA*dB*dC*arg.order.geometry) return;


         // thread order is optimized to maximize coalescing

         // X = (((g*R + d) * dA + a)*dB + b)*dC + c

         int gdab = X / dC;

         int c    = arg.C0[dim] + X    - gdab*dC;

         int gda  = gdab / dB;

         int b    = arg.B0[dim] + gdab - gda *dB;

         int gd   = gda / dA;

         int a    = arg.A0[dim] + gda  - gd  *dA;

         int g    = gd / arg.R[dim];

         int d    = D0          + gd   - g   *arg.R[dim];


         // we only do the extraction for parity we are currently working on

         int oddness = (a+b+c+d) & 1;

         if (oddness == parity) {

           if (extract) extractor<Float,length>(arg, dir, a, b, c, d, g, parity);

           else injector<Float,length>(arg, dir, a, b, c, d, g, parity);

         } // oddness == parity

       } // dir


     } // parity


   }


   template <typename Float, int length, int nDim, typename Order>

   class ExtractGhostEx : Tunable {

     ExtractGhostExArg<Order,nDim> arg;

     int size;

     bool extract;

     const GaugeField &meta;

     QudaFieldLocation location;


   private:

     unsigned int sharedBytesPerThread() const { return 0; }

     unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0 ;}


     bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.

     unsigned int minThreads() const { return size; }


   public:

     ExtractGhostEx(ExtractGhostExArg<Order,nDim> &arg, bool extract,

                    const GaugeField &meta, QudaFieldLocation location)

       : arg(arg), extract(extract), meta(meta), location(location) {

       int dA = arg.A1[arg.dim]-arg.A0[arg.dim];

       int dB = arg.B1[arg.dim]-arg.B0[arg.dim];

       int dC = arg.C1[arg.dim]-arg.C0[arg.dim];

       size = arg.R[arg.dim]*dA*dB*dC*arg.order.geometry;

       writeAuxString("prec=%lu,stride=%d,extract=%d,dimension=%d",

                      sizeof(Float),arg.order.stride, extract, arg.dim);

     }

     virtual ~ExtractGhostEx() { ; }


     void apply(const cudaStream_t &stream) {

       if (extract) {

         if (location==QUDA_CPU_FIELD_LOCATION) {

           extractGhostEx<Float,length,nDim,Order,true>(arg);

         } else {

 #if (__COMPUTE_CAPABILITY__ >= 200)

           TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());

           tp.grid.y = 2;

           tp.grid.z = 2;

           extractGhostExKernel<Float,length,nDim,Order,true>

             <<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg);

 #else

       errorQuda("extractGhostEx not supported on pre-Fermi architecture");

 #endif


         }

       } else { // we are injecting

         if (location==QUDA_CPU_FIELD_LOCATION) {

           extractGhostEx<Float,length,nDim,Order,false>(arg);

         } else {

 #if (__COMPUTE_CAPABILITY__ >= 200)

           TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());

           tp.grid.y = 2;

           tp.grid.z = 2;

           extractGhostExKernel<Float,length,nDim,Order,false>

             <<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg);

 #else

       errorQuda("extractGhostEx not supported on pre-Fermi architecture");

 #endif

         }

       }

     }


     TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }


     std::string paramString(const TuneParam &param) const { // Don't bother printing the grid dim.

       std::stringstream ps;

       ps << "block=(" << param.block.x << "," << param.block.y << "," << param.block.z << "), ";

       ps << "shared=" << param.shared_bytes;

       return ps.str();

     }


     long long flops() const { return 0; }

     long long bytes() const { return 2 * 2 * 2 * size * arg.order.Bytes(); } // 2 for i/o

   };


   template <typename Float, int length, typename Order>

   void extractGhostEx(Order order, const int dim, const int *surfaceCB, const int *E,

                       const int *R, bool extract, const GaugeField &u, QudaFieldLocation location) {

     const int nDim = 4;

     //loop variables: a, b, c with a the most signifcant and c the least significant

     //A0, B0, C0 the minimum value

     //A0, B0, C0 the maximum value


     int X[nDim]; // compute interior dimensions

     for (int d=0; d<nDim; d++) X[d] = E[d] - 2*R[d];


     //..........x..........y............z.............t

     int A0[nDim] = {R[3],      R[3],        R[3],         0};

     int A1[nDim] = {X[3]+R[3], X[3]+R[3],   X[3]+R[3],    X[2]+2*R[2]};


     int B0[nDim] = {R[2],      R[2],        0,            0};

     int B1[nDim] = {X[2]+R[2], X[2]+R[2],   X[1]+2*R[1],  X[1]+2*R[1]};


     int C0[nDim] = {R[1],      0,           0,            0};

     int C1[nDim] = {X[1]+R[1], X[0]+2*R[0], X[0]+2*R[0],  X[0]+2*R[0]};


     int fSrc[nDim][nDim] = {

       {E[2]*E[1]*E[0], E[1]*E[0], E[0],              1},

       {E[2]*E[1]*E[0], E[1]*E[0],    1,           E[0]},

       {E[2]*E[1]*E[0],      E[0],    1,      E[1]*E[0]},

       {E[1]*E[0],           E[0],    1, E[2]*E[1]*E[0]}

     };


     int fBuf[nDim][nDim]={

       {E[2]*E[1], E[1], 1, E[3]*E[2]*E[1]},

       {E[2]*E[0], E[0], 1, E[3]*E[2]*E[0]},

       {E[1]*E[0], E[0], 1, E[3]*E[1]*E[0]},

       {E[1]*E[0], E[0], 1, E[2]*E[1]*E[0]}

     };


     //set the local processor parity

     //switching odd and even ghost gauge when that dimension size is odd

     //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1

     // FIXME - I don't understand this, shouldn't it be commDim(dim) == 0 ?

     int localParity[nDim];

     for (int d=0; d<nDim; d++)

       localParity[dim] = ((X[dim] % 2 ==1) && (commDim(dim) > 1)) ? 1 : 0;

     //      localParity[dim] = (X[dim]%2==0 || commDim(dim)) ? 0 : 1;


     ExtractGhostExArg<Order, nDim> arg(order, dim, X, R, surfaceCB, A0, A1, B0, B1,

                                        C0, C1, fSrc, fBuf, localParity);

     ExtractGhostEx<Float,length,nDim,Order> extractor(arg, extract, u, location);

     extractor.apply(0);

     if (location == QUDA_CUDA_FIELD_LOCATION) {

       cudaDeviceSynchronize(); // need to sync before we commence any communication

       checkCudaError();

     }

   }


   template <typename Float>

   void extractGhostEx(const GaugeField &u, int dim, const int *R, Float **Ghost, bool extract) {


     const int length = 18;


     QudaFieldLocation location =

       (typeid(u)==typeid(cudaGaugeField)) ? QUDA_CUDA_FIELD_LOCATION : QUDA_CPU_FIELD_LOCATION;


     if (u.Order() == QUDA_FLOAT2_GAUGE_ORDER) {

       if (u.Reconstruct() == QUDA_RECONSTRUCT_NO) {

         if (typeid(Float)==typeid(short) && u.LinkType() == QUDA_ASQTAD_FAT_LINKS) {

           extractGhostEx<Float,length>(FloatNOrder<Float,length,2,19>(u, 0, Ghost),

                                        dim, u.SurfaceCB(), u.X(), R, extract, u, location);

         } else {

           extractGhostEx<Float,length>(FloatNOrder<Float,length,2,18>(u, 0, Ghost),

                                        dim, u.SurfaceCB(), u.X(), R, extract, u, location);

         }

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_12) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,2,12>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_8) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,2,8>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_13) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,2,13>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_9) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,2,9>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       }

     } else if (u.Order() == QUDA_FLOAT4_GAUGE_ORDER) {

       if (u.Reconstruct() == QUDA_RECONSTRUCT_NO) {

         if (typeid(Float)==typeid(short) && u.LinkType() == QUDA_ASQTAD_FAT_LINKS) {

           extractGhostEx<Float,length>(FloatNOrder<Float,length,1,19>(u, 0, Ghost),

                                        dim, u.SurfaceCB(), u.X(), R, extract, u, location);

         } else {

           extractGhostEx<Float,length>(FloatNOrder<Float,length,1,18>(u, 0, Ghost),

                                        dim, u.SurfaceCB(), u.X(), R, extract, u, location);

         }

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_12) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,4,12>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if (u.Reconstruct() == QUDA_RECONSTRUCT_8) {

         extractGhostEx<Float,length>(FloatNOrder<Float,length,4,8>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if(u.Reconstruct() == QUDA_RECONSTRUCT_13){

         extractGhostEx<Float,length>(FloatNOrder<Float,length,4,13>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       } else if(u.Reconstruct() == QUDA_RECONSTRUCT_9){

         extractGhostEx<Float,length>(FloatNOrder<Float,length,4,9>(u, 0, Ghost),

                                      dim, u.SurfaceCB(), u.X(), R, extract, u, location);

       }

     } else if (u.Order() == QUDA_QDP_GAUGE_ORDER) {


 #ifdef BUILD_QDP_INTERFACE

       extractGhostEx<Float,length>(QDPOrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("QDP interface has not been built\n");

 #endif


     } else if (u.Order() == QUDA_QDPJIT_GAUGE_ORDER) {


 #ifdef BUILD_QDPJIT_INTERFACE

       extractGhostEx<Float,length>(QDPJITOrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("QDPJIT interface has not been built\n");

 #endif


     } else if (u.Order() == QUDA_CPS_WILSON_GAUGE_ORDER) {


 #ifdef BUILD_CPS_INTERFACE

       extractGhostEx<Float,length>(CPSOrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("CPS interface has not been built\n");

 #endif


     } else if (u.Order() == QUDA_MILC_GAUGE_ORDER) {


 #ifdef BUILD_MILC_INTERFACE

       extractGhostEx<Float,length>(MILCOrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("MILC interface has not been built\n");

 #endif


     } else if (u.Order() == QUDA_BQCD_GAUGE_ORDER) {


 #ifdef BUILD_BQCD_INTERFACE

       extractGhostEx<Float,length>(BQCDOrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("BQCD interface has not been built\n");

 #endif


     } else if (u.Order() == QUDA_TIFR_GAUGE_ORDER) {


 #ifdef BUILD_TIFR_INTERFACE

       extractGhostEx<Float,length>(TIFROrder<Float,length>(u, 0, Ghost),

                                    dim, u.SurfaceCB(), u.X(), R, extract, u, location);

 #else

       errorQuda("TIFR interface has not been built\n");

 #endif


     } else {

       errorQuda("Gauge field %d order not supported", u.Order());

     }


   }


   void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R,

                                  void **ghost, bool extract) {


     if (u.Precision() == QUDA_DOUBLE_PRECISION) {

       extractGhostEx(u, dim, R, (double**)ghost, extract);

     } else if (u.Precision() == QUDA_SINGLE_PRECISION) {

       extractGhostEx(u, dim, R, (float**)ghost, extract);

     } else if (u.Precision() == QUDA_HALF_PRECISION) {

       extractGhostEx(u, dim, R, (short**)ghost, extract);

     } else {

       errorQuda("Unknown precision type %d", u.Precision());

     }


   }


 } // namespace quda

commDim
int commDim(int)
Definition: face_buffer.cpp:535

QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:55

quda::QDPOrder
Definition: clover_field_order.h:134

quda::TuneParam
Definition: tune_quda.h:16

quda::ExtractGhostEx::apply
void apply(const cudaStream_t &stream)
Definition: extract_gauge_ghost_extended.cu:219

quda::ExtractGhostExArg::R
int R[nDim]
Definition: extract_gauge_ghost_extended.cu:11

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20

quda::MILCOrder
Definition: gauge_field_order.h:752

errorQuda
#define errorQuda(...)
Definition: util_quda.h:73

quda::ExtractGhostEx::~ExtractGhostEx
virtual ~ExtractGhostEx()
Definition: extract_gauge_ghost_extended.cu:217

QUDA_BQCD_GAUGE_ORDER
Definition: enum_quda.h:36

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:271

QUDA_HALF_PRECISION
Definition: enum_quda.h:48

QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:32

quda::GaugeField::Order
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:169

QUDA_FLOAT2_GAUGE_ORDER
Definition: enum_quda.h:30

quda::stream
cudaStream_t * stream
Definition: cuda_color_spinor_field.cu:816

testing::internal::string
::std::string string
Definition: gtest.h:1979

quda::ExtractGhostExArg
Definition: extract_gauge_ghost_extended.cu:7

C1
float4 C1
Definition: tmc_dslash_dagger_fermi_core.h:559

quda::TuneParam::shared_bytes
int shared_bytes
Definition: tune_quda.h:21

quda::ExtractGhostEx::flops
long long flops() const
Definition: extract_gauge_ghost_extended.cu:261

length
int length[]
Definition: gauge_force_test.cpp:41

quda::TuneParam::grid
dim3 grid
Definition: tune_quda.h:20

quda::BQCDOrder
Definition: clover_field_order.h:242

quda::ExtractGhostEx::paramString
std::string paramString(const TuneParam &param) const
Definition: extract_gauge_ghost_extended.cu:254

quda::ExtractGhostExArg::C0
int C0[nDim]
Definition: extract_gauge_ghost_extended.cu:17

param
QudaGaugeParam param
Definition: pack_test.cpp:17

E
int E[4]
Definition: hisq_paths_force_core.h:133

QUDA_RECONSTRUCT_9
Definition: enum_quda.h:58

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:176

quda::Tunable
Definition: tune_quda.h:40

quda::Tunable::writeAuxString
void writeAuxString(const char *format,...)
Definition: tune_quda.h:138

quda::extractGhostExKernel
__global__ void extractGhostExKernel(ExtractGhostExArg< Order, nDim > arg)
Definition: extract_gauge_ghost_extended.cu:140

QUDA_RECONSTRUCT_12
Definition: enum_quda.h:56

location
const QudaFieldLocation location
Definition: pack_test.cpp:46

quda::extractor
__device__ __host__ void extractor(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
Definition: extract_gauge_ghost_extended.cu:49

quda::QDPJITOrder
Definition: clover_field_order.h:171

quda::extractGhostEx
void extractGhostEx(ExtractGhostExArg< Order, nDim > arg)
Definition: extract_gauge_ghost_extended.cu:93

testing::internal::Float
FloatingPoint< float > Float
Definition: gtest.h:7350

quda::TuneParam::block
dim3 block
Definition: tune_quda.h:19

QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:35

quda::extractExtendedGaugeGhost
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
Definition: extract_gauge_ghost_extended.cu:440

quda::ExtractGhostExArg::fBody
int fBody[nDim][nDim]
Definition: extract_gauge_ghost_extended.cu:19

dim
int dim
Definition: tm_ndeg_fused_exterior_dslash_core.h:195

quda::tuneLaunch
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:271

quda::ExtractGhostEx::ExtractGhostEx
ExtractGhostEx(ExtractGhostExArg< Order, nDim > &arg, bool extract, const GaugeField &meta, QudaFieldLocation location)
Definition: extract_gauge_ghost_extended.cu:207

quda::GaugeField::Reconstruct
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:168

quda::FloatNOrder
Definition: clover_field_order.h:56

quda::cudaGaugeField
Definition: gauge_field.h:216

QUDA_CPS_WILSON_GAUGE_ORDER
Definition: enum_quda.h:34

quda::ExtractGhostExArg::order
Order order
Definition: extract_gauge_ghost_extended.cu:8

gauge_field_order.h

quda::LatticeField::VolString
const char * VolString() const
Definition: lattice_field.h:199

quda::TIFROrder
Definition: gauge_field_order.h:879

quda::ExtractGhostExArg::C1
int C1[nDim]
Definition: extract_gauge_ghost_extended.cu:18

quda::ExtractGhostExArg::dim
int dim
Definition: extract_gauge_ghost_extended.cu:9

QUDA_TIFR_GAUGE_ORDER
Definition: enum_quda.h:37

X
int X
Definition: asym_wilson_clover_dslash_dagger_fermi_core.h:394

quda::ExtractGhostExArg::B0
int B0[nDim]
Definition: extract_gauge_ghost_extended.cu:15

QUDA_RECONSTRUCT_8
Definition: enum_quda.h:57

C0
float4 C0
Definition: tmc_dslash_dagger_fermi_core.h:558

quda::ExtractGhostExArg::A1
int A1[nDim]
Definition: extract_gauge_ghost_extended.cu:14

QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:50

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:49

quda::ExtractGhostExArg::ExtractGhostExArg
ExtractGhostExArg(const Order &order, int dim, const int *X_, const int *R_, const int *surfaceCB_, const int *A0_, const int *A1_, const int *B0_, const int *B1_, const int *C0_, const int *C1_, const int fBody_[nDim][nDim], const int fBuf_[nDim][nDim], const int *localParity_)
Definition: extract_gauge_ghost_extended.cu:22

quda::mapper
Definition: register_traits.h:16

quda::GaugeField::LinkType
QudaLinkType LinkType() const
Definition: gauge_field.h:174

QUDA_FLOAT4_GAUGE_ORDER
Definition: enum_quda.h:31

QUDA_QDPJIT_GAUGE_ORDER
Definition: enum_quda.h:33

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:843

QUDA_RECONSTRUCT_13
Definition: enum_quda.h:59

quda::ExtractGhostEx::tuneKey
TuneKey tuneKey() const
Definition: extract_gauge_ghost_extended.cu:252

quda::ExtractGhostExArg::A0
int A0[nDim]
Definition: extract_gauge_ghost_extended.cu:13

quda::ExtractGhostEx::bytes
long long bytes() const
Definition: extract_gauge_ghost_extended.cu:262

checkCudaError
#define checkCudaError()
Definition: util_quda.h:110

getTuning
QudaTune getTuning()
Definition: util_quda.cpp:32

quda::ExtractGhostExArg::fBuf
int fBuf[nDim][nDim]
Definition: extract_gauge_ghost_extended.cu:20

quda::CPSOrder
Definition: gauge_field_order.h:784

QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:270

quda::ExtractGhostExArg::X
int X[nDim]
Definition: extract_gauge_ghost_extended.cu:10

QUDA_ASQTAD_FAT_LINKS
Definition: enum_quda.h:21

parity
const QudaParity parity
Definition: dslash_test.cpp:29

quda::Tunable::aux
char aux[TuneKey::aux_n]
Definition: tune_quda.h:136

quda::ExtractGhostExArg::localParity
int localParity[nDim]
Definition: extract_gauge_ghost_extended.cu:21

quda::TuneKey
Definition: tune_key.h:8

quda::injector
__device__ __host__ void injector(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
Definition: extract_gauge_ghost_extended.cu:70

quda::ExtractGhostExArg::surfaceCB
int surfaceCB[nDim]
Definition: extract_gauge_ghost_extended.cu:12

quda::ExtractGhostExArg::B1
int B1[nDim]
Definition: extract_gauge_ghost_extended.cu:16

quda_internal.h

quda::GaugeField
Definition: gauge_field.h:118

quda::ExtractGhostEx
Definition: extract_gauge_ghost_extended.cu:192