quda-ref/v1.1.0/color__spinor__field_8cpp_source.html

 #include <color_spinor_field.h>

 #include <string.h>

 #include <iostream>

 #include <typeinfo>


 namespace quda {


   /*ColorSpinorField::ColorSpinorField() : init(false) {


     }*/


   ColorSpinorParam::ColorSpinorParam(const ColorSpinorField &field) : LatticeFieldParam()  {

     field.fill(*this);

   }


   ColorSpinorField::ColorSpinorField(const ColorSpinorParam &param)

     : LatticeField(param), init(false), ghost_precision_allocated(QUDA_INVALID_PRECISION), v(0), norm(0),

       ghost( ), ghostNorm( ), ghostFace( ),

       bytes(0), norm_bytes(0), even(0), odd(0),

       composite_descr(param.is_composite, param.composite_dim, param.is_component, param.component_id),

       components(0)

   {

     if (param.create == QUDA_INVALID_FIELD_CREATE) errorQuda("Invalid create type");

     for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;

     create(param.nDim, param.x, param.nColor, param.nSpin, param.nVec, param.twistFlavor, param.Precision(), param.pad,

            param.siteSubset, param.siteOrder, param.fieldOrder, param.gammaBasis, param.pc_type, param.suggested_parity);

   }


   ColorSpinorField::ColorSpinorField(const ColorSpinorField &field)

     : LatticeField(field), init(false), ghost_precision_allocated(QUDA_INVALID_PRECISION), v(0), norm(0),

       ghost( ), ghostNorm( ), ghostFace( ),

       bytes(0), norm_bytes(0), even(0), odd(0),

      composite_descr(field.composite_descr), components(0)

   {

     for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;

     create(field.nDim, field.x, field.nColor, field.nSpin, field.nVec, field.twistFlavor, field.Precision(), field.pad,

            field.siteSubset, field.siteOrder, field.fieldOrder, field.gammaBasis, field.pc_type, field.suggested_parity);

   }


   ColorSpinorField::~ColorSpinorField() {

     destroy();

   }


   void ColorSpinorField::createGhostZone(int nFace, bool spin_project) const

   {

     if ( typeid(*this) == typeid(cpuColorSpinorField) || ghost_precision_allocated == ghost_precision ) return;


     bool is_fixed = (ghost_precision == QUDA_HALF_PRECISION || ghost_precision == QUDA_QUARTER_PRECISION);

     int nSpinGhost = (nSpin == 4 && spin_project) ? 2 : nSpin;

     size_t site_size = nSpinGhost * nColor * 2 * ghost_precision + (is_fixed ? sizeof(float) : 0);


     // calculate size of ghost zone required

     int ghost_volume = 0;

     int dims = nDim == 5 ? (nDim - 1) : nDim;

     int x5 = nDim == 5 ? x[4] : 1;

     const int ghost_align

       = 1; // TODO perhaps in the future we should align each ghost dim/dir, e.g., along 32-byte boundaries

     ghost_bytes = 0;

     for (int i=0; i<dims; i++) {

       ghostFace[i] = 0;

       if (comm_dim_partitioned(i)) {

         ghostFace[i] = 1;

         for (int j=0; j<dims; j++) {

           if (i==j) continue;

           ghostFace[i] *= x[j];

         }

         ghostFace[i] *= x5; // temporary hack : extra dimension for DW ghosts

         if (i == 0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;

         ghost_volume += 2 * nFace * ghostFace[i];

       }


       ghost_face_bytes[i] = nFace * ghostFace[i] * site_size;

       ghost_face_bytes_aligned[i] = ((ghost_face_bytes[i] + ghost_align - 1) / ghost_align) * ghost_align;

       ghost_offset[i][0] = i == 0 ? 0 : ghost_offset[i - 1][0] + 2 * ghost_face_bytes_aligned[i - 1];

       ghost_offset[i][1] = ghost_offset[i][0] + ghost_face_bytes_aligned[i];

       ghost_bytes += 2 * ghost_face_bytes_aligned[i];


       ghostFaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET ? ghostFace[i] / 2 : ghostFace[i]);

     } // dim


     if (isNative()) ghost_bytes = ALIGNMENT_ADJUST(ghost_bytes);


     { // compute temporaries needed by dslash and packing kernels

       auto &X = dslash_constant.X;

       for (int dim=0; dim<nDim; dim++) X[dim] = x[dim];

       for (int dim=nDim; dim<QUDA_MAX_DIM; dim++) X[dim] = 1;

       if (siteSubset == QUDA_PARITY_SITE_SUBSET) X[0] = 2*X[0];


       for (int i=0; i<nDim; i++) dslash_constant.Xh[i] = X[i]/2;


       dslash_constant.Ls = X[4];

       dslash_constant.volume_4d_cb = volumeCB / (nDim == 5 ? x[4] : 1);

       dslash_constant.volume_4d = 2 * dslash_constant.volume_4d_cb;


       int face[4];

       for (int dim=0; dim<4; dim++) {

         for (int j=0; j<4; j++) face[j] = X[j];

         face[dim] = nFace;

         dslash_constant.face_X[dim] = face[0];

         dslash_constant.face_Y[dim] = face[1];

         dslash_constant.face_Z[dim] = face[2];

         dslash_constant.face_T[dim] = face[3];

         dslash_constant.face_XY[dim] = dslash_constant.face_X[dim] * face[1];

         dslash_constant.face_XYZ[dim] = dslash_constant.face_XY[dim] * face[2];

         dslash_constant.face_XYZT[dim] = dslash_constant.face_XYZ[dim] * face[3];

       }


       dslash_constant.Vh = (X[3]*X[2]*X[1]*X[0])/2;

       dslash_constant.ghostFace[0] = X[1] * X[2] * X[3];

       dslash_constant.ghostFace[1] = X[0] * X[2] * X[3];

       dslash_constant.ghostFace[2] = X[0] * X[1] * X[3];

       dslash_constant.ghostFace[3] = X[0] * X[1] * X[2];

       for (int d = 0; d < 4; d++) dslash_constant.ghostFaceCB[d] = dslash_constant.ghostFace[d] / 2;


       dslash_constant.X2X1 = X[1]*X[0];

       dslash_constant.X3X2X1 = X[2]*X[1]*X[0];

       dslash_constant.X4X3X2X1 = X[3] * X[2] * X[1] * X[0];

       dslash_constant.X2X1mX1 = (X[1]-1)*X[0];

       dslash_constant.X3X2X1mX2X1 = (X[2]-1)*X[1]*X[0];

       dslash_constant.X4X3X2X1mX3X2X1 = (X[3]-1)*X[2]*X[1]*X[0];

       dslash_constant.X5X4X3X2X1mX4X3X2X1 = (X[4] - 1) * X[3] * X[2] * X[1] * X[0];

       dslash_constant.X4X3X2X1hmX3X2X1h = dslash_constant.X4X3X2X1mX3X2X1/2;


       // used by indexFromFaceIndexStaggered

       dslash_constant.dims[0][0]=X[1];

       dslash_constant.dims[0][1]=X[2];

       dslash_constant.dims[0][2]=X[3];


       dslash_constant.dims[1][0]=X[0];

       dslash_constant.dims[1][1]=X[2];

       dslash_constant.dims[1][2]=X[3];


       dslash_constant.dims[2][0]=X[0];

       dslash_constant.dims[2][1]=X[1];

       dslash_constant.dims[2][2]=X[3];


       dslash_constant.dims[3][0]=X[0];

       dslash_constant.dims[3][1]=X[1];

       dslash_constant.dims[3][2]=X[2];

     }

     ghost_precision_allocated = ghost_precision;


   } // createGhostZone


   void ColorSpinorField::create(int Ndim, const int *X, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor,

                                 QudaPrecision Prec, int Pad, QudaSiteSubset siteSubset, QudaSiteOrder siteOrder,

                                 QudaFieldOrder fieldOrder, QudaGammaBasis gammaBasis, QudaPCType pc_type,

                                 QudaParity suggested_parity)

   {

     this->siteSubset = siteSubset;

     this->siteOrder = siteOrder;

     this->fieldOrder = fieldOrder;

     this->gammaBasis = gammaBasis;


     if (Ndim > QUDA_MAX_DIM){

       errorQuda("Number of dimensions nDim = %d too great", Ndim);

     }

     nDim = Ndim;

     nColor = Nc;

     nSpin = Ns;

     nVec = Nvec;

     twistFlavor = Twistflavor;


     this->pc_type = pc_type;

     this->suggested_parity = suggested_parity;


     precision = Prec;

     // Copy all data in X

     for (int d = 0; d < QUDA_MAX_DIM; d++) x[d] = X[d];

     volume = 1;

     for (int d=0; d<nDim; d++) {

       volume *= x[d];

     }

     volumeCB = siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;


    if((twistFlavor == QUDA_TWIST_NONDEG_DOUBLET || twistFlavor == QUDA_TWIST_DEG_DOUBLET) && x[4] != 2)

      errorQuda("Must be two flavors for non-degenerate twisted mass spinor (while provided with %d number of components)\n", x[4]);//two flavors


     pad = Pad;

     if (siteSubset == QUDA_FULL_SITE_SUBSET) {

       stride = volume/2 + pad; // padding is based on half volume

       length = 2*stride*nColor*nSpin*2;

     } else {

       stride = volume + pad;

       length = stride*nColor*nSpin*2;

     }


     real_length = volume*nColor*nSpin*2; // physical length


     bytes = (size_t)length * precision; // includes pads and ghost zones

     if (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(bytes/2) : ALIGNMENT_ADJUST(bytes);


     if (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) {

       norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);

       if (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(norm_bytes/2) : ALIGNMENT_ADJUST(norm_bytes);

     } else {

       norm_bytes = 0;

     }


     init = true;


     if (composite_descr.is_composite) {


       if (composite_descr.is_component) errorQuda("\nComposite type is not implemented.\n");


       composite_descr.volume   = volume;

       composite_descr.volumeCB = volumeCB;

       composite_descr.stride = stride;

       composite_descr.length = length;

       composite_descr.real_length = real_length;

       composite_descr.bytes       = bytes;

       composite_descr.norm_bytes  = norm_bytes;


       volume *= composite_descr.dim;

       volumeCB *= composite_descr.dim;

       stride *= composite_descr.dim;

       length *= composite_descr.dim;

       real_length *= composite_descr.dim;


       bytes *= composite_descr.dim;

       norm_bytes *= composite_descr.dim;

     }  else if (composite_descr.is_component) {

       composite_descr.dim = 0;


       composite_descr.volume      = 0;

       composite_descr.volumeCB    = 0;

       composite_descr.stride      = 0;

       composite_descr.length      = 0;

       composite_descr.real_length = 0;

       composite_descr.bytes       = 0;

       composite_descr.norm_bytes  = 0;

     }


     setTuningString();

   }


   void ColorSpinorField::setTuningString() {

     {

       //LatticeField::setTuningString(); // FIXME - LatticeField needs correct dims for single-parity

       char vol_tmp[TuneKey::volume_n];

       int check  = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);

       if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");

       for (int d=1; d<nDim; d++) {

         strcpy(vol_tmp, vol_string);

         check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);

         if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");

       }

     }


     {

       int aux_string_n = TuneKey::aux_n / 2;

       char aux_tmp[aux_string_n];

       int check = snprintf(aux_string, aux_string_n, "vol=%lu,stride=%lu,precision=%d,order=%d,Ns=%d,Nc=%d", volume,

                            stride, precision, fieldOrder, nSpin, nColor);

       if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");

       if (twistFlavor != QUDA_TWIST_NO && twistFlavor != QUDA_TWIST_INVALID) {

         strcpy(aux_tmp, aux_string);

         check = snprintf(aux_string, aux_string_n, "%s,TwistFlavour=%d", aux_tmp, twistFlavor);

         if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");

       }

     }

   }


   void ColorSpinorField::destroy() {

     init = false;

   }


   ColorSpinorField& ColorSpinorField::operator=(const ColorSpinorField &src) {

     if (&src != this) {

       if(src.composite_descr.is_composite){

         this->composite_descr.is_composite = true;

         this->composite_descr.dim          = src.composite_descr.dim;

         this->composite_descr.is_component = false;

         this->composite_descr.id           = 0;

       }

       else if(src.composite_descr.is_component){

         this->composite_descr.is_composite = false;

         this->composite_descr.dim          = 0;

         //this->composite_descr.is_component = false;

         //this->composite_descr.id           = 0;

       }


       create(src.nDim, src.x, src.nColor, src.nSpin, src.nVec, src.twistFlavor, src.precision, src.pad, src.siteSubset,

              src.siteOrder, src.fieldOrder, src.gammaBasis, src.pc_type, src.suggested_parity);

     }

     return *this;

   }


   // Resets the attributes of this field if param disagrees (and is defined)

   void ColorSpinorField::reset(const ColorSpinorParam &param)

   {

     if (param.nColor != 0) nColor = param.nColor;

     if (param.nSpin != 0) nSpin = param.nSpin;

     if (param.nVec != 0) nVec = param.nVec;

     if (param.twistFlavor != QUDA_TWIST_INVALID) twistFlavor = param.twistFlavor;


     if (param.pc_type != QUDA_PC_INVALID) pc_type = param.pc_type;

     if (param.suggested_parity != QUDA_INVALID_PARITY) suggested_parity = param.suggested_parity;


     if (param.Precision() != QUDA_INVALID_PRECISION) precision = param.Precision();

     if (param.GhostPrecision() != QUDA_INVALID_PRECISION) ghost_precision = param.GhostPrecision();

     if (param.nDim != 0) nDim = param.nDim;


     composite_descr.is_composite     = param.is_composite;

     composite_descr.is_component     = param.is_component;

     composite_descr.dim              = param.is_composite ? param.composite_dim : 0;

     composite_descr.id               = param.component_id;


     volume = 1;

     for (int d=0; d<nDim; d++) {

       if (param.x[d] != 0) x[d] = param.x[d];

       volume *= x[d];

     }

     volumeCB = param.siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;


     if((twistFlavor == QUDA_TWIST_NONDEG_DOUBLET || twistFlavor == QUDA_TWIST_DEG_DOUBLET) && x[4] != 2)

       errorQuda("Must be two flavors for non-degenerate twisted mass spinor (provided with %d)\n", x[4]);


     if (param.pad != 0) pad = param.pad;


     if (param.siteSubset == QUDA_FULL_SITE_SUBSET) {

       stride = volume/2 + pad;

       length = 2*stride*nColor*nSpin*2;

     } else if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) {

       stride = volume + pad;

       length = stride*nColor*nSpin*2;

     } else {

       //errorQuda("SiteSubset not defined %d", param.siteSubset);

       //do nothing, not an error (can't remember why - need to document this sometime! )

     }


     if (param.siteSubset != QUDA_INVALID_SITE_SUBSET) siteSubset = param.siteSubset;

     if (param.siteOrder != QUDA_INVALID_SITE_ORDER) siteOrder = param.siteOrder;

     if (param.fieldOrder != QUDA_INVALID_FIELD_ORDER) fieldOrder = param.fieldOrder;

     if (param.gammaBasis != QUDA_INVALID_GAMMA_BASIS) gammaBasis = param.gammaBasis;


     real_length = volume*nColor*nSpin*2;


     bytes = (size_t)length * precision; // includes pads

     if (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(bytes/2) : ALIGNMENT_ADJUST(bytes);


     if (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) {

       norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);

       if (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(norm_bytes/2) : ALIGNMENT_ADJUST(norm_bytes);

     } else {

       norm_bytes = 0;

     }


     if (composite_descr.is_composite) {

       composite_descr.volume            = volume;

       composite_descr.stride            = stride;

       composite_descr.length            = length;

       composite_descr.real_length       = real_length;

       composite_descr.bytes             = bytes;

       composite_descr.norm_bytes        = norm_bytes;


       volume            *= composite_descr.dim;

       stride            *= composite_descr.dim;

       length            *= composite_descr.dim;

       real_length       *= composite_descr.dim;


       bytes      *= composite_descr.dim;

       norm_bytes *= composite_descr.dim;

     } else {

       composite_descr.volume            = 0;

       composite_descr.stride            = 0;

       composite_descr.length            = 0;

       composite_descr.real_length       = 0;

       composite_descr.bytes             = 0;

       composite_descr.norm_bytes        = 0;

     }


     if (!init) errorQuda("Shouldn't be resetting a non-inited field\n");


     setTuningString();

   }


   // Fills the param with the contents of this field

   void ColorSpinorField::fill(ColorSpinorParam &param) const {

     param.location = Location();

     param.nColor = nColor;

     param.nSpin = nSpin;

     param.nVec = nVec;

     param.twistFlavor = twistFlavor;

     param.fieldOrder = fieldOrder;

     param.setPrecision(precision, ghost_precision);

     param.nDim = nDim;


     param.is_composite  = composite_descr.is_composite;

     param.composite_dim = composite_descr.dim;

     param.is_component  = false;//always either a regular spinor or a composite object

     param.component_id  = 0;


     memcpy(param.x, x, QUDA_MAX_DIM*sizeof(int));

     param.pad = pad;

     param.siteSubset = siteSubset;

     param.siteOrder = siteOrder;

     param.gammaBasis = gammaBasis;

     param.pc_type = pc_type;

     param.suggested_parity = suggested_parity;

     param.create = QUDA_NULL_FIELD_CREATE;

   }


   void ColorSpinorField::exchange(void **ghost, void **sendbuf, int nFace) const {


     // FIXME: use LatticeField MsgHandles

     MsgHandle *mh_send_fwd[4];

     MsgHandle *mh_from_back[4];

     MsgHandle *mh_from_fwd[4];

     MsgHandle *mh_send_back[4];

     size_t bytes[4];


     const int Ninternal = 2*nColor*nSpin;

     size_t total_bytes = 0;

     for (int i=0; i<nDimComms; i++) {

       bytes[i] = siteSubset*nFace*surfaceCB[i]*Ninternal*ghost_precision;

       if (comm_dim_partitioned(i)) total_bytes += 2*bytes[i]; // 2 for fwd/bwd

     }


     void *total_send = nullptr;

     void *total_recv = nullptr;

     void *send_fwd[4];

     void *send_back[4];

     void *recv_fwd[4];

     void *recv_back[4];


     // leave this option in there just in case

     bool no_comms_fill = false;


     // If this is set to false, then we are assuming that the send and

     // ghost buffers are in a single contiguous memory space.  Setting

     // to false means we aggregate all cudaMemcpys which reduces

     // latency.

     bool fine_grained_memcpy = false;


     if (Location() == QUDA_CPU_FIELD_LOCATION) {

       for (int i=0; i<nDimComms; i++) {

         if (comm_dim_partitioned(i)) {

           send_back[i] = sendbuf[2*i + 0];

           send_fwd[i]  = sendbuf[2*i + 1];

           recv_fwd[i]  =   ghost[2*i + 1];

           recv_back[i] =   ghost[2*i + 0];

         } else if (no_comms_fill) {

           memcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i]);

           memcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i]);

         }

       }

     } else { // FIXME add GPU_COMMS support

       if (total_bytes) {

         total_send = pool_pinned_malloc(total_bytes);

         total_recv = pool_pinned_malloc(total_bytes);

       }

       size_t offset = 0;

       for (int i=0; i<nDimComms; i++) {

         if (comm_dim_partitioned(i)) {

           send_back[i] = static_cast<char*>(total_send) + offset;

           recv_back[i] = static_cast<char*>(total_recv) + offset;

           offset += bytes[i];

           send_fwd[i] = static_cast<char*>(total_send) + offset;

           recv_fwd[i] = static_cast<char*>(total_recv) + offset;

           offset += bytes[i];

           if (fine_grained_memcpy) {

             qudaMemcpy(send_back[i], sendbuf[2*i + 0], bytes[i], cudaMemcpyDeviceToHost);

             qudaMemcpy(send_fwd[i],  sendbuf[2*i + 1], bytes[i], cudaMemcpyDeviceToHost);

           }

         } else if (no_comms_fill) {

           qudaMemcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i], cudaMemcpyDeviceToDevice);

           qudaMemcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i], cudaMemcpyDeviceToDevice);

         }

       }

       if (!fine_grained_memcpy && total_bytes) {

         // find first non-zero pointer

         void *send_ptr = nullptr;

         for (int i=0; i<nDimComms; i++) {

           if (comm_dim_partitioned(i)) {

             send_ptr = sendbuf[2*i];

             break;

           }

         }

         qudaMemcpy(total_send, send_ptr, total_bytes, cudaMemcpyDeviceToHost);

       }

     }


     for (int i=0; i<nDimComms; i++) {

       if (!comm_dim_partitioned(i)) continue;

       mh_send_fwd[i] = comm_declare_send_relative(send_fwd[i], i, +1, bytes[i]);

       mh_send_back[i] = comm_declare_send_relative(send_back[i], i, -1, bytes[i]);

       mh_from_fwd[i] = comm_declare_receive_relative(recv_fwd[i], i, +1, bytes[i]);

       mh_from_back[i] = comm_declare_receive_relative(recv_back[i], i, -1, bytes[i]);

     }


     for (int i=0; i<nDimComms; i++) {

       if (comm_dim_partitioned(i)) {

         comm_start(mh_from_back[i]);

         comm_start(mh_from_fwd[i]);

         comm_start(mh_send_fwd[i]);

         comm_start(mh_send_back[i]);

       }

     }


     for (int i=0; i<nDimComms; i++) {

       if (!comm_dim_partitioned(i)) continue;

       comm_wait(mh_send_fwd[i]);

       comm_wait(mh_send_back[i]);

       comm_wait(mh_from_back[i]);

       comm_wait(mh_from_fwd[i]);

     }


     if (Location() == QUDA_CUDA_FIELD_LOCATION) {

       for (int i=0; i<nDimComms; i++) {

         if (!comm_dim_partitioned(i)) continue;

         if (fine_grained_memcpy) {

           qudaMemcpy(ghost[2*i+0], recv_back[i], bytes[i], cudaMemcpyHostToDevice);

           qudaMemcpy(ghost[2*i+1], recv_fwd[i], bytes[i], cudaMemcpyHostToDevice);

         }

       }


       if (!fine_grained_memcpy && total_bytes) {

         // find first non-zero pointer

         void *ghost_ptr = nullptr;

         for (int i=0; i<nDimComms; i++) {

           if (comm_dim_partitioned(i)) {

             ghost_ptr = ghost[2*i];

             break;

           }

         }

         qudaMemcpy(ghost_ptr, total_recv, total_bytes, cudaMemcpyHostToDevice);

       }


       if (total_bytes) {

         pool_pinned_free(total_send);

         pool_pinned_free(total_recv);

       }

     }


     for (int i=0; i<nDimComms; i++) {

       if (!comm_dim_partitioned(i)) continue;

       comm_free(mh_send_fwd[i]);

       comm_free(mh_send_back[i]);

       comm_free(mh_from_back[i]);

       comm_free(mh_from_fwd[i]);

     }

   }


   // For kernels with precision conversion built in

   void ColorSpinorField::checkField(const ColorSpinorField &a, const ColorSpinorField &b) {

     if (a.Length() != b.Length()) {

       errorQuda("checkSpinor: lengths do not match: %lu %lu", a.Length(), b.Length());

     }


     if (a.Ncolor() != b.Ncolor()) {

       errorQuda("checkSpinor: colors do not match: %d %d", a.Ncolor(), b.Ncolor());

     }


     if (a.Nspin() != b.Nspin()) {

       errorQuda("checkSpinor: spins do not match: %d %d", a.Nspin(), b.Nspin());

     }


     if (a.Nvec() != b.Nvec()) {

       errorQuda("checkSpinor: nVec does not match: %d %d", a.Nvec(), b.Nvec());

     }


     if (a.TwistFlavor() != b.TwistFlavor()) {

       errorQuda("checkSpinor: twist flavors do not match: %d %d", a.TwistFlavor(), b.TwistFlavor());

     }

   }


   const ColorSpinorField& ColorSpinorField::Even() const {

     if (siteSubset != QUDA_FULL_SITE_SUBSET)

       errorQuda("Cannot return even subset of %d subset", siteSubset);

     if (fieldOrder == QUDA_QDPJIT_FIELD_ORDER)

       errorQuda("Cannot return even subset of QDPJIT field");

     return *even;

   }


   const ColorSpinorField& ColorSpinorField::Odd() const {

     if (siteSubset != QUDA_FULL_SITE_SUBSET)

       errorQuda("Cannot return odd subset of %d subset", siteSubset);

     if (fieldOrder == QUDA_QDPJIT_FIELD_ORDER)

       errorQuda("Cannot return even subset of QDPJIT field");

     return *odd;

   }


   ColorSpinorField& ColorSpinorField::Even() {

     if (siteSubset != QUDA_FULL_SITE_SUBSET)

       errorQuda("Cannot return even subset of %d subset", siteSubset);

     if (fieldOrder == QUDA_QDPJIT_FIELD_ORDER)

       errorQuda("Cannot return even subset of QDPJIT field");

     return *even;

   }


   ColorSpinorField& ColorSpinorField::Odd() {

     if (siteSubset != QUDA_FULL_SITE_SUBSET)

       errorQuda("Cannot return odd subset of %d subset", siteSubset);

     if (fieldOrder == QUDA_QDPJIT_FIELD_ORDER)

       errorQuda("Cannot return even subset of QDPJIT field");

     return *odd;

   }


   ColorSpinorField& ColorSpinorField::Component(const int idx) {

     if (this->IsComposite()) {

       if (idx < this->CompositeDim()) {  //  setup eigenvector form the set

         return *(dynamic_cast<ColorSpinorField*>(components[idx]));

       }

       else{

         errorQuda("Incorrect component index...");

       }

     }

     errorQuda("Cannot get requested component");

     exit(-1);

   }


   ColorSpinorField& ColorSpinorField::Component(const int idx) const {

     if (this->IsComposite()) {

       if (idx < this->CompositeDim()) {  //  setup eigenvector form the set

         return *(dynamic_cast<ColorSpinorField*>(components[idx]));

       }

       else{

         errorQuda("Incorrect component index...");

       }

     }

     errorQuda("Cannot get requested component");

     exit(-1);

   }


   void* ColorSpinorField::Ghost(const int i) {

     if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);

     return ghost[i];

   }


   const void* ColorSpinorField::Ghost(const int i) const {

     if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);

     return ghost[i];

   }


   void* ColorSpinorField::GhostNorm(const int i){

     if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);

     return ghostNorm[i];

   }


   const void* ColorSpinorField::GhostNorm(const int i) const{

     if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);

     return ghostNorm[i];

   }


   void* const* ColorSpinorField::Ghost() const {

     return ghost_buf;

   }


   /*

     Convert from 1-dimensional index to the n-dimensional spatial index.

     With full fields, we assume that the field is even-odd ordered.  The

     lattice coordinates that are computed here are full-field

     coordinates.

   */

   void ColorSpinorField::LatticeIndex(int *y, int i) const {

     int z[QUDA_MAX_DIM];

     memcpy(z, x, QUDA_MAX_DIM*sizeof(int));


     // parity is the slowest running dimension

     int parity = 0;

     if (siteSubset == QUDA_FULL_SITE_SUBSET) z[0] /= 2;


     for (int d=0; d<nDim; d++) {

       y[d] = i % z[d];

       i /= z[d];

     }


     parity = i;


     // convert into the full-field lattice coordinate

     int oddBit = parity;

     if (siteSubset == QUDA_FULL_SITE_SUBSET) {

       for (int d=1; d<nDim; d++) oddBit += y[d];

       oddBit = oddBit & 1;

     }

     y[0] = 2*y[0] + oddBit;  // compute the full x coordinate

   }


   /*

     Convert from n-dimensional spatial index to the 1-dimensional index.

     With full fields, we assume that the field is even-odd ordered.  The

     input lattice coordinates are always full-field coordinates.

   */

   void ColorSpinorField::OffsetIndex(int &i, int *y) const {


     int parity = 0;

     int z[QUDA_MAX_DIM];

     memcpy(z, x, QUDA_MAX_DIM*sizeof(int));

     int savey0 = y[0];


     if (siteSubset == QUDA_FULL_SITE_SUBSET) {

       for (int d=0; d<nDim; d++) parity += y[d];

       parity = parity & 1;

       y[0] /= 2;

       z[0] /= 2;

     }


     i = parity;

     for (int d=nDim-1; d>=0; d--) {

       i = z[d]*i + y[d];

       //printf("z[%d]=%d y[%d]=%d ", d, z[d], d, y[d]);

     }


     //printf("\nparity = %d\n", parity);


     if (siteSubset == QUDA_FULL_SITE_SUBSET) y[0] = savey0;

   }


   ColorSpinorField* ColorSpinorField::Create(const ColorSpinorParam &param) {


     ColorSpinorField *field = nullptr;

     if (param.location == QUDA_CPU_FIELD_LOCATION) {

       field = new cpuColorSpinorField(param);

     } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {

       field = new cudaColorSpinorField(param);

     } else {

       errorQuda("Invalid field location %d", param.location);

     }


     return field;

   }


   ColorSpinorField* ColorSpinorField::Create(const ColorSpinorField &src, const ColorSpinorParam &param) {


     ColorSpinorField *field = nullptr;

     if (param.location == QUDA_CPU_FIELD_LOCATION) {

       field = new cpuColorSpinorField(src, param);

     } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {

       field = new cudaColorSpinorField(src, param);

     } else {

       errorQuda("Invalid field location %d", param.location);

     }


     return field;

   }


   ColorSpinorField *ColorSpinorField::CreateAlias(const ColorSpinorParam &param_)

   {

     if (param_.Precision() > precision)

       errorQuda("Cannot create an alias to source with lower precision than the alias");

     ColorSpinorParam param(param_);

     param.create = QUDA_REFERENCE_FIELD_CREATE;

     param.v = V();


     // if norm field in the source exists, use it, else use the second

     // half of main field for norm storage, ensuring that the start of

     // the norm field is on an alignment boundary if we're using an

     // internal field

     if (param.Precision() < QUDA_SINGLE_PRECISION) {

       auto norm_offset = (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) ?

         (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2 * ALIGNMENT_ADJUST(Bytes() / 4) : ALIGNMENT_ADJUST(Bytes() / 2) :

         0;

       param.norm = Norm() ? Norm() : static_cast<char *>(V()) + norm_offset;

     }


     auto alias = ColorSpinorField::Create(param);


     if (alias->Bytes() > Bytes()) errorQuda("Alias footprint %lu greater than source %lu", alias->Bytes(), Bytes());

     if (alias->Precision() < QUDA_SINGLE_PRECISION) {

       // check that norm does not overlap with body

       if (static_cast<char *>(alias->V()) + alias->Bytes() > alias->Norm())

         errorQuda("Overlap between alias body and norm");

       // check that norm does fall off the end

       if (static_cast<char *>(alias->Norm()) + alias->NormBytes() > static_cast<char *>(V()) + Bytes())

         errorQuda("Norm is not contained in the srouce field");

     }


     return alias;

   }


   ColorSpinorField* ColorSpinorField::CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,

                                                    QudaPrecision new_precision, QudaFieldLocation new_location,

                                                    QudaMemoryType new_mem_type) {

     ColorSpinorParam coarseParam(*this);

     for (int d=0; d<nDim; d++) coarseParam.x[d] = x[d]/geoBlockSize[d];


     int geoBlockVolume = 1;

     for (int d = 0; d < nDim; d++) { geoBlockVolume *= geoBlockSize[d]; }


     // Detect if the "coarse" op is the Kahler-Dirac op or something else

     // that still acts on a fine staggered ColorSpinorField

     if (geoBlockVolume == 1 && Nvec == nColor && nSpin == 1) {

       coarseParam.nSpin = nSpin;

       coarseParam.nColor = nColor;

     } else {

       coarseParam.nSpin = (nSpin == 1) ? 2 : (nSpin / spinBlockSize); // coarsening staggered check

       coarseParam.nColor = Nvec;

     }


     coarseParam.siteSubset = QUDA_FULL_SITE_SUBSET; // coarse grid is always full

     coarseParam.create = QUDA_ZERO_FIELD_CREATE;


     // if new precision is not set, use this->precision

     new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;


     // if new location is not set, use this->location

     new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location() : new_location;


     // for GPU fields, always use native ordering to ensure coalescing

     if (new_location == QUDA_CUDA_FIELD_LOCATION) coarseParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;

     else coarseParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;


     coarseParam.setPrecision(new_precision);


     // set where we allocate the field

     coarseParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :

       (new_location == QUDA_CUDA_FIELD_LOCATION ? QUDA_MEMORY_DEVICE : QUDA_MEMORY_PINNED);


     ColorSpinorField *coarse = NULL;

     if (new_location == QUDA_CPU_FIELD_LOCATION) {

       coarse = new cpuColorSpinorField(coarseParam);

     } else if (new_location== QUDA_CUDA_FIELD_LOCATION) {

       coarse = new cudaColorSpinorField(coarseParam);

     } else {

       errorQuda("Invalid field location %d", new_location);

     }


     return coarse;

   }


   ColorSpinorField* ColorSpinorField::CreateFine(const int *geoBlockSize, int spinBlockSize, int Nvec,

                                                  QudaPrecision new_precision, QudaFieldLocation new_location,

                                                  QudaMemoryType new_mem_type) {

     ColorSpinorParam fineParam(*this);

     for (int d=0; d<nDim; d++) fineParam.x[d] = x[d] * geoBlockSize[d];

     fineParam.nSpin = nSpin * spinBlockSize;

     fineParam.nColor = Nvec;

     fineParam.siteSubset = QUDA_FULL_SITE_SUBSET; // FIXME fine grid is always full

     fineParam.create = QUDA_ZERO_FIELD_CREATE;


     // if new precision is not set, use this->precision

     new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;


     // if new location is not set, use this->location

     new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location(): new_location;


     // for GPU fields, always use native ordering to ensure coalescing

     if (new_location == QUDA_CUDA_FIELD_LOCATION) {

       fineParam.setPrecision(new_precision, new_precision, true);

     } else {

       fineParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;

       fineParam.setPrecision(new_precision);

     }


     // set where we allocate the field

     fineParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :

       (new_location == QUDA_CUDA_FIELD_LOCATION ? QUDA_MEMORY_DEVICE : QUDA_MEMORY_PINNED);


     ColorSpinorField *fine = NULL;

     if (new_location == QUDA_CPU_FIELD_LOCATION) {

       fine = new cpuColorSpinorField(fineParam);

     } else if (new_location == QUDA_CUDA_FIELD_LOCATION) {

       fine = new cudaColorSpinorField(fineParam);

     } else {

       errorQuda("Invalid field location %d", new_location);

     }

     return fine;

   }


   std::ostream& operator<<(std::ostream &out, const ColorSpinorField &a) {

     out << "typedid = " << typeid(a).name() << std::endl;

     out << "nColor = " << a.nColor << std::endl;

     out << "nSpin = " << a.nSpin << std::endl;

     out << "twistFlavor = " << a.twistFlavor << std::endl;

     out << "nDim = " << a.nDim << std::endl;

     for (int d=0; d<a.nDim; d++) out << "x[" << d << "] = " << a.x[d] << std::endl;

     out << "volume = " << a.volume << std::endl;

     out << "pc_type = " << a.pc_type << std::endl;

     out << "suggested_parity = " << a.suggested_parity << std::endl;

     out << "precision = " << a.precision << std::endl;

     out << "ghost_precision = " << a.ghost_precision << std::endl;

     out << "pad = " << a.pad << std::endl;

     out << "stride = " << a.stride << std::endl;

     out << "real_length = " << a.real_length << std::endl;

     out << "length = " << a.length << std::endl;

     out << "bytes = " << a.bytes << std::endl;

     out << "norm_bytes = " << a.norm_bytes << std::endl;

     out << "siteSubset = " << a.siteSubset << std::endl;

     out << "siteOrder = " << a.siteOrder << std::endl;

     out << "fieldOrder = " << a.fieldOrder << std::endl;

     out << "gammaBasis = " << a.gammaBasis << std::endl;

     out << "Is composite = " << a.composite_descr.is_composite << std::endl;

     if(a.composite_descr.is_composite)

     {

       out << "Composite Dim = " << a.composite_descr.dim << std::endl;

       out << "Composite Volume = " << a.composite_descr.volume << std::endl;

       out << "Composite Stride = " << a.composite_descr.stride << std::endl;

       out << "Composite Length = " << a.composite_descr.length << std::endl;

     }

     out << "Is component = " << a.composite_descr.is_component << std::endl;

     if(a.composite_descr.is_composite) out << "Component ID = " << a.composite_descr.id << std::endl;

     out << "pc_type = " << a.pc_type << std::endl;

     return out;

   }


 } // namespace quda

quda::ColorSpinorField
Definition: color_spinor_field.h:379

quda::ColorSpinorField::Length
size_t Length() const
Definition: color_spinor_field.h:487

quda::ColorSpinorField::real_length
size_t real_length
Definition: color_spinor_field.h:415

quda::ColorSpinorField::exchange
void exchange(void **ghost, void **sendbuf, int nFace=1) const
Definition: color_spinor_field.cpp:406

quda::ColorSpinorField::fill
void fill(ColorSpinorParam &) const
Definition: color_spinor_field.cpp:381

quda::ColorSpinorField::LatticeIndex
void LatticeIndex(int *y, int i) const
Definition: color_spinor_field.cpp:660

quda::ColorSpinorField::ghostFace
int ghostFace[QUDA_MAX_DIM]
Definition: color_spinor_field.h:429

quda::ColorSpinorField::siteOrder
QudaSiteOrder siteOrder
Definition: color_spinor_field.h:440

quda::ColorSpinorField::Ghost
void *const  * Ghost() const
Definition: color_spinor_field.cpp:650

quda::ColorSpinorField::Ndim
int Ndim() const
Definition: color_spinor_field.h:483

quda::ColorSpinorField::CreateAlias
ColorSpinorField * CreateAlias(const ColorSpinorParam &param)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
Definition: color_spinor_field.cpp:742

quda::ColorSpinorField::operator=
virtual ColorSpinorField & operator=(const ColorSpinorField &)
Definition: color_spinor_field.cpp:269

quda::ColorSpinorField::ghost_buf
void * ghost_buf[2 *QUDA_MAX_DIM]
Definition: color_spinor_field.h:432

quda::ColorSpinorField::suggested_parity
QudaParity suggested_parity
Definition: color_spinor_field.h:413

quda::ColorSpinorField::pc_type
QudaPCType pc_type
Definition: color_spinor_field.h:407

quda::ColorSpinorField::volume
size_t volume
Definition: color_spinor_field.h:400

quda::ColorSpinorField::even
ColorSpinorField * even
Definition: color_spinor_field.h:445

quda::ColorSpinorField::Norm
void * Norm()
Definition: color_spinor_field.h:502

quda::ColorSpinorField::Odd
const ColorSpinorField & Odd() const
Definition: color_spinor_field.cpp:578

quda::ColorSpinorField::composite_descr
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
Definition: color_spinor_field.h:449

quda::ColorSpinorField::init
bool init
Definition: color_spinor_field.h:388

quda::ColorSpinorField::IsComposite
bool IsComposite() const
Definition: color_spinor_field.h:548

quda::ColorSpinorField::ColorSpinorField
ColorSpinorField(const ColorSpinorField &)
Definition: color_spinor_field.cpp:29

quda::ColorSpinorField::TwistFlavor
QudaTwistFlavorType TwistFlavor() const
Definition: color_spinor_field.h:482

quda::ColorSpinorField::Nvec
int Nvec() const
Definition: color_spinor_field.h:481

quda::ColorSpinorField::Nspin
int Nspin() const
Definition: color_spinor_field.h:480

quda::ColorSpinorField::twistFlavor
QudaTwistFlavorType twistFlavor
Definition: color_spinor_field.h:405

quda::ColorSpinorField::CreateFine
ColorSpinorField * CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_type=QUDA_MEMORY_INVALID)
Create a fine color-spinor field, using this field to set the meta data.
Definition: color_spinor_field.cpp:826

quda::ColorSpinorField::length
size_t length
Definition: color_spinor_field.h:416

quda::ColorSpinorField::nDim
int nDim
Definition: color_spinor_field.h:397

quda::ColorSpinorField::components
CompositeColorSpinorField components
Definition: color_spinor_field.h:451

quda::ColorSpinorField::nVec
int nVec
Definition: color_spinor_field.h:395

quda::ColorSpinorField::gammaBasis
QudaGammaBasis gammaBasis
Definition: color_spinor_field.h:442

quda::ColorSpinorField::Ncolor
int Ncolor() const
Definition: color_spinor_field.h:479

quda::ColorSpinorField::pad
size_t pad
Definition: color_spinor_field.h:402

quda::ColorSpinorField::CompositeDim
int CompositeDim() const
Definition: color_spinor_field.h:551

quda::ColorSpinorField::ghost
void * ghost[2][QUDA_MAX_DIM]
Definition: color_spinor_field.h:426

quda::ColorSpinorField::Create
static ColorSpinorField * Create(const ColorSpinorParam &param)
Definition: color_spinor_field.cpp:714

quda::ColorSpinorField::Bytes
size_t Bytes() const
Definition: color_spinor_field.h:492

quda::ColorSpinorField::reset
void reset(const ColorSpinorParam &)
Definition: color_spinor_field.cpp:291

quda::ColorSpinorField::createGhostZone
void createGhostZone(int nFace, bool spin_project=true) const
Definition: color_spinor_field.cpp:44

quda::ColorSpinorField::checkField
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
Definition: color_spinor_field.cpp:548

quda::ColorSpinorField::bytes
size_t bytes
Definition: color_spinor_field.h:436

quda::ColorSpinorField::ghost_precision_allocated
QudaPrecision ghost_precision_allocated
Definition: color_spinor_field.h:391

quda::ColorSpinorField::nSpin
int nSpin
Definition: color_spinor_field.h:394

quda::ColorSpinorField::CreateCoarse
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
Definition: color_spinor_field.cpp:776

quda::ColorSpinorField::stride
size_t stride
Definition: color_spinor_field.h:403

quda::ColorSpinorField::fieldOrder
QudaFieldOrder fieldOrder
Definition: color_spinor_field.h:441

quda::ColorSpinorField::OffsetIndex
void OffsetIndex(int &i, int *y) const
Definition: color_spinor_field.cpp:689

quda::ColorSpinorField::GhostNorm
void * GhostNorm(const int i)
Definition: color_spinor_field.cpp:640

quda::ColorSpinorField::V
void * V()
Definition: color_spinor_field.h:500

quda::ColorSpinorField::isNative
bool isNative() const
Definition: color_spinor_field.h:546

quda::ColorSpinorField::norm_bytes
size_t norm_bytes
Definition: color_spinor_field.h:437

quda::ColorSpinorField::volumeCB
size_t volumeCB
Definition: color_spinor_field.h:401

quda::ColorSpinorField::Even
const ColorSpinorField & Even() const
Definition: color_spinor_field.cpp:570

quda::ColorSpinorField::siteSubset
QudaSiteSubset siteSubset
Definition: color_spinor_field.h:439

quda::ColorSpinorField::Component
ColorSpinorField & Component(const int idx) const
Definition: color_spinor_field.cpp:615

quda::ColorSpinorField::ghostFaceCB
int ghostFaceCB[QUDA_MAX_DIM]
Definition: color_spinor_field.h:430

quda::ColorSpinorField::ghostNorm
void * ghostNorm[2][QUDA_MAX_DIM]
Definition: color_spinor_field.h:427

quda::ColorSpinorField::setTuningString
void setTuningString()
Set the vol_string and aux_string for use in tuning.
Definition: color_spinor_field.cpp:238

quda::ColorSpinorField::x
int x[QUDA_MAX_DIM]
Definition: color_spinor_field.h:398

quda::ColorSpinorField::~ColorSpinorField
virtual ~ColorSpinorField()
Definition: color_spinor_field.cpp:40

quda::ColorSpinorField::Pad
int Pad() const
Definition: color_spinor_field.h:491

quda::ColorSpinorField::X
const int * X() const
Definition: color_spinor_field.h:484

quda::ColorSpinorField::dslash_constant
DslashConstant dslash_constant
Definition: color_spinor_field.h:434

quda::ColorSpinorField::nColor
int nColor
Definition: color_spinor_field.h:393

quda::ColorSpinorField::odd
ColorSpinorField * odd
Definition: color_spinor_field.h:446

quda::ColorSpinorParam
Definition: color_spinor_field.h:131

quda::ColorSpinorParam::ColorSpinorParam
ColorSpinorParam()
Definition: color_spinor_field.h:192

quda::ColorSpinorParam::nColor
int nColor
Definition: color_spinor_field.h:136

quda::ColorSpinorParam::setPrecision
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
Definition: color_spinor_field.h:172

quda::ColorSpinorParam::fieldOrder
QudaFieldOrder fieldOrder
Definition: color_spinor_field.h:144

quda::ColorSpinorParam::nSpin
int nSpin
Definition: color_spinor_field.h:137

quda::ColorSpinorParam::create
QudaFieldCreate create
Definition: color_spinor_field.h:146

quda::LatticeField
Definition: lattice_field.h:145

quda::LatticeField::mh_send_fwd
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:331

quda::LatticeField::nDimComms
int nDimComms
Definition: lattice_field.h:198

quda::LatticeField::ghost_offset
size_t ghost_offset[QUDA_MAX_DIM][2]
Definition: lattice_field.h:274

quda::LatticeField::ghost_precision
QudaPrecision ghost_precision
Definition: lattice_field.h:181

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:567

quda::LatticeField::Location
QudaFieldLocation Location() const
Definition: lattice_field.cpp:683

quda::LatticeField::precision
QudaPrecision precision
Definition: lattice_field.h:178

quda::LatticeField::ghost_face_bytes
size_t ghost_face_bytes[QUDA_MAX_DIM]
Definition: lattice_field.h:264

quda::LatticeField::aux_string
char aux_string[TuneKey::aux_n]
Definition: lattice_field.h:388

quda::LatticeField::ghost_bytes
size_t ghost_bytes
Definition: lattice_field.h:254

quda::LatticeField::vol_string
char vol_string[TuneKey::volume_n]
Definition: lattice_field.h:385

quda::LatticeField::surfaceCB
int surfaceCB[QUDA_MAX_DIM]
Definition: lattice_field.h:172

quda::LatticeField::mh_send_back
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:334

quda::LatticeField::ghost_face_bytes_aligned
size_t ghost_face_bytes_aligned[QUDA_MAX_DIM]
Definition: lattice_field.h:269

quda::LatticeField::total_bytes
size_t total_bytes
Definition: lattice_field.h:163

quda::cpuColorSpinorField
Definition: color_spinor_field.h:976

quda::cudaColorSpinorField
Definition: color_spinor_field.h:682

color_spinor_field.h

comm_start
void comm_start(MsgHandle *mh)
Definition: communicator_stack.cpp:165

comm_dim_partitioned
int comm_dim_partitioned(int dim)
Definition: communicator_stack.cpp:74

comm_declare_receive_relative
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:82

comm_wait
void comm_wait(MsgHandle *mh)
Definition: communicator_stack.cpp:167

comm_free
void comm_free(MsgHandle *&mh)
Definition: communicator_stack.cpp:163

comm_declare_send_relative
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:67

dim
std::array< int, 4 > dim
Definition: command_line_params.cpp:34

parity
QudaParity parity
Definition: covdev_test.cpp:40

QudaSiteOrder
enum QudaSiteOrder_s QudaSiteOrder

QudaPrecision
enum QudaPrecision_s QudaPrecision

QUDA_CUDA_FIELD_LOCATION
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

QUDA_CPU_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325

QUDA_INVALID_FIELD_LOCATION
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327

QudaTwistFlavorType
enum QudaTwistFlavorType_s QudaTwistFlavorType

QUDA_INVALID_SITE_SUBSET
@ QUDA_INVALID_SITE_SUBSET
Definition: enum_quda.h:334

QUDA_FULL_SITE_SUBSET
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333

QUDA_PARITY_SITE_SUBSET
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332

QUDA_INVALID_GAMMA_BASIS
@ QUDA_INVALID_GAMMA_BASIS
Definition: enum_quda.h:371

QudaPCType
enum QudaPCType_s QudaPCType

QUDA_INVALID_PARITY
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284

QUDA_MEMORY_PINNED
@ QUDA_MEMORY_PINNED
Definition: enum_quda.h:14

QUDA_MEMORY_DEVICE
@ QUDA_MEMORY_DEVICE
Definition: enum_quda.h:13

QUDA_MEMORY_INVALID
@ QUDA_MEMORY_INVALID
Definition: enum_quda.h:16

QudaFieldOrder
enum QudaFieldOrder_s QudaFieldOrder

QudaSiteSubset
enum QudaSiteSubset_s QudaSiteSubset

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

QUDA_INVALID_SITE_ORDER
@ QUDA_INVALID_SITE_ORDER
Definition: enum_quda.h:342

QudaMemoryType
enum QudaMemoryType_s QudaMemoryType

QUDA_SINGLE_PRECISION
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64

QUDA_INVALID_PRECISION
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66

QUDA_QUARTER_PRECISION
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62

QUDA_HALF_PRECISION
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63

QUDA_PC_INVALID
@ QUDA_PC_INVALID
Definition: enum_quda.h:397

QUDA_INVALID_FIELD_ORDER
@ QUDA_INVALID_FIELD_ORDER
Definition: enum_quda.h:356

QUDA_FLOAT2_FIELD_ORDER
@ QUDA_FLOAT2_FIELD_ORDER
Definition: enum_quda.h:348

QUDA_QDPJIT_FIELD_ORDER
@ QUDA_QDPJIT_FIELD_ORDER
Definition: enum_quda.h:353

QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351

QudaGammaBasis
enum QudaGammaBasis_s QudaGammaBasis

QUDA_ZERO_FIELD_CREATE
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361

QUDA_INVALID_FIELD_CREATE
@ QUDA_INVALID_FIELD_CREATE
Definition: enum_quda.h:364

QUDA_REFERENCE_FIELD_CREATE
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363

QUDA_NULL_FIELD_CREATE
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360

QUDA_TWIST_NO
@ QUDA_TWIST_NO
Definition: enum_quda.h:403

QUDA_TWIST_INVALID
@ QUDA_TWIST_INVALID
Definition: enum_quda.h:404

QUDA_TWIST_NONDEG_DOUBLET
@ QUDA_TWIST_NONDEG_DOUBLET
Definition: enum_quda.h:401

QUDA_TWIST_DEG_DOUBLET
@ QUDA_TWIST_DEG_DOUBLET
Definition: enum_quda.h:402

QudaParity
enum QudaParity_s QudaParity

pool_pinned_malloc
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:172

pool_pinned_free
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:173

quda::blas_lapack::native::init
void init()
Create the BLAS context.
Definition: blas_lapack_cublas.cpp:28

quda::blas::bytes
unsigned long long bytes

quda
Definition: blas_lapack.h:24

quda::norm
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:1088

quda::operator<<
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
Definition: clover_field.cpp:441

param
QudaGaugeParam param
Definition: pack_test.cpp:18

qudaMemcpy
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_api.h:204

QUDA_MAX_DIM
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
Definition: quda_constants.h:17

ALIGNMENT_ADJUST
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:42

MsgHandle_s
Definition: communicator_mpi.cpp:15

QudaGaugeParam_s::location
QudaFieldLocation location
Definition: quda.h:33

quda::CompositeColorSpinorFieldDescriptor::length
size_t length
Definition: color_spinor_field.h:92

quda::CompositeColorSpinorFieldDescriptor::is_component
bool is_component
Definition: color_spinor_field.h:83

quda::CompositeColorSpinorFieldDescriptor::volumeCB
size_t volumeCB
Definition: color_spinor_field.h:89

quda::CompositeColorSpinorFieldDescriptor::norm_bytes
size_t norm_bytes
Definition: color_spinor_field.h:95

quda::CompositeColorSpinorFieldDescriptor::is_composite
bool is_composite
Definition: color_spinor_field.h:82

quda::CompositeColorSpinorFieldDescriptor::bytes
size_t bytes
Definition: color_spinor_field.h:94

quda::CompositeColorSpinorFieldDescriptor::id
int id
Definition: color_spinor_field.h:86

quda::CompositeColorSpinorFieldDescriptor::volume
size_t volume
Definition: color_spinor_field.h:88

quda::CompositeColorSpinorFieldDescriptor::stride
size_t stride
Definition: color_spinor_field.h:90

quda::CompositeColorSpinorFieldDescriptor::real_length
size_t real_length
Definition: color_spinor_field.h:91

quda::CompositeColorSpinorFieldDescriptor::dim
int dim
Definition: color_spinor_field.h:85

quda::DslashConstant::face_XYZ
int_fastdiv face_XYZ[4]
Definition: color_spinor_field.h:360

quda::DslashConstant::Ls
int Ls
Definition: color_spinor_field.h:350

quda::DslashConstant::face_Z
int_fastdiv face_Z[4]
Definition: color_spinor_field.h:357

quda::DslashConstant::face_Y
int_fastdiv face_Y[4]
Definition: color_spinor_field.h:356

quda::DslashConstant::Vh
int Vh
Definition: color_spinor_field.h:347

quda::DslashConstant::X5X4X3X2X1mX4X3X2X1
int X5X4X3X2X1mX4X3X2X1
Definition: color_spinor_field.h:373

quda::DslashConstant::X2X1mX1
int X2X1mX1
Definition: color_spinor_field.h:370

quda::DslashConstant::face_XY
int_fastdiv face_XY[4]
Definition: color_spinor_field.h:359

quda::DslashConstant::volume_4d_cb
int_fastdiv volume_4d_cb
Definition: color_spinor_field.h:353

quda::DslashConstant::Xh
int_fastdiv Xh[QUDA_MAX_DIM]
Definition: color_spinor_field.h:349

quda::DslashConstant::face_T
int_fastdiv face_T[4]
Definition: color_spinor_field.h:358

quda::DslashConstant::ghostFace
int ghostFace[QUDA_MAX_DIM+1]
Definition: color_spinor_field.h:363

quda::DslashConstant::volume_4d
int volume_4d
Definition: color_spinor_field.h:352

quda::DslashConstant::X2X1
int X2X1
Definition: color_spinor_field.h:366

quda::DslashConstant::X4X3X2X1hmX3X2X1h
int X4X3X2X1hmX3X2X1h
Definition: color_spinor_field.h:374

quda::DslashConstant::X4X3X2X1mX3X2X1
int X4X3X2X1mX3X2X1
Definition: color_spinor_field.h:372

quda::DslashConstant::X4X3X2X1
int X4X3X2X1
Definition: color_spinor_field.h:368

quda::DslashConstant::ghostFaceCB
int ghostFaceCB[QUDA_MAX_DIM+1]
Definition: color_spinor_field.h:364

quda::DslashConstant::X3X2X1mX2X1
int X3X2X1mX2X1
Definition: color_spinor_field.h:371

quda::DslashConstant::X
int_fastdiv X[QUDA_MAX_DIM]
Definition: color_spinor_field.h:348

quda::DslashConstant::X3X2X1
int X3X2X1
Definition: color_spinor_field.h:367

quda::DslashConstant::face_XYZT
int_fastdiv face_XYZT[4]
Definition: color_spinor_field.h:361

quda::DslashConstant::dims
int_fastdiv dims[4][3]
Definition: color_spinor_field.h:376

quda::DslashConstant::face_X
int_fastdiv face_X[4]
Definition: color_spinor_field.h:355

quda::LatticeFieldParam
Definition: lattice_field.h:48

quda::LatticeFieldParam::mem_type
QudaMemoryType mem_type
Definition: lattice_field.h:74

quda::LatticeFieldParam::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68

quda::LatticeFieldParam::siteSubset
QudaSiteSubset siteSubset
Definition: lattice_field.h:72

quda::LatticeFieldParam::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:59

quda::TuneKey::aux_n
static const int aux_n
Definition: tune_key.h:12

quda::TuneKey::volume_n
static const int volume_n
Definition: tune_key.h:10

errorQuda
#define errorQuda(...)
Definition: util_quda.h:120