15 #define REORDER_LOCATION QUDA_CUDA_FIELD_LOCATION
17 #define REORDER_LOCATION QUDA_CPU_FIELD_LOCATION
25 int cudaColorSpinorField::initGhostFaceBuffer = 0;
26 void* cudaColorSpinorField::ghostFaceBuffer[2];
27 void* cudaColorSpinorField::fwdGhostFaceBuffer[2][
QUDA_MAX_DIM];
28 void* cudaColorSpinorField::backGhostFaceBuffer[2][
QUDA_MAX_DIM];
29 size_t cudaColorSpinorField::ghostFaceBytes = 0;
38 initComms(false), bufferMessageHandler(0), nFaceComms(0) {
62 initComms(false), bufferMessageHandler(0), nFaceComms(0) {
71 initComms(false), bufferMessageHandler(0), nFaceComms(0) {
91 errorQuda(
"Cannot reference a non-cuda field");
114 copySpinorField(src);
125 initComms(false), bufferMessageHandler(0), nFaceComms(0) {
127 copySpinorField(src);
136 errorQuda(
"Unknown input ColorSpinorField %s",
typeid(src).name());
150 copySpinorField(src);
162 loadSpinorField(src);
178 }
else if (
nSpin == 1) {
184 }
else if (
nSpin == 1) {
212 memcpy(param.x,
x,
nDim*
sizeof(
int));
225 #ifdef USE_TEXTURE_OBJECTS
243 memcpy(param.x,
x,
nDim*
sizeof(
int));
251 for(
int id = 0;
id <
eigv_dim;
id++)
256 #ifdef USE_TEXTURE_OBJECTS //(a lot of texture objects...)
273 #ifdef USE_TEXTURE_OBJECTS
274 if((eigv_dim == 0) || (eigv_dim > 0 &&
eigv_id > -1))
280 for(
int i=0; i<
nDim; ++i){
291 #ifdef USE_TEXTURE_OBJECTS
292 void cudaColorSpinorField::createTexObject() {
295 if (texInit)
errorQuda(
"Already bound textures");
299 cudaChannelFormatDesc desc;
300 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
302 else desc.f = cudaChannelFormatKindSigned;
317 cudaResourceDesc resDesc;
318 memset(&resDesc, 0,
sizeof(resDesc));
319 resDesc.resType = cudaResourceTypeLinear;
320 resDesc.res.linear.devPtr =
v;
321 resDesc.res.linear.desc = desc;
322 resDesc.res.linear.sizeInBytes =
bytes;
324 cudaTextureDesc texDesc;
325 memset(&texDesc, 0,
sizeof(texDesc));
327 else texDesc.readMode = cudaReadModeElementType;
329 cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
334 cudaChannelFormatDesc desc;
335 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
336 desc.f = cudaChannelFormatKindFloat;
339 cudaResourceDesc resDesc;
340 memset(&resDesc, 0,
sizeof(resDesc));
341 resDesc.resType = cudaResourceTypeLinear;
342 resDesc.res.linear.devPtr =
norm;
343 resDesc.res.linear.desc = desc;
346 cudaTextureDesc texDesc;
347 memset(&texDesc, 0,
sizeof(texDesc));
348 texDesc.readMode = cudaReadModeElementType;
350 cudaCreateTextureObject(&texNorm, &resDesc, &texDesc, NULL);
358 void cudaColorSpinorField::destroyTexObject() {
360 cudaDestroyTextureObject(tex);
368 void cudaColorSpinorField::destroy() {
380 std::vector<ColorSpinorField*>::iterator vec;
387 #ifdef USE_TEXTURE_OBJECTS
388 if((eigv_dim == 0) || (eigv_dim > 0 &&
eigv_id > -1))
420 void cudaColorSpinorField::zeroPad() {
424 if (eigv_dim > 0 &&
eigv_id == -1){
431 if(pad_bytes) cudaMemset2D(dst, pitch, 0, pad_bytes, Npad);
438 void cudaColorSpinorField::copy(
const cudaColorSpinorField &src) {
443 void cudaColorSpinorField::copySpinorField(
const ColorSpinorField &src) {
448 copy(dynamic_cast<const cudaColorSpinorField&>(src));
452 loadSpinorField(src);
454 errorQuda(
"Unknown input ColorSpinorField %s",
typeid(src).name());
458 void cudaColorSpinorField::loadSpinorField(
const ColorSpinorField &src) {
462 for(
int b=0; b<2; ++b){
479 cudaMemcpy(Src, src.V(), src.Bytes(), cudaMemcpyHostToDevice);
480 cudaMemcpy(srcNorm, src.Norm(), src.NormBytes(), cudaMemcpyHostToDevice);
482 for(
int b=0; b<2; ++b){
485 memcpy(
bufferPinned[bufferIndex], src.V(), src.Bytes());
486 memcpy((
char*)
bufferPinned[bufferIndex]+src.Bytes(), src.Norm(), src.NormBytes());
488 cudaHostGetDevicePointer(&Src,
bufferPinned[bufferIndex], 0);
489 srcNorm = (
void*)((
char*)Src + src.Bytes());
501 void cudaColorSpinorField::saveSpinorField(ColorSpinorField &dest)
const {
521 cudaHostGetDevicePointer(&dst,
bufferPinned[bufferIndex], 0);
522 dstNorm = (
char*)dst+dest.Bytes();
527 cudaMemcpy(dest.V(), dst, dest.Bytes(), cudaMemcpyDeviceToHost);
528 cudaMemcpy(dest.Norm(), dstNorm, dest.NormBytes(), cudaMemcpyDeviceToHost);
531 memcpy(dest.Norm(), (
char*)
bufferPinned[bufferIndex]+dest.Bytes(), dest.NormBytes());
541 if (nSpin == 4) Nint /= 2;
544 size_t faceBytes = 0;
545 for (
int i=0; i<4; i++) {
553 if(initGhostFaceBuffer == 0 || faceBytes > ghostFaceBytes){
556 if (initGhostFaceBuffer){
557 for(
int b=0; b<2; ++b)
device_free(ghostFaceBuffer[b]);
561 for(
int b=0; b<2; ++b) ghostFaceBuffer[b] =
device_malloc(faceBytes);
562 initGhostFaceBuffer = 1;
563 ghostFaceBytes = faceBytes;
569 for (
int i=0; i<4; i++) {
572 for(
int b=0; b<2; ++b) backGhostFaceBuffer[b][i] = (
void*)(((
char*)ghostFaceBuffer[b]) + offset);
576 for(
int b=0; b<2; ++b) fwdGhostFaceBuffer[b][i] = (
void*)(((
char*)ghostFaceBuffer[b]) + offset);
586 if (!initGhostFaceBuffer)
return;
588 for(
int b=0; b<2; ++b)
device_free(ghostFaceBuffer[b]);
590 for(
int i=0;i < 4; i++){
592 for(
int b=0; b<2; ++b){
593 backGhostFaceBuffer[b][i] = NULL;
594 fwdGhostFaceBuffer[b][i] = NULL;
597 initGhostFaceBuffer = 0;
604 void *buffer,
double a,
double b)
615 void *packBuffer = buffer ? buffer : ghostFaceBuffer[
bufferIndex];
616 packFace(packBuffer, *
this, nFace, dagger, parity, dim, face_num, *stream, a, b);
618 errorQuda(
"packGhost not built on single-GPU build");
637 (dir ==
QUDA_BACKWARDS) ? this->backGhostFaceBuffer[bufferIndex][dim] : this->fwdGhostFaceBuffer[bufferIndex][dim];
639 cudaMemcpyAsync(ghost_spinor, gpu_buf, bytes, cudaMemcpyDeviceToHost, *stream);
642 int Npad = Nint /
Nvec;
647 }
else if (
nSpin == 4) {
650 bool upper = dagger ?
true :
false;
652 int lower_spin_offset = Npad*
stride;
653 if (upper) offset = (dir ==
QUDA_BACKWARDS ? 0 : Nt_minus1_offset);
654 else offset = lower_spin_offset + (dir ==
QUDA_BACKWARDS ? 0 : Nt_minus1_offset);
662 void *dst = (
char*)ghost_spinor;
663 void *src = (
char*)
v + offset*Nvec*precision;
666 cudaMemcpy2DAsync(dst, len, src, spitch, len, Npad, cudaMemcpyDeviceToHost, *stream);
669 int norm_offset = (dir ==
QUDA_BACKWARDS) ? 0 : Nt_minus1_offset*
sizeof(
float);
670 void *dst = (
char*)ghost_spinor + nFace*Nint*
ghostFace[3]*precision;
671 void *src = (
char*)
norm + norm_offset;
672 cudaMemcpyAsync(dst, src, nFace*
ghostFace[3]*
sizeof(
float), cudaMemcpyDeviceToHost, *stream);
675 int flavorVolume =
volume / 2;
677 int Npad = Nint /
Nvec;
678 int flavor1_Nt_minus1_offset = (flavorVolume - flavorTFace);
679 int flavor2_Nt_minus1_offset = (
volume - flavorTFace);
680 int flavor1_offset = 0;
681 int flavor2_offset = 0;
684 bool upper = dagger ?
true :
false;
686 int lower_spin_offset = Npad*
stride;
688 flavor1_offset = (dir ==
QUDA_BACKWARDS ? 0 : flavor1_Nt_minus1_offset);
689 flavor2_offset = (dir ==
QUDA_BACKWARDS ? flavorVolume : flavor2_Nt_minus1_offset);
691 flavor1_offset = lower_spin_offset + (dir ==
QUDA_BACKWARDS ? 0 : flavor1_Nt_minus1_offset);
692 flavor2_offset = lower_spin_offset + (dir ==
QUDA_BACKWARDS ? flavorVolume : flavor2_Nt_minus1_offset);
700 void *dst = (
char*)ghost_spinor;
701 void *src = (
char*)
v + flavor1_offset*Nvec*precision;
704 size_t dpitch = 2*len;
705 cudaMemcpy2DAsync(dst, dpitch, src, spitch, len, Npad, cudaMemcpyDeviceToHost, *stream);
706 dst = (
char*)ghost_spinor+len;
707 src = (
char*)
v + flavor2_offset*Nvec*precision;
708 cudaMemcpy2DAsync(dst, dpitch, src, spitch, len, Npad, cudaMemcpyDeviceToHost, *stream);
711 int Nt_minus1_offset = (flavorVolume - flavorTFace);
712 int norm_offset = (dir ==
QUDA_BACKWARDS) ? 0 : Nt_minus1_offset*
sizeof(
float);
713 void *dst = (
char*)ghost_spinor + Nint*
ghostFace[3]*precision;
714 void *src = (
char*)
norm + norm_offset;
715 size_t dpitch = flavorTFace*
sizeof(float);
716 size_t spitch = flavorVolume*
sizeof(float);
717 cudaMemcpy2DAsync(dst, dpitch, src, spitch, flavorTFace*
sizeof(
float), 2, cudaMemcpyDeviceToHost, *stream);
721 errorQuda(
"sendGhost not built on single-GPU build");
738 void *dst = (
char*)
v + precision*offset;
739 const void *src = ghost_spinor;
741 cudaMemcpyAsync(dst, src, len*precision, cudaMemcpyHostToDevice, *stream);
750 void *dst =
static_cast<char*
>(
norm) + norm_offset*
sizeof(
float);
751 const void *src =
static_cast<const char*
>(ghost_spinor)+nFace*Nint*ghostFace[dim]*precision;
752 cudaMemcpyAsync(dst, src, normlen*
sizeof(
float), cudaMemcpyHostToDevice, *stream);
774 void *packBuffer = buffer ? buffer : ghostFaceBuffer[
bufferIndex];
775 packFaceExtended(packBuffer, *
this, nFace, R, dagger, parity, dim, face_num, *stream);
777 errorQuda(
"packGhostExtended not built on single-GPU build");
795 unpackGhost(ghost_spinor, nFace, dim, dir, dagger, stream);
805 const int face_num = 2;
806 const bool unpack =
true;
807 const int R[4] = {0,0,0,0};
808 packFaceExtended(ghostFaceBuffer[bufferIndex], *
this, nFace, R, dagger, parity, dim, face_num, *stream, unpack);
810 errorQuda(
"unpackGhostExtended not built on single-GPU build");
822 if (!initComms || nFaceComms != nFace) {
828 errorQuda(
"Only supports single parity fields");
836 errorQuda(
"Requested number of faces %d in communicator is greater than supported %d",
840 size_t faceBytes = 0;
854 faceBytes += 2*nbytes[i];
859 for(
int b=0; b<2; ++b){
873 for(
int b=0; b<2; ++b){
878 my_back_norm_face[b][i] =
static_cast<char*
>(backGhostFaceBuffer[b][i]) + nFace*
ghostFace[i]*Ndof*precision;
879 from_back_norm_face[b][i] =
ghostNorm[i];
884 for(
int b=0; b<2; ++b){
892 for(
int b=0; b<2; ++b){
897 my_fwd_norm_face[b][i] =
static_cast<char*
>(fwdGhostFaceBuffer[b][i]) + nFace*
ghostFace[i]*Ndof*precision;
898 from_fwd_norm_face[b][i] =
static_cast<char*
>(
ghostNorm[i]) + nFace*
ghostFace[i]*
sizeof(
float);
902 for(
int b=0; b<2; ++b){
912 for(
int b=0; b<2; ++b){
927 for(
int b=0; b<2; ++b){
947 size_t nbytes_Nface =
surfaceCB[i]*Ndof*precision*(j+1);
950 size_t nbytes_Nface = (nbytes[i] /
maxNface) * (j+1);
952 for(
int b=0; b<2; ++b){
961 for(
int b=0; b<2; ++b){
964 mh_send_norm_fwd[b][j][2*i+1] = mh_send_norm_fwd[b][j][2*i];
965 mh_send_norm_back[b][j][2*i+1] = mh_send_norm_back[b][j][2*i];
970 errorQuda(
"GPU_COMMS for non-degenerate doublet only supported with time-dimension kernel packing enabled.");
980 int Nblocks = Ndof /
Nvec();
988 offset[2*1 + 0] = endOffset;
989 offset[2*0 + 1] = offset[2*0 + 0];
990 offset[2*1 + 1] = offset[2*1 + 0];
991 }
else if (
nSpin == 4) {
993 offset[2*0 + 0] = Nblocks*
stride;
994 offset[2*1 + 0] = endOffset;
997 offset[2*1 + 1] = Nblocks*stride + endOffset;
999 errorQuda(
"Unsupported number of spin components");
1002 for (
int k=0; k<4; k++) {
1009 if (blksize * Nblocks != nbytes_Nface)
1010 errorQuda(
"Total strided message size does not match expected size");
1014 for(
int b=0; b<2; ++b){
1030 void *norm_fwd =
static_cast<float*
>(
norm) + Nt_minus1_offset;
1031 void *norm_back =
norm;
1032 for(
int b=0; b<2; ++b){
1035 mh_send_norm_fwd[b][j][2*i+1] = mh_send_norm_fwd[b][j][2*i];
1036 mh_send_norm_back[b][j][2*i+1] = mh_send_norm_back[b][j][2*i];
1043 for(
int b=0; b<2; ++b){
1050 for(
int b=0; b<2; ++b){
1069 for(
int b=0; b<2; ++b){
1083 comm_free(mh_send_norm_back[b][j][2*i]);
1099 delete []mh_recv_norm_fwd[b][j];
1100 delete []mh_recv_norm_back[b][j];
1101 delete []mh_send_norm_fwd[b][j];
1102 delete []mh_send_norm_back[b][j];
1119 delete []mh_recv_norm_fwd[b];
1120 delete []mh_recv_norm_back[b];
1121 delete []mh_send_norm_fwd[b];
1122 delete []mh_send_norm_back[b];
1126 my_fwd_norm_face[b][i] = NULL;
1127 my_back_norm_face[b][i] = NULL;
1128 from_fwd_norm_face[b][i] = NULL;
1129 from_back_norm_face[b][i] = NULL;
1143 bool zeroCopyPack,
double a,
double b) {
1153 cudaHostGetDevicePointer(&my_face_d,
my_face[bufferIndex], 0);
1161 bool zeroCopyPack,
double a,
double b) {
1169 cudaHostGetDevicePointer(&my_face_d,
my_face[bufferIndex], 0);
1178 cudaStream_t *stream_p,
const bool zeroCopyPack){
1185 void *my_face_d = NULL;
1187 cudaHostGetDevicePointer(&my_face_d,
my_face[bufferIndex], 0);
1201 cudaStream_t *pack_stream = (stream_p) ? stream_p :
stream+dir;
1229 comm_start(mh_recv_norm_fwd[bufferIndex][nFace-1][dim]);
1232 comm_start(mh_recv_norm_back[bufferIndex][nFace-1][dim]);
1249 comm_start(mh_send_norm_back[bufferIndex][nFace-1][2*dim+dagger]);
1251 comm_start(mh_send_norm_fwd[bufferIndex][nFace-1][2*dim+dagger]);
1279 comm_start(mh_recv_norm_fwd[bufferIndex][nFace-1][dim]);
1281 comm_start(mh_send_norm_back[bufferIndex][nFace-1][2*dim+dagger]);
1284 comm_start(mh_recv_norm_back[bufferIndex][nFace-1][dim]);
1286 comm_start(mh_send_norm_fwd[bufferIndex][nFace-1][2*dim+dagger]);
1310 comm_query(mh_recv_norm_fwd[bufferIndex][nFace-1][dim]) &&
1311 comm_query(mh_send_norm_back[bufferIndex][nFace-1][2*dim+dagger]))
return 1;
1315 comm_query(mh_recv_norm_back[bufferIndex][nFace-1][dim]) &&
1316 comm_query(mh_send_norm_fwd[bufferIndex][nFace-1][2*dim+dagger]))
return 1;
1370 out <<
"v = " << a.
v << std::endl;
1371 out <<
"norm = " << a.
norm << std::endl;
1372 out <<
"alloc = " << a.alloc << std::endl;
1373 out <<
"init = " << a.init << std::endl;
1385 errorQuda(
"Incorrect eigenvector index...");
1388 errorQuda(
"Eigenvector must be a parity spinor");
1395 if(first_element < 0)
errorQuda(
"\nError: trying to set negative first element.\n");
1397 if (first_element == 0 && range == this->
EigvDim())
1399 if(range != dst.
EigvDim())
errorQuda(
"\nError: eigenvector range to big.\n");
1403 else if ((first_element+range) < this->
EigvDim())
1420 memcpy(param.
x,
x, nDim*
sizeof(
int));
1425 param.
v = (
void*)((
char*)
v + first_element*
eigv_bytes);
1432 for(
int id = first_element;
id < (first_element+range);
id++)
1443 errorQuda(
"Incorrect eigenvector dimension...");
1447 errorQuda(
"Eigenvector must be a parity spinor");
1455 #ifdef USE_TEXTURE_OBJECTS
1456 printfQuda(
"\nPrint texture info for the field:\n");
1458 cudaResourceDesc resDesc;
1460 cudaGetTextureObjectResourceDesc(&resDesc, this->Tex());
1461 printfQuda(
"\nDevice pointer: %p\n", resDesc.res.linear.devPtr);
1462 printfQuda(
"\nVolume (in bytes): %d\n", resDesc.res.linear.sizeInBytes);
1463 if (resDesc.resType == cudaResourceTypeLinear)
printfQuda(
"\nResource type: linear \n");
QudaFieldOrder fieldOrder
MsgHandle * comm_declare_strided_send_relative(void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void streamInit(cudaStream_t *stream_p)
int commDimPartitioned(int dir)
void packFace(void *ghost_buf, cudaColorSpinorField &in, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0, const double b=0.0)
friend class ColorSpinorParam
void packGhost(const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, cudaStream_t *stream, void *buffer=0, double a=0, double b=0)
int ghostFace[QUDA_MAX_DIM]
void * ghost[QUDA_MAX_DIM]
QudaVerbosity getVerbosity()
void * ghostNorm[QUDA_MAX_DIM]
void unpackGhost(const void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir, const int dagger, cudaStream_t *stream)
void gather(int nFace, int dagger, int dir, cudaStream_t *stream_p=NULL)
MsgHandle *** mh_send_back[2]
void sendGhost(void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir, const int dagger, cudaStream_t *stream)
int commsQuery(int nFace, int dir, int dagger=0)
void pack(int nFace, int parity, int dagger, int stream_idx, bool zeroCopyPack, double a=0, double b=0)
MsgHandle * comm_declare_send_relative(void *buffer, int dim, int dir, size_t nbytes)
int ghostOffset[QUDA_MAX_DIM]
QudaGammaBasis gammaBasis
void * my_fwd_face[2][QUDA_MAX_DIM]
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
void unpackGhostExtended(const void *ghost_spinor, const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, cudaStream_t *stream)
int eigv_dim
used for eigcg:
void scatterExtended(int nFace, int parity, int dagger, int dir)
void sendStart(int nFace, int dir, int dagger=0)
QudaSiteSubset siteSubset
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
virtual ~cudaColorSpinorField()
void comm_free(MsgHandle *mh)
cudaColorSpinorField & Odd() const
friend class cpuColorSpinorField
enum QudaDirection_s QudaDirection
std::vector< ColorSpinorField * > eigenvectors
for eigcg:
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
static void * bufferDevice
void recvStart(int nFace, int dir, int dagger=0)
void reset(const ColorSpinorParam &)
QudaFieldOrder fieldOrder
void CopyEigenvecSubset(cudaColorSpinorField &dst, const int range, const int first_element=0) const
void getTexObjectInfo() const
QudaGammaBasis gammaBasis
void comm_start(MsgHandle *mh)
void packExtended(const int nFace, const int R[], const int parity, const int dagger, const int dim, cudaStream_t *stream_p, const bool zeroCopyPack=false)
static void freeGhostBuffer(void)
int EigvDim() const
for eigcg only:
ColorSpinorField & operator=(const ColorSpinorField &)
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
int ghostNormOffset[QUDA_MAX_DIM]
MsgHandle *** mh_recv_back[2]
enum QudaParity_s QudaParity
void createComms(int nFace)
QudaTwistFlavorType twistFlavor
MsgHandle * comm_declare_receive_relative(void *buffer, int dim, int dir, size_t nbytes)
virtual ColorSpinorField & operator=(const ColorSpinorField &)
void * from_fwd_face[2][QUDA_MAX_DIM]
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
static size_t bufferPinnedResizeCount
void * from_back_face[2][QUDA_MAX_DIM]
void * memset(void *s, int c, size_t n)
void resizeBufferDevice(size_t bytes) const
int comm_query(MsgHandle *mh)
QudaTwistFlavorType twistFlavor
QudaTwistFlavorType TwistFlavor() const
#define device_malloc(size)
void allocateGhostBuffer(int nFace)
void * my_back_face[2][QUDA_MAX_DIM]
void packGhostExtended(const int nFace, const int R[], const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, cudaStream_t *stream, void *buffer=0)
int surfaceCB[QUDA_MAX_DIM]
enum QudaFieldCreate_s QudaFieldCreate
void init(int argc, char **argv)
cudaColorSpinorField(const cudaColorSpinorField &)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaFieldLocation Location() const
void commsStart(int nFace, int dir, int dagger=0)
void packFaceExtended(void *ghost_buf, cudaColorSpinorField &field, const int nFace, const int R[], const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const bool unpack=false)
void scatter(int nFace, int dagger, int dir, cudaStream_t *stream_p)
MsgHandle *** mh_send_fwd[2]
QudaSiteSubset siteSubset
void initComms(int argc, char **argv, const int *commDims)
QudaSiteSubset SiteSubset() const
void resizeBufferPinned(size_t bytes, const int index=0) const
cudaColorSpinorField & Even() const
static void * bufferPinned[2]
MsgHandle *** mh_recv_fwd[2]
cudaColorSpinorField & Eigenvec(const int idx) const
for deflated solvers: