QUDA: quda/lib/face_qmp.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <quda_internal.h>
00002 #include <face_quda.h>
00003 #include <cstdio>
00004 #include <cstdlib>
00005 #include <quda.h>
00006 #include <string.h>
00007 #include <gauge_field.h>
00008 #include <sys/time.h>
00009 
00010 #ifdef QMP_COMMS
00011 #include <qmp.h>
00012 #endif
00013 
00014 /*
00015   Multi-GPU TODOs
00016   - test qmp code
00017   - implement OpenMP version?
00018   - split face kernels
00019   - separate block sizes for body and face
00020   - minimize pointer arithmetic in core code (need extra constant to replace SPINOR_HOP)
00021  */
00022 
00023 using namespace std;
00024 
00025 cudaStream_t *stream;
00026 
00027 bool globalReduce = true;
00028 
00029 // Easy to switch between overlapping communication or not
00030 #ifdef OVERLAP_COMMS
00031 #define CUDAMEMCPY(dst, src, size, type, stream) cudaMemcpyAsync(dst, src, size, type, stream)
00032 #else
00033 #define CUDAMEMCPY(dst, src, size, type, stream) cudaMemcpy(dst, src, size, type)
00034 #endif
00035 
00036 FaceBuffer::FaceBuffer(const int *X, const int nDim, const int Ninternal, 
00037                        const int nFace, const QudaPrecision precision) :
00038   Ninternal(Ninternal), precision(precision), nDim(nDim), nFace(nFace)
00039 {
00040   if (nDim > QUDA_MAX_DIM) errorQuda("nDim = %d is greater than the maximum of %d\n", nDim, QUDA_MAX_DIM);
00041 
00042   setupDims(X);
00043 
00044   // set these both = 0 separate streams for forwards and backwards comms
00045   // sendBackStrmIdx = 0, and sendFwdStrmIdx = 1 for overlap
00046   sendBackStrmIdx = 0;
00047   sendFwdStrmIdx = 1;
00048   recFwdStrmIdx = sendBackStrmIdx;
00049   recBackStrmIdx = sendFwdStrmIdx;
00050   
00051   unsigned int flag = cudaHostAllocDefault;
00052 
00053   //printf("nDim = %d\n", nDim);
00054 
00055   // Buffers hold half spinors
00056   for (int i=0; i<nDim; i++) {
00057     nbytes[i] = nFace*faceVolumeCB[i]*Ninternal*precision;
00058 
00059     // add extra space for the norms for half precision
00060     if (precision == QUDA_HALF_PRECISION) nbytes[i] += nFace*faceVolumeCB[i]*sizeof(float);
00061     //printf("bytes = %d, nFace = %d, faceVolume = %d, Ndof = %d, prec =  %d\n", 
00062     //     nbytes[i], nFace, faceVolumeCB[i], Ninternal, precision);
00063 
00064     cudaHostAlloc(&(my_fwd_face[i]), nbytes[i], flag);
00065     if( !my_fwd_face[i] ) errorQuda("Unable to allocate my_fwd_face with size %lu", nbytes[i]);
00066   
00067     //printf("%d\n", nbytes[i]);
00068 
00069     cudaHostAlloc(&(my_back_face[i]), nbytes[i], flag);
00070     if( !my_back_face[i] ) errorQuda("Unable to allocate my_back_face with size %lu", nbytes[i]);
00071   }
00072 
00073   for (int i=0; i<nDim; i++) {
00074 #ifdef QMP_COMMS
00075     cudaHostAlloc(&(from_fwd_face[i]), nbytes[i], flag);
00076     if( !from_fwd_face[i] ) errorQuda("Unable to allocate from_fwd_face with size %lu", nbytes[i]);
00077     
00078     cudaHostAlloc(&(from_back_face[i]), nbytes[i], flag);
00079     if( !from_back_face[i] ) errorQuda("Unable to allocate from_back_face with size %lu", nbytes[i]);
00080 
00081 // if no GPUDirect so need separate IB and GPU host buffers
00082 #ifndef GPU_DIRECT
00083     ib_my_fwd_face[i] = malloc(nbytes[i]);
00084     if (!ib_my_fwd_face[i]) errorQuda("Unable to allocate ib_my_fwd_face with size %lu", nbytes[i]);
00085 
00086     ib_my_back_face[i] = malloc(nbytes[i]);
00087     if (!ib_my_back_face[i]) errorQuda("Unable to allocate ib_my_back_face with size %lu", nbytes[i]);
00088 
00089     ib_from_fwd_face[i] = malloc(nbytes[i]);
00090     if (!ib_from_fwd_face[i]) errorQuda("Unable to allocate ib_from_fwd_face with size %lu", nbytes[i]);
00091 
00092     ib_from_back_face[i] = malloc(nbytes[i]);
00093     if (!ib_from_back_face[i]) errorQuda("Unable to allocate ib_from_back_face with size %lu", nbytes[i]);
00094 #else // else just alias the pointer
00095     ib_my_fwd_face[i] = my_fwd_face[i];
00096     ib_my_back_face[i] = my_back_face[i];
00097     ib_from_fwd_face[i] = from_fwd_face[i];
00098     ib_from_back_face[i] = from_back_face[i];
00099 #endif
00100 
00101 #else
00102     from_fwd_face[i] = my_back_face[i];
00103     from_back_face[i] = my_fwd_face[i];
00104 #endif  
00105   }
00106 
00107 #ifdef QMP_COMMS
00108   for (int i=0; i<nDim; i++) {
00109 
00110     mm_send_fwd[i] = QMP_declare_msgmem(ib_my_fwd_face[i], nbytes[i]);
00111     if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem");
00112     
00113     mm_send_back[i] = QMP_declare_msgmem(ib_my_back_face[i], nbytes[i]);
00114     if( mm_send_back[i] == NULL ) errorQuda("Unable to allocate send back message mem");
00115     
00116     mm_from_fwd[i] = QMP_declare_msgmem(ib_from_fwd_face[i], nbytes[i]);
00117     if( mm_from_fwd[i] == NULL ) errorQuda("Unable to allocate recv from fwd message mem");
00118     
00119     mm_from_back[i] = QMP_declare_msgmem(ib_from_back_face[i], nbytes[i]);
00120     if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem");
00121 
00122     mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0);
00123     if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send");
00124     
00125     mh_send_back[i] = QMP_declare_send_relative(mm_send_back[i], i, -1, 0);
00126     if( mh_send_back[i] == NULL ) errorQuda("Unable to allocate backward send");
00127     
00128     mh_from_fwd[i] = QMP_declare_receive_relative(mm_from_fwd[i], i, +1, 0);
00129     if( mh_from_fwd[i] == NULL ) errorQuda("Unable to allocate forward recv");
00130     
00131     mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0);
00132     if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv");
00133   }
00134 #endif
00135 
00136 }
00137 
00138 FaceBuffer::FaceBuffer(const FaceBuffer &face) {
00139   errorQuda("FaceBuffer copy constructor not implemented");
00140 }
00141 
00142 void FaceBuffer::setupDims(const int* X)
00143 {
00144   Volume = 1;
00145   for (int d=0; d< nDim; d++) {
00146     this->X[d] = X[d];
00147     Volume *= this->X[d];    
00148   }
00149   VolumeCB = Volume/2;
00150 
00151   for (int i=0; i<nDim; i++) {
00152     faceVolume[i] = 1;
00153     for (int j=0; j<nDim; j++) {
00154       if (i==j) continue;
00155       faceVolume[i] *= this->X[j];
00156     }
00157     faceVolumeCB[i] = faceVolume[i]/2;
00158 
00159   }
00160 
00161 }
00162 
00163 FaceBuffer::~FaceBuffer()
00164 {
00165   
00166   //printf("Ndim = %d\n", nDim);
00167   for (int i=0; i<nDim; i++) {
00168 #ifdef QMP_COMMS
00169 
00170 #ifndef GPU_DIRECT
00171     free(ib_my_fwd_face[i]);
00172     free(ib_my_back_face[i]);
00173     free(ib_from_fwd_face[i]);
00174     free(ib_from_back_face[i]);
00175 #endif
00176 
00177     QMP_free_msghandle(mh_send_fwd[i]);
00178     QMP_free_msghandle(mh_send_back[i]);
00179     QMP_free_msghandle(mh_from_fwd[i]);
00180     QMP_free_msghandle(mh_from_back[i]);
00181     QMP_free_msgmem(mm_send_fwd[i]);
00182     QMP_free_msgmem(mm_send_back[i]);
00183     QMP_free_msgmem(mm_from_fwd[i]);
00184     QMP_free_msgmem(mm_from_back[i]);
00185     cudaFreeHost(from_fwd_face[i]); // these are aliasing pointers for non-qmp case
00186     cudaFreeHost(from_back_face[i]);// these are aliasing pointers for non-qmp case
00187 #endif
00188     cudaFreeHost(my_fwd_face[i]);
00189     cudaFreeHost(my_back_face[i]);
00190   }
00191 
00192   for (int i=0; i<nDim; i++) {
00193     my_fwd_face[i]=NULL;
00194     my_back_face[i]=NULL;
00195     from_fwd_face[i]=NULL;
00196     from_back_face[i]=NULL;
00197   }
00198 }
00199 
00200 void FaceBuffer::pack(cudaColorSpinorField &in, int parity, int dagger, int dim, cudaStream_t *stream_p)
00201 {
00202   if(!commDimPartitioned(dim)) return;
00203 
00204   in.allocateGhostBuffer();   // allocate the ghost buffer if not yet allocated
00205   stream = stream_p;
00206 
00207   in.packGhost(dim, (QudaParity)parity, dagger, &stream[Nstream-1]);  
00208 }
00209 
00210 void FaceBuffer::gather(cudaColorSpinorField &in, int dagger, int dir)
00211 {
00212   int dim = dir/2;
00213   if(!commDimPartitioned(dim)) return;
00214 
00215   if (dir%2==0) {
00216     // backwards copy to host
00217     in.sendGhost(my_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+sendBackStrmIdx]);  
00218   } else {
00219     // forwards copy to host
00220     in.sendGhost(my_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+sendFwdStrmIdx]);
00221   }
00222 }
00223 
00224 void FaceBuffer::commsStart(int dir) {
00225   int dim = dir / 2;
00226   if(!commDimPartitioned(dim)) return;
00227 
00228   if (dir%2 == 0) { // sending backwards
00229 
00230 #ifdef QMP_COMMS  // Begin backward send
00231     // Prepost receive
00232     QMP_start(mh_from_fwd[dim]);
00233 #ifndef GPU_DIRECT
00234     memcpy(ib_my_back_face[dim], my_back_face[dim], nbytes[dim]);
00235 #endif
00236     QMP_start(mh_send_back[dim]);
00237 #endif
00238 
00239   } else { //sending forwards
00240     
00241 #ifdef QMP_COMMS
00242   // Prepost receive
00243     QMP_start(mh_from_back[dim]);
00244     // Begin forward send
00245 #ifndef GPU_DIRECT
00246     memcpy(ib_my_fwd_face[dim], my_fwd_face[dim], nbytes[dim]);
00247 #endif
00248     QMP_start(mh_send_fwd[dim]);
00249 #endif
00250   }
00251 
00252 } 
00253 
00254 int FaceBuffer::commsQuery(int dir) {
00255 
00256 #ifdef QMP_COMMS
00257 
00258   int dim = dir/2;
00259   if(!commDimPartitioned(dim)) return 0;
00260 
00261   if (dir%2==0) {// receive from forwards
00262     if (QMP_is_complete(mh_send_back[dim]) == QMP_TRUE &&
00263         QMP_is_complete(mh_from_fwd[dim]) == QMP_TRUE) {
00264 #ifndef GPU_DIRECT
00265       memcpy(from_fwd_face[dim], ib_from_fwd_face[dim], nbytes[dim]);           
00266 #endif
00267       return 1;
00268     }
00269   } else { // receive from backwards
00270     if (QMP_is_complete(mh_send_fwd[dim]) == QMP_TRUE && 
00271         QMP_is_complete(mh_from_back[dim]) == QMP_TRUE) {
00272 #ifndef GPU_DIRECT
00273       memcpy(from_back_face[dim], ib_from_back_face[dim], nbytes[dim]);         
00274 #endif
00275       return 1;
00276     }
00277   }
00278   return 0;
00279 
00280 #else // no communications so just return true
00281 
00282   return 1;
00283 
00284 #endif
00285 }
00286 
00287 void FaceBuffer::scatter(cudaColorSpinorField &out, int dagger, int dir)
00288 {
00289   int dim = dir/2;
00290   if(!commDimPartitioned(dim)) return;
00291 
00292   if (dir%2==0) {// receive from forwards
00293     out.unpackGhost(from_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+recFwdStrmIdx]); // 0, 2, 4, 6
00294   } else { // receive from backwards
00295     out.unpackGhost(from_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+recBackStrmIdx]); // 1, 3, 5, 7
00296   }
00297 }
00298 
00299 // This is just an initial hack for CPU comms - should be creating the message handlers at instantiation
00300 void FaceBuffer::exchangeCpuSpinor(cpuColorSpinorField &spinor, int oddBit, int dagger)
00301 {
00302 
00303   // allocate the ghost buffer if not yet allocated
00304   spinor.allocateGhostBuffer();
00305 
00306   for(int i=0;i < 4; i++){
00307     spinor.packGhost(spinor.backGhostFaceSendBuffer[i], i, QUDA_BACKWARDS, (QudaParity)oddBit, dagger);
00308     spinor.packGhost(spinor.fwdGhostFaceSendBuffer[i], i, QUDA_FORWARDS, (QudaParity)oddBit, dagger);
00309   }
00310 
00311 #ifdef QMP_COMMS
00312 
00313   QMP_msgmem_t mm_send_fwd[4];
00314   QMP_msgmem_t mm_from_back[4];
00315   QMP_msgmem_t mm_from_fwd[4];
00316   QMP_msgmem_t mm_send_back[4];
00317   QMP_msghandle_t mh_send_fwd[4];
00318   QMP_msghandle_t mh_from_back[4];
00319   QMP_msghandle_t mh_from_fwd[4];
00320   QMP_msghandle_t mh_send_back[4];
00321 
00322   for (int i=0; i<4; i++) {
00323     mm_send_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceSendBuffer[i], nbytes[i]);
00324     if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem");
00325     
00326     mm_send_back[i] = QMP_declare_msgmem(spinor.backGhostFaceSendBuffer[i], nbytes[i]);
00327     if( mm_send_back == NULL ) errorQuda("Unable to allocate send back message mem");
00328     
00329     mm_from_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceBuffer[i], nbytes[i]);
00330     if( mm_from_fwd[i] == NULL ) errorQuda("Unable to allocate recv from fwd message mem");
00331     
00332     mm_from_back[i] = QMP_declare_msgmem(spinor.backGhostFaceBuffer[i], nbytes[i]);
00333     if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem");
00334     
00335     mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0);
00336     if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send");
00337     
00338     mh_send_back[i] = QMP_declare_send_relative(mm_send_back[i], i, -1, 0);
00339     if( mh_send_back[i] == NULL ) errorQuda("Unable to allocate backward send");
00340     
00341     mh_from_fwd[i] = QMP_declare_receive_relative(mm_from_fwd[i], i, +1, 0);
00342     if( mh_from_fwd[i] == NULL ) errorQuda("Unable to allocate forward recv");
00343     
00344     mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0);
00345     if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv");
00346   }
00347 
00348   for (int i=0; i<4; i++) {
00349     QMP_start(mh_from_back[i]);
00350     QMP_start(mh_from_fwd[i]);
00351     QMP_start(mh_send_fwd[i]);
00352     QMP_start(mh_send_back[i]);
00353   }
00354 
00355   for (int i=0; i<4; i++) {
00356     QMP_wait(mh_send_fwd[i]);
00357     QMP_wait(mh_send_back[i]);
00358     QMP_wait(mh_from_back[i]);
00359     QMP_wait(mh_from_fwd[i]);
00360   }
00361 
00362   for (int i=0; i<4; i++) {
00363     QMP_free_msghandle(mh_send_fwd[i]);
00364     QMP_free_msghandle(mh_send_back[i]);
00365     QMP_free_msghandle(mh_from_fwd[i]);
00366     QMP_free_msghandle(mh_from_back[i]);
00367     QMP_free_msgmem(mm_send_fwd[i]);
00368     QMP_free_msgmem(mm_send_back[i]);
00369     QMP_free_msgmem(mm_from_back[i]);
00370     QMP_free_msgmem(mm_from_fwd[i]);
00371   }
00372 
00373 #else
00374 
00375   for (int i=0; i<4; i++) {
00376     //printf("%d COPY length = %d\n", i, nbytes[i]/precision);
00377     memcpy(spinor.fwdGhostFaceBuffer[i], spinor.backGhostFaceSendBuffer[i], nbytes[i]);
00378     memcpy(spinor.backGhostFaceBuffer[i], spinor.fwdGhostFaceSendBuffer[i], nbytes[i]);
00379   }
00380 
00381 #endif
00382 }
00383 
00384 void FaceBuffer::exchangeCpuLink(void** ghost_link, void** link_sendbuf) {
00385 
00386 #ifdef QMP_COMMS
00387 
00388   QMP_msgmem_t mm_send_fwd[4];
00389   QMP_msgmem_t mm_from_back[4];
00390   QMP_msghandle_t mh_send_fwd[4];
00391   QMP_msghandle_t mh_from_back[4];
00392 
00393   for (int i=0; i<4; i++) {
00394     int len = 2*nFace*faceVolumeCB[i]*Ninternal;
00395     mm_send_fwd[i] = QMP_declare_msgmem(link_sendbuf[i], len*precision);
00396     if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem");
00397     
00398     mm_from_back[i] = QMP_declare_msgmem(ghost_link[i], len*precision);
00399     if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem");
00400     
00401     mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0);
00402     if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send");
00403     
00404     mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0);
00405     if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv");
00406   }
00407 
00408   for (int i=0; i<4; i++) {
00409     QMP_start(mh_send_fwd[i]);
00410     QMP_start(mh_from_back[i]);
00411   }
00412 
00413   for (int i=0; i<4; i++) {
00414     QMP_wait(mh_send_fwd[i]);
00415     QMP_wait(mh_from_back[i]);
00416   }
00417 
00418   for (int i=0; i<4; i++) {
00419     QMP_free_msghandle(mh_send_fwd[i]);
00420     QMP_free_msghandle(mh_from_back[i]);
00421     QMP_free_msgmem(mm_send_fwd[i]);
00422     QMP_free_msgmem(mm_from_back[i]);
00423   }
00424 
00425 #else
00426 
00427   for(int dir =0; dir < 4; dir++) {
00428     int len = 2*nFace*faceVolumeCB[dir]*Ninternal; // factor 2 since we have both parities
00429     //printf("%d COPY length = %d\n", dir, len);
00430     memcpy(ghost_link[dir], link_sendbuf[dir], len*precision); 
00431   }
00432 
00433 #endif
00434 
00435 }
00436 
00437 
00438 
00439 void transferGaugeFaces(void *gauge, void *gauge_face, QudaPrecision precision,
00440                         int Nvec, QudaReconstructType reconstruct, int V, int Vs)
00441 {
00442   int nblocks, ndim=4;
00443   size_t blocksize;//, nbytes;
00444   ptrdiff_t offset, stride;
00445   void *g;
00446 
00447   nblocks = ndim*reconstruct/Nvec;
00448   blocksize = Vs*Nvec*precision;
00449   offset = (V-Vs)*Nvec*precision;
00450   stride = (V+Vs)*Nvec*precision; // assume that pad = Vs
00451 
00452 #ifdef QMP_COMMS
00453 
00454   QMP_msgmem_t mm_gauge_send_fwd;
00455   QMP_msgmem_t mm_gauge_from_back;
00456   QMP_msghandle_t mh_gauge_send_fwd;
00457   QMP_msghandle_t mh_gauge_from_back;
00458 
00459   g = (void *) ((char *) gauge + offset);
00460   mm_gauge_send_fwd = QMP_declare_strided_msgmem(g, blocksize, nblocks, stride);
00461   if (!mm_gauge_send_fwd) {
00462     errorQuda("Unable to allocate gauge message mem");
00463   }
00464 
00465   mm_gauge_from_back = QMP_declare_strided_msgmem(gauge_face, blocksize, nblocks, stride);
00466   if (!mm_gauge_from_back) { 
00467     errorQuda("Unable to allocate gauge face message mem");
00468   }
00469 
00470   mh_gauge_send_fwd = QMP_declare_send_relative(mm_gauge_send_fwd, 3, +1, 0);
00471   if (!mh_gauge_send_fwd) {
00472     errorQuda("Unable to allocate gauge message handle");
00473   }
00474   mh_gauge_from_back = QMP_declare_receive_relative(mm_gauge_from_back, 3, -1, 0);
00475   if (!mh_gauge_from_back) {
00476     errorQuda("Unable to allocate gauge face message handle");
00477   }
00478 
00479   QMP_start(mh_gauge_send_fwd);
00480   QMP_start(mh_gauge_from_back);
00481   
00482   QMP_wait(mh_gauge_send_fwd);
00483   QMP_wait(mh_gauge_from_back);
00484 
00485   QMP_free_msghandle(mh_gauge_send_fwd);
00486   QMP_free_msghandle(mh_gauge_from_back);
00487   QMP_free_msgmem(mm_gauge_send_fwd);
00488   QMP_free_msgmem(mm_gauge_from_back);
00489 
00490 #else 
00491 
00492   void *gf;
00493 
00494   for (int i=0; i<nblocks; i++) {
00495     g = (void *) ((char *) gauge + offset + i*stride);
00496     gf = (void *) ((char *) gauge_face + i*stride);
00497     cudaMemcpy(gf, g, blocksize, cudaMemcpyHostToHost);
00498   }
00499 
00500 #endif // QMP_COMMS
00501 }
00502 
00503 void reduceMaxDouble(double &max) {
00504 
00505 #ifdef QMP_COMMS
00506   QMP_max_double(&max);
00507 #endif
00508 
00509 }
00510 
00511 void reduceDouble(double &sum) {
00512 
00513 #ifdef QMP_COMMS
00514   if (globalReduce) QMP_sum_double(&sum);
00515 #endif
00516 
00517 }
00518 
00519 void reduceDoubleArray(double *sum, const int len) {
00520 
00521 #ifdef QMP_COMMS
00522   if (globalReduce) QMP_sum_double_array(sum,len);
00523 #endif
00524 
00525 }
00526 
00527 #ifdef QMP_COMMS
00528 static int manual_set_partition[4] ={0, 0, 0, 0};
00529 int commDim(int dir) { return QMP_get_logical_dimensions()[dir]; }
00530 int commCoords(int dir) { return QMP_get_logical_coordinates()[dir]; }
00531 int commDimPartitioned(int dir){ return (manual_set_partition[dir] || ((commDim(dir) > 1)));}
00532 void commDimPartitionedSet(int dir){ manual_set_partition[dir] = 1; }
00533 void commBarrier() { QMP_barrier(); }
00534 #else
00535 int commDim(int dir) { return 1; }
00536 int commCoords(int dir) { return 0; }
00537 int commDimPartitioned(int dir){ return 0; }
00538 void commDimPartitionedSet(int dir){ ; }
00539 void commBarrier() { ; }
00540 #endif