QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <quda_internal.h> 00002 #include <face_quda.h> 00003 #include <cstdio> 00004 #include <cstdlib> 00005 #include <quda.h> 00006 #include <string.h> 00007 #include <gauge_field.h> 00008 #include <sys/time.h> 00009 00010 #ifdef QMP_COMMS 00011 #include <qmp.h> 00012 #endif 00013 00014 /* 00015 Multi-GPU TODOs 00016 - test qmp code 00017 - implement OpenMP version? 00018 - split face kernels 00019 - separate block sizes for body and face 00020 - minimize pointer arithmetic in core code (need extra constant to replace SPINOR_HOP) 00021 */ 00022 00023 using namespace std; 00024 00025 cudaStream_t *stream; 00026 00027 bool globalReduce = true; 00028 00029 // Easy to switch between overlapping communication or not 00030 #ifdef OVERLAP_COMMS 00031 #define CUDAMEMCPY(dst, src, size, type, stream) cudaMemcpyAsync(dst, src, size, type, stream) 00032 #else 00033 #define CUDAMEMCPY(dst, src, size, type, stream) cudaMemcpy(dst, src, size, type) 00034 #endif 00035 00036 FaceBuffer::FaceBuffer(const int *X, const int nDim, const int Ninternal, 00037 const int nFace, const QudaPrecision precision) : 00038 Ninternal(Ninternal), precision(precision), nDim(nDim), nFace(nFace) 00039 { 00040 if (nDim > QUDA_MAX_DIM) errorQuda("nDim = %d is greater than the maximum of %d\n", nDim, QUDA_MAX_DIM); 00041 00042 setupDims(X); 00043 00044 // set these both = 0 separate streams for forwards and backwards comms 00045 // sendBackStrmIdx = 0, and sendFwdStrmIdx = 1 for overlap 00046 sendBackStrmIdx = 0; 00047 sendFwdStrmIdx = 1; 00048 recFwdStrmIdx = sendBackStrmIdx; 00049 recBackStrmIdx = sendFwdStrmIdx; 00050 00051 unsigned int flag = cudaHostAllocDefault; 00052 00053 //printf("nDim = %d\n", nDim); 00054 00055 // Buffers hold half spinors 00056 for (int i=0; i<nDim; i++) { 00057 nbytes[i] = nFace*faceVolumeCB[i]*Ninternal*precision; 00058 00059 // add extra space for the norms for half precision 00060 if (precision == QUDA_HALF_PRECISION) nbytes[i] += nFace*faceVolumeCB[i]*sizeof(float); 00061 //printf("bytes = %d, nFace = %d, faceVolume = %d, Ndof = %d, prec = %d\n", 00062 // nbytes[i], nFace, faceVolumeCB[i], Ninternal, precision); 00063 00064 cudaHostAlloc(&(my_fwd_face[i]), nbytes[i], flag); 00065 if( !my_fwd_face[i] ) errorQuda("Unable to allocate my_fwd_face with size %lu", nbytes[i]); 00066 00067 //printf("%d\n", nbytes[i]); 00068 00069 cudaHostAlloc(&(my_back_face[i]), nbytes[i], flag); 00070 if( !my_back_face[i] ) errorQuda("Unable to allocate my_back_face with size %lu", nbytes[i]); 00071 } 00072 00073 for (int i=0; i<nDim; i++) { 00074 #ifdef QMP_COMMS 00075 cudaHostAlloc(&(from_fwd_face[i]), nbytes[i], flag); 00076 if( !from_fwd_face[i] ) errorQuda("Unable to allocate from_fwd_face with size %lu", nbytes[i]); 00077 00078 cudaHostAlloc(&(from_back_face[i]), nbytes[i], flag); 00079 if( !from_back_face[i] ) errorQuda("Unable to allocate from_back_face with size %lu", nbytes[i]); 00080 00081 // if no GPUDirect so need separate IB and GPU host buffers 00082 #ifndef GPU_DIRECT 00083 ib_my_fwd_face[i] = malloc(nbytes[i]); 00084 if (!ib_my_fwd_face[i]) errorQuda("Unable to allocate ib_my_fwd_face with size %lu", nbytes[i]); 00085 00086 ib_my_back_face[i] = malloc(nbytes[i]); 00087 if (!ib_my_back_face[i]) errorQuda("Unable to allocate ib_my_back_face with size %lu", nbytes[i]); 00088 00089 ib_from_fwd_face[i] = malloc(nbytes[i]); 00090 if (!ib_from_fwd_face[i]) errorQuda("Unable to allocate ib_from_fwd_face with size %lu", nbytes[i]); 00091 00092 ib_from_back_face[i] = malloc(nbytes[i]); 00093 if (!ib_from_back_face[i]) errorQuda("Unable to allocate ib_from_back_face with size %lu", nbytes[i]); 00094 #else // else just alias the pointer 00095 ib_my_fwd_face[i] = my_fwd_face[i]; 00096 ib_my_back_face[i] = my_back_face[i]; 00097 ib_from_fwd_face[i] = from_fwd_face[i]; 00098 ib_from_back_face[i] = from_back_face[i]; 00099 #endif 00100 00101 #else 00102 from_fwd_face[i] = my_back_face[i]; 00103 from_back_face[i] = my_fwd_face[i]; 00104 #endif 00105 } 00106 00107 #ifdef QMP_COMMS 00108 for (int i=0; i<nDim; i++) { 00109 00110 mm_send_fwd[i] = QMP_declare_msgmem(ib_my_fwd_face[i], nbytes[i]); 00111 if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem"); 00112 00113 mm_send_back[i] = QMP_declare_msgmem(ib_my_back_face[i], nbytes[i]); 00114 if( mm_send_back[i] == NULL ) errorQuda("Unable to allocate send back message mem"); 00115 00116 mm_from_fwd[i] = QMP_declare_msgmem(ib_from_fwd_face[i], nbytes[i]); 00117 if( mm_from_fwd[i] == NULL ) errorQuda("Unable to allocate recv from fwd message mem"); 00118 00119 mm_from_back[i] = QMP_declare_msgmem(ib_from_back_face[i], nbytes[i]); 00120 if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem"); 00121 00122 mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0); 00123 if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send"); 00124 00125 mh_send_back[i] = QMP_declare_send_relative(mm_send_back[i], i, -1, 0); 00126 if( mh_send_back[i] == NULL ) errorQuda("Unable to allocate backward send"); 00127 00128 mh_from_fwd[i] = QMP_declare_receive_relative(mm_from_fwd[i], i, +1, 0); 00129 if( mh_from_fwd[i] == NULL ) errorQuda("Unable to allocate forward recv"); 00130 00131 mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0); 00132 if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv"); 00133 } 00134 #endif 00135 00136 } 00137 00138 FaceBuffer::FaceBuffer(const FaceBuffer &face) { 00139 errorQuda("FaceBuffer copy constructor not implemented"); 00140 } 00141 00142 void FaceBuffer::setupDims(const int* X) 00143 { 00144 Volume = 1; 00145 for (int d=0; d< nDim; d++) { 00146 this->X[d] = X[d]; 00147 Volume *= this->X[d]; 00148 } 00149 VolumeCB = Volume/2; 00150 00151 for (int i=0; i<nDim; i++) { 00152 faceVolume[i] = 1; 00153 for (int j=0; j<nDim; j++) { 00154 if (i==j) continue; 00155 faceVolume[i] *= this->X[j]; 00156 } 00157 faceVolumeCB[i] = faceVolume[i]/2; 00158 00159 } 00160 00161 } 00162 00163 FaceBuffer::~FaceBuffer() 00164 { 00165 00166 //printf("Ndim = %d\n", nDim); 00167 for (int i=0; i<nDim; i++) { 00168 #ifdef QMP_COMMS 00169 00170 #ifndef GPU_DIRECT 00171 free(ib_my_fwd_face[i]); 00172 free(ib_my_back_face[i]); 00173 free(ib_from_fwd_face[i]); 00174 free(ib_from_back_face[i]); 00175 #endif 00176 00177 QMP_free_msghandle(mh_send_fwd[i]); 00178 QMP_free_msghandle(mh_send_back[i]); 00179 QMP_free_msghandle(mh_from_fwd[i]); 00180 QMP_free_msghandle(mh_from_back[i]); 00181 QMP_free_msgmem(mm_send_fwd[i]); 00182 QMP_free_msgmem(mm_send_back[i]); 00183 QMP_free_msgmem(mm_from_fwd[i]); 00184 QMP_free_msgmem(mm_from_back[i]); 00185 cudaFreeHost(from_fwd_face[i]); // these are aliasing pointers for non-qmp case 00186 cudaFreeHost(from_back_face[i]);// these are aliasing pointers for non-qmp case 00187 #endif 00188 cudaFreeHost(my_fwd_face[i]); 00189 cudaFreeHost(my_back_face[i]); 00190 } 00191 00192 for (int i=0; i<nDim; i++) { 00193 my_fwd_face[i]=NULL; 00194 my_back_face[i]=NULL; 00195 from_fwd_face[i]=NULL; 00196 from_back_face[i]=NULL; 00197 } 00198 } 00199 00200 void FaceBuffer::pack(cudaColorSpinorField &in, int parity, int dagger, int dim, cudaStream_t *stream_p) 00201 { 00202 if(!commDimPartitioned(dim)) return; 00203 00204 in.allocateGhostBuffer(); // allocate the ghost buffer if not yet allocated 00205 stream = stream_p; 00206 00207 in.packGhost(dim, (QudaParity)parity, dagger, &stream[Nstream-1]); 00208 } 00209 00210 void FaceBuffer::gather(cudaColorSpinorField &in, int dagger, int dir) 00211 { 00212 int dim = dir/2; 00213 if(!commDimPartitioned(dim)) return; 00214 00215 if (dir%2==0) { 00216 // backwards copy to host 00217 in.sendGhost(my_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+sendBackStrmIdx]); 00218 } else { 00219 // forwards copy to host 00220 in.sendGhost(my_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+sendFwdStrmIdx]); 00221 } 00222 } 00223 00224 void FaceBuffer::commsStart(int dir) { 00225 int dim = dir / 2; 00226 if(!commDimPartitioned(dim)) return; 00227 00228 if (dir%2 == 0) { // sending backwards 00229 00230 #ifdef QMP_COMMS // Begin backward send 00231 // Prepost receive 00232 QMP_start(mh_from_fwd[dim]); 00233 #ifndef GPU_DIRECT 00234 memcpy(ib_my_back_face[dim], my_back_face[dim], nbytes[dim]); 00235 #endif 00236 QMP_start(mh_send_back[dim]); 00237 #endif 00238 00239 } else { //sending forwards 00240 00241 #ifdef QMP_COMMS 00242 // Prepost receive 00243 QMP_start(mh_from_back[dim]); 00244 // Begin forward send 00245 #ifndef GPU_DIRECT 00246 memcpy(ib_my_fwd_face[dim], my_fwd_face[dim], nbytes[dim]); 00247 #endif 00248 QMP_start(mh_send_fwd[dim]); 00249 #endif 00250 } 00251 00252 } 00253 00254 int FaceBuffer::commsQuery(int dir) { 00255 00256 #ifdef QMP_COMMS 00257 00258 int dim = dir/2; 00259 if(!commDimPartitioned(dim)) return 0; 00260 00261 if (dir%2==0) {// receive from forwards 00262 if (QMP_is_complete(mh_send_back[dim]) == QMP_TRUE && 00263 QMP_is_complete(mh_from_fwd[dim]) == QMP_TRUE) { 00264 #ifndef GPU_DIRECT 00265 memcpy(from_fwd_face[dim], ib_from_fwd_face[dim], nbytes[dim]); 00266 #endif 00267 return 1; 00268 } 00269 } else { // receive from backwards 00270 if (QMP_is_complete(mh_send_fwd[dim]) == QMP_TRUE && 00271 QMP_is_complete(mh_from_back[dim]) == QMP_TRUE) { 00272 #ifndef GPU_DIRECT 00273 memcpy(from_back_face[dim], ib_from_back_face[dim], nbytes[dim]); 00274 #endif 00275 return 1; 00276 } 00277 } 00278 return 0; 00279 00280 #else // no communications so just return true 00281 00282 return 1; 00283 00284 #endif 00285 } 00286 00287 void FaceBuffer::scatter(cudaColorSpinorField &out, int dagger, int dir) 00288 { 00289 int dim = dir/2; 00290 if(!commDimPartitioned(dim)) return; 00291 00292 if (dir%2==0) {// receive from forwards 00293 out.unpackGhost(from_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+recFwdStrmIdx]); // 0, 2, 4, 6 00294 } else { // receive from backwards 00295 out.unpackGhost(from_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+recBackStrmIdx]); // 1, 3, 5, 7 00296 } 00297 } 00298 00299 // This is just an initial hack for CPU comms - should be creating the message handlers at instantiation 00300 void FaceBuffer::exchangeCpuSpinor(cpuColorSpinorField &spinor, int oddBit, int dagger) 00301 { 00302 00303 // allocate the ghost buffer if not yet allocated 00304 spinor.allocateGhostBuffer(); 00305 00306 for(int i=0;i < 4; i++){ 00307 spinor.packGhost(spinor.backGhostFaceSendBuffer[i], i, QUDA_BACKWARDS, (QudaParity)oddBit, dagger); 00308 spinor.packGhost(spinor.fwdGhostFaceSendBuffer[i], i, QUDA_FORWARDS, (QudaParity)oddBit, dagger); 00309 } 00310 00311 #ifdef QMP_COMMS 00312 00313 QMP_msgmem_t mm_send_fwd[4]; 00314 QMP_msgmem_t mm_from_back[4]; 00315 QMP_msgmem_t mm_from_fwd[4]; 00316 QMP_msgmem_t mm_send_back[4]; 00317 QMP_msghandle_t mh_send_fwd[4]; 00318 QMP_msghandle_t mh_from_back[4]; 00319 QMP_msghandle_t mh_from_fwd[4]; 00320 QMP_msghandle_t mh_send_back[4]; 00321 00322 for (int i=0; i<4; i++) { 00323 mm_send_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceSendBuffer[i], nbytes[i]); 00324 if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem"); 00325 00326 mm_send_back[i] = QMP_declare_msgmem(spinor.backGhostFaceSendBuffer[i], nbytes[i]); 00327 if( mm_send_back == NULL ) errorQuda("Unable to allocate send back message mem"); 00328 00329 mm_from_fwd[i] = QMP_declare_msgmem(spinor.fwdGhostFaceBuffer[i], nbytes[i]); 00330 if( mm_from_fwd[i] == NULL ) errorQuda("Unable to allocate recv from fwd message mem"); 00331 00332 mm_from_back[i] = QMP_declare_msgmem(spinor.backGhostFaceBuffer[i], nbytes[i]); 00333 if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem"); 00334 00335 mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0); 00336 if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send"); 00337 00338 mh_send_back[i] = QMP_declare_send_relative(mm_send_back[i], i, -1, 0); 00339 if( mh_send_back[i] == NULL ) errorQuda("Unable to allocate backward send"); 00340 00341 mh_from_fwd[i] = QMP_declare_receive_relative(mm_from_fwd[i], i, +1, 0); 00342 if( mh_from_fwd[i] == NULL ) errorQuda("Unable to allocate forward recv"); 00343 00344 mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0); 00345 if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv"); 00346 } 00347 00348 for (int i=0; i<4; i++) { 00349 QMP_start(mh_from_back[i]); 00350 QMP_start(mh_from_fwd[i]); 00351 QMP_start(mh_send_fwd[i]); 00352 QMP_start(mh_send_back[i]); 00353 } 00354 00355 for (int i=0; i<4; i++) { 00356 QMP_wait(mh_send_fwd[i]); 00357 QMP_wait(mh_send_back[i]); 00358 QMP_wait(mh_from_back[i]); 00359 QMP_wait(mh_from_fwd[i]); 00360 } 00361 00362 for (int i=0; i<4; i++) { 00363 QMP_free_msghandle(mh_send_fwd[i]); 00364 QMP_free_msghandle(mh_send_back[i]); 00365 QMP_free_msghandle(mh_from_fwd[i]); 00366 QMP_free_msghandle(mh_from_back[i]); 00367 QMP_free_msgmem(mm_send_fwd[i]); 00368 QMP_free_msgmem(mm_send_back[i]); 00369 QMP_free_msgmem(mm_from_back[i]); 00370 QMP_free_msgmem(mm_from_fwd[i]); 00371 } 00372 00373 #else 00374 00375 for (int i=0; i<4; i++) { 00376 //printf("%d COPY length = %d\n", i, nbytes[i]/precision); 00377 memcpy(spinor.fwdGhostFaceBuffer[i], spinor.backGhostFaceSendBuffer[i], nbytes[i]); 00378 memcpy(spinor.backGhostFaceBuffer[i], spinor.fwdGhostFaceSendBuffer[i], nbytes[i]); 00379 } 00380 00381 #endif 00382 } 00383 00384 void FaceBuffer::exchangeCpuLink(void** ghost_link, void** link_sendbuf) { 00385 00386 #ifdef QMP_COMMS 00387 00388 QMP_msgmem_t mm_send_fwd[4]; 00389 QMP_msgmem_t mm_from_back[4]; 00390 QMP_msghandle_t mh_send_fwd[4]; 00391 QMP_msghandle_t mh_from_back[4]; 00392 00393 for (int i=0; i<4; i++) { 00394 int len = 2*nFace*faceVolumeCB[i]*Ninternal; 00395 mm_send_fwd[i] = QMP_declare_msgmem(link_sendbuf[i], len*precision); 00396 if( mm_send_fwd[i] == NULL ) errorQuda("Unable to allocate send fwd message mem"); 00397 00398 mm_from_back[i] = QMP_declare_msgmem(ghost_link[i], len*precision); 00399 if( mm_from_back[i] == NULL ) errorQuda("Unable to allocate recv from back message mem"); 00400 00401 mh_send_fwd[i] = QMP_declare_send_relative(mm_send_fwd[i], i, +1, 0); 00402 if( mh_send_fwd[i] == NULL ) errorQuda("Unable to allocate forward send"); 00403 00404 mh_from_back[i] = QMP_declare_receive_relative(mm_from_back[i], i, -1, 0); 00405 if( mh_from_back[i] == NULL ) errorQuda("Unable to allocate backward recv"); 00406 } 00407 00408 for (int i=0; i<4; i++) { 00409 QMP_start(mh_send_fwd[i]); 00410 QMP_start(mh_from_back[i]); 00411 } 00412 00413 for (int i=0; i<4; i++) { 00414 QMP_wait(mh_send_fwd[i]); 00415 QMP_wait(mh_from_back[i]); 00416 } 00417 00418 for (int i=0; i<4; i++) { 00419 QMP_free_msghandle(mh_send_fwd[i]); 00420 QMP_free_msghandle(mh_from_back[i]); 00421 QMP_free_msgmem(mm_send_fwd[i]); 00422 QMP_free_msgmem(mm_from_back[i]); 00423 } 00424 00425 #else 00426 00427 for(int dir =0; dir < 4; dir++) { 00428 int len = 2*nFace*faceVolumeCB[dir]*Ninternal; // factor 2 since we have both parities 00429 //printf("%d COPY length = %d\n", dir, len); 00430 memcpy(ghost_link[dir], link_sendbuf[dir], len*precision); 00431 } 00432 00433 #endif 00434 00435 } 00436 00437 00438 00439 void transferGaugeFaces(void *gauge, void *gauge_face, QudaPrecision precision, 00440 int Nvec, QudaReconstructType reconstruct, int V, int Vs) 00441 { 00442 int nblocks, ndim=4; 00443 size_t blocksize;//, nbytes; 00444 ptrdiff_t offset, stride; 00445 void *g; 00446 00447 nblocks = ndim*reconstruct/Nvec; 00448 blocksize = Vs*Nvec*precision; 00449 offset = (V-Vs)*Nvec*precision; 00450 stride = (V+Vs)*Nvec*precision; // assume that pad = Vs 00451 00452 #ifdef QMP_COMMS 00453 00454 QMP_msgmem_t mm_gauge_send_fwd; 00455 QMP_msgmem_t mm_gauge_from_back; 00456 QMP_msghandle_t mh_gauge_send_fwd; 00457 QMP_msghandle_t mh_gauge_from_back; 00458 00459 g = (void *) ((char *) gauge + offset); 00460 mm_gauge_send_fwd = QMP_declare_strided_msgmem(g, blocksize, nblocks, stride); 00461 if (!mm_gauge_send_fwd) { 00462 errorQuda("Unable to allocate gauge message mem"); 00463 } 00464 00465 mm_gauge_from_back = QMP_declare_strided_msgmem(gauge_face, blocksize, nblocks, stride); 00466 if (!mm_gauge_from_back) { 00467 errorQuda("Unable to allocate gauge face message mem"); 00468 } 00469 00470 mh_gauge_send_fwd = QMP_declare_send_relative(mm_gauge_send_fwd, 3, +1, 0); 00471 if (!mh_gauge_send_fwd) { 00472 errorQuda("Unable to allocate gauge message handle"); 00473 } 00474 mh_gauge_from_back = QMP_declare_receive_relative(mm_gauge_from_back, 3, -1, 0); 00475 if (!mh_gauge_from_back) { 00476 errorQuda("Unable to allocate gauge face message handle"); 00477 } 00478 00479 QMP_start(mh_gauge_send_fwd); 00480 QMP_start(mh_gauge_from_back); 00481 00482 QMP_wait(mh_gauge_send_fwd); 00483 QMP_wait(mh_gauge_from_back); 00484 00485 QMP_free_msghandle(mh_gauge_send_fwd); 00486 QMP_free_msghandle(mh_gauge_from_back); 00487 QMP_free_msgmem(mm_gauge_send_fwd); 00488 QMP_free_msgmem(mm_gauge_from_back); 00489 00490 #else 00491 00492 void *gf; 00493 00494 for (int i=0; i<nblocks; i++) { 00495 g = (void *) ((char *) gauge + offset + i*stride); 00496 gf = (void *) ((char *) gauge_face + i*stride); 00497 cudaMemcpy(gf, g, blocksize, cudaMemcpyHostToHost); 00498 } 00499 00500 #endif // QMP_COMMS 00501 } 00502 00503 void reduceMaxDouble(double &max) { 00504 00505 #ifdef QMP_COMMS 00506 QMP_max_double(&max); 00507 #endif 00508 00509 } 00510 00511 void reduceDouble(double &sum) { 00512 00513 #ifdef QMP_COMMS 00514 if (globalReduce) QMP_sum_double(&sum); 00515 #endif 00516 00517 } 00518 00519 void reduceDoubleArray(double *sum, const int len) { 00520 00521 #ifdef QMP_COMMS 00522 if (globalReduce) QMP_sum_double_array(sum,len); 00523 #endif 00524 00525 } 00526 00527 #ifdef QMP_COMMS 00528 static int manual_set_partition[4] ={0, 0, 0, 0}; 00529 int commDim(int dir) { return QMP_get_logical_dimensions()[dir]; } 00530 int commCoords(int dir) { return QMP_get_logical_coordinates()[dir]; } 00531 int commDimPartitioned(int dir){ return (manual_set_partition[dir] || ((commDim(dir) > 1)));} 00532 void commDimPartitionedSet(int dir){ manual_set_partition[dir] = 1; } 00533 void commBarrier() { QMP_barrier(); } 00534 #else 00535 int commDim(int dir) { return 1; } 00536 int commCoords(int dir) { return 0; } 00537 int commDimPartitioned(int dir){ return 0; } 00538 void commDimPartitionedSet(int dir){ ; } 00539 void commBarrier() { ; } 00540 #endif