QUDA: quda/lib/cpu_gauge_field.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <gauge_field.h>
00002 #include <face_quda.h>
00003 #include <assert.h>
00004 #include <string.h>
00005 
00006 cpuGaugeField::cpuGaugeField(const GaugeFieldParam &param) : 
00007   GaugeField(param, QUDA_CPU_FIELD_LOCATION), pinned(param.pinned) {
00008 
00009   if (reconstruct != QUDA_RECONSTRUCT_NO && 
00010       reconstruct != QUDA_RECONSTRUCT_10)
00011     errorQuda("Reconstruction type %d not supported", reconstruct);
00012 
00013   if (reconstruct == QUDA_RECONSTRUCT_10 && order != QUDA_MILC_GAUGE_ORDER)
00014     errorQuda("10 reconstruction only supported with MILC gauge order");
00015 
00016   if (order == QUDA_QDP_GAUGE_ORDER) {
00017     gauge = (void**)malloc(nDim * sizeof(void*));
00018 
00019     for (int d=0; d<nDim; d++) {
00020       if (create == QUDA_NULL_FIELD_CREATE 
00021           || create == QUDA_ZERO_FIELD_CREATE) {
00022         if(pinned){
00023           cudaMallocHost(&gauge[d], volume * reconstruct * precision);
00024         }else{
00025           gauge[d] = malloc(volume * reconstruct * precision);
00026         }
00027         
00028         if(create == QUDA_ZERO_FIELD_CREATE){
00029           memset(gauge[d], 0, volume * reconstruct * precision);
00030         }
00031       } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
00032         gauge[d] = ((void**)param.gauge)[d];
00033       } else {
00034         errorQuda("Unsupported creation type %d", create);
00035       }
00036     }
00037     
00038   } else if (order == QUDA_CPS_WILSON_GAUGE_ORDER || 
00039              order == QUDA_MILC_GAUGE_ORDER) {
00040     if (create == QUDA_NULL_FIELD_CREATE ||
00041         create == QUDA_ZERO_FIELD_CREATE) {
00042       if(pinned){
00043         cudaMallocHost(&(gauge), nDim*volume*reconstruct*precision);
00044       }else{
00045         gauge = (void**)malloc(nDim * volume * reconstruct * precision);
00046       }
00047       if(create == QUDA_ZERO_FIELD_CREATE){
00048         memset(gauge, 0, nDim*volume * reconstruct * precision);
00049       }
00050     } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
00051       gauge = (void**)param.gauge;
00052     } else {
00053       errorQuda("Unsupported creation type %d", create);
00054     }
00055   } else {
00056   errorQuda("Unsupported gauge order type %d", order);
00057   }
00058   
00059   // Ghost zone is always 2-dimensional
00060   ghost = (void**)malloc(sizeof(void*)*QUDA_MAX_DIM);
00061   for (int i=0; i<nDim; i++) {
00062     if(pinned){
00063       cudaMallocHost(&ghost[i], nFace * surface[i] * reconstruct * precision);
00064     }else{
00065       ghost[i] = malloc(nFace * surface[i] * reconstruct * precision);
00066     }
00067   }
00068   
00069 }
00070 
00071 cpuGaugeField::~cpuGaugeField() {
00072 
00073   if (create == QUDA_NULL_FIELD_CREATE  
00074       || create == QUDA_ZERO_FIELD_CREATE  ) {
00075     if (order == QUDA_QDP_GAUGE_ORDER){
00076       for (int d=0; d<nDim; d++) {
00077         if(pinned){
00078           if (gauge[d]) cudaFreeHost(gauge[d]);   
00079         }else{
00080           if (gauge[d]) free(gauge[d]);
00081         }
00082       }
00083       if (gauge) free(gauge);
00084     }else{      
00085       if(pinned){
00086           if (gauge) cudaFreeHost(gauge);         
00087         }else{
00088           if (gauge) free(gauge);
00089         }
00090     }
00091   }else{ // QUDA_REFERENCE_FIELD_CREATE 
00092     if (order == QUDA_QDP_GAUGE_ORDER){
00093       free(gauge);
00094     }
00095   }
00096   
00097   
00098   for (int i=0; i<nDim; i++) {
00099     if(pinned){
00100       if (ghost[i]) cudaFreeHost(ghost[i]);
00101     }else{
00102       if (ghost[i]) free(ghost[i]);
00103     }
00104   }
00105   free(ghost);
00106   
00107 }
00108 
00109 template <typename Float>
00110 void packGhost(Float **gauge, Float **ghost, const int nFace, const int *X, 
00111                const int volumeCB, const int *surfaceCB) {
00112   int XY=X[0]*X[1];
00113   int XYZ=X[0]*X[1]*X[2];
00114 
00115   //loop variables: a, b, c with a the most signifcant and c the least significant
00116   //A, B, C the maximum value
00117   //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1
00118   int A[4], B[4], C[4];
00119   
00120   //X dimension
00121   A[0] = X[3]; B[0] = X[2]; C[0] = X[1];
00122   
00123   //Y dimension
00124   A[1] = X[3]; B[1] = X[2]; C[1] = X[0];
00125 
00126   //Z dimension
00127   A[2] = X[3]; B[2] = X[1]; C[2] = X[0];
00128 
00129   //T dimension
00130   A[3] = X[2]; B[3] = X[1]; C[3] = X[0];
00131 
00132   //multiplication factor to compute index in original cpu memory
00133   int f[4][4]={
00134     {XYZ,    XY, X[0],     1},
00135     {XYZ,    XY,    1,  X[0]},
00136     {XYZ,  X[0],    1,    XY},
00137     { XY,  X[0],    1,   XYZ}
00138   };
00139 
00140   for(int dir =0; dir < 4; dir++)
00141     {
00142       Float* even_src = gauge[dir];
00143       Float* odd_src = gauge[dir] + volumeCB*gaugeSiteSize;
00144 
00145       Float* even_dst;
00146       Float* odd_dst;
00147      
00148      //switching odd and even ghost gauge when that dimension size is odd
00149      //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1
00150       if((X[dir] % 2 ==0) || (commDim(dir) == 1)){
00151         even_dst = ghost[dir];
00152         odd_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;      
00153      }else{
00154         even_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;
00155         odd_dst = ghost[dir];
00156      }
00157 
00158       int even_dst_index = 0;
00159       int odd_dst_index = 0;
00160 
00161       int d;
00162       int a,b,c;
00163       for(d = X[dir]- nFace; d < X[dir]; d++){
00164         for(a = 0; a < A[dir]; a++){
00165           for(b = 0; b < B[dir]; b++){
00166             for(c = 0; c < C[dir]; c++){
00167               int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1;
00168               int oddness = (a+b+c+d)%2;
00169               if (oddness == 0){ //even
00170                 for(int i=0;i < 18;i++){
00171                   even_dst[18*even_dst_index+i] = even_src[18*index + i];
00172                 }
00173                 even_dst_index++;
00174               }else{ //odd
00175                 for(int i=0;i < 18;i++){
00176                   odd_dst[18*odd_dst_index+i] = odd_src[18*index + i];
00177                 }
00178                 odd_dst_index++;
00179               }
00180             }//c
00181           }//b
00182         }//a
00183       }//d
00184 
00185       assert( even_dst_index == nFace*surfaceCB[dir]);
00186       assert( odd_dst_index == nFace*surfaceCB[dir]);
00187     }
00188 
00189 }
00190 
00191 // This does the exchange of the gauge field ghost zone and places it
00192 // into the ghost array.
00193 // This should be optimized so it is reused if called multiple times
00194 void cpuGaugeField::exchangeGhost() const {
00195   void **send = (void**)malloc(sizeof(void*)*QUDA_MAX_DIM);
00196 
00197   for (int d=0; d<nDim; d++) {
00198     send[d] = malloc(nFace * surface[d] * reconstruct * precision);
00199   }
00200 
00201   // get the links into a contiguous buffer
00202   if (precision == QUDA_DOUBLE_PRECISION) {
00203     packGhost((double**)gauge, (double**)send, nFace, x, volumeCB, surfaceCB);
00204   } else {
00205     packGhost((float**)gauge, (float**)send, nFace, x, volumeCB, surfaceCB);
00206   }
00207 
00208   // communicate between nodes
00209   FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision);
00210   faceBuf.exchangeCpuLink(ghost, send);
00211 
00212   for (int i=0; i<4; i++) {
00213     double sum = 0.0;
00214     for (int j=0; j<nFace*surface[i]*reconstruct; j++) {
00215       sum += ((double*)(ghost[i]))[j];
00216     }
00217   }
00218 
00219   for (int d=0; d<nDim; d++) free(send[d]);
00220   free(send);
00221 }
00222 
00223 void cpuGaugeField::setGauge(void**_gauge)
00224 {
00225   if(create != QUDA_REFERENCE_FIELD_CREATE){
00226     errorQuda("Setting gauge pointer is only allowed when cpu gauge"
00227               "is of QUDA_REFERENCE_FIELD_CREATE type\n");
00228   }
00229   gauge= _gauge;
00230 }