QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <gauge_field.h> 00002 #include <face_quda.h> 00003 #include <assert.h> 00004 #include <string.h> 00005 00006 cpuGaugeField::cpuGaugeField(const GaugeFieldParam ¶m) : 00007 GaugeField(param, QUDA_CPU_FIELD_LOCATION), pinned(param.pinned) { 00008 00009 if (reconstruct != QUDA_RECONSTRUCT_NO && 00010 reconstruct != QUDA_RECONSTRUCT_10) 00011 errorQuda("Reconstruction type %d not supported", reconstruct); 00012 00013 if (reconstruct == QUDA_RECONSTRUCT_10 && order != QUDA_MILC_GAUGE_ORDER) 00014 errorQuda("10 reconstruction only supported with MILC gauge order"); 00015 00016 if (order == QUDA_QDP_GAUGE_ORDER) { 00017 gauge = (void**)malloc(nDim * sizeof(void*)); 00018 00019 for (int d=0; d<nDim; d++) { 00020 if (create == QUDA_NULL_FIELD_CREATE 00021 || create == QUDA_ZERO_FIELD_CREATE) { 00022 if(pinned){ 00023 cudaMallocHost(&gauge[d], volume * reconstruct * precision); 00024 }else{ 00025 gauge[d] = malloc(volume * reconstruct * precision); 00026 } 00027 00028 if(create == QUDA_ZERO_FIELD_CREATE){ 00029 memset(gauge[d], 0, volume * reconstruct * precision); 00030 } 00031 } else if (create == QUDA_REFERENCE_FIELD_CREATE) { 00032 gauge[d] = ((void**)param.gauge)[d]; 00033 } else { 00034 errorQuda("Unsupported creation type %d", create); 00035 } 00036 } 00037 00038 } else if (order == QUDA_CPS_WILSON_GAUGE_ORDER || 00039 order == QUDA_MILC_GAUGE_ORDER) { 00040 if (create == QUDA_NULL_FIELD_CREATE || 00041 create == QUDA_ZERO_FIELD_CREATE) { 00042 if(pinned){ 00043 cudaMallocHost(&(gauge), nDim*volume*reconstruct*precision); 00044 }else{ 00045 gauge = (void**)malloc(nDim * volume * reconstruct * precision); 00046 } 00047 if(create == QUDA_ZERO_FIELD_CREATE){ 00048 memset(gauge, 0, nDim*volume * reconstruct * precision); 00049 } 00050 } else if (create == QUDA_REFERENCE_FIELD_CREATE) { 00051 gauge = (void**)param.gauge; 00052 } else { 00053 errorQuda("Unsupported creation type %d", create); 00054 } 00055 } else { 00056 errorQuda("Unsupported gauge order type %d", order); 00057 } 00058 00059 // Ghost zone is always 2-dimensional 00060 ghost = (void**)malloc(sizeof(void*)*QUDA_MAX_DIM); 00061 for (int i=0; i<nDim; i++) { 00062 if(pinned){ 00063 cudaMallocHost(&ghost[i], nFace * surface[i] * reconstruct * precision); 00064 }else{ 00065 ghost[i] = malloc(nFace * surface[i] * reconstruct * precision); 00066 } 00067 } 00068 00069 } 00070 00071 cpuGaugeField::~cpuGaugeField() { 00072 00073 if (create == QUDA_NULL_FIELD_CREATE 00074 || create == QUDA_ZERO_FIELD_CREATE ) { 00075 if (order == QUDA_QDP_GAUGE_ORDER){ 00076 for (int d=0; d<nDim; d++) { 00077 if(pinned){ 00078 if (gauge[d]) cudaFreeHost(gauge[d]); 00079 }else{ 00080 if (gauge[d]) free(gauge[d]); 00081 } 00082 } 00083 if (gauge) free(gauge); 00084 }else{ 00085 if(pinned){ 00086 if (gauge) cudaFreeHost(gauge); 00087 }else{ 00088 if (gauge) free(gauge); 00089 } 00090 } 00091 }else{ // QUDA_REFERENCE_FIELD_CREATE 00092 if (order == QUDA_QDP_GAUGE_ORDER){ 00093 free(gauge); 00094 } 00095 } 00096 00097 00098 for (int i=0; i<nDim; i++) { 00099 if(pinned){ 00100 if (ghost[i]) cudaFreeHost(ghost[i]); 00101 }else{ 00102 if (ghost[i]) free(ghost[i]); 00103 } 00104 } 00105 free(ghost); 00106 00107 } 00108 00109 template <typename Float> 00110 void packGhost(Float **gauge, Float **ghost, const int nFace, const int *X, 00111 const int volumeCB, const int *surfaceCB) { 00112 int XY=X[0]*X[1]; 00113 int XYZ=X[0]*X[1]*X[2]; 00114 00115 //loop variables: a, b, c with a the most signifcant and c the least significant 00116 //A, B, C the maximum value 00117 //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1 00118 int A[4], B[4], C[4]; 00119 00120 //X dimension 00121 A[0] = X[3]; B[0] = X[2]; C[0] = X[1]; 00122 00123 //Y dimension 00124 A[1] = X[3]; B[1] = X[2]; C[1] = X[0]; 00125 00126 //Z dimension 00127 A[2] = X[3]; B[2] = X[1]; C[2] = X[0]; 00128 00129 //T dimension 00130 A[3] = X[2]; B[3] = X[1]; C[3] = X[0]; 00131 00132 //multiplication factor to compute index in original cpu memory 00133 int f[4][4]={ 00134 {XYZ, XY, X[0], 1}, 00135 {XYZ, XY, 1, X[0]}, 00136 {XYZ, X[0], 1, XY}, 00137 { XY, X[0], 1, XYZ} 00138 }; 00139 00140 for(int dir =0; dir < 4; dir++) 00141 { 00142 Float* even_src = gauge[dir]; 00143 Float* odd_src = gauge[dir] + volumeCB*gaugeSiteSize; 00144 00145 Float* even_dst; 00146 Float* odd_dst; 00147 00148 //switching odd and even ghost gauge when that dimension size is odd 00149 //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1 00150 if((X[dir] % 2 ==0) || (commDim(dir) == 1)){ 00151 even_dst = ghost[dir]; 00152 odd_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize; 00153 }else{ 00154 even_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize; 00155 odd_dst = ghost[dir]; 00156 } 00157 00158 int even_dst_index = 0; 00159 int odd_dst_index = 0; 00160 00161 int d; 00162 int a,b,c; 00163 for(d = X[dir]- nFace; d < X[dir]; d++){ 00164 for(a = 0; a < A[dir]; a++){ 00165 for(b = 0; b < B[dir]; b++){ 00166 for(c = 0; c < C[dir]; c++){ 00167 int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1; 00168 int oddness = (a+b+c+d)%2; 00169 if (oddness == 0){ //even 00170 for(int i=0;i < 18;i++){ 00171 even_dst[18*even_dst_index+i] = even_src[18*index + i]; 00172 } 00173 even_dst_index++; 00174 }else{ //odd 00175 for(int i=0;i < 18;i++){ 00176 odd_dst[18*odd_dst_index+i] = odd_src[18*index + i]; 00177 } 00178 odd_dst_index++; 00179 } 00180 }//c 00181 }//b 00182 }//a 00183 }//d 00184 00185 assert( even_dst_index == nFace*surfaceCB[dir]); 00186 assert( odd_dst_index == nFace*surfaceCB[dir]); 00187 } 00188 00189 } 00190 00191 // This does the exchange of the gauge field ghost zone and places it 00192 // into the ghost array. 00193 // This should be optimized so it is reused if called multiple times 00194 void cpuGaugeField::exchangeGhost() const { 00195 void **send = (void**)malloc(sizeof(void*)*QUDA_MAX_DIM); 00196 00197 for (int d=0; d<nDim; d++) { 00198 send[d] = malloc(nFace * surface[d] * reconstruct * precision); 00199 } 00200 00201 // get the links into a contiguous buffer 00202 if (precision == QUDA_DOUBLE_PRECISION) { 00203 packGhost((double**)gauge, (double**)send, nFace, x, volumeCB, surfaceCB); 00204 } else { 00205 packGhost((float**)gauge, (float**)send, nFace, x, volumeCB, surfaceCB); 00206 } 00207 00208 // communicate between nodes 00209 FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision); 00210 faceBuf.exchangeCpuLink(ghost, send); 00211 00212 for (int i=0; i<4; i++) { 00213 double sum = 0.0; 00214 for (int j=0; j<nFace*surface[i]*reconstruct; j++) { 00215 sum += ((double*)(ghost[i]))[j]; 00216 } 00217 } 00218 00219 for (int d=0; d<nDim; d++) free(send[d]); 00220 free(send); 00221 } 00222 00223 void cpuGaugeField::setGauge(void**_gauge) 00224 { 00225 if(create != QUDA_REFERENCE_FIELD_CREATE){ 00226 errorQuda("Setting gauge pointer is only allowed when cpu gauge" 00227 "is of QUDA_REFERENCE_FIELD_CREATE type\n"); 00228 } 00229 gauge= _gauge; 00230 }