QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cpu_gauge_field.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <gauge_field.h>
3 #include <face_quda.h>
4 #include <assert.h>
5 #include <string.h>
6 
7 namespace quda {
8 
10  GaugeField(param), pinned(param.pinned)
11  {
13  errorQuda("CPU fields do not support half precision");
14  }
15  if (pad != 0) {
16  errorQuda("CPU fields do not support non-zero padding");
17  }
19  errorQuda("Reconstruction type %d not supported", reconstruct);
20  }
22  errorQuda("10-reconstruction only supported with MILC gauge order");
23  }
24 
25  if (order == QUDA_QDP_GAUGE_ORDER) {
26 
27  gauge = (void**) safe_malloc(nDim * sizeof(void*));
28 
29  for (int d=0; d<nDim; d++) {
30  size_t nbytes = volume * reconstruct * precision;
32  gauge[d] = (pinned ? pinned_malloc(nbytes) : safe_malloc(nbytes));
34  memset(gauge[d], 0, nbytes);
35  }
36  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
37  gauge[d] = ((void**)param.gauge)[d];
38  } else {
39  errorQuda("Unsupported creation type %d", create);
40  }
41  }
42 
44 
46  size_t nbytes = nDim * volume * reconstruct * precision;
47  gauge = (void **) (pinned ? pinned_malloc(nbytes) : safe_malloc(nbytes));
49  memset(gauge, 0, nbytes);
50  }
51  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
52  gauge = (void**) param.gauge;
53  } else {
54  errorQuda("Unsupported creation type %d", create);
55  }
56 
57  } else {
58  errorQuda("Unsupported gauge order type %d", order);
59  }
60 
61  // Ghost zone is always 2-dimensional
62  ghost = (void**) safe_malloc(QUDA_MAX_DIM * sizeof(void*));
63  for (int i=0; i<nDim; i++) {
64  size_t nbytes = nFace * surface[i] * reconstruct * precision;
65  ghost[i] = (pinned ? pinned_malloc(nbytes) : safe_malloc(nbytes));
66  }
67  }
68 
69 
71  {
73  if (order == QUDA_QDP_GAUGE_ORDER) {
74  for (int d=0; d<nDim; d++) {
75  if (gauge[d]) host_free(gauge[d]);
76  }
77  if (gauge) host_free(gauge);
78  } else {
79  if (gauge) host_free(gauge);
80  }
81  } else { // QUDA_REFERENCE_FIELD_CREATE
83  if (gauge) host_free(gauge);
84  }
85  }
86 
87  for (int i=0; i<nDim; i++) {
88  if (ghost[i]) host_free(ghost[i]);
89  }
90  if (ghost) host_free(ghost);
91  }
92 
93 
94  // transpose the matrix
95  template <typename Float>
96  inline void transpose(Float *gT, const Float *g) {
97  for (int ic=0; ic<3; ic++) {
98  for (int jc=0; jc<3; jc++) {
99  for (int r=0; r<2; r++) {
100  gT[(ic*3+jc)*2+r] = g[(jc*3+ic)*2+r];
101  }
102  }
103  }
104  }
105 
106  // FIXME - replace this with a functor approach to more easily arbitrary ordering
107  template <typename Float>
108  void packGhost(Float **ghost, const Float **gauge, const int nFace, const int *X,
109  const int volumeCB, const int *surfaceCB, const QudaGaugeFieldOrder order) {
110 
111  if (order != QUDA_QDP_GAUGE_ORDER && order != QUDA_BQCD_GAUGE_ORDER &&
113  errorQuda("packGhost not supported for %d gauge field order", order);
114 
115  int XY=X[0]*X[1];
116  int XYZ=X[0]*X[1]*X[2];
117 
118  //loop variables: a, b, c with a the most signifcant and c the least significant
119  //A, B, C the maximum value
120  //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1
121  int A[4], B[4], C[4];
122 
123  //X dimension
124  A[0] = X[3]; B[0] = X[2]; C[0] = X[1];
125 
126  //Y dimension
127  A[1] = X[3]; B[1] = X[2]; C[1] = X[0];
128 
129  //Z dimension
130  A[2] = X[3]; B[2] = X[1]; C[2] = X[0];
131 
132  //T dimension
133  A[3] = X[2]; B[3] = X[1]; C[3] = X[0];
134 
135  //multiplication factor to compute index in original cpu memory
136  int f[4][4]={
137  {XYZ, XY, X[0], 1},
138  {XYZ, XY, 1, X[0]},
139  {XYZ, X[0], 1, XY},
140  { XY, X[0], 1, XYZ}
141  };
142 
143  for(int dir =0; dir < 4; dir++)
144  {
145  const Float* even_src;
146  const Float* odd_src;
147 
148  if (order == QUDA_BQCD_GAUGE_ORDER) {
149  // need to add on halo region
150  int mu_offset = X[0]/2 + 2;
151  for (int i=1; i<4; i++) mu_offset *= (X[i] + 2);
152  even_src = (const Float*)gauge + (dir*2+0)*mu_offset*gaugeSiteSize;
153  odd_src = (const Float*)gauge + (dir*2+1)*mu_offset*gaugeSiteSize;
154  } else if (order == QUDA_CPS_WILSON_GAUGE_ORDER || order == QUDA_MILC_GAUGE_ORDER) {
155  even_src = (const Float*)gauge + 0*4*volumeCB*gaugeSiteSize;
156  odd_src = (const Float*)gauge + 1*4*volumeCB*gaugeSiteSize;
157  } else { // QDP_GAUGE_FIELD_ORDER
158  even_src = gauge[dir];
159  odd_src = gauge[dir] + volumeCB*gaugeSiteSize;
160  }
161 
162  Float* even_dst;
163  Float* odd_dst;
164 
165  //switching odd and even ghost gauge when that dimension size is odd
166  //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1
167  if((X[dir] % 2 ==0) || (commDim(dir) == 1)){
168  even_dst = ghost[dir];
169  odd_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;
170  }else{
171  even_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;
172  odd_dst = ghost[dir];
173  }
174 
175  int even_dst_index = 0;
176  int odd_dst_index = 0;
177 
178  int d;
179  int a,b,c;
180  for(d = X[dir]- nFace; d < X[dir]; d++){
181  for(a = 0; a < A[dir]; a++){
182  for(b = 0; b < B[dir]; b++){
183  for(c = 0; c < C[dir]; c++){
184  int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1;
185  if (order == QUDA_CPS_WILSON_GAUGE_ORDER || order == QUDA_MILC_GAUGE_ORDER)
186  index = 4*index + dir;
187  int oddness = (a+b+c+d)%2;
188  if (oddness == 0){ //even
189  if (order == QUDA_BQCD_GAUGE_ORDER || order == QUDA_CPS_WILSON_GAUGE_ORDER) {
190  // we do transposition here so we can just call packQDPGauge for the ghost zone
191  Float gT[18];
192  transpose(gT, &even_src[18*index]);
193  for(int i=0; i<18; i++){
194  even_dst[18*even_dst_index+i] = gT[i];
195  }
196  } else {
197  for(int i=0;i < 18;i++){
198  even_dst[18*even_dst_index+i] = even_src[18*index + i];
199  }
200  }
201  even_dst_index++;
202  }else{ //odd
203  if (order == QUDA_BQCD_GAUGE_ORDER || order == QUDA_CPS_WILSON_GAUGE_ORDER) {
204  // we do transposition here so we can just call packQDPGauge for the ghost zone
205  Float gT[18];
206  transpose(gT, &odd_src[18*index]);
207  for(int i=0; i<18; i++){
208  odd_dst[18*odd_dst_index+i] = gT[i];
209  }
210  } else {
211  for(int i=0;i < 18;i++){
212  odd_dst[18*odd_dst_index+i] = odd_src[18*index + i];
213  }
214  }
215  odd_dst_index++;
216  }
217  }//c
218  }//b
219  }//a
220  }//d
221 
222  assert( even_dst_index == nFace*surfaceCB[dir]);
223  assert( odd_dst_index == nFace*surfaceCB[dir]);
224  }
225 
226  }
227 
228 
229  // This does the exchange of the gauge field ghost zone and places it
230  // into the ghost array.
231  // This should be optimized so it is reused if called multiple times
233  void **send = (void **) safe_malloc(QUDA_MAX_DIM*sizeof(void *));
234 
235  for (int d=0; d<nDim; d++) {
236  send[d] = safe_malloc(nFace * surface[d] * reconstruct * precision);
237  }
238 
239  // get the links into a contiguous buffer
241  packGhost((double**)send, (const double**)gauge, nFace, x, volumeCB, surfaceCB, order);
242  } else {
243  packGhost((float**)send, (const float**)gauge, nFace, x, volumeCB, surfaceCB, order);
244  }
245 
246  // communicate between nodes
247  FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision);
248  faceBuf.exchangeCpuLink(ghost, send);
249 
250  for (int d=0; d<nDim; d++) {
251  host_free(send[d]);
252  }
253  host_free(send);
254  }
255 
256  void cpuGaugeField::setGauge(void **_gauge)
257  {
259  errorQuda("Setting gauge pointer is only allowed when cpu gauge"
260  "is of QUDA_REFERENCE_FIELD_CREATE type\n");
261  }
262  gauge = _gauge;
263  }
264 
265 /*template <typename Float>
266 void print_matrix(const Float &m, unsigned int x) {
267 
268  for (int s=0; s<o.Nspin(); s++) {
269  std::cout << "x = " << x << ", s = " << s << ", { ";
270  for (int c=0; c<o.Ncolor(); c++) {
271  std::cout << " ( " << o(x, s, c, 0) << " , " ;
272  if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) << " ) ," ;
273  else std::cout << o(x, s, c, 1) << " ) " ;
274  }
275  std::cout << " } " << std::endl;
276  }
277 
278 }
279 
280 // print out the vector at volume point x
281 void cpuColorSpinorField::PrintMatrix(unsigned int x) {
282 
283  switch(precision) {
284  case QUDA_DOUBLE_PRECISION:
285  print_matrix(*order_double, x);
286  break;
287  case QUDA_SINGLE_PRECISION:
288  print_matrix(*order_single, x);
289  break;
290  default:
291  errorQuda("Precision %d not implemented", precision);
292  }
293 
294 }
295 */
296 
297 } // namespace quda