QUDA  1.0.0
cpu_gauge_field.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <gauge_field.h>
3 #include <assert.h>
4 #include <string.h>
5 #include <typeinfo>
6 
7 namespace quda {
8 
10  GaugeField(param)
11  {
13  errorQuda("CPU fields do not support half precision");
14  }
16  errorQuda("CPU fields do not support quarter precision");
17  }
18  if (pad != 0) {
19  errorQuda("CPU fields do not support non-zero padding");
20  }
22  errorQuda("Reconstruction type %d not supported", reconstruct);
23  }
25  errorQuda("10-reconstruction only supported with MILC gauge order");
26  }
27 
28  int siteDim=0;
29  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
30  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
31  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
32  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
33  else errorQuda("Unknown geometry type %d", geometry);
34 
35  // compute the correct bytes size for these padded field orders
37  bytes = siteDim * (x[0]*x[1]*(x[2]+4)*x[3]) * nInternal * precision;
38  } else if (order == QUDA_BQCD_GAUGE_ORDER) {
39  bytes = siteDim * (x[0]+4)*(x[1]+2)*(x[2]+2)*(x[3]+2) * nInternal * precision;
40  } else if (order == QUDA_MILC_SITE_GAUGE_ORDER) {
42  }
43 
44  if (order == QUDA_QDP_GAUGE_ORDER) {
45  gauge = (void**) safe_malloc(siteDim * sizeof(void*));
46 
47  for (int d=0; d<siteDim; d++) {
48  size_t nbytes = volume * nInternal * precision;
50  gauge[d] = safe_malloc(nbytes);
51  if (create == QUDA_ZERO_FIELD_CREATE) memset(gauge[d], 0, nbytes);
52  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
53  gauge[d] = ((void**)param.gauge)[d];
54  } else {
55  errorQuda("Unsupported creation type %d", create);
56  }
57  }
58 
62 
64  errorQuda("MILC site gauge order only supported for reference fields");
65  }
66 
68  gauge = (void **) safe_malloc(bytes);
70  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
71  gauge = (void**) param.gauge;
72  } else {
73  errorQuda("Unsupported creation type %d", create);
74  }
75 
76  } else {
77  errorQuda("Unsupported gauge order type %d", order);
78  }
79 
80  // no need to exchange data if this is a momentum field
82  // Ghost zone is always 2-dimensional
83  for (int i=0; i<nDim; i++) {
84  size_t nbytes = nFace * surface[i] * nInternal * precision;
85  ghost[i] = nbytes ? safe_malloc(nbytes) : nullptr;
86  ghost[i+4] = (nbytes && geometry == QUDA_COARSE_GEOMETRY) ? safe_malloc(nbytes) : nullptr;
87  }
88 
90  // exchange the boundaries if a non-trivial field
94  }
95  }
96 
97  // compute the fat link max now in case it is needed later (i.e., for half precision)
98  if (param.compute_fat_link_max) fat_link_max = this->abs_max();
99  }
100 
101 
103  {
104  int siteDim = 0;
105  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
106  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
107  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
108  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
109  else errorQuda("Unknown geometry type %d", geometry);
110 
112  if (order == QUDA_QDP_GAUGE_ORDER) {
113  for (int d=0; d<siteDim; d++) {
114  if (gauge[d]) host_free(gauge[d]);
115  }
116  if (gauge) host_free(gauge);
117  } else {
118  if (gauge) host_free(gauge);
119  }
120  } else { // QUDA_REFERENCE_FIELD_CREATE
121  if (order == QUDA_QDP_GAUGE_ORDER){
122  if (gauge) host_free(gauge);
123  }
124  }
125 
127  for (int i=0; i<nDim; i++) {
128  if (ghost[i]) host_free(ghost[i]);
129  if (ghost[i+4] && geometry == QUDA_COARSE_GEOMETRY) host_free(ghost[i+4]);
130  }
131  }
132  }
133 
134  // This does the exchange of the gauge field ghost zone and places it
135  // into the ghost array.
138  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
139 
140  if ( (link_direction == QUDA_LINK_BIDIRECTIONAL || link_direction == QUDA_LINK_FORWARDS) && geometry != QUDA_COARSE_GEOMETRY)
141  errorQuda("Cannot request exchange of forward links on non-coarse geometry");
142 
143  void *send[2*QUDA_MAX_DIM];
144  for (int d=0; d<nDim; d++) {
146  if (geometry == QUDA_COARSE_GEOMETRY) send[d+4] = safe_malloc(nFace*surface[d]*nInternal*precision);
147  }
148 
149  if (link_direction == QUDA_LINK_BACKWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
150  // get the links into contiguous buffers
151  extractGaugeGhost(*this, send, true);
152 
153  // communicate between nodes
154  exchange(ghost, send, QUDA_FORWARDS);
155  }
156 
157  // repeat if requested and links are bi-directional
158  if (link_direction == QUDA_LINK_FORWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
159  extractGaugeGhost(*this, send, true, nDim);
160  exchange(ghost+nDim, send+nDim, QUDA_FORWARDS);
161  }
162 
163  for (int d=0; d<geometry; d++) host_free(send[d]);
164  }
165 
166  // This does the opposite of exchangeGhost and sends back the ghost
167  // zone to the node from which it came and injects it back into the
168  // field
171  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
172 
173  if (link_direction != QUDA_LINK_BACKWARDS)
174  errorQuda("link_direction = %d not supported", link_direction);
175 
176  void *recv[QUDA_MAX_DIM];
177  for (int d=0; d<nDim; d++) recv[d] = safe_malloc(nFace*surface[d]*nInternal*precision);
178 
179  // communicate between nodes
180  exchange(recv, ghost, QUDA_BACKWARDS);
181 
182  // get the links into contiguous buffers
183  extractGaugeGhost(*this, recv, false);
184 
185  for (int d=0; d<nDim; d++) host_free(recv[d]);
186  }
187 
188  void cpuGaugeField::exchangeExtendedGhost(const int *R, bool no_comms_fill) {
189 
190  void *send[QUDA_MAX_DIM];
191  void *recv[QUDA_MAX_DIM];
192  size_t bytes[QUDA_MAX_DIM];
193  // store both parities and directions in each
194  for (int d=0; d<nDim; d++) {
195  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
196  bytes[d] = surface[d] * R[d] * geometry * nInternal * precision;
197  send[d] = safe_malloc(2 * bytes[d]);
198  recv[d] = safe_malloc(2 * bytes[d]);
199  }
200 
201  for (int d=0; d<nDim; d++) {
202  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
203  //extract into a contiguous buffer
204  extractExtendedGaugeGhost(*this, d, R, send, true);
205 
206  if (comm_dim_partitioned(d)) {
207  // do the exchange
212 
213  mh_recv_back = comm_declare_receive_relative(recv[d], d, -1, bytes[d]);
214  mh_recv_fwd = comm_declare_receive_relative(((char*)recv[d])+bytes[d], d, +1, bytes[d]);
215  mh_send_back = comm_declare_send_relative(send[d], d, -1, bytes[d]);
216  mh_send_fwd = comm_declare_send_relative(((char*)send[d])+bytes[d], d, +1, bytes[d]);
217 
218  comm_start(mh_recv_back);
219  comm_start(mh_recv_fwd);
220  comm_start(mh_send_fwd);
221  comm_start(mh_send_back);
222 
223  comm_wait(mh_send_fwd);
224  comm_wait(mh_send_back);
225  comm_wait(mh_recv_back);
226  comm_wait(mh_recv_fwd);
227 
228  comm_free(mh_send_fwd);
229  comm_free(mh_send_back);
230  comm_free(mh_recv_back);
231  comm_free(mh_recv_fwd);
232  } else {
233  memcpy(static_cast<char*>(recv[d])+bytes[d], send[d], bytes[d]);
234  memcpy(recv[d], static_cast<char*>(send[d])+bytes[d], bytes[d]);
235  }
236 
237  // inject back into the gauge field
238  extractExtendedGaugeGhost(*this, d, R, recv, false);
239  }
240 
241  for (int d=0; d<nDim; d++) {
242  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
243  host_free(send[d]);
244  host_free(recv[d]);
245  }
246 
247  }
248 
249  void cpuGaugeField::exchangeExtendedGhost(const int *R, TimeProfile &profile, bool no_comms_fill) {
250  profile.TPSTART(QUDA_PROFILE_COMMS);
251  exchangeExtendedGhost(R, no_comms_fill);
252  profile.TPSTOP(QUDA_PROFILE_COMMS);
253  }
254 
255  // defined in cudaGaugeField
257  void **create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
258  void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
259  void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
260 
261  void cpuGaugeField::copy(const GaugeField &src) {
262  if (this == &src) return;
263 
264  checkField(src);
265 
267  fat_link_max = src.LinkMax();
269  errorQuda("fat_link_max has not been computed");
270  } else {
271  fat_link_max = 1.0;
272  }
273 
274  if (typeid(src) == typeid(cudaGaugeField)) {
275 
277 
278  if (!src.isNative()) errorQuda("Only native order is supported");
279  void *buffer = pool_pinned_malloc(src.Bytes());
280  // this copies over both even and odd
281  qudaMemcpy(buffer, static_cast<const cudaGaugeField&>(src).Gauge_p(),
282  src.Bytes(), cudaMemcpyDeviceToHost);
283 
284  copyGenericGauge(*this, src, QUDA_CPU_FIELD_LOCATION, gauge, buffer);
285  pool_pinned_free(buffer);
286 
287  } else { // else on the GPU
288 
289  void *buffer = create_gauge_buffer(bytes, order, geometry);
290  size_t ghost_bytes[8];
291  int dstNinternal = reconstruct != QUDA_RECONSTRUCT_NO ? reconstruct : 2*nColor*nColor;
292  for (int d=0; d<geometry; d++) ghost_bytes[d] = nFace * surface[d%4] * dstNinternal * precision;
293  void **ghost_buffer = (nFace > 0) ? create_ghost_buffer(ghost_bytes, order, geometry) : nullptr;
294 
296  copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0);
297  if (geometry == QUDA_COARSE_GEOMETRY) copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0, 3); // forwards links if bi-directional
298  } else {
299  copyExtendedGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0);
300  }
301 
302  if (order == QUDA_QDP_GAUGE_ORDER) {
303  for (int d=0; d<geometry; d++) {
304  qudaMemcpy(((void**)gauge)[d], ((void**)buffer)[d], bytes/geometry, cudaMemcpyDeviceToHost);
305  }
306  } else {
307  qudaMemcpy(gauge, buffer, bytes, cudaMemcpyHostToDevice);
308  }
309 
311  for (int d=0; d<geometry; d++)
312  qudaMemcpy(Ghost()[d], ghost_buffer[d], ghost_bytes[d], cudaMemcpyDeviceToHost);
313 
314  free_gauge_buffer(buffer, order, geometry);
315  if (nFace > 0) free_ghost_buffer(ghost_buffer, order, geometry);
316  }
317 
318  } else if (typeid(src) == typeid(cpuGaugeField)) {
319  // copy field and ghost zone directly
321  const_cast<void*>(static_cast<const cpuGaugeField&>(src).Gauge_p()));
322  } else {
323  errorQuda("Invalid gauge field type");
324  }
325 
326  // if we have copied from a source without a pad then we need to exchange
330  }
331 
332  checkCudaError();
333  }
334 
335  void cpuGaugeField::setGauge(void **gauge_)
336  {
338  errorQuda("Setting gauge pointer is only allowed when create="
339  "QUDA_REFERENCE_FIELD_CREATE type\n");
340  }
341  gauge = gauge_;
342  }
343 
344  void cpuGaugeField::backup() const {
345  if (backed_up) errorQuda("Gauge field already backed up");
346 
347  if (order == QUDA_QDP_GAUGE_ORDER) {
348  char **buffer = new char*[geometry];
349  for (int d=0; d<geometry; d++) {
350  buffer[d] = new char[bytes/geometry];
351  memcpy(buffer[d], gauge[d], bytes/geometry);
352  }
353  backup_h = reinterpret_cast<char*>(buffer);
354  } else {
355  backup_h = new char[bytes];
356  memcpy(backup_h, gauge, bytes);
357  }
358 
359  backed_up = true;
360  }
361 
363  {
364  if (!backed_up) errorQuda("Cannot restore since not backed up");
365 
366  if (order == QUDA_QDP_GAUGE_ORDER) {
367  char **buffer = reinterpret_cast<char**>(backup_h);
368  for (int d=0; d<geometry; d++) {
369  memcpy(gauge[d], buffer[d], bytes/geometry);
370  delete []buffer[d];
371  }
372  delete []buffer;
373  } else {
374  memcpy(gauge, backup_h, bytes);
375  delete []backup_h;
376  }
377 
378  backed_up = false;
379  }
380 
382  memset(gauge, 0, bytes);
383  }
384 
385 /*template <typename Float>
386 void print_matrix(const Float &m, unsigned int x) {
387 
388  for (int s=0; s<o.Nspin(); s++) {
389  std::cout << "x = " << x << ", s = " << s << ", { ";
390  for (int c=0; c<o.Ncolor(); c++) {
391  std::cout << " ( " << o(x, s, c, 0) << " , " ;
392  if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) << " ) ," ;
393  else std::cout << o(x, s, c, 1) << " ) " ;
394  }
395  std::cout << " } " << std::endl;
396  }
397 
398 }
399 
400 // print out the vector at volume point x
401 void cpuColorSpinorField::PrintMatrix(unsigned int x) {
402 
403  switch(precision) {
404  case QUDA_DOUBLE_PRECISION:
405  print_matrix(*order_double, x);
406  break;
407  case QUDA_SINGLE_PRECISION:
408  print_matrix(*order_single, x);
409  break;
410  default:
411  errorQuda("Precision %d not implemented", precision);
412  }
413 
414 }
415 */
416 
417 } // namespace quda
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_cuda_api.h:33
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void extractGaugeGhost(const GaugeField &u, void **ghost, bool extract=true, int offset=0)
void setGauge(void **_gauge)
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:128
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
Definition: copy_gauge.cu:41
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
This does routine will populate the border / halo region of a gauge field that has been created using...
#define errorQuda(...)
Definition: util_quda.h:121
void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
#define host_free(ptr)
Definition: malloc_quda.h:71
enum QudaLinkDirection_s QudaLinkDirection
void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaReconstructType reconstruct
Definition: gauge_field.h:176
int x[QUDA_MAX_DIM]
double abs_max(int dim=-1) const
Compute the absolute maximum of the field (Linfinity norm)
Definition: max_gauge.cu:84
QudaFieldGeometry geometry
Definition: gauge_field.h:174
QudaGaugeParam param
Definition: pack_test.cpp:17
const int * R() const
void checkField(const LatticeField &) const
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74
void restore() const
Restores the cpuGaugeField.
size_t Bytes() const
Definition: gauge_field.h:311
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
void copy(const GaugeField &src)
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:216
QudaGhostExchange ghostExchange
void exchange(void **recv, void **send, QudaDirection dir) const
Exchange the buffers across all dimensions in a given direction.
void * create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
const void ** Ghost() const
Definition: gauge_field.h:323
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
void comm_free(MsgHandle *&mh)
Definition: comm_mpi.cpp:207
void injectGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
The opposite of exchangeGhost: take the ghost zone on x, send to node x-1, and inject back into the f...
#define safe_malloc(size)
Definition: malloc_quda.h:66
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
void * memset(void *s, int c, size_t n)
cpuGaugeField(const GaugeFieldParam &param)
Constructor for cpuGaugeField from a GaugeFieldParam.
int surface[QUDA_MAX_DIM]
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:127
void backup() const
Backs up the cpuGaugeField.
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
void ** create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaLinkType link_type
Definition: gauge_field.h:180
const double & LinkMax() const
Definition: gauge_field.h:280
QudaFieldCreate create
Definition: gauge_field.h:187
enum QudaFieldGeometry_s QudaFieldGeometry
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
#define checkCudaError()
Definition: util_quda.h:161
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:222
QudaGaugeFieldOrder order
Definition: gauge_field.h:178
QudaGhostExchange GhostExchange() const
bool isNative() const
void * ghost[2 *QUDA_MAX_DIM]
Definition: gauge_field.h:189
QudaPrecision precision
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
int comm_dim_partitioned(int dim)