QUDA  v1.1.0
A library for QCD on GPUs
cpu_gauge_field.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <gauge_field.h>
3 #include <assert.h>
4 #include <string.h>
5 #include <typeinfo>
6 
7 namespace quda {
8 
11  {
13  errorQuda("CPU fields do not support half precision");
14  }
16  errorQuda("CPU fields do not support quarter precision");
17  }
18  if (pad != 0) {
19  errorQuda("CPU fields do not support non-zero padding");
20  }
22  errorQuda("Reconstruction type %d not supported", reconstruct);
23  }
25  errorQuda("10-reconstruction only supported with momentum links");
26  }
27 
28  int siteDim=0;
29  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
30  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
31  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
32  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
33  else errorQuda("Unknown geometry type %d", geometry);
34 
35  // compute the correct bytes size for these padded field orders
37  bytes = siteDim * (x[0]*x[1]*(x[2]+4)*x[3]) * nInternal * precision;
38  } else if (order == QUDA_BQCD_GAUGE_ORDER) {
39  bytes = siteDim * (x[0]+4)*(x[1]+2)*(x[2]+2)*(x[3]+2) * nInternal * precision;
40  } else if (order == QUDA_MILC_SITE_GAUGE_ORDER) {
42  }
43 
44  if (order == QUDA_QDP_GAUGE_ORDER) {
45  gauge = (void**) safe_malloc(siteDim * sizeof(void*));
46 
47  for (int d=0; d<siteDim; d++) {
48  size_t nbytes = volume * nInternal * precision;
50  gauge[d] = nbytes ? safe_malloc(nbytes) : nullptr;
51  if (create == QUDA_ZERO_FIELD_CREATE && nbytes) memset(gauge[d], 0, nbytes);
52  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
53  gauge[d] = ((void **)param.gauge)[d];
54  } else {
55  errorQuda("Unsupported creation type %d", create);
56  }
57  }
58 
62 
64  errorQuda("MILC site gauge order only supported for reference fields");
65  }
66 
68  gauge = bytes ? (void **)safe_malloc(bytes) : nullptr;
69  if (create == QUDA_ZERO_FIELD_CREATE && bytes) memset(gauge, 0, bytes);
70  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
71  gauge = (void**) param.gauge;
72  } else {
73  errorQuda("Unsupported creation type %d", create);
74  }
75 
76  } else {
77  errorQuda("Unsupported gauge order type %d", order);
78  }
79 
80  // no need to exchange data if this is a momentum field
82  // Ghost zone is always 2-dimensional
83  for (int i=0; i<nDim; i++) {
84  size_t nbytes = nFace * surface[i] * nInternal * precision;
85  ghost[i] = nbytes ? safe_malloc(nbytes) : nullptr;
86  ghost[i+4] = (nbytes && geometry == QUDA_COARSE_GEOMETRY) ? safe_malloc(nbytes) : nullptr;
87  }
88 
90  // exchange the boundaries if a non-trivial field
94  }
95  }
96 
97  // compute the fat link max now in case it is needed later (i.e., for half precision)
98  if (param.compute_fat_link_max) fat_link_max = this->abs_max();
99  }
100 
101 
103  {
104  int siteDim = 0;
105  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
106  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
107  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
108  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
109  else errorQuda("Unknown geometry type %d", geometry);
110 
112  if (order == QUDA_QDP_GAUGE_ORDER) {
113  for (int d=0; d<siteDim; d++) {
114  if (gauge[d]) host_free(gauge[d]);
115  }
116  if (gauge) host_free(gauge);
117  } else {
118  if (gauge) host_free(gauge);
119  }
120  } else { // QUDA_REFERENCE_FIELD_CREATE
121  if (order == QUDA_QDP_GAUGE_ORDER){
122  if (gauge) host_free(gauge);
123  }
124  }
125 
127  for (int i=0; i<nDim; i++) {
128  if (ghost[i]) host_free(ghost[i]);
129  if (ghost[i+4] && geometry == QUDA_COARSE_GEOMETRY) host_free(ghost[i+4]);
130  }
131  }
132  }
133 
134  // This does the exchange of the gauge field ghost zone and places it
135  // into the ghost array.
138  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
139 
140  if ( (link_direction == QUDA_LINK_BIDIRECTIONAL || link_direction == QUDA_LINK_FORWARDS) && geometry != QUDA_COARSE_GEOMETRY)
141  errorQuda("Cannot request exchange of forward links on non-coarse geometry");
142 
143  void *send[2*QUDA_MAX_DIM];
144  for (int d=0; d<nDim; d++) {
147  }
148 
149  if (link_direction == QUDA_LINK_BACKWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
150  // get the links into contiguous buffers
151  extractGaugeGhost(*this, send, true);
152 
153  // communicate between nodes
154  exchange(ghost, send, QUDA_FORWARDS);
155  }
156 
157  // repeat if requested and links are bi-directional
158  if (link_direction == QUDA_LINK_FORWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
159  extractGaugeGhost(*this, send, true, nDim);
161  }
162 
163  for (int d=0; d<geometry; d++) host_free(send[d]);
164  }
165 
166  // This does the opposite of exchangeGhost and sends back the ghost
167  // zone to the node from which it came and injects it back into the
168  // field
171  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
172 
173  if (link_direction != QUDA_LINK_BACKWARDS)
174  errorQuda("link_direction = %d not supported", link_direction);
175 
176  void *recv[2*QUDA_MAX_DIM];
177  for (int d=0; d<nDim; d++) recv[d] = safe_malloc(nFace*surface[d]*nInternal*precision);
178 
179  // communicate between nodes
180  exchange(recv, ghost, QUDA_BACKWARDS);
181 
182  // get the links into contiguous buffers
183  extractGaugeGhost(*this, recv, false);
184 
185  for (int d=0; d<nDim; d++) host_free(recv[d]);
186  }
187 
188  void cpuGaugeField::exchangeExtendedGhost(const int *R, bool no_comms_fill) {
189 
190  void *send[QUDA_MAX_DIM];
191  void *recv[QUDA_MAX_DIM];
192  size_t bytes[QUDA_MAX_DIM];
193  // store both parities and directions in each
194  for (int d=0; d<nDim; d++) {
195  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
196  bytes[d] = surface[d] * R[d] * geometry * nInternal * precision;
197  send[d] = safe_malloc(2 * bytes[d]);
198  recv[d] = safe_malloc(2 * bytes[d]);
199  }
200 
201  for (int d=0; d<nDim; d++) {
202  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
203  //extract into a contiguous buffer
204  extractExtendedGaugeGhost(*this, d, R, send, true);
205 
206  if (comm_dim_partitioned(d)) {
207  // do the exchange
212 
213  mh_recv_back = comm_declare_receive_relative(recv[d], d, -1, bytes[d]);
214  mh_recv_fwd = comm_declare_receive_relative(((char*)recv[d])+bytes[d], d, +1, bytes[d]);
215  mh_send_back = comm_declare_send_relative(send[d], d, -1, bytes[d]);
216  mh_send_fwd = comm_declare_send_relative(((char*)send[d])+bytes[d], d, +1, bytes[d]);
217 
222 
227 
232  } else {
233  memcpy(static_cast<char*>(recv[d])+bytes[d], send[d], bytes[d]);
234  memcpy(recv[d], static_cast<char*>(send[d])+bytes[d], bytes[d]);
235  }
236 
237  // inject back into the gauge field
238  extractExtendedGaugeGhost(*this, d, R, recv, false);
239  }
240 
241  for (int d=0; d<nDim; d++) {
242  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
243  host_free(send[d]);
244  host_free(recv[d]);
245  }
246 
247  }
248 
249  void cpuGaugeField::exchangeExtendedGhost(const int *R, TimeProfile &profile, bool no_comms_fill) {
250  profile.TPSTART(QUDA_PROFILE_COMMS);
251  exchangeExtendedGhost(R, no_comms_fill);
252  profile.TPSTOP(QUDA_PROFILE_COMMS);
253  }
254 
255  // defined in cudaGaugeField
256  void *create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
257  void **create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
258  void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
259  void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
260 
261  void cpuGaugeField::copy(const GaugeField &src) {
262  if (this == &src) return;
263 
264  checkField(src);
265 
267  fat_link_max = src.LinkMax();
269  } else {
270  fat_link_max = 1.0;
271  }
272 
273  if (typeid(src) == typeid(cudaGaugeField)) {
274 
276 
277  if (!src.isNative()) errorQuda("Only native order is supported");
278  void *buffer = pool_pinned_malloc(src.Bytes());
279  // this copies over both even and odd
280  qudaMemcpy(buffer, static_cast<const cudaGaugeField&>(src).Gauge_p(),
281  src.Bytes(), cudaMemcpyDeviceToHost);
282 
283  copyGenericGauge(*this, src, QUDA_CPU_FIELD_LOCATION, gauge, buffer);
284  pool_pinned_free(buffer);
285 
286  } else { // else on the GPU
287 
288  void *buffer = create_gauge_buffer(bytes, order, geometry);
289  size_t ghost_bytes[8];
290  int dstNinternal = reconstruct != QUDA_RECONSTRUCT_NO ? reconstruct : 2*nColor*nColor;
291  for (int d=0; d<geometry; d++) ghost_bytes[d] = nFace * surface[d%4] * dstNinternal * precision;
292  void **ghost_buffer = (nFace > 0) ? create_ghost_buffer(ghost_bytes, order, geometry) : nullptr;
293 
295  copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0);
296  if (geometry == QUDA_COARSE_GEOMETRY) copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0, 3); // forwards links if bi-directional
297  } else {
298  copyExtendedGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0);
299  }
300 
301  if (order == QUDA_QDP_GAUGE_ORDER) {
302  for (int d=0; d<geometry; d++) {
303  qudaMemcpy(((void**)gauge)[d], ((void**)buffer)[d], bytes/geometry, cudaMemcpyDeviceToHost);
304  }
305  } else {
306  qudaMemcpy(gauge, buffer, bytes, cudaMemcpyHostToDevice);
307  }
308 
310  for (int d=0; d<geometry; d++)
311  qudaMemcpy(Ghost()[d], ghost_buffer[d], ghost_bytes[d], cudaMemcpyDeviceToHost);
312 
313  free_gauge_buffer(buffer, order, geometry);
314  if (nFace > 0) free_ghost_buffer(ghost_buffer, order, geometry);
315  }
316 
317  } else if (typeid(src) == typeid(cpuGaugeField)) {
318  // copy field and ghost zone directly
319  copyGenericGauge(*this, src, QUDA_CPU_FIELD_LOCATION, gauge,
320  const_cast<void*>(static_cast<const cpuGaugeField&>(src).Gauge_p()));
321  } else {
322  errorQuda("Invalid gauge field type");
323  }
324 
325  // if we have copied from a source without a pad then we need to exchange
329  }
330  }
331 
332  void cpuGaugeField::setGauge(void **gauge_)
333  {
335  errorQuda("Setting gauge pointer is only allowed when create="
336  "QUDA_REFERENCE_FIELD_CREATE type\n");
337  }
338  gauge = gauge_;
339  }
340 
341  void cpuGaugeField::backup() const {
342  if (backed_up) errorQuda("Gauge field already backed up");
343 
344  if (order == QUDA_QDP_GAUGE_ORDER) {
345  char **buffer = new char*[geometry];
346  for (int d=0; d<geometry; d++) {
347  buffer[d] = new char[bytes/geometry];
348  memcpy(buffer[d], gauge[d], bytes/geometry);
349  }
350  backup_h = reinterpret_cast<char*>(buffer);
351  } else {
352  backup_h = new char[bytes];
353  memcpy(backup_h, gauge, bytes);
354  }
355 
356  backed_up = true;
357  }
358 
360  {
361  if (!backed_up) errorQuda("Cannot restore since not backed up");
362 
363  if (order == QUDA_QDP_GAUGE_ORDER) {
364  char **buffer = reinterpret_cast<char**>(backup_h);
365  for (int d=0; d<geometry; d++) {
366  memcpy(gauge[d], buffer[d], bytes/geometry);
367  delete []buffer[d];
368  }
369  delete []buffer;
370  } else {
371  memcpy(gauge, backup_h, bytes);
372  delete []backup_h;
373  }
374 
375  backed_up = false;
376  }
377 
379  if (order != QUDA_QDP_GAUGE_ORDER) {
380  memset(gauge, 0, bytes);
381  } else {
382  for (int g=0; g<geometry; g++) memset(gauge[g], 0, volume * nInternal * precision);
383  }
384  }
385 
386  void cpuGaugeField::copy_to_buffer(void *buffer) const
387  {
388 
390  void *const *p = static_cast<void *const *>(Gauge_p());
391  int dbytes = Bytes() / 4;
392  static_assert(sizeof(char) == 1, "Assuming sizeof(char) == 1");
393  char *dst_buffer = reinterpret_cast<char *>(buffer);
394  for (int d = 0; d < 4; d++) { std::memcpy(&dst_buffer[d * dbytes], p[d], dbytes); }
398  const void *p = Gauge_p();
399  int bytes = Bytes();
400  std::memcpy(buffer, p, bytes);
401  } else {
402  errorQuda("Unsupported order = %d\n", Order());
403  }
404  }
405 
407  {
408 
410  void **p = static_cast<void **>(Gauge_p());
411  size_t dbytes = Bytes() / 4;
412  static_assert(sizeof(char) == 1, "Assuming sizeof(char) == 1");
413  const char *dst_buffer = reinterpret_cast<const char *>(buffer);
414  for (int d = 0; d < 4; d++) { std::memcpy(p[d], &dst_buffer[d * dbytes], dbytes); }
418  void *p = Gauge_p();
419  size_t bytes = Bytes();
420  std::memcpy(p, buffer, bytes);
421  } else {
422  errorQuda("Unsupported order = %d\n", Order());
423  }
424  }
425 
426 /*template <typename Float>
427 void print_matrix(const Float &m, unsigned int x) {
428 
429  for (int s=0; s<o.Nspin(); s++) {
430  std::cout << "x = " << x << ", s = " << s << ", { ";
431  for (int c=0; c<o.Ncolor(); c++) {
432  std::cout << " ( " << o(x, s, c, 0) << " , " ;
433  if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) << " ) ," ;
434  else std::cout << o(x, s, c, 1) << " ) " ;
435  }
436  std::cout << " } " << std::endl;
437  }
438 
439 }
440 
441 // print out the vector at volume point x
442 void cpuColorSpinorField::PrintMatrix(unsigned int x) {
443 
444  switch(precision) {
445  case QUDA_DOUBLE_PRECISION:
446  print_matrix(*order_double, x);
447  break;
448  case QUDA_SINGLE_PRECISION:
449  print_matrix(*order_single, x);
450  break;
451  default:
452  errorQuda("Precision %d not implemented", precision);
453  }
454 
455 }
456 */
457 
458 } // namespace quda
QudaLinkType link_type
Definition: gauge_field.h:216
QudaFieldCreate create
Definition: gauge_field.h:223
void * ghost[2 *QUDA_MAX_DIM]
Definition: gauge_field.h:225
QudaGaugeFieldOrder order
Definition: gauge_field.h:214
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:287
const double & LinkMax() const
Definition: gauge_field.h:321
size_t Bytes() const
Definition: gauge_field.h:352
void exchange(void **recv, void **send, QudaDirection dir) const
Exchange the buffers across all dimensions in a given direction.
bool isNative() const
Definition: gauge_field.h:350
double abs_max(int dim=-1, bool fixed=false) const
Compute the absolute maximum of the field (Linfinity norm)
void checkField(const LatticeField &) const
QudaFieldGeometry geometry
Definition: gauge_field.h:210
QudaReconstructType reconstruct
Definition: gauge_field.h:212
const void ** Ghost() const
Definition: gauge_field.h:368
QudaGhostExchange ghostExchange
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
int x[QUDA_MAX_DIM]
QudaPrecision precision
const int * R() const
int surface[QUDA_MAX_DIM]
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
QudaGhostExchange GhostExchange() const
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
void copy(const GaugeField &src)
virtual void copy_from_buffer(void *buffer)
Copy all contents of the field from a host buffer to this field.
void setGauge(void **_gauge)
void backup() const
Backs up the cpuGaugeField.
void injectGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
The opposite of exchangeGhost: take the ghost zone on x, send to node x-1, and inject back into the f...
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
This does routine will populate the border / halo region of a gauge field that has been created using...
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
void restore() const
Restores the cpuGaugeField.
cpuGaugeField(const GaugeFieldParam &param)
Constructor for cpuGaugeField from a GaugeFieldParam.
virtual void copy_to_buffer(void *buffer) const
Copy all contents of the field to a host buffer.
void comm_start(MsgHandle *mh)
int comm_dim_partitioned(int dim)
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:82
void comm_wait(MsgHandle *mh)
void comm_free(MsgHandle *&mh)
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:67
void * memset(void *s, int c, size_t n)
enum QudaLinkDirection_s QudaLinkDirection
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_LINK_BIDIRECTIONAL
Definition: enum_quda.h:497
@ QUDA_LINK_FORWARDS
Definition: enum_quda.h:497
@ QUDA_LINK_BACKWARDS
Definition: enum_quda.h:497
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
@ QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:70
@ QUDA_RECONSTRUCT_10
Definition: enum_quda.h:75
@ QUDA_SCALAR_GEOMETRY
Definition: enum_quda.h:500
@ QUDA_VECTOR_GEOMETRY
Definition: enum_quda.h:501
@ QUDA_TENSOR_GEOMETRY
Definition: enum_quda.h:502
@ QUDA_COARSE_GEOMETRY
Definition: enum_quda.h:503
enum QudaFieldGeometry_s QudaFieldGeometry
@ QUDA_FORWARDS
Definition: enum_quda.h:493
@ QUDA_BACKWARDS
Definition: enum_quda.h:491
@ QUDA_GHOST_EXCHANGE_EXTENDED
Definition: enum_quda.h:510
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63
@ QUDA_BQCD_GAUGE_ORDER
Definition: enum_quda.h:49
@ QUDA_TIFR_GAUGE_ORDER
Definition: enum_quda.h:50
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_MILC_SITE_GAUGE_ORDER
Definition: enum_quda.h:48
@ QUDA_CPS_WILSON_GAUGE_ORDER
Definition: enum_quda.h:46
@ QUDA_TIFR_PADDED_GAUGE_ORDER
Definition: enum_quda.h:51
@ QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:47
@ QUDA_QDPJIT_GAUGE_ORDER
Definition: enum_quda.h:45
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_ASQTAD_MOM_LINKS
Definition: enum_quda.h:33
@ QUDA_ASQTAD_FAT_LINKS
Definition: enum_quda.h:31
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:172
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:173
#define host_free(ptr)
Definition: malloc_quda.h:115
unsigned long long bytes
void * create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void ** create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
Definition: copy_gauge.cpp:44
void extractGaugeGhost(const GaugeField &u, void **ghost, bool extract=true, int offset=0)
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
@ QUDA_PROFILE_COMMS
Definition: timer.h:109
void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
QudaGaugeParam param
Definition: pack_test.cpp:18
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_api.h:204
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
#define errorQuda(...)
Definition: util_quda.h:120