QUDA  0.9.0
cpu_gauge_field.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <gauge_field.h>
3 #include <assert.h>
4 #include <string.h>
5 #include <typeinfo>
6 
7 namespace quda {
8 
11  {
13  errorQuda("CPU fields do not support half precision");
14  }
15  if (pad != 0) {
16  errorQuda("CPU fields do not support non-zero padding");
17  }
19  errorQuda("Reconstruction type %d not supported", reconstruct);
20  }
22  errorQuda("10-reconstruction only supported with MILC gauge order");
23  }
24 
25  int siteDim=0;
26  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
27  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
28  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
29  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
30  else errorQuda("Unknown geometry type %d", geometry);
31 
32  // compute the correct bytes size for these padded field orders
34  bytes = siteDim * (x[0]*x[1]*(x[2]+4)*x[3]) * nInternal * precision;
35  } else if (order == QUDA_BQCD_GAUGE_ORDER) {
36  bytes = siteDim * (x[0]+4)*(x[1]+2)*(x[2]+2)*(x[3]+2) * nInternal * precision;
37  } else if (order == QUDA_MILC_SITE_GAUGE_ORDER) {
39  }
40 
41  if (order == QUDA_QDP_GAUGE_ORDER) {
42  gauge = (void**) safe_malloc(siteDim * sizeof(void*));
43 
44  for (int d=0; d<siteDim; d++) {
45  size_t nbytes = volume * nInternal * precision;
47  gauge[d] = safe_malloc(nbytes);
48  if (create == QUDA_ZERO_FIELD_CREATE) memset(gauge[d], 0, nbytes);
49  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
50  gauge[d] = ((void**)param.gauge)[d];
51  } else {
52  errorQuda("Unsupported creation type %d", create);
53  }
54  }
55 
59 
61  errorQuda("MILC site gauge order only supported for reference fields");
62  }
63 
65  gauge = (void **) safe_malloc(bytes);
67  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
68  gauge = (void**) param.gauge;
69  } else {
70  errorQuda("Unsupported creation type %d", create);
71  }
72 
73  } else {
74  errorQuda("Unsupported gauge order type %d", order);
75  }
76 
77  // no need to exchange data if this is a momentum field
79  // Ghost zone is always 2-dimensional
80  for (int i=0; i<nDim; i++) {
81  size_t nbytes = nFace * surface[i] * nInternal * precision;
82  ghost[i] = nbytes ? safe_malloc(nbytes) : nullptr;
83  ghost[i+4] = (nbytes && geometry == QUDA_COARSE_GEOMETRY) ? safe_malloc(nbytes) : nullptr;
84  }
85 
87  // exchange the boundaries if a non-trivial field
91  }
92  }
93 
94  // compute the fat link max now in case it is needed later (i.e., for half precision)
95  if (param.compute_fat_link_max) fat_link_max = maxGauge(*this);
96  }
97 
98 
100  {
101 
102  int siteDim = 0;
103  if (geometry == QUDA_SCALAR_GEOMETRY) siteDim = 1;
104  else if (geometry == QUDA_VECTOR_GEOMETRY) siteDim = nDim;
105  else if (geometry == QUDA_TENSOR_GEOMETRY) siteDim = nDim * (nDim-1) / 2;
106  else if (geometry == QUDA_COARSE_GEOMETRY) siteDim = 2*nDim;
107  else errorQuda("Unknown geometry type %d", geometry);
108 
110  if (order == QUDA_QDP_GAUGE_ORDER) {
111  for (int d=0; d<siteDim; d++) {
112  if (gauge[d]) host_free(gauge[d]);
113  }
114  if (gauge) host_free(gauge);
115  } else {
116  if (gauge) host_free(gauge);
117  }
118  } else { // QUDA_REFERENCE_FIELD_CREATE
119  if (order == QUDA_QDP_GAUGE_ORDER){
120  if (gauge) host_free(gauge);
121  }
122  }
123 
125  for (int i=0; i<nDim; i++) {
126  if (ghost[i]) host_free(ghost[i]);
128  }
129  }
130  }
131 
132  // This does the exchange of the gauge field ghost zone and places it
133  // into the ghost array.
136  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
137 
138  if ( (link_direction == QUDA_LINK_BIDIRECTIONAL || link_direction == QUDA_LINK_FORWARDS) && geometry != QUDA_COARSE_GEOMETRY)
139  errorQuda("Cannot request exchange of forward links on non-coarse geometry");
140 
141  void *send[2*QUDA_MAX_DIM];
142  for (int d=0; d<nDim; d++) {
145  }
146 
147  if (link_direction == QUDA_LINK_BACKWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
148  // get the links into contiguous buffers
149  extractGaugeGhost(*this, send, true);
150 
151  // communicate between nodes
152  exchange(ghost, send, QUDA_FORWARDS);
153  }
154 
155  // repeat if requested and links are bi-directional
156  if (link_direction == QUDA_LINK_FORWARDS || link_direction == QUDA_LINK_BIDIRECTIONAL) {
157  extractGaugeGhost(*this, send, true, nDim);
159  }
160 
161  for (int d=0; d<geometry; d++) host_free(send[d]);
162  }
163 
164  // This does the opposite of exchangeGhost and sends back the ghost
165  // zone to the node from which it came and injects it back into the
166  // field
169  errorQuda("Cannot exchange for %d geometry gauge field", geometry);
170 
171  if (link_direction != QUDA_LINK_BACKWARDS)
172  errorQuda("link_direction = %d not supported", link_direction);
173 
174  void *recv[QUDA_MAX_DIM];
175  for (int d=0; d<nDim; d++) recv[d] = safe_malloc(nFace*surface[d]*nInternal*precision);
176 
177  // communicate between nodes
178  exchange(recv, ghost, QUDA_BACKWARDS);
179 
180  // get the links into contiguous buffers
181  extractGaugeGhost(*this, recv, false);
182 
183  for (int d=0; d<nDim; d++) host_free(recv[d]);
184  }
185 
186  void cpuGaugeField::exchangeExtendedGhost(const int *R, bool no_comms_fill) {
187 
188  void *send[QUDA_MAX_DIM];
189  void *recv[QUDA_MAX_DIM];
190  size_t bytes[QUDA_MAX_DIM];
191  // store both parities and directions in each
192  for (int d=0; d<nDim; d++) {
193  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
194  bytes[d] = surface[d] * R[d] * geometry * nInternal * precision;
195  send[d] = safe_malloc(2 * bytes[d]);
196  recv[d] = safe_malloc(2 * bytes[d]);
197  }
198 
199  for (int d=0; d<nDim; d++) {
200  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
201  //extract into a contiguous buffer
202  extractExtendedGaugeGhost(*this, d, R, send, true);
203 
204  if (comm_dim_partitioned(d)) {
205  // do the exchange
210 
212  mh_recv_fwd = comm_declare_receive_relative(((char*)recv[d])+bytes[d], d, +1, bytes[d]);
214  mh_send_fwd = comm_declare_send_relative(((char*)send[d])+bytes[d], d, +1, bytes[d]);
215 
220 
225 
230  } else {
231  memcpy(static_cast<char*>(recv[d])+bytes[d], send[d], bytes[d]);
232  memcpy(recv[d], static_cast<char*>(send[d])+bytes[d], bytes[d]);
233  }
234 
235  // inject back into the gauge field
236  extractExtendedGaugeGhost(*this, d, R, recv, false);
237  }
238 
239  for (int d=0; d<nDim; d++) {
240  if (!(comm_dim_partitioned(d) || (no_comms_fill && R[d])) ) continue;
241  host_free(send[d]);
242  host_free(recv[d]);
243  }
244 
245  }
246 
247  void cpuGaugeField::exchangeExtendedGhost(const int *R, TimeProfile &profile, bool no_comms_fill) {
248  profile.TPSTART(QUDA_PROFILE_COMMS);
249  exchangeExtendedGhost(R, no_comms_fill);
250  profile.TPSTOP(QUDA_PROFILE_COMMS);
251  }
252 
253  // defined in cudaGaugeField
254  void *create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
255  void **create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
256  void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
257  void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry);
258 
260  if (this == &src) return;
261 
262  checkField(src);
263 
265  fat_link_max = src.LinkMax();
266  if (precision == QUDA_HALF_PRECISION && fat_link_max == 0.0)
267  errorQuda("fat_link_max has not been computed");
268  } else {
269  fat_link_max = 1.0;
270  }
271 
272  if (typeid(src) == typeid(cudaGaugeField)) {
273 
275 
276  if (!src.isNative()) errorQuda("Only native order is supported");
277  void *buffer = pool_pinned_malloc(src.Bytes());
278  // this copies over both even and odd
279  qudaMemcpy(buffer, static_cast<const cudaGaugeField&>(src).Gauge_p(),
280  src.Bytes(), cudaMemcpyDeviceToHost);
281 
283  pool_pinned_free(buffer);
284 
285  } else { // else on the GPU
286 
287  void *buffer = create_gauge_buffer(bytes, order, geometry);
288  size_t ghost_bytes[8];
289  int dstNinternal = reconstruct != QUDA_RECONSTRUCT_NO ? reconstruct : 2*nColor*nColor;
290  for (int d=0; d<geometry; d++) ghost_bytes[d] = nFace * surface[d%4] * dstNinternal * precision;
291  void **ghost_buffer = (nFace > 0) ? create_ghost_buffer(ghost_bytes, order, geometry) : nullptr;
292 
294  copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0);
295  if (geometry == QUDA_COARSE_GEOMETRY) copyGenericGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0, ghost_buffer, 0, 3); // forwards links if bi-directional
296  } else {
297  copyExtendedGauge(*this, src, QUDA_CUDA_FIELD_LOCATION, buffer, 0);
298  }
299 
300  if (order == QUDA_QDP_GAUGE_ORDER) {
301  for (int d=0; d<geometry; d++) {
302  qudaMemcpy(((void**)gauge)[d], ((void**)buffer)[d], bytes/geometry, cudaMemcpyDeviceToHost);
303  }
304  } else {
305  qudaMemcpy(gauge, buffer, bytes, cudaMemcpyHostToDevice);
306  }
307 
308  if (order > 4 && ghostExchange == QUDA_GHOST_EXCHANGE_PAD && src.GhostExchange() == QUDA_GHOST_EXCHANGE_PAD && nFace)
309  for (int d=0; d<geometry; d++)
310  qudaMemcpy(Ghost()[d], ghost_buffer[d], ghost_bytes[d], cudaMemcpyDeviceToHost);
311 
312  free_gauge_buffer(buffer, order, geometry);
313  if (nFace > 0) free_ghost_buffer(ghost_buffer, order, geometry);
314  }
315 
316  } else if (typeid(src) == typeid(cpuGaugeField)) {
317  // copy field and ghost zone directly
319  const_cast<void*>(static_cast<const cpuGaugeField&>(src).Gauge_p()));
320  } else {
321  errorQuda("Invalid gauge field type");
322  }
323 
324  // if we have copied from a source without a pad then we need to exchange
326  src.GhostExchange() != QUDA_GHOST_EXCHANGE_PAD) {
328  }
329 
330  checkCudaError();
331  }
332 
333  void cpuGaugeField::setGauge(void **gauge_)
334  {
336  errorQuda("Setting gauge pointer is only allowed when create="
337  "QUDA_REFERENCE_FIELD_CREATE type\n");
338  }
339  gauge = gauge_;
340  }
341 
342  void cpuGaugeField::backup() const {
343  if (backed_up) errorQuda("Gauge field already backed up");
344 
345  if (order == QUDA_QDP_GAUGE_ORDER) {
346  char **buffer = new char*[geometry];
347  for (int d=0; d<geometry; d++) {
348  buffer[d] = new char[bytes/geometry];
349  memcpy(buffer[d], gauge[d], bytes/geometry);
350  }
351  backup_h = reinterpret_cast<char*>(buffer);
352  } else {
353  backup_h = new char[bytes];
355  }
356 
357  backed_up = true;
358  }
359 
361  if (!backed_up) errorQuda("Cannot restore since not backed up");
362 
363  if (order == QUDA_QDP_GAUGE_ORDER) {
364  char **buffer = reinterpret_cast<char**>(backup_h);
365  for (int d=0; d<geometry; d++) {
366  memcpy(gauge[d], buffer[d], bytes/geometry);
367  delete []buffer[d];
368  }
369  delete []buffer;
370  } else {
372  delete []backup_h;
373  }
374 
375  backed_up = false;
376  }
377 
379  memset(gauge, 0, bytes);
380  }
381 
382 /*template <typename Float>
383 void print_matrix(const Float &m, unsigned int x) {
384 
385  for (int s=0; s<o.Nspin(); s++) {
386  std::cout << "x = " << x << ", s = " << s << ", { ";
387  for (int c=0; c<o.Ncolor(); c++) {
388  std::cout << " ( " << o(x, s, c, 0) << " , " ;
389  if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) << " ) ," ;
390  else std::cout << o(x, s, c, 1) << " ) " ;
391  }
392  std::cout << " } " << std::endl;
393  }
394 
395 }
396 
397 // print out the vector at volume point x
398 void cpuColorSpinorField::PrintMatrix(unsigned int x) {
399 
400  switch(precision) {
401  case QUDA_DOUBLE_PRECISION:
402  print_matrix(*order_double, x);
403  break;
404  case QUDA_SINGLE_PRECISION:
405  print_matrix(*order_single, x);
406  break;
407  default:
408  errorQuda("Precision %d not implemented", precision);
409  }
410 
411 }
412 */
413 
414 } // namespace quda
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_cuda_api.h:32
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void extractGaugeGhost(const GaugeField &u, void **ghost, bool extract=true, int offset=0)
void setGauge(void **_gauge)
double maxGauge(const GaugeField &u)
Definition: max_gauge.cu:31
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:116
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
Definition: copy_gauge.cu:38
const void * src
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
This does routine will populate the border / halo region of a gauge field that has been created using...
#define errorQuda(...)
Definition: util_quda.h:90
void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
#define host_free(ptr)
Definition: malloc_quda.h:59
enum QudaLinkDirection_s QudaLinkDirection
void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaReconstructType reconstruct
Definition: gauge_field.h:135
int x[QUDA_MAX_DIM]
static int R[4]
QudaFieldGeometry geometry
Definition: gauge_field.h:133
QudaGaugeParam param
Definition: pack_test.cpp:17
void comm_free(MsgHandle *mh)
Definition: comm_mpi.cpp:252
const int * R() const
void checkField(const LatticeField &) const
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
void copy(const GaugeField &src)
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:260
QudaGhostExchange ghostExchange
void exchange(void **recv, void **send, QudaDirection dir) const
Exchange the buffers across all dimensions in a given direction.
void * create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void restore()
Restores the cpuGaugeField.
const void ** Ghost() const
Definition: gauge_field.h:254
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
void injectGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
The opposite of exchangeGhost: take the ghost zone on x, send to node x-1, and inject back into the f...
void * memcpy(void *__dst, const void *__src, size_t __n)
#define safe_malloc(size)
Definition: malloc_quda.h:54
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
cpuGaugeField(const GaugeFieldParam &param)
Constructor for cpuGaugeField from a GaugeFieldParam.
void * memset(void *__b, int __c, size_t __len)
int surface[QUDA_MAX_DIM]
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:115
void backup() const
Backs up the cpuGaugeField.
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
void ** create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaLinkType link_type
Definition: gauge_field.h:139
QudaFieldCreate create
Definition: gauge_field.h:147
enum QudaFieldGeometry_s QudaFieldGeometry
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
#define checkCudaError()
Definition: util_quda.h:129
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:266
QudaGaugeFieldOrder order
Definition: gauge_field.h:137
static __inline__ size_t size_t d
void * ghost[2 *QUDA_MAX_DIM]
Definition: gauge_field.h:149
QudaPrecision precision
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
unsigned long long bytes
Definition: blas_quda.cu:43
int comm_dim_partitioned(int dim)