QUDA  v1.1.0
A library for QCD on GPUs
comm_common.cpp
Go to the documentation of this file.
1 #include <unistd.h> // for gethostname()
2 #include <assert.h>
3 #include <limits>
4 
5 #include <quda_internal.h>
6 #include <communicator_quda.h>
7 #include <comm_quda.h>
8 
9 
10 char *comm_hostname(void)
11 {
12  static bool cached = false;
13  static char hostname[128];
14 
15  if (!cached) {
16  gethostname(hostname, 128);
17  hostname[127] = '\0';
18  cached = true;
19  }
20 
21  return hostname;
22 }
23 
24 static unsigned long int rand_seed = 137;
25 
33 double comm_drand(void)
34 {
35  const double twoneg48 = 0.35527136788005009e-14;
36  const unsigned long int m = 25214903917, a = 11, mask = 281474976710655;
37  rand_seed = (m * rand_seed + a) & mask;
38  return (twoneg48 * rand_seed);
39 }
40 
44 MsgHandle *comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir,
45  size_t nbytes)
46 {
47 #ifdef HOST_DEBUG
48  checkCudaError(); // check and clear error state first
49 
50  if (isHost(buffer)) {
51  // test this memory allocation is ok by doing a memcpy from it
52  void *tmp = safe_malloc(nbytes);
53  try {
54  std::copy(static_cast<char *>(buffer), static_cast<char *>(buffer) + nbytes, static_cast<char *>(tmp));
55  } catch (std::exception &e) {
56  printfQuda("ERROR: buffer failed (%s:%d in %s(), dim=%d, dir=%d, nbytes=%zu)\n", file, line, func, dim, dir,
57  nbytes);
58  errorQuda("aborting");
59  }
60  host_free(tmp);
61  } else {
62  // test this memory allocation is ok by doing a memcpy from it
63  void *tmp = device_malloc(nbytes);
64  qudaMemcpy(tmp, buffer, nbytes, cudaMemcpyDeviceToDevice);
66  }
67 #endif
68 
69  int disp[QUDA_MAX_DIM] = {0};
70  disp[dim] = dir;
71 
72  return comm_declare_send_displaced(buffer, disp, nbytes);
73 }
74 
78 MsgHandle *comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir,
79  size_t nbytes)
80 {
81 #ifdef HOST_DEBUG
82  checkCudaError(); // check and clear error state first
83 
84  if (isHost(buffer)) {
85  // test this memory allocation is ok by filling it
86  try {
87  std::fill(static_cast<char *>(buffer), static_cast<char *>(buffer) + nbytes, 0);
88  } catch (std::exception &e) {
89  printfQuda("ERROR: buffer failed (%s:%d in %s(), dim=%d, dir=%d, nbytes=%zu)\n", file, line, func, dim, dir,
90  nbytes);
91  errorQuda("aborting");
92  }
93  } else {
94  // test this memory allocation is ok by doing a memset
95  qudaMemset(buffer, 0, nbytes);
96  }
97 #endif
98 
99  int disp[QUDA_MAX_DIM] = {0};
100  disp[dim] = dir;
101 
102  return comm_declare_receive_displaced(buffer, disp, nbytes);
103 }
104 
108 MsgHandle *comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim,
109  int dir, size_t blksize, int nblocks, size_t stride)
110 {
111 #ifdef HOST_DEBUG
112  checkCudaError(); // check and clear error state first
113 
114  if (isHost(buffer)) {
115  // test this memory allocation is ok by doing a memcpy from it
116  void *tmp = safe_malloc(blksize * nblocks);
117  try {
118  for (int i = 0; i < nblocks; i++)
119  std::copy(static_cast<char *>(buffer) + i * stride, static_cast<char *>(buffer) + i * stride + blksize,
120  static_cast<char *>(tmp));
121  } catch (std::exception &e) {
122  printfQuda("ERROR: buffer failed (%s:%d in %s(), dim=%d, dir=%d, blksize=%zu nblocks=%d stride=%zu)\n", file,
123  line, func, dim, dir, blksize, nblocks, stride);
124  errorQuda("aborting");
125  }
126  host_free(tmp);
127  } else {
128  // test this memory allocation is ok by doing a memcpy from it
129 
130  void *tmp = device_malloc(blksize*nblocks);
131  qudaMemcpy2D(tmp, blksize, buffer, stride, blksize, nblocks, cudaMemcpyDeviceToDevice);
132  device_free(tmp);
133  }
134 #endif
135 
136  int disp[QUDA_MAX_DIM] = {0};
137  disp[dim] = dir;
138 
139  return comm_declare_strided_send_displaced(buffer, disp, blksize, nblocks, stride);
140 }
141 
145 MsgHandle *comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim,
146  int dir, size_t blksize, int nblocks, size_t stride)
147 {
148 #ifdef HOST_DEBUG
149  checkCudaError(); // check and clear error state first
150 
151  if (isHost(buffer)) {
152  // test this memory allocation is ok by filling it
153  try {
154  for (int i = 0; i < nblocks; i++)
155  std::fill(static_cast<char *>(buffer) + i * stride, static_cast<char *>(buffer) + i * stride + blksize, 0);
156  } catch (std::exception &e) {
157  printfQuda("ERROR: buffer failed (%s:%d in %s(), dim=%d, dir=%d, blksize=%zu nblocks=%d stride=%zu)\n", file,
158  line, func, dim, dir, blksize, nblocks, stride);
159  errorQuda("aborting");
160  }
161  } else {
162  // test this memory allocation is ok by doing a memset
163  qudaMemset2D(buffer, stride, 0, blksize, nblocks);
164  }
165 #endif
166 
167  int disp[QUDA_MAX_DIM] = {0};
168  disp[dim] = dir;
169 
170  return comm_declare_strided_receive_displaced(buffer, disp, blksize, nblocks, stride);
171 }
172 
173 Topology *comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, int my_rank)
174 {
175  if (ndim > QUDA_MAX_DIM) { errorQuda("ndim exceeds QUDA_MAX_DIM"); }
176 
177  Topology *topo = new Topology;
178 
179  topo->ndim = ndim;
180 
181  int nodes = 1;
182  for (int i = 0; i < ndim; i++) {
183  topo->dims[i] = dims[i];
184  nodes *= dims[i];
185  }
186 
187  topo->ranks = new int[nodes];
188  topo->coords = (int(*)[QUDA_MAX_DIM])new int[QUDA_MAX_DIM * nodes];
189 
190  int x[QUDA_MAX_DIM];
191  for (int i = 0; i < QUDA_MAX_DIM; i++) x[i] = 0;
192 
193  do {
194  int rank = rank_from_coords(x, map_data);
195  topo->ranks[index(ndim, dims, x)] = rank;
196  for (int i = 0; i < ndim; i++) { topo->coords[rank][i] = x[i]; }
197  } while (advance_coords(ndim, dims, x));
198 
199  topo->my_rank = my_rank;
200  for (int i = 0; i < ndim; i++) { topo->my_coords[i] = topo->coords[my_rank][i]; }
201 
202  // initialize the random number generator with a rank-dependent seed and initialized it only once.
203  if (comm_gpuid() < 0) { rand_seed = 17 * my_rank + 137; }
204 
205  return topo;
206 }
207 
208 void comm_abort(int status)
209 {
210 #ifdef HOST_DEBUG
211  raise(SIGABRT);
212 #endif
213 #ifdef QUDA_BACKWARDSCPP
214  backward::StackTrace st;
215  st.load_here(32);
216  backward::Printer p;
217  p.print(st, getOutputFile());
218 #endif
219  comm_abort_(status);
220 }
char * comm_hostname(void)
Definition: comm_common.cpp:10
void comm_abort(int status)
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, int my_rank)
MsgHandle * comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
Definition: comm_common.cpp:78
MsgHandle * comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
MsgHandle * comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
Definition: comm_common.cpp:44
double comm_drand(void)
Definition: comm_common.cpp:33
MsgHandle * comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
struct Topology_s Topology
Definition: comm_quda.h:9
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
int(* QudaCommsMap)(const int *coords, void *fdata)
Definition: comm_quda.h:12
void comm_abort_(int status)
int comm_gpuid(void)
std::array< int, 4 > dim
bool isHost(const void *buffer)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
#define device_malloc(size)
Definition: malloc_quda.h:102
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define device_free(ptr)
Definition: malloc_quda.h:110
#define host_free(ptr)
Definition: malloc_quda.h:115
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
#define qudaMemset2D(ptr, pitch, value, width, height)
Definition: quda_api.h:221
#define qudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind)
Definition: quda_api.h:210
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_api.h:204
#define qudaMemset(ptr, value, count)
Definition: quda_api.h:218
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
int(* coords)[QUDA_MAX_DIM]
int my_coords[QUDA_MAX_DIM]
int dims[QUDA_MAX_DIM]
#define printfQuda(...)
Definition: util_quda.h:114
#define checkCudaError()
Definition: util_quda.h:158
FILE * getOutputFile()
Definition: util_quda.cpp:23
#define errorQuda(...)
Definition: util_quda.h:120