4 #define QMP_CHECK(qmp_call) \
6 QMP_status_t status = qmp_call; \
7 if (status != QMP_SUCCESS) errorQuda("(QMP) %s", QMP_error_string(status)); \
10 #define MPI_CHECK(mpi_call) \
12 int status = mpi_call; \
13 if (status != MPI_SUCCESS) { \
14 char err_string[128]; \
16 MPI_Error_string(status, err_string, &err_len); \
17 err_string[127] = '\0'; \
18 errorQuda("(MPI) %s", err_string); \
30 #define USE_MPI_GATHER
37 bool user_set_comm_handle_,
void *user_comm)
44 if (!initialized) { assert(
false); }
49 QMP_COMM_HANDLE = QMP_comm_get_default();
50 MPI_COMM_HANDLE = *((MPI_Comm *)user_comm);
52 QMP_COMM_HANDLE = QMP_comm_get_default();
54 QMP_get_mpi_comm(QMP_COMM_HANDLE, &my_comm);
55 MPI_COMM_HANDLE = *((MPI_Comm *)my_comm);
58 is_qmp_handle_default =
true;
60 comm_init(nDim, commDims, rank_from_coords, map_data);
67 constexpr
int nDim = 4;
74 for (
int d = 0; d < nDim; d++) {
75 assert(other.
comm_dim(d) % comm_split[d] == 0);
76 comm_dims_split[d] = other.
comm_dim(d) / comm_split[d];
77 comm_key_split[d] = other.
comm_coord(d) % comm_dims_split[d];
78 comm_color_split[d] = other.
comm_coord(d) / comm_dims_split[d];
81 int key = index(nDim, comm_dims_split.
data(), comm_key_split.
data());
82 int color = index(nDim, comm_split, comm_color_split.
data());
84 QMP_comm_split(other.QMP_COMM_HANDLE, color, key, &QMP_COMM_HANDLE);
86 QMP_get_mpi_comm(QMP_COMM_HANDLE, &my_comm);
87 MPI_COMM_HANDLE = *((MPI_Comm *)my_comm);
89 is_qmp_handle_default =
false;
91 int my_rank_ = QMP_get_node_number();
96 printf(
"Creating split communicator, base_rank = %5d, key = %5d, color = %5d, split_rank = %5d, gpuid = %d\n",
105 QMP_comm_free(QMP_COMM_HANDLE);
117 #ifdef USE_MPI_GATHER
125 for (
int j = 0; j < 128; j++) {
126 data[j] = (i ==
comm_rank()) ? hostname[j] : 0;
127 QMP_comm_sum_int(QMP_COMM_HANDLE, data + j);
128 hostname_recv_buf[i * 128 + j] = data[j];
140 #ifdef USE_MPI_GATHER
148 QMP_comm_sum_int(QMP_COMM_HANDLE, &data);
149 gpuid_recv_buf[i] = data;
156 if (QMP_is_initialized() != QMP_TRUE) {
errorQuda(
"QMP has not been initialized"); }
159 for (
int i = 0; i < ndim; i++) { grid_size *= dims[i]; }
160 if (grid_size != QMP_comm_get_number_of_nodes(QMP_COMM_HANDLE)) {
161 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match"
162 " total number of QMP nodes (%d != %d)",
163 grid_size, QMP_comm_get_number_of_nodes(QMP_COMM_HANDLE));
180 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
181 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
183 mh->
handle = QMP_comm_declare_send_to(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
184 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
196 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
197 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
199 mh->
handle = QMP_comm_declare_receive_from(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
200 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
215 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
216 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
218 mh->
handle = QMP_comm_declare_send_to(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
219 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
234 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
235 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
237 mh->
handle = QMP_comm_declare_receive_from(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
238 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
248 int nblocks,
size_t stride)
255 mh->
mem = QMP_declare_strided_msgmem(buffer, blksize, nblocks, stride);
256 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
258 mh->
handle = QMP_comm_declare_send_to(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
259 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
269 int nblocks,
size_t stride)
276 mh->
mem = QMP_declare_strided_msgmem(buffer, blksize, nblocks, stride);
277 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
279 mh->
handle = QMP_comm_declare_receive_from(QMP_COMM_HANDLE, mh->
mem,
rank, 0);
280 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
287 QMP_free_msghandle(mh->
handle);
288 QMP_free_msgmem(mh->
mem);
302 QMP_CHECK(QMP_comm_sum_double(QMP_COMM_HANDLE, data));
306 double *recv_buf = (
double *)
safe_malloc(n *
sizeof(
double));
320 QMP_CHECK(QMP_comm_sum_double_array(QMP_COMM_HANDLE, data,
size));
324 double *recv_buf =
new double[
size * n];
327 double *recv_trans =
new double[
size * n];
328 for (
size_t i = 0; i < n; i++) {
329 for (
size_t j = 0; j <
size; j++) { recv_trans[j * n + i] = recv_buf[i *
size + j]; }
342 for (
size_t i = 0; i <
size; i++) {
QMP_CHECK(QMP_comm_max_double(QMP_COMM_HANDLE, data + i)); }
349 if (
sizeof(uint64_t) !=
sizeof(
unsigned long))
errorQuda(
"unsigned long is not 64-bit");
350 QMP_CHECK(QMP_comm_xor_ulong(QMP_COMM_HANDLE,
reinterpret_cast<unsigned long *
>(data)));
355 QMP_CHECK(QMP_comm_broadcast(QMP_COMM_HANDLE, data, nbytes));
int comm_rank_displaced(const Topology *topo, const int displacement[])
char * comm_hostname(void)
int(* QudaCommsMap)(const int *coords, void *fdata)
#define MPI_CHECK(mpi_call)
#define QMP_CHECK(qmp_call)
int lex_rank_from_coords_dim_t(const int *coords, void *fdata)
#define safe_malloc(size)
_EXTERN_C_ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
bool comm_deterministic_reduce()
void comm_allreduce_max(double *data)
T deterministic_reduce(T *array, int n)
void comm_wait(MsgHandle *mh)
void comm_allreduce(double *data)
void comm_broadcast(void *data, size_t nbytes)
int comm_query(MsgHandle *mh)
void comm_gather_gpuid(int *gpuid_recv_buf)
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_allreduce_min(double *data)
static int comm_rank_global()
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
static void comm_abort_(int status)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_max_array(double *data, size_t size)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
bool user_set_comm_handle
void comm_gather_hostname(char *hostname_recv_buf)
void comm_allreduce_xor(uint64_t *data)
void comm_allreduce_int(int *data)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_free(MsgHandle *&mh)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_start(MsgHandle *mh)
void comm_allreduce_array(double *data, size_t size)