3 #define MPI_CHECK(mpi_call) \
5 int status = mpi_call; \
6 if (status != MPI_SUCCESS) { \
7 char err_string[128]; \
9 MPI_Error_string(status, err_string, &err_len); \
10 err_string[127] = '\0'; \
11 errorQuda("(MPI) %s", err_string); \
36 bool user_set_comm_handle_,
void *user_comm)
43 if (!initialized) { assert(
false); }
46 MPI_COMM_HANDLE = *((MPI_Comm *)user_comm);
48 MPI_Comm_dup(MPI_COMM_WORLD, &MPI_COMM_HANDLE);
51 comm_init(nDim, commDims, rank_from_coords, map_data);
58 constexpr
int nDim = 4;
65 for (
int d = 0; d < nDim; d++) {
66 assert(other.
comm_dim(d) % comm_split[d] == 0);
67 comm_dims_split[d] = other.
comm_dim(d) / comm_split[d];
68 comm_key_split[d] = other.
comm_coord(d) % comm_dims_split[d];
69 comm_color_split[d] = other.
comm_coord(d) / comm_dims_split[d];
72 int key = index(nDim, comm_dims_split.
data(), comm_key_split.
data());
73 int color = index(nDim, comm_split, comm_color_split.
data());
75 MPI_CHECK(MPI_Comm_split(other.MPI_COMM_HANDLE, color, key, &MPI_COMM_HANDLE));
77 MPI_CHECK(MPI_Comm_rank(MPI_COMM_HANDLE, &my_rank_));
105 MPI_CHECK(MPI_Initialized(&initialized));
107 if (!initialized) {
errorQuda(
"MPI has not been initialized"); }
113 for (
int i = 0; i < ndim; i++) { grid_size *= dims[i]; }
114 if (grid_size !=
size) {
115 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match"
116 " total number of MPI ranks (%d != %d)",
163 for (
int i = ndim - 1; i >= 0; i--) tag = tag * 4 * max_displacement + displacement[i] + max_displacement;
164 tag = tag >= 0 ? tag : 2 *
pow(4 * max_displacement, ndim) + tag;
185 for (
int i = ndim - 1; i >= 0; i--) tag = tag * 4 * max_displacement - displacement[i] + max_displacement;
186 tag = tag >= 0 ? tag : 2 *
pow(4 * max_displacement, ndim) + tag;
199 int nblocks,
size_t stride)
208 for (
int i = ndim - 1; i >= 0; i--) tag = tag * 4 * max_displacement + displacement[i] + max_displacement;
209 tag = tag >= 0 ? tag : 2 *
pow(4 * max_displacement, ndim) + tag;
227 int nblocks,
size_t stride)
236 for (
int i = ndim - 1; i >= 0; i--) tag = tag * 4 * max_displacement - displacement[i] + max_displacement;
237 tag = tag >= 0 ? tag : 2 *
pow(4 * max_displacement, ndim) + tag;
279 double *recv_buf = (
double *)
safe_malloc(n *
sizeof(
double));
303 double *recvbuf =
new double[
size];
305 memcpy(data, recvbuf,
size *
sizeof(
double));
309 double *recv_buf =
new double[
size * n];
312 double *recv_trans =
new double[
size * n];
313 for (
size_t i = 0; i < n; i++) {
314 for (
size_t j = 0; j <
size; j++) { recv_trans[j * n + i] = recv_buf[i *
size + j]; }
326 double *recvbuf =
new double[
size];
328 memcpy(data, recvbuf,
size *
sizeof(
double));
341 if (
sizeof(uint64_t) !=
sizeof(
unsigned long))
errorQuda(
"unsigned long is not 64-bit");
int comm_rank_displaced(const Topology *topo, const int displacement[])
char * comm_hostname(void)
int comm_ndim(const Topology *topo)
int(* QudaCommsMap)(const int *coords, void *fdata)
#define MPI_CHECK(mpi_call)
void check_displacement(const int displacement[], int ndim)
int lex_rank_from_coords_dim_t(const int *coords, void *fdata)
#define safe_malloc(size)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
_EXTERN_C_ int MPI_Start(MPI_Request *request)
_EXTERN_C_ int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)
_EXTERN_C_ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)
_EXTERN_C_ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
_EXTERN_C_ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
_EXTERN_C_ int MPI_Wait(MPI_Request *request, MPI_Status *status)
_EXTERN_C_ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
_EXTERN_C_ int MPI_Barrier(MPI_Comm comm)
_EXTERN_C_ int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)
bool comm_deterministic_reduce()
void comm_allreduce_max(double *data)
T deterministic_reduce(T *array, int n)
void comm_wait(MsgHandle *mh)
void comm_allreduce(double *data)
void comm_broadcast(void *data, size_t nbytes)
int comm_query(MsgHandle *mh)
void comm_gather_gpuid(int *gpuid_recv_buf)
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_allreduce_min(double *data)
static int comm_rank_global()
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
static void comm_abort_(int status)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_max_array(double *data, size_t size)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
bool user_set_comm_handle
void comm_gather_hostname(char *hostname_recv_buf)
void comm_allreduce_xor(uint64_t *data)
void comm_allreduce_int(int *data)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_free(MsgHandle *&mh)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_start(MsgHandle *mh)
void comm_allreduce_array(double *data, size_t size)