12 #define MPI_CHECK(mpi_call) do { \ 13 int status = mpi_call; \ 14 if (status != MPI_SUCCESS) { \ 15 char err_string[128]; \ 17 MPI_Error_string(status, err_string, &err_len); \ 18 err_string[127] = '\0'; \ 19 errorQuda("(MPI) %s", err_string); \ 61 MPI_CHECK( MPI_Initialized(&initialized) );
64 errorQuda(
"MPI has not been initialized");
71 for (
int i = 0; i <
ndim; i++) {
74 if (grid_size !=
size) {
75 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match" 76 " total number of MPI ranks (%d != %d)", grid_size,
size);
97 for (
int i=0; i<
ndim; i++) {
99 errorQuda(
"Requested displacement[%d] = %d is greater than maximum allowed", i, displacement[i]);
154 size_t blksize,
int nblocks,
size_t stride)
169 MPI_CHECK( MPI_Type_vector(nblocks, blksize, stride, MPI_BYTE, &(mh->
datatype)) );
183 size_t blksize,
int nblocks,
size_t stride)
198 MPI_CHECK( MPI_Type_vector(nblocks, blksize, stride, MPI_BYTE, &(mh->
datatype)) );
238 std::sort(array, array + n);
239 return std::accumulate(array, array + n, 0.0);
250 double *recv_buf = (
double *)
safe_malloc(n *
sizeof(
double));
275 double *recvbuf =
new double[
size];
277 memcpy(data, recvbuf, size *
sizeof(
double));
281 double *recv_buf =
new double[size * n];
284 double *recv_trans =
new double[size * n];
285 for (
size_t i = 0; i < n; i++) {
286 for (
size_t j = 0; j <
size; j++) { recv_trans[j * n + i] = recv_buf[i * size + j]; }
298 double *recvbuf =
new double[
size];
300 memcpy(data, recvbuf, size*
sizeof(
double));
313 if (
sizeof(uint64_t) !=
sizeof(
unsigned long))
errorQuda(
"unsigned long is not 64-bit");
333 MPI_Abort(MPI_COMM_HANDLE, status);
void comm_free(MsgHandle *&mh)
void comm_allreduce(double *data)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
_EXTERN_C_ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
_EXTERN_C_ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
int comm_query(MsgHandle *mh)
void comm_wait(MsgHandle *mh)
void comm_allreduce_array(double *data, size_t size)
void comm_allreduce_max(double *data)
_EXTERN_C_ int MPI_Wait(MPI_Request *request, MPI_Status *status)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_allreduce_int(int *data)
static const int max_displacement
#define MPI_CHECK(mpi_call)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_max_array(double *data, size_t size)
_EXTERN_C_ int MPI_Barrier(MPI_Comm comm)
char * comm_hostname(void)
void comm_start(MsgHandle *mh)
void comm_allreduce_min(double *data)
_EXTERN_C_ int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)
static bool initialized
Profiler for initQuda.
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi.cpp.
void comm_broadcast(void *data, size_t nbytes)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
int comm_rank_displaced(const Topology *topo, const int displacement[])
_EXTERN_C_ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)
int(* QudaCommsMap)(const int *coords, void *fdata)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications common to all communications abstractions.
_EXTERN_C_ int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)
#define safe_malloc(size)
bool comm_deterministic_reduce()
_EXTERN_C_ int MPI_Start(MPI_Request *request)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
int comm_ndim(const Topology *topo)
void comm_allreduce_xor(uint64_t *data)
__host__ __device__ ValueType abs(ValueType x)
static void check_displacement(const int displacement[], int ndim)
T deterministic_reduce(T *array, int n)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_abort(int status)
_EXTERN_C_ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)