10 #define MPI_CHECK(mpi_call) do { \
11 int status = mpi_call; \
12 if (status != MPI_SUCCESS) { \
13 char err_string[128]; \
15 MPI_Error_string(status, err_string, &err_len); \
16 err_string[127] = '\0'; \
17 errorQuda("(MPI) %s", err_string); \
29 static int gpuid = -1;
35 MPI_CHECK( MPI_Initialized(&initialized) );
38 errorQuda(
"MPI has not been initialized");
41 MPI_CHECK( MPI_Comm_rank(MPI_COMM_WORLD, &rank) );
42 MPI_CHECK( MPI_Comm_size(MPI_COMM_WORLD, &size) );
45 for (
int i = 0; i < ndim; i++) {
48 if (grid_size != size) {
49 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match"
50 " total number of MPI ranks (%d != %d)", grid_size, size);
59 char *hostname_recv_buf = (
char *)
safe_malloc(128*size);
61 MPI_CHECK( MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD) );
64 for (
int i = 0; i < rank; i++) {
65 if (!strncmp(hostname, &hostname_recv_buf[128*i], 128)) {
72 cudaGetDeviceCount(&device_count);
73 if (device_count == 0) {
76 if (gpuid >= device_count) {
77 errorQuda(
"Too few GPUs available on %s", hostname);
110 MPI_CHECK( MPI_Send_init(buffer, nbytes, MPI_BYTE, rank, tag, MPI_COMM_WORLD, &(mh->
request)) );
126 MPI_CHECK( MPI_Recv_init(buffer, nbytes, MPI_BYTE, rank, tag, MPI_COMM_WORLD, &(mh->
request)) );
162 MPI_CHECK( MPI_Allreduce(data, &recvbuf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) );
170 MPI_CHECK( MPI_Allreduce(data, &recvbuf, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD) );
177 double recvbuf[size];
178 MPI_CHECK( MPI_Allreduce(data, &recvbuf, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) );
179 memcpy(data, recvbuf,
sizeof(recvbuf));
186 MPI_CHECK( MPI_Allreduce(data, &recvbuf, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD) );
194 MPI_CHECK( MPI_Bcast(data, (
int)nbytes, MPI_BYTE, 0, MPI_COMM_WORLD) );
200 MPI_CHECK( MPI_Barrier(MPI_COMM_WORLD) );