10 #define MPI_CHECK(mpi_call) do { \ 11 int status = mpi_call; \ 12 if (status != MPI_SUCCESS) { \ 13 char err_string[128]; \ 15 MPI_Error_string(status, err_string, &err_len); \ 16 err_string[127] = '\0'; \ 17 errorQuda("(MPI) %s", err_string); \ 67 errorQuda(
"MPI has not been initialized");
74 for (
int i = 0;
i <
ndim;
i++) {
77 if (grid_size !=
size) {
78 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match" 79 " total number of MPI ranks (%d != %d)", grid_size,
size);
91 for (
int i = 0;
i <
rank;
i++) {
98 cudaGetDeviceCount(&device_count);
99 if (device_count == 0) {
102 if (
gpuid >= device_count) {
103 char *enable_mps_env =
getenv(
"QUDA_ENABLE_MPS");
104 if (enable_mps_env &&
strcmp(enable_mps_env,
"1") == 0) {
143 errorQuda(
"Requested displacement[%d] = %d is greater than maximum allowed",
i, displacement[
i]);
198 size_t blksize,
int nblocks,
size_t stride)
213 MPI_CHECK( MPI_Type_vector(nblocks, blksize, stride, MPI_BYTE, &(mh->
datatype)) );
227 size_t blksize,
int nblocks,
size_t stride)
242 MPI_CHECK( MPI_Type_vector(nblocks, blksize, stride, MPI_BYTE, &(mh->
datatype)) );
298 double *recvbuf =
new double[
size];
314 if (
sizeof(
uint64_t) !=
sizeof(
unsigned long))
errorQuda(
"unsigned long is not 64-bit");
339 MPI_Abort(MPI_COMM_WORLD, status) ;
void comm_allreduce(double *data)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
_EXTERN_C_ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
_EXTERN_C_ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
int comm_query(MsgHandle *mh)
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_peer2peer_init(const char *hostname_recv_buf)
void comm_wait(MsgHandle *mh)
void comm_allreduce_array(double *data, size_t size)
void comm_allreduce_max(double *data)
_EXTERN_C_ int MPI_Wait(MPI_Request *request, MPI_Status *status)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_allreduce_int(int *data)
static const int max_displacement
#define MPI_CHECK(mpi_call)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
double pow(double, double)
const char * comm_dim_partitioned_string()
Return a string that defines the comm partitioning (used as a tuneKey)
static char partition_string[16]
_EXTERN_C_ int MPI_Barrier(MPI_Comm comm)
int strcmp(const char *__s1, const char *__s2)
char * comm_hostname(void)
void comm_start(MsgHandle *mh)
int printf(const char *,...) __attribute__((__format__(__printf__
_EXTERN_C_ int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)
static bool initialized
Profiler for initQuda.
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_broadcast(void *data, size_t nbytes)
int comm_rank_displaced(const Topology *topo, const int displacement[])
unsigned long long uint64_t
_EXTERN_C_ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)
int(* QudaCommsMap)(const int *coords, void *fdata)
_EXTERN_C_ int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)
void * memcpy(void *__dst, const void *__src, size_t __n)
#define safe_malloc(size)
_EXTERN_C_ int MPI_Start(MPI_Request *request)
int abs(int) __attribute__((const))
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
int strncmp(const char *__s1, const char *__s2, size_t __n)
void comm_set_default_topology(Topology *topo)
int comm_ndim(const Topology *topo)
void comm_allreduce_xor(uint64_t *data)
static char topology_string[16]
void comm_free(MsgHandle *mh)
static void check_displacement(const int displacement[], int ndim)
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_abort(int status)
char * getenv(const char *)
_EXTERN_C_ int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
int comm_dim_partitioned(int dim)