6 #define QMP_CHECK(qmp_call) do { \ 7 QMP_status_t status = qmp_call; \ 8 if (status != QMP_SUCCESS) \ 9 errorQuda("(QMP) %s", QMP_error_string(status)); \ 25 #define USE_MPI_GATHER 39 MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD);
46 for (
int j=0; j<128; j++) {
47 data[j] = (
i ==
comm_rank()) ? hostname[j] : 0;
49 hostname_recv_buf[
i*128 + j] = data[j];
70 gpuid_recv_buf[
i] = data;
78 if ( QMP_is_initialized() != QMP_TRUE ) {
79 errorQuda(
"QMP has not been initialized");
83 for (
int i = 0;
i <
ndim;
i++) {
86 if (grid_size != QMP_get_number_of_nodes()) {
87 errorQuda(
"Communication grid size declared via initCommsGridQuda() does not match" 88 " total number of QMP nodes (%d != %d)", grid_size, QMP_get_number_of_nodes());
106 cudaGetDeviceCount(&device_count);
107 if (device_count == 0) {
110 if (
gpuid >= device_count) {
111 char *enable_mps_env =
getenv(
"QUDA_ENABLE_MPS");
112 if (enable_mps_env &&
strcmp(enable_mps_env,
"1") == 0) {
130 return QMP_get_node_number();
136 return QMP_get_number_of_nodes();
156 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
157 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
160 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
175 mh->
mem = QMP_declare_msgmem(buffer, nbytes);
176 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
179 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
190 size_t blksize,
int nblocks,
size_t stride)
197 mh->
mem = QMP_declare_strided_msgmem(buffer, blksize, nblocks, stride);
198 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
201 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
211 size_t blksize,
int nblocks,
size_t stride)
218 mh->
mem = QMP_declare_strided_msgmem(buffer, blksize, nblocks, stride);
219 if (mh->
mem == NULL)
errorQuda(
"Unable to allocate QMP message memory");
222 if (mh->
handle == NULL)
errorQuda(
"Unable to allocate QMP message handle");
230 QMP_free_msghandle(mh->
handle);
231 QMP_free_msgmem(mh->
mem);
250 return (QMP_is_complete(mh->
handle) == QMP_TRUE);
279 if (
sizeof(
uint64_t) !=
sizeof(
unsigned long))
errorQuda(
"unsigned long is not 64-bit");
280 QMP_CHECK( QMP_xor_ulong( reinterpret_cast<unsigned long*>(data) ));
285 QMP_CHECK( QMP_broadcast(data, nbytes) );
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
int comm_query(MsgHandle *mh)
_EXTERN_C_ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_peer2peer_init(const char *hostname_recv_buf)
static char partition_string[16]
void comm_allreduce_xor(uint64_t *data)
Topology * comm_default_topology(void)
void comm_wait(MsgHandle *mh)
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
void comm_free(MsgHandle *mh)
void comm_start(MsgHandle *mh)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
int strcmp(const char *__s1, const char *__s2)
const char * comm_dim_partitioned_string()
Return a string that defines the comm partitioning (used as a tuneKey)
char * comm_hostname(void)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
int printf(const char *,...) __attribute__((__format__(__printf__
void comm_allreduce(double *data)
void comm_abort(int status)
#define QMP_CHECK(qmp_call)
int comm_rank_displaced(const Topology *topo, const int displacement[])
unsigned long long uint64_t
int(* QudaCommsMap)(const int *coords, void *fdata)
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
#define safe_malloc(size)
void comm_allreduce_max(double *data)
int strncmp(const char *__s1, const char *__s2, size_t __n)
void comm_set_default_topology(Topology *topo)
static char topology_string[16]
void comm_allreduce_int(int *data)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_allreduce_array(double *data, size_t size)
void comm_broadcast(void *data, size_t nbytes)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
char * getenv(const char *)
int comm_dim_partitioned(int dim)