QUDA  0.9.0
comm_quda.h
Go to the documentation of this file.
1 #pragma once
2 #include <cstdint>
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8  typedef struct MsgHandle_s MsgHandle;
9  typedef struct Topology_s Topology;
10 
11  /* defined in quda.h; redefining here to avoid circular references */
12  typedef int (*QudaCommsMap)(const int *coords, void *fdata);
13 
14  /* implemented in comm_common.cpp */
15 
16  char *comm_hostname(void);
17  double comm_drand(void);
18  Topology *comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data);
19  void comm_destroy_topology(Topology *topo);
20  int comm_ndim(const Topology *topo);
21  const int *comm_dims(const Topology *topo);
22  const int *comm_coords(const Topology *topo);
23  const int *comm_coords_from_rank(const Topology *topo, int rank);
24  int comm_rank_from_coords(const Topology *topo, const int *coords);
25  int comm_rank_displaced(const Topology *topo, const int displacement[]);
28 
29  // routines related to direct peer-2-peer access
30  void comm_set_neighbor_ranks(Topology *topo=NULL);
31  int comm_neighbor_rank(int dir, int dim);
32 
38  int comm_dim(int dim);
39 
45  int comm_coord(int dim);
46 
56  MsgHandle *comm_declare_send_relative_(const char *func, const char *file, int line,
57  void *buffer, int dim, int dir, size_t nbytes);
58 
59 #define comm_declare_send_relative(buffer, dim, dir, nbytes) \
60  comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
61 
71  MsgHandle *comm_declare_receive_relative_(const char *func, const char *file, int line,
72  void *buffer, int dim, int dir, size_t nbytes);
73 
74 #define comm_declare_receive_relative(buffer, dim, dir, nbytes) \
75  comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
76 
88  MsgHandle *comm_declare_strided_send_relative_(const char *func, const char *file, int line,
89  void *buffer, int dim, int dir,
90  size_t blksize, int nblocks, size_t stride);
91 
92 #define comm_declare_strided_send_relative(buffer, dim, dir, blksize, nblocks, stride) \
93  comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
94 
106  MsgHandle *comm_declare_strided_receive_relative_(const char *func, const char *file, int line,
107  void *buffer, int dim, int dir,
108  size_t blksize, int nblocks, size_t stride);
109 
110 #define comm_declare_strided_receive_relative(buffer, dim, dir, blksize, nblocks, stride) \
111  comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
112 
113  void comm_finalize(void);
114  void comm_dim_partitioned_set(int dim);
115  int comm_dim_partitioned(int dim);
116 
121  int comm_partitioned();
122 
127  const char* comm_dim_partitioned_string();
128 
133  const char* comm_dim_topology_string();
134 
135  /* implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi.cpp */
136 
137  void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data);
138  int comm_rank(void);
139  int comm_size(void);
140  int comm_gpuid(void);
141 
148  void comm_gather_hostname(char *hostname_recv_buf);
149 
155  void comm_gather_gpuid(int *gpuid_recv_buf);
156 
161  void comm_peer2peer_init(const char *hostname_recv_buf);
162 
168 
175  bool comm_peer2peer_enabled(int dir, int dim);
176 
182  void comm_enable_peer2peer(bool enable);
183 
191  bool comm_intranode_enabled(int dir, int dim);
192 
199  void comm_enable_intranode(bool enable);
200 
204  bool comm_gdr_enabled();
205 
209  bool comm_gdr_blacklist();
210 
218  MsgHandle *comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes);
219 
227  MsgHandle *comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes);
228 
237  MsgHandle *comm_declare_strided_send_displaced(void *buffer, const int displacement[],
238  size_t blksize, int nblocks, size_t stride);
239 
248  MsgHandle *comm_declare_strided_receive_displaced(void *buffer, const int displacement[],
249  size_t blksize, int nblocks, size_t stride);
250 
251  void comm_free(MsgHandle *mh);
252  void comm_start(MsgHandle *mh);
253  void comm_wait(MsgHandle *mh);
254  int comm_query(MsgHandle *mh);
255  void comm_allreduce(double* data);
256  void comm_allreduce_max(double* data);
257  void comm_allreduce_array(double* data, size_t size);
258  void comm_allreduce_int(int* data);
259  void comm_allreduce_xor(uint64_t *data);
260  void comm_broadcast(void *data, size_t nbytes);
261  void comm_barrier(void);
262  void comm_abort(int status);
263 
264  void reduceMaxDouble(double &);
265  void reduceDouble(double &);
266  void reduceDoubleArray(double *, const int len);
267  int commDim(int);
268  int commCoords(int);
269  int commDimPartitioned(int dir);
270  void commDimPartitionedSet(int dir);
271 
278  bool commGlobalReduction();
279  void commGlobalReductionSet(bool global_reduce);
280 
281  bool commAsyncReduction();
282  void commAsyncReductionSet(bool global_reduce);
283 
284 #ifdef __cplusplus
285 }
286 #endif
const int * comm_coords(const Topology *topo)
int comm_rank(void)
Definition: comm_mpi.cpp:120
MsgHandle * comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void comm_finalize(void)
int commDimPartitioned(int dir)
void comm_destroy_topology(Topology *topo)
bool commAsyncReduction()
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
Definition: comm_mpi.cpp:174
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Definition: comm_common.cpp:94
const char * comm_dim_partitioned_string()
Return a string that defines the comm partitioning (used as a tuneKey)
Definition: comm_mpi.cpp:342
void comm_peer2peer_init(const char *hostname_recv_buf)
const void * func
void comm_abort(int status)
Definition: comm_mpi.cpp:334
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
Definition: comm_mpi.cpp:226
int comm_dim(int dim)
void commDimPartitionedSet(int dir)
int commCoords(int)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
static int rank
Definition: comm_mpi.cpp:42
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
void reduceDoubleArray(double *, const int len)
int comm_coord(int dim)
void comm_allreduce_array(double *data, size_t size)
Definition: comm_mpi.cpp:296
void comm_set_neighbor_ranks(Topology *topo=NULL)
Topology * comm_default_topology(void)
const int * comm_coords_from_rank(const Topology *topo, int rank)
int comm_gpuid(void)
Definition: comm_mpi.cpp:132
int comm_rank_from_coords(const Topology *topo, const int *coords)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
Definition: comm_mpi.cpp:50
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
void comm_free(MsgHandle *mh)
Definition: comm_mpi.cpp:252
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Definition: comm_mpi.cpp:61
static int ndim
Definition: layout_hyper.c:53
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
Definition: comm_mpi.cpp:197
int comm_size(void)
Definition: comm_mpi.cpp:126
char * comm_hostname(void)
Definition: comm_common.cpp:58
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
Definition: comm_mpi.cpp:151
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
Definition: comm_mpi.cpp:346
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:260
int commDim(int)
bool comm_intranode_enabled(int dir, int dim)
void comm_dim_partitioned_set(int dim)
int dims[QUDA_MAX_DIM]
Definition: comm_common.cpp:10
int comm_rank_displaced(const Topology *topo, const int displacement[])
unsigned long long uint64_t
double comm_drand(void)
Definition: comm_common.cpp:82
bool commGlobalReduction()
int(* coords)[QUDA_MAX_DIM]
Definition: comm_common.cpp:12
int(* QudaCommsMap)(const int *coords, void *fdata)
Definition: comm_quda.h:12
MsgHandle * comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void commAsyncReductionSet(bool global_reduce)
bool comm_peer2peer_enabled(int dir, int dim)
void comm_allreduce_xor(uint64_t *data)
Definition: comm_mpi.cpp:312
const int * comm_dims(const Topology *topo)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
Definition: comm_mpi.cpp:56
void comm_broadcast(void *data, size_t nbytes)
Definition: comm_mpi.cpp:322
void comm_set_default_topology(Topology *topo)
MsgHandle * comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int comm_ndim(const Topology *topo)
int comm_query(MsgHandle *mh)
Definition: comm_mpi.cpp:272
void comm_allreduce_int(int *data)
Definition: comm_mpi.cpp:305
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:266
void comm_allreduce(double *data)
Definition: comm_mpi.cpp:281
void reduceDouble(double &)
int comm_neighbor_rank(int dir, int dim)
void comm_allreduce_max(double *data)
Definition: comm_mpi.cpp:289
int comm_peer2peer_enabled_global()
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
void reduceMaxDouble(double &)
MsgHandle * comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
int comm_dim_partitioned(int dim)
void comm_barrier(void)
Definition: comm_mpi.cpp:328
void commGlobalReductionSet(bool global_reduce)