QUDA  v1.1.0
A library for QCD on GPUs
comm_quda.h
Go to the documentation of this file.
1 #pragma once
2 #include <cstdint>
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8  typedef struct MsgHandle_s MsgHandle;
9  typedef struct Topology_s Topology;
10 
11  /* defined in quda.h; redefining here to avoid circular references */
12  typedef int (*QudaCommsMap)(const int *coords, void *fdata);
13 
14  char *comm_hostname(void);
15  double comm_drand(void);
16  Topology *comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data);
17  void comm_destroy_topology(Topology *topo);
18  int comm_ndim(const Topology *topo);
19  const int *comm_dims(const Topology *topo);
20  const int *comm_coords(const Topology *topo);
21  const int *comm_coords_from_rank(const Topology *topo, int rank);
22  int comm_rank_from_coords(const Topology *topo, const int *coords);
23  int comm_rank_displaced(const Topology *topo, const int displacement[]);
26 
27  // routines related to direct peer-2-peer access
29  int comm_neighbor_rank(int dir, int dim);
30 
36  int comm_dim(int dim);
37 
43  int comm_coord(int dim);
44 
48  MsgHandle *comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes);
49 
53  MsgHandle *comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes);
54 
64  MsgHandle *comm_declare_send_relative_(const char *func, const char *file, int line,
65  void *buffer, int dim, int dir, size_t nbytes);
66 
67 #define comm_declare_send_relative(buffer, dim, dir, nbytes) \
68  comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
69 
79  MsgHandle *comm_declare_receive_relative_(const char *func, const char *file, int line,
80  void *buffer, int dim, int dir, size_t nbytes);
81 
82 #define comm_declare_receive_relative(buffer, dim, dir, nbytes) \
83  comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
84 
96  MsgHandle *comm_declare_strided_send_relative_(const char *func, const char *file, int line,
97  void *buffer, int dim, int dir,
98  size_t blksize, int nblocks, size_t stride);
99 
100 #define comm_declare_strided_send_relative(buffer, dim, dir, blksize, nblocks, stride) \
101  comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
102 
114  MsgHandle *comm_declare_strided_receive_relative_(const char *func, const char *file, int line,
115  void *buffer, int dim, int dir,
116  size_t blksize, int nblocks, size_t stride);
117 
118 #define comm_declare_strided_receive_relative(buffer, dim, dir, blksize, nblocks, stride) \
119  comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
120 
121  void comm_finalize(void);
122  void comm_dim_partitioned_set(int dim);
123  int comm_dim_partitioned(int dim);
124 
129  int comm_partitioned();
130 
135 
141  const char *comm_dim_partitioned_string(const int *comm_dim_override = 0);
142 
147  const char* comm_dim_topology_string();
148 
155  const char *comm_config_string();
156 
160  void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data,
161  bool user_set_comm_handle = false, void *user_comm = nullptr);
162 
166  void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data);
167 
171  int comm_rank(void);
172 
177  int comm_rank_global(void);
178 
182  int comm_size(void);
183 
187  int comm_gpuid(void);
188 
193 
200  void comm_gather_hostname(char *hostname_recv_buf);
201 
207  void comm_gather_gpuid(int *gpuid_recv_buf);
208 
213  void comm_peer2peer_init(const char *hostname_recv_buf);
214 
221  bool comm_peer2peer_present();
222 
228 
235  bool comm_peer2peer_enabled(int dir, int dim);
236 
242  void comm_enable_peer2peer(bool enable);
243 
251  bool comm_intranode_enabled(int dir, int dim);
252 
259  void comm_enable_intranode(bool enable);
260 
264  bool comm_gdr_enabled();
265 
269  bool comm_nvshmem_enabled();
270 
274  bool comm_gdr_blacklist();
275 
283  MsgHandle *comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes);
284 
292  MsgHandle *comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes);
293 
303  void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride);
304 
313  MsgHandle *comm_declare_strided_receive_displaced(void *buffer, const int displacement[],
314  size_t blksize, int nblocks, size_t stride);
315 
316  void comm_free(MsgHandle *&mh);
317  void comm_start(MsgHandle *mh);
318  void comm_wait(MsgHandle *mh);
319  int comm_query(MsgHandle *mh);
320  void comm_allreduce(double* data);
321  void comm_allreduce_max(double* data);
322  void comm_allreduce_min(double* data);
323  void comm_allreduce_array(double* data, size_t size);
324  void comm_allreduce_max_array(double* data, size_t size);
325  void comm_allreduce_int(int* data);
326  void comm_allreduce_xor(uint64_t *data);
327  void comm_broadcast(void *data, size_t nbytes);
328  void comm_barrier(void);
329  void comm_abort(int status);
330  void comm_abort_(int status);
331 
332  void reduceMaxDouble(double &);
333  void reduceDouble(double &);
334  void reduceDoubleArray(double *, const int len);
335  int commDim(int);
336  int commCoords(int);
337  int commDimPartitioned(int dir);
338  void commDimPartitionedSet(int dir);
339 
346  bool commGlobalReduction();
347  void commGlobalReductionSet(bool global_reduce);
348 
349  bool commAsyncReduction();
350  void commAsyncReductionSet(bool global_reduce);
351 
352 #ifdef __cplusplus
353 }
354 #endif
bool comm_intranode_enabled(int dir, int dim)
const int * comm_coords_from_rank(const Topology *topo, int rank)
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_start(MsgHandle *mh)
void comm_barrier(void)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
int comm_rank_displaced(const Topology *topo, const int displacement[])
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void commAsyncReductionSet(bool global_reduce)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
char * comm_hostname(void)
Definition: comm_common.cpp:10
int commCoords(int)
void reduceMaxDouble(double &)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications common to all communications abstractions.
bool comm_nvshmem_enabled()
Query if NVSHMEM communication is enabled (global setting)
int comm_rank_from_coords(const Topology *topo, const int *coords)
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
const char * comm_config_string()
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to ...
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
int commDim(int)
void comm_abort(int status)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_xor(uint64_t *data)
const char * comm_dim_partitioned_string(const int *comm_dim_override=0)
Return a string that defines the comm partitioning (used as a tuneKey)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
int comm_rank(void)
void comm_set_tunekey_string()
Create the topology and partition strings that are used in tuneKeys.
int comm_coord(int dim)
int comm_size(void)
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_allreduce_int(int *data)
int comm_ndim(const Topology *topo)
void reduceDouble(double &)
bool comm_deterministic_reduce()
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle=false, void *user_comm=nullptr)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi....
int comm_query(MsgHandle *mh)
bool commGlobalReduction()
bool comm_peer2peer_enabled(int dir, int dim)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
const int * comm_dims(const Topology *topo)
void commGlobalReductionSet(bool global_reduce)
void comm_broadcast(void *data, size_t nbytes)
int comm_dim_partitioned(int dim)
MsgHandle * comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
Definition: comm_common.cpp:78
int(* QudaCommsMap)(const int *coords, void *fdata)
Definition: comm_quda.h:12
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_wait(MsgHandle *mh)
void comm_set_default_topology(Topology *topo)
int comm_rank_global(void)
void comm_allreduce_min(double *data)
void comm_free(MsgHandle *&mh)
void reduceDoubleArray(double *, const int len)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int comm_dim(int dim)
int commDimPartitioned(int dir)
void comm_set_neighbor_ranks(Topology *topo=NULL)
void comm_allreduce_max(double *data)
void comm_abort_(int status)
void comm_dim_partitioned_set(int dim)
void comm_finalize(void)
MsgHandle * comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void comm_allreduce(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
const int * comm_coords(const Topology *topo)
MsgHandle * comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
Definition: comm_common.cpp:44
void comm_destroy_topology(Topology *topo)
double comm_drand(void)
Definition: comm_common.cpp:33
int comm_peer2peer_enabled_global()
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
MsgHandle * comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
int comm_gpuid(void)
void comm_allreduce_array(double *data, size_t size)
bool commAsyncReduction()
void commDimPartitionedSet(int dir)
void comm_allreduce_max_array(double *data, size_t size)
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
std::array< int, 4 > dim
int(* coords)[QUDA_MAX_DIM]
int dims[QUDA_MAX_DIM]