QUDA  v1.1.0
A library for QCD on GPUs
communicator_stack.cpp
Go to the documentation of this file.
1 #include <communicator_quda.h>
2 #include <map>
3 #include <array>
4 #include <lattice_field.h>
5 
6 int Communicator::gpuid = -1;
7 
8 static std::map<quda::CommKey, Communicator> communicator_stack;
9 
10 static quda::CommKey current_key = {-1, -1, -1, -1};
11 
12 void init_communicator_stack(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data,
13  bool user_set_comm_handle, void *user_comm)
14 {
15  communicator_stack.emplace(
16  std::piecewise_construct, std::forward_as_tuple(default_comm_key),
17  std::forward_as_tuple(ndim, dims, rank_from_coords, map_data, user_set_comm_handle, user_comm));
18 
19  current_key = default_comm_key;
20 }
21 
22 void finalize_communicator_stack() { communicator_stack.clear(); }
23 
24 static Communicator &get_default_communicator()
25 {
26  auto search = communicator_stack.find(default_comm_key);
27  if (search == communicator_stack.end()) { errorQuda("Default communicator can't be found."); }
28  return search->second;
29 }
30 
32 {
33  auto search = communicator_stack.find(current_key);
34  if (search == communicator_stack.end()) { errorQuda("Current communicator can't be found."); }
35  return search->second;
36 }
37 
38 void push_communicator(const quda::CommKey &split_key)
39 {
41  errorQuda(
42  "Split-grid is currently not supported with NVSHMEM. Please set QUDA_ENABLE_NVSHMEM=0 to disable NVSHMEM.");
43  auto search = communicator_stack.find(split_key);
44  if (search == communicator_stack.end()) {
45  communicator_stack.emplace(std::piecewise_construct, std::forward_as_tuple(split_key),
46  std::forward_as_tuple(get_default_communicator(), split_key.data()));
47  }
48 
49  quda::LatticeField::freeGhostBuffer(); // Destroy the (IPC) Comm buffers with the old communicator.
50 
51  current_key = split_key;
52 }
53 
55 
57 
59 
60 int comm_rank_from_coords(const int *coords) { return get_current_communicator().comm_rank_from_coords(coords); }
61 
62 void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle,
63  void *user_comm)
64 {
65  init_communicator_stack(ndim, dims, rank_from_coords, map_data, user_set_comm_handle, user_comm);
66 }
67 
69 
71 
73 
75 
77 
79 
81 
82 const char *comm_dim_partitioned_string(const int *comm_dim_override)
83 {
84  return get_current_communicator().comm_dim_partitioned_string(comm_dim_override);
85 }
86 
87 int comm_rank(void) { return get_current_communicator().comm_rank(); }
88 
90 
91 int comm_size(void) { return get_current_communicator().comm_size(); }
92 
93 // XXX:
94 // Note here we are always using the **default** communicator.
95 // We might need to have a better approach.
96 int comm_gpuid(void) { return Communicator::comm_gpuid(); }
97 
99 
100 void comm_gather_hostname(char *hostname_recv_buf)
101 {
102  get_current_communicator().comm_gather_hostname(hostname_recv_buf);
103 }
104 
105 void comm_gather_gpuid(int *gpuid_recv_buf) { get_current_communicator().comm_gather_gpuid(gpuid_recv_buf); }
106 
107 void comm_peer2peer_init(const char *hostname_recv_buf)
108 {
109  get_current_communicator().comm_peer2peer_init(hostname_recv_buf);
110 }
111 
113 
115 
117 
119 
121 
123 
125 
127 
129 
130 MsgHandle *comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
131 {
132  return get_current_communicator().comm_declare_send_rank(buffer, rank, tag, nbytes);
133 }
134 
135 MsgHandle *comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
136 {
137  return get_current_communicator().comm_declare_recv_rank(buffer, rank, tag, nbytes);
138 }
139 
140 MsgHandle *comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
141 {
142  return get_current_communicator().comm_declare_send_displaced(buffer, displacement, nbytes);
143 }
144 
145 MsgHandle *comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
146 {
147  return get_current_communicator().comm_declare_receive_displaced(buffer, displacement, nbytes);
148 }
149 
150 MsgHandle *comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks,
151  size_t stride)
152 {
153  return get_current_communicator().comm_declare_strided_send_displaced(buffer, displacement, blksize, nblocks, stride);
154 }
155 
156 MsgHandle *comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks,
157  size_t stride)
158 {
159  return get_current_communicator().comm_declare_strided_receive_displaced(buffer, displacement, blksize, nblocks,
160  stride);
161 }
162 
164 
166 
168 
170 
172 
174 
176 
177 void comm_allreduce_array(double *data, size_t size) { get_current_communicator().comm_allreduce_array(data, size); }
178 
179 void comm_allreduce_max_array(double *data, size_t size)
180 {
182 }
183 
185 
187 
188 void comm_broadcast(void *data, size_t nbytes) { get_current_communicator().comm_broadcast(data, nbytes); }
189 
190 void comm_broadcast_global(void *data, size_t nbytes) { get_default_communicator().comm_broadcast(data, nbytes); }
191 
193 
194 void comm_abort_(int status) { Communicator::comm_abort_(status); };
195 
197 
199 
200 void reduceDoubleArray(double *max, const int len) { get_current_communicator().reduceDoubleArray(max, len); }
201 
203 
205 
207 
209 
211 
213 
214 void commGlobalReductionSet(bool global_reduce) { get_current_communicator().commGlobalReductionSet(global_reduce); }
215 
217 
218 void commAsyncReductionSet(bool global_reduce) { get_current_communicator().commAsyncReductionSet(global_reduce); }
219 
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
int(* QudaCommsMap)(const int *coords, void *fdata)
Definition: comm_quda.h:12
std::array< int, 4 > dim
constexpr quda::CommKey default_comm_key
bool comm_intranode_enabled(int dir, int dim)
void comm_start(MsgHandle *mh)
void comm_barrier(void)
void comm_broadcast_global(void *data, size_t nbytes)
These routine broadcast the data according to the default communicator.
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
const char * comm_dim_partitioned_string(const int *comm_dim_override)
Return a string that defines the comm partitioning (used as a tuneKey)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void commAsyncReductionSet(bool global_reduce)
void reduceMaxDouble(double &max)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
bool comm_nvshmem_enabled()
Query if NVSHMEM communication is enabled (global setting)
int comm_rank_from_coords(const int *coords)
void reduceDoubleArray(double *max, const int len)
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
const char * comm_config_string()
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to ...
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
void comm_finalize()
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
void reduceDouble(double &sum)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_dim_partitioned_reset()
Communicator & get_current_communicator()
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_xor(uint64_t *data)
void finalize_communicator_stack()
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
int comm_rank(void)
int commCoords(int dim)
int comm_coord(int dim)
int comm_size(void)
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_allreduce_int(int *data)
bool comm_deterministic_reduce()
int comm_query(MsgHandle *mh)
bool commGlobalReduction()
void init_communicator_stack(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle, void *user_comm)
bool comm_peer2peer_enabled(int dir, int dim)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
void commGlobalReductionSet(bool global_reduce)
void comm_broadcast(void *data, size_t nbytes)
int comm_dim_partitioned(int dim)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_wait(MsgHandle *mh)
int comm_rank_global(void)
void comm_allreduce_min(double *data)
void comm_free(MsgHandle *&mh)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int commDim(int dim)
int comm_dim(int dim)
int commDimPartitioned(int dir)
int get_enable_p2p_max_access_rank()
void comm_allreduce_max(double *data)
void comm_abort_(int status)
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle, void *user_comm)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi....
void comm_dim_partitioned_set(int dim)
void comm_allreduce(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
int comm_peer2peer_enabled_global()
void push_communicator(const quda::CommKey &split_key)
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
int comm_gpuid(void)
void comm_allreduce_array(double *data, size_t size)
bool commAsyncReduction()
void commDimPartitionedSet(int dir)
void comm_allreduce_max_array(double *data, size_t size)
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
__host__ __device__ T sum(const array< T, s > &a)
Definition: utility.h:76
int comm_dim(int dim)
void commGlobalReductionSet(bool global_reduction)
bool comm_deterministic_reduce()
int commDimPartitioned(int dir)
char topology_string[128]
void comm_allreduce_max(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
void commAsyncReductionSet(bool async_reduction)
void reduceDouble(double &sum)
void comm_wait(MsgHandle *mh)
void comm_allreduce(double *data)
bool commGlobalReduction()
void comm_broadcast(void *data, size_t nbytes)
int commDim(int dir)
int comm_query(MsgHandle *mh)
void comm_gather_gpuid(int *gpuid_recv_buf)
void comm_allreduce_min(double *data)
bool comm_intranode_enabled(int dir, int dim)
void reduceMaxDouble(double &max)
int comm_coord(int dim)
static int comm_rank_global()
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_enable_intranode(bool enable)
void comm_barrier(void)
static void comm_abort_(int status)
void comm_enable_peer2peer(bool enable)
int commCoords(int dir)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
bool comm_peer2peer_present()
int comm_dim_partitioned(int)
void reduceDoubleArray(double *sum, const int len)
const char * comm_dim_partitioned_string(const int *comm_dim_override)
void comm_allreduce_max_array(double *data, size_t size)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
void comm_gather_hostname(char *hostname_recv_buf)
void comm_allreduce_xor(uint64_t *data)
bool commAsyncReduction()
int enable_p2p_max_access_rank
void comm_allreduce_int(int *data)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
static int gpuid
bool comm_gdr_blacklist()
void comm_free(MsgHandle *&mh)
const char * comm_config_string()
int comm_peer2peer_enabled_global()
bool comm_peer2peer_enabled(int dir, int dim)
bool comm_nvshmem_enabled()
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_rank_from_coords(const int *coords)
static int comm_gpuid()
void comm_dim_partitioned_reset()
void comm_dim_partitioned_set(int dim)
void comm_start(MsgHandle *mh)
void commDimPartitionedSet(int dir)
void comm_allreduce_array(double *data, size_t size)
constexpr int * data()
Definition: comm_key.h:18
#define errorQuda(...)
Definition: util_quda.h:120