8 static std::map<quda::CommKey, Communicator> communicator_stack;
13 bool user_set_comm_handle,
void *user_comm)
15 communicator_stack.emplace(
17 std::forward_as_tuple(ndim, dims, rank_from_coords, map_data, user_set_comm_handle, user_comm));
27 if (search == communicator_stack.end()) {
errorQuda(
"Default communicator can't be found."); }
28 return search->second;
33 auto search = communicator_stack.find(current_key);
34 if (search == communicator_stack.end()) {
errorQuda(
"Current communicator can't be found."); }
35 return search->second;
42 "Split-grid is currently not supported with NVSHMEM. Please set QUDA_ENABLE_NVSHMEM=0 to disable NVSHMEM.");
43 auto search = communicator_stack.find(split_key);
44 if (search == communicator_stack.end()) {
45 communicator_stack.emplace(std::piecewise_construct, std::forward_as_tuple(split_key),
46 std::forward_as_tuple(get_default_communicator(), split_key.
data()));
51 current_key = split_key;
62 void comm_init(
int ndim,
const int *dims,
QudaCommsMap rank_from_coords,
void *map_data,
bool user_set_comm_handle,
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
int(* QudaCommsMap)(const int *coords, void *fdata)
constexpr quda::CommKey default_comm_key
bool comm_intranode_enabled(int dir, int dim)
void comm_start(MsgHandle *mh)
void comm_broadcast_global(void *data, size_t nbytes)
These routine broadcast the data according to the default communicator.
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
const char * comm_dim_partitioned_string(const int *comm_dim_override)
Return a string that defines the comm partitioning (used as a tuneKey)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void commAsyncReductionSet(bool global_reduce)
void reduceMaxDouble(double &max)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
bool comm_nvshmem_enabled()
Query if NVSHMEM communication is enabled (global setting)
int comm_rank_from_coords(const int *coords)
void reduceDoubleArray(double *max, const int len)
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
const char * comm_config_string()
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to ...
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
void reduceDouble(double &sum)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_dim_partitioned_reset()
Communicator & get_current_communicator()
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_xor(uint64_t *data)
void finalize_communicator_stack()
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_allreduce_int(int *data)
bool comm_deterministic_reduce()
int comm_query(MsgHandle *mh)
bool commGlobalReduction()
void init_communicator_stack(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle, void *user_comm)
bool comm_peer2peer_enabled(int dir, int dim)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
void commGlobalReductionSet(bool global_reduce)
void comm_broadcast(void *data, size_t nbytes)
int comm_dim_partitioned(int dim)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_wait(MsgHandle *mh)
int comm_rank_global(void)
void comm_allreduce_min(double *data)
void comm_free(MsgHandle *&mh)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int commDimPartitioned(int dir)
int get_enable_p2p_max_access_rank()
void comm_allreduce_max(double *data)
void comm_abort_(int status)
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle, void *user_comm)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi....
void comm_dim_partitioned_set(int dim)
void comm_allreduce(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
int comm_peer2peer_enabled_global()
void push_communicator(const quda::CommKey &split_key)
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
void comm_allreduce_array(double *data, size_t size)
bool commAsyncReduction()
void commDimPartitionedSet(int dir)
void comm_allreduce_max_array(double *data, size_t size)
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
__host__ __device__ T sum(const array< T, s > &a)
void commGlobalReductionSet(bool global_reduction)
bool comm_deterministic_reduce()
int commDimPartitioned(int dir)
char topology_string[128]
void comm_allreduce_max(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
void commAsyncReductionSet(bool async_reduction)
void reduceDouble(double &sum)
void comm_wait(MsgHandle *mh)
void comm_allreduce(double *data)
bool commGlobalReduction()
void comm_broadcast(void *data, size_t nbytes)
int comm_query(MsgHandle *mh)
void comm_gather_gpuid(int *gpuid_recv_buf)
void comm_allreduce_min(double *data)
bool comm_intranode_enabled(int dir, int dim)
void reduceMaxDouble(double &max)
static int comm_rank_global()
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_enable_intranode(bool enable)
static void comm_abort_(int status)
void comm_enable_peer2peer(bool enable)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
bool comm_peer2peer_present()
int comm_dim_partitioned(int)
void reduceDoubleArray(double *sum, const int len)
const char * comm_dim_partitioned_string(const int *comm_dim_override)
void comm_allreduce_max_array(double *data, size_t size)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
void comm_gather_hostname(char *hostname_recv_buf)
void comm_allreduce_xor(uint64_t *data)
bool commAsyncReduction()
int enable_p2p_max_access_rank
void comm_allreduce_int(int *data)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
bool comm_gdr_blacklist()
void comm_free(MsgHandle *&mh)
const char * comm_config_string()
int comm_peer2peer_enabled_global()
bool comm_peer2peer_enabled(int dir, int dim)
bool comm_nvshmem_enabled()
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_rank_from_coords(const int *coords)
void comm_dim_partitioned_reset()
void comm_dim_partitioned_set(int dim)
void comm_start(MsgHandle *mh)
void commDimPartitionedSet(int dir)
void comm_allreduce_array(double *data, size_t size)