65 void *buffer,
int dim,
int dir,
size_t nbytes);
67 #define comm_declare_send_relative(buffer, dim, dir, nbytes) \
68 comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
80 void *buffer,
int dim,
int dir,
size_t nbytes);
82 #define comm_declare_receive_relative(buffer, dim, dir, nbytes) \
83 comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes)
97 void *buffer,
int dim,
int dir,
98 size_t blksize,
int nblocks,
size_t stride);
100 #define comm_declare_strided_send_relative(buffer, dim, dir, blksize, nblocks, stride) \
101 comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
115 void *buffer,
int dim,
int dir,
116 size_t blksize,
int nblocks,
size_t stride);
118 #define comm_declare_strided_receive_relative(buffer, dim, dir, blksize, nblocks, stride) \
119 comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride)
161 bool user_set_comm_handle =
false,
void *user_comm =
nullptr);
303 void *buffer,
const int displacement[],
size_t blksize,
int nblocks,
size_t stride);
314 size_t blksize,
int nblocks,
size_t stride);
bool comm_intranode_enabled(int dir, int dim)
const int * comm_coords_from_rank(const Topology *topo, int rank)
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_start(MsgHandle *mh)
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
int comm_rank_displaced(const Topology *topo, const int displacement[])
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
void commAsyncReductionSet(bool global_reduce)
Topology * comm_default_topology(void)
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
char * comm_hostname(void)
void reduceMaxDouble(double &)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications common to all communications abstractions.
bool comm_nvshmem_enabled()
Query if NVSHMEM communication is enabled (global setting)
int comm_rank_from_coords(const Topology *topo, const int *coords)
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
const char * comm_config_string()
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to ...
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_neighbor_rank(int dir, int dim)
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
void comm_abort(int status)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void comm_allreduce_xor(uint64_t *data)
const char * comm_dim_partitioned_string(const int *comm_dim_override=0)
Return a string that defines the comm partitioning (used as a tuneKey)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
void comm_set_tunekey_string()
Create the topology and partition strings that are used in tuneKeys.
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_allreduce_int(int *data)
int comm_ndim(const Topology *topo)
void reduceDouble(double &)
bool comm_deterministic_reduce()
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle=false, void *user_comm=nullptr)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi....
int comm_query(MsgHandle *mh)
bool commGlobalReduction()
bool comm_peer2peer_enabled(int dir, int dim)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
const int * comm_dims(const Topology *topo)
void commGlobalReductionSet(bool global_reduce)
void comm_broadcast(void *data, size_t nbytes)
int comm_dim_partitioned(int dim)
MsgHandle * comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
int(* QudaCommsMap)(const int *coords, void *fdata)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_wait(MsgHandle *mh)
void comm_set_default_topology(Topology *topo)
int comm_rank_global(void)
void comm_allreduce_min(double *data)
void comm_free(MsgHandle *&mh)
void reduceDoubleArray(double *, const int len)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int commDimPartitioned(int dir)
void comm_set_neighbor_ranks(Topology *topo=NULL)
void comm_allreduce_max(double *data)
void comm_abort_(int status)
void comm_dim_partitioned_set(int dim)
MsgHandle * comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void comm_allreduce(double *data)
void comm_peer2peer_init(const char *hostname_recv_buf)
const int * comm_coords(const Topology *topo)
MsgHandle * comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
void comm_destroy_topology(Topology *topo)
int comm_peer2peer_enabled_global()
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
MsgHandle * comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
void comm_allreduce_array(double *data, size_t size)
bool commAsyncReduction()
void commDimPartitionedSet(int dir)
void comm_allreduce_max_array(double *data, size_t size)
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
int(* coords)[QUDA_MAX_DIM]