57 void *buffer,
int dim,
int dir,
size_t nbytes);
59 #define comm_declare_send_relative(buffer, dim, dir, nbytes) \ 60 comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) 72 void *buffer,
int dim,
int dir,
size_t nbytes);
74 #define comm_declare_receive_relative(buffer, dim, dir, nbytes) \ 75 comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) 89 void *buffer,
int dim,
int dir,
90 size_t blksize,
int nblocks,
size_t stride);
92 #define comm_declare_strided_send_relative(buffer, dim, dir, blksize, nblocks, stride) \ 93 comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) 107 void *buffer,
int dim,
int dir,
108 size_t blksize,
int nblocks,
size_t stride);
110 #define comm_declare_strided_receive_relative(buffer, dim, dir, blksize, nblocks, stride) \ 111 comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) 283 void *buffer,
const int displacement[],
size_t blksize,
int nblocks,
size_t stride);
294 size_t blksize,
int nblocks,
size_t stride);
const int * comm_coords(const Topology *topo)
MsgHandle * comm_declare_strided_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
int commDimPartitioned(int dir)
void comm_destroy_topology(Topology *topo)
bool commAsyncReduction()
MsgHandle * comm_declare_receive_displaced(void *buffer, const int displacement[], size_t nbytes)
Topology * comm_create_topology(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
void comm_peer2peer_init(const char *hostname_recv_buf)
void comm_abort(int status)
MsgHandle * comm_declare_strided_receive_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
void commDimPartitionedSet(int dir)
int comm_partitioned()
Loop over comm_dim_partitioned(dim) for all comms dimensions.
void comm_allreduce_max_array(double *data, size_t size)
void comm_enable_intranode(bool enable)
Enable / disable intra-node (non-peer-to-peer) communication.
void reduceDoubleArray(double *, const int len)
void comm_allreduce_array(double *data, size_t size)
void comm_set_neighbor_ranks(Topology *topo=NULL)
Topology * comm_default_topology(void)
const int * comm_coords_from_rank(const Topology *topo, int rank)
int comm_rank_from_coords(const Topology *topo, const int *coords)
const char * comm_dim_partitioned_string(const int *comm_dim_override=0)
Return a string that defines the comm partitioning (used as a tuneKey)
void comm_gather_hostname(char *hostname_recv_buf)
Gather all hostnames.
void comm_enable_peer2peer(bool enable)
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support p...
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi.cpp.
MsgHandle * comm_declare_strided_send_displaced(void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride)
char * comm_hostname(void)
void comm_allreduce_min(double *data)
MsgHandle * comm_declare_send_displaced(void *buffer, const int displacement[], size_t nbytes)
const char * comm_dim_topology_string()
Return a string that defines the comm topology (for use as a tuneKey)
void comm_start(MsgHandle *mh)
bool comm_intranode_enabled(int dir, int dim)
void comm_dim_partitioned_set(int dim)
int comm_rank_displaced(const Topology *topo, const int displacement[])
bool commGlobalReduction()
void comm_free(MsgHandle *&mh)
int(* coords)[QUDA_MAX_DIM]
int(* QudaCommsMap)(const int *coords, void *fdata)
void comm_init_common(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
Initialize the communications common to all communications abstractions.
MsgHandle * comm_declare_strided_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride)
bool comm_deterministic_reduce()
void commAsyncReductionSet(bool global_reduce)
bool comm_peer2peer_enabled(int dir, int dim)
void comm_allreduce_xor(uint64_t *data)
const int * comm_dims(const Topology *topo)
void comm_gather_gpuid(int *gpuid_recv_buf)
Gather all GPU ids.
void comm_broadcast(void *data, size_t nbytes)
void comm_set_default_topology(Topology *topo)
MsgHandle * comm_declare_receive_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
int comm_ndim(const Topology *topo)
int comm_query(MsgHandle *mh)
void comm_allreduce_int(int *data)
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
void comm_set_tunekey_string()
Create the topology and partition strings that are used in tuneKeys.
void comm_wait(MsgHandle *mh)
void comm_allreduce(double *data)
void reduceDouble(double &)
int comm_neighbor_rank(int dir, int dim)
void comm_allreduce_max(double *data)
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
int comm_peer2peer_enabled_global()
bool comm_gdr_blacklist()
Query if GPU Direct RDMA communication is blacklisted for this GPU.
void reduceMaxDouble(double &)
MsgHandle * comm_declare_send_relative_(const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes)
int comm_dim_partitioned(int dim)
const char * comm_config_string()
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to ...
void commGlobalReductionSet(bool global_reduce)