QUDA
v1.1.0
A library for QCD on GPUs
|
#include <cstdint>
Go to the source code of this file.
Macros | |
#define | comm_declare_send_relative(buffer, dim, dir, nbytes) comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) |
#define | comm_declare_receive_relative(buffer, dim, dir, nbytes) comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) |
#define | comm_declare_strided_send_relative(buffer, dim, dir, blksize, nblocks, stride) comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) |
#define | comm_declare_strided_receive_relative(buffer, dim, dir, blksize, nblocks, stride) comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) |
Typedefs | |
typedef struct MsgHandle_s | MsgHandle |
typedef struct Topology_s | Topology |
typedef int(* | QudaCommsMap) (const int *coords, void *fdata) |
Functions | |
char * | comm_hostname (void) |
double | comm_drand (void) |
Topology * | comm_create_topology (int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data) |
void | comm_destroy_topology (Topology *topo) |
int | comm_ndim (const Topology *topo) |
const int * | comm_dims (const Topology *topo) |
const int * | comm_coords (const Topology *topo) |
const int * | comm_coords_from_rank (const Topology *topo, int rank) |
int | comm_rank_from_coords (const Topology *topo, const int *coords) |
int | comm_rank_displaced (const Topology *topo, const int displacement[]) |
void | comm_set_default_topology (Topology *topo) |
Topology * | comm_default_topology (void) |
void | comm_set_neighbor_ranks (Topology *topo=NULL) |
int | comm_neighbor_rank (int dir, int dim) |
int | comm_dim (int dim) |
int | comm_coord (int dim) |
MsgHandle * | comm_declare_send_rank (void *buffer, int rank, int tag, size_t nbytes) |
MsgHandle * | comm_declare_recv_rank (void *buffer, int rank, int tag, size_t nbytes) |
MsgHandle * | comm_declare_send_relative_ (const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes) |
MsgHandle * | comm_declare_receive_relative_ (const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t nbytes) |
MsgHandle * | comm_declare_strided_send_relative_ (const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride) |
MsgHandle * | comm_declare_strided_receive_relative_ (const char *func, const char *file, int line, void *buffer, int dim, int dir, size_t blksize, int nblocks, size_t stride) |
void | comm_finalize (void) |
void | comm_dim_partitioned_set (int dim) |
int | comm_dim_partitioned (int dim) |
int | comm_partitioned () |
Loop over comm_dim_partitioned(dim) for all comms dimensions. More... | |
void | comm_set_tunekey_string () |
Create the topology and partition strings that are used in tuneKeys. More... | |
const char * | comm_dim_partitioned_string (const int *comm_dim_override=0) |
Return a string that defines the comm partitioning (used as a tuneKey) More... | |
const char * | comm_dim_topology_string () |
Return a string that defines the comm topology (for use as a tuneKey) More... | |
const char * | comm_config_string () |
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to enable unique policies). More... | |
void | comm_init (int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data, bool user_set_comm_handle=false, void *user_comm=nullptr) |
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi.cpp. More... | |
void | comm_init_common (int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data) |
Initialize the communications common to all communications abstractions. More... | |
int | comm_rank (void) |
int | comm_rank_global (void) |
int | comm_size (void) |
int | comm_gpuid (void) |
bool | comm_deterministic_reduce () |
void | comm_gather_hostname (char *hostname_recv_buf) |
Gather all hostnames. More... | |
void | comm_gather_gpuid (int *gpuid_recv_buf) |
Gather all GPU ids. More... | |
void | comm_peer2peer_init (const char *hostname_recv_buf) |
bool | comm_peer2peer_present () |
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has been disabled or not. We use this, for example, to determine if we need to allocate pinned device memory or not. More... | |
int | comm_peer2peer_enabled_global () |
bool | comm_peer2peer_enabled (int dir, int dim) |
void | comm_enable_peer2peer (bool enable) |
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support peer-to-peer communication. More... | |
bool | comm_intranode_enabled (int dir, int dim) |
void | comm_enable_intranode (bool enable) |
Enable / disable intra-node (non-peer-to-peer) communication. More... | |
bool | comm_gdr_enabled () |
Query if GPU Direct RDMA communication is enabled (global setting) More... | |
bool | comm_nvshmem_enabled () |
Query if NVSHMEM communication is enabled (global setting) More... | |
bool | comm_gdr_blacklist () |
Query if GPU Direct RDMA communication is blacklisted for this GPU. More... | |
MsgHandle * | comm_declare_send_displaced (void *buffer, const int displacement[], size_t nbytes) |
MsgHandle * | comm_declare_receive_displaced (void *buffer, const int displacement[], size_t nbytes) |
MsgHandle * | comm_declare_strided_send_displaced (void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride) |
MsgHandle * | comm_declare_strided_receive_displaced (void *buffer, const int displacement[], size_t blksize, int nblocks, size_t stride) |
void | comm_free (MsgHandle *&mh) |
void | comm_start (MsgHandle *mh) |
void | comm_wait (MsgHandle *mh) |
int | comm_query (MsgHandle *mh) |
void | comm_allreduce (double *data) |
void | comm_allreduce_max (double *data) |
void | comm_allreduce_min (double *data) |
void | comm_allreduce_array (double *data, size_t size) |
void | comm_allreduce_max_array (double *data, size_t size) |
void | comm_allreduce_int (int *data) |
void | comm_allreduce_xor (uint64_t *data) |
void | comm_broadcast (void *data, size_t nbytes) |
void | comm_barrier (void) |
void | comm_abort (int status) |
void | comm_abort_ (int status) |
void | reduceMaxDouble (double &) |
void | reduceDouble (double &) |
void | reduceDoubleArray (double *, const int len) |
int | commDim (int) |
int | commCoords (int) |
int | commDimPartitioned (int dir) |
void | commDimPartitionedSet (int dir) |
void | commDimPartitionedReset () |
Reset the comm dim partioned array to zero,. More... | |
bool | commGlobalReduction () |
void | commGlobalReductionSet (bool global_reduce) |
bool | commAsyncReduction () |
void | commAsyncReductionSet (bool global_reduce) |
#define comm_declare_receive_relative | ( | buffer, | |
dim, | |||
dir, | |||
nbytes | |||
) | comm_declare_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) |
Definition at line 82 of file comm_quda.h.
#define comm_declare_send_relative | ( | buffer, | |
dim, | |||
dir, | |||
nbytes | |||
) | comm_declare_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, nbytes) |
Definition at line 67 of file comm_quda.h.
#define comm_declare_strided_receive_relative | ( | buffer, | |
dim, | |||
dir, | |||
blksize, | |||
nblocks, | |||
stride | |||
) | comm_declare_strided_receive_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) |
Definition at line 118 of file comm_quda.h.
#define comm_declare_strided_send_relative | ( | buffer, | |
dim, | |||
dir, | |||
blksize, | |||
nblocks, | |||
stride | |||
) | comm_declare_strided_send_relative_(__func__, __FILE__, __LINE__, buffer, dim, dir, blksize, nblocks, stride) |
Definition at line 100 of file comm_quda.h.
typedef struct MsgHandle_s MsgHandle |
Definition at line 1 of file comm_quda.h.
typedef int(* QudaCommsMap) (const int *coords, void *fdata) |
Definition at line 12 of file comm_quda.h.
typedef struct Topology_s Topology |
Definition at line 1 of file comm_quda.h.
void comm_abort | ( | int | status | ) |
Definition at line 208 of file comm_common.cpp.
void comm_abort_ | ( | int | status | ) |
Definition at line 194 of file communicator_stack.cpp.
void comm_allreduce | ( | double * | data | ) |
Definition at line 171 of file communicator_stack.cpp.
void comm_allreduce_array | ( | double * | data, |
size_t | size | ||
) |
Definition at line 177 of file communicator_stack.cpp.
void comm_allreduce_int | ( | int * | data | ) |
Definition at line 184 of file communicator_stack.cpp.
void comm_allreduce_max | ( | double * | data | ) |
Definition at line 173 of file communicator_stack.cpp.
void comm_allreduce_max_array | ( | double * | data, |
size_t | size | ||
) |
Definition at line 179 of file communicator_stack.cpp.
void comm_allreduce_min | ( | double * | data | ) |
Definition at line 175 of file communicator_stack.cpp.
void comm_allreduce_xor | ( | uint64_t * | data | ) |
Definition at line 186 of file communicator_stack.cpp.
void comm_barrier | ( | void | ) |
Definition at line 192 of file communicator_stack.cpp.
void comm_broadcast | ( | void * | data, |
size_t | nbytes | ||
) |
Definition at line 188 of file communicator_stack.cpp.
const char* comm_config_string | ( | ) |
Return a string that defines the P2P/GDR environment variable configuration (for use as a tuneKey to enable unique policies).
Definition at line 80 of file communicator_stack.cpp.
int comm_coord | ( | int | dim | ) |
Return the coording of this process in the dimension dim
dim | Dimension which we are querying |
Definition at line 58 of file communicator_stack.cpp.
|
inline |
Definition at line 110 of file communicator_quda.h.
|
inline |
Definition at line 112 of file communicator_quda.h.
Topology* comm_create_topology | ( | int | ndim, |
const int * | dims, | ||
QudaCommsMap | rank_from_coords, | ||
void * | map_data | ||
) |
MsgHandle* comm_declare_receive_displaced | ( | void * | buffer, |
const int | displacement[], | ||
size_t | nbytes | ||
) |
Create a persistent message handler for a relative receive
buffer | Buffer into which message will be received |
dim | Dimension from message will be received |
dir | Direction from messaged with be recived (0 - backwards, 1 forwards) |
nbytes | Size of message in bytes |
Definition at line 145 of file communicator_stack.cpp.
MsgHandle* comm_declare_receive_relative_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | buffer, | ||
int | dim, | ||
int | dir, | ||
size_t | nbytes | ||
) |
Create a persistent message handler for a relative send. This should not be called directly, and instead the helper macro (without the trailing underscore) should be called instead.
buffer | Buffer into which message will be received |
dim | Dimension from message will be received |
dir | Direction from messaged with be recived (0 - backwards, 1 forwards) |
nbytes | Size of message in bytes |
Receive from the "dir" direction in the "dim" dimension
Definition at line 78 of file comm_common.cpp.
MsgHandle* comm_declare_recv_rank | ( | void * | buffer, |
int | rank, | ||
int | tag, | ||
size_t | nbytes | ||
) |
Declare a message handle for receiving nbytes
from the rank
with tag
.
Definition at line 135 of file communicator_stack.cpp.
MsgHandle* comm_declare_send_displaced | ( | void * | buffer, |
const int | displacement[], | ||
size_t | nbytes | ||
) |
Create a persistent message handler for a relative send
buffer | Buffer from which message will be sent |
dim | Dimension in which message will be sent |
dir | Direction in which messaged with be sent (0 - backwards, 1 forwards) |
nbytes | Size of message in bytes |
Definition at line 140 of file communicator_stack.cpp.
MsgHandle* comm_declare_send_rank | ( | void * | buffer, |
int | rank, | ||
int | tag, | ||
size_t | nbytes | ||
) |
Declare a message handle for sending nbytes
to the rank
with tag
.
Definition at line 130 of file communicator_stack.cpp.
MsgHandle* comm_declare_send_relative_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | buffer, | ||
int | dim, | ||
int | dir, | ||
size_t | nbytes | ||
) |
Create a persistent message handler for a relative send. This should not be called directly, and instead the helper macro (without the trailing underscore) should be called instead.
buffer | Buffer from which message will be sent |
dim | Dimension in which message will be sent |
dir | Direction in which messaged with be sent (0 - backwards, 1 forwards) |
nbytes | Size of message in bytes |
Send to the "dir" direction in the "dim" dimension
Definition at line 44 of file comm_common.cpp.
MsgHandle* comm_declare_strided_receive_displaced | ( | void * | buffer, |
const int | displacement[], | ||
size_t | blksize, | ||
int | nblocks, | ||
size_t | stride | ||
) |
Create a persistent strided message handler for a displaced receive
buffer | Buffer into which message will be received |
displacement | Array of offsets specifying the relative node from which we are receiving |
blksize | Size of block in bytes |
nblocks | Number of blocks |
stride | Stride between blocks in bytes |
Definition at line 156 of file communicator_stack.cpp.
MsgHandle* comm_declare_strided_receive_relative_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | buffer, | ||
int | dim, | ||
int | dir, | ||
size_t | blksize, | ||
int | nblocks, | ||
size_t | stride | ||
) |
Create a persistent strided message handler for a relative receive This should not be called directly, and instead the helper macro (without the trailing underscore) should be called instead.
buffer | Buffer into which message will be received |
dim | Dimension from message will be received |
dir | Direction from messaged with be recived (0 - backwards, 1 forwards) |
blksize | Size of block in bytes |
nblocks | Number of blocks |
stride | Stride between blocks in bytes |
Strided receive from the "dir" direction in the "dim" dimension
Definition at line 145 of file comm_common.cpp.
MsgHandle* comm_declare_strided_send_displaced | ( | void * | buffer, |
const int | displacement[], | ||
size_t | blksize, | ||
int | nblocks, | ||
size_t | stride | ||
) |
Create a persistent strided message handler for a displaced send
buffer | Buffer from which message will be sent |
displacement | Array of offsets specifying the relative node to which we are sending |
blksize | Size of block in bytes |
nblocks | Number of blocks |
stride | Stride between blocks in bytes |
Definition at line 150 of file communicator_stack.cpp.
MsgHandle* comm_declare_strided_send_relative_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | buffer, | ||
int | dim, | ||
int | dir, | ||
size_t | blksize, | ||
int | nblocks, | ||
size_t | stride | ||
) |
Create a persistent strided message handler for a relative send. This should not be called directly, and instead the helper macro (without the trailing underscore) should be called instead.
buffer | Buffer from which message will be sent |
dim | Dimension in which message will be sent |
dir | Direction in which messaged with be sent (0 - backwards, 1 forwards) |
blksize | Size of block in bytes |
nblocks | Number of blocks |
stride | Stride between blocks in bytes |
Strided send to the "dir" direction in the "dim" dimension
Definition at line 108 of file comm_common.cpp.
Topology* comm_default_topology | ( | void | ) |
|
inline |
Definition at line 99 of file communicator_quda.h.
bool comm_deterministic_reduce | ( | ) |
Definition at line 98 of file communicator_stack.cpp.
int comm_dim | ( | int | dim | ) |
Return the number of processes in the dimension dim
dim | Dimension which we are querying |
Definition at line 56 of file communicator_stack.cpp.
int comm_dim_partitioned | ( | int | dim | ) |
Definition at line 74 of file communicator_stack.cpp.
void comm_dim_partitioned_set | ( | int | dim | ) |
Definition at line 70 of file communicator_stack.cpp.
const char* comm_dim_partitioned_string | ( | const int * | comm_dim_override = 0 | ) |
Return a string that defines the comm partitioning (used as a tuneKey)
comm_dim_override | Optional override for partitioning |
Definition at line 82 of file communicator_stack.cpp.
const char* comm_dim_topology_string | ( | ) |
Return a string that defines the comm topology (for use as a tuneKey)
Definition at line 78 of file communicator_stack.cpp.
|
inline |
Definition at line 108 of file communicator_quda.h.
double comm_drand | ( | void | ) |
We provide our own random number generator to avoid re-seeding rand(), which might also be used by the calling application. This is a clone of rand48(), provided by stdlib.h on UNIX.
Definition at line 33 of file comm_common.cpp.
void comm_enable_intranode | ( | bool | enable | ) |
Enable / disable intra-node (non-peer-to-peer) communication.
[in] | enable | Boolean flag to enable / disable intra-node (non peer-to-peer) communication |
Definition at line 122 of file communicator_stack.cpp.
void comm_enable_peer2peer | ( | bool | enable | ) |
Enable / disable peer-to-peer communication: used for dslash policies that do not presently support peer-to-peer communication.
[in] | enable | Boolean flag to enable / disable peer-to-peer communication |
Definition at line 118 of file communicator_stack.cpp.
void comm_finalize | ( | void | ) |
Definition at line 68 of file communicator_stack.cpp.
void comm_free | ( | MsgHandle *& | mh | ) |
Definition at line 163 of file communicator_stack.cpp.
void comm_gather_gpuid | ( | int * | gpuid_recv_buf | ) |
Gather all GPU ids.
[out] | gpuid_recv_buf | int array of length comm_size() that will be filled in GPU ids for all processes (in rank order). |
Definition at line 105 of file communicator_stack.cpp.
void comm_gather_hostname | ( | char * | hostname_recv_buf | ) |
Gather all hostnames.
[out] | hostname_recv_buf | char array of length 128*comm_size() that will be filled in GPU ids for all processes. Each hostname is in rank order, with 128 bytes for each. |
Definition at line 100 of file communicator_stack.cpp.
bool comm_gdr_blacklist | ( | ) |
Query if GPU Direct RDMA communication is blacklisted for this GPU.
Definition at line 126 of file communicator_stack.cpp.
bool comm_gdr_enabled | ( | ) |
Query if GPU Direct RDMA communication is enabled (global setting)
Definition at line 124 of file communicator_stack.cpp.
int comm_gpuid | ( | void | ) |
Definition at line 96 of file communicator_stack.cpp.
char* comm_hostname | ( | void | ) |
Definition at line 10 of file comm_common.cpp.
void comm_init | ( | int | ndim, |
const int * | dims, | ||
QudaCommsMap | rank_from_coords, | ||
void * | map_data, | ||
bool | user_set_comm_handle = false , |
||
void * | user_comm = nullptr |
||
) |
Initialize the communications, implemented in comm_single.cpp, comm_qmp.cpp, and comm_mpi.cpp.
Definition at line 62 of file communicator_stack.cpp.
void comm_init_common | ( | int | ndim, |
const int * | dims, | ||
QudaCommsMap | rank_from_coords, | ||
void * | map_data | ||
) |
Initialize the communications common to all communications abstractions.
bool comm_intranode_enabled | ( | int | dir, |
int | dim | ||
) |
Query if intra-node (non-peer-to-peer) communication is enabled in a given dimension and direction
dir | Direction (0 - backwards, 1 forwards) |
dim | Dimension (0-3) |
Definition at line 120 of file communicator_stack.cpp.
|
inline |
Definition at line 106 of file communicator_quda.h.
int comm_neighbor_rank | ( | int | dir, |
int | dim | ||
) |
Definition at line 54 of file communicator_stack.cpp.
bool comm_nvshmem_enabled | ( | ) |
Query if NVSHMEM communication is enabled (global setting)
Definition at line 128 of file communicator_stack.cpp.
int comm_partitioned | ( | ) |
Loop over comm_dim_partitioned(dim) for all comms dimensions.
Definition at line 76 of file communicator_stack.cpp.
bool comm_peer2peer_enabled | ( | int | dir, |
int | dim | ||
) |
Query if peer-to-peer communication is enabled
dir | Direction (0 - backwards, 1 forwards) |
dim | Dimension (0-3) |
Definition at line 116 of file communicator_stack.cpp.
int comm_peer2peer_enabled_global | ( | ) |
Query what peer-to-peer communication is enabled globally
Definition at line 114 of file communicator_stack.cpp.
void comm_peer2peer_init | ( | const char * | hostname_recv_buf | ) |
Enabled peer-to-peer communication.
hostname_buf | Array that holds all process hostnames |
Definition at line 107 of file communicator_stack.cpp.
bool comm_peer2peer_present | ( | ) |
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has been disabled or not. We use this, for example, to determine if we need to allocate pinned device memory or not.
Definition at line 112 of file communicator_stack.cpp.
int comm_query | ( | MsgHandle * | mh | ) |
Definition at line 169 of file communicator_stack.cpp.
int comm_rank | ( | void | ) |
Definition at line 87 of file communicator_stack.cpp.
|
inline |
Definition at line 121 of file communicator_quda.h.
|
inline |
Definition at line 114 of file communicator_quda.h.
int comm_rank_global | ( | void | ) |
Definition at line 89 of file communicator_stack.cpp.
void comm_set_default_topology | ( | Topology * | topo | ) |
void comm_set_neighbor_ranks | ( | Topology * | topo = NULL | ) |
void comm_set_tunekey_string | ( | ) |
Create the topology and partition strings that are used in tuneKeys.
int comm_size | ( | void | ) |
Definition at line 91 of file communicator_stack.cpp.
void comm_start | ( | MsgHandle * | mh | ) |
Definition at line 165 of file communicator_stack.cpp.
void comm_wait | ( | MsgHandle * | mh | ) |
Definition at line 167 of file communicator_stack.cpp.
bool commAsyncReduction | ( | ) |
Definition at line 216 of file communicator_stack.cpp.
void commAsyncReductionSet | ( | bool | global_reduce | ) |
Definition at line 218 of file communicator_stack.cpp.
int commCoords | ( | int | dim | ) |
Definition at line 204 of file communicator_stack.cpp.
int commDim | ( | int | dim | ) |
Definition at line 202 of file communicator_stack.cpp.
int commDimPartitioned | ( | int | dir | ) |
Definition at line 206 of file communicator_stack.cpp.
void commDimPartitionedReset | ( | ) |
Reset the comm dim partioned array to zero,.
This should only be needed for automated testing when different partitioning is applied within a single run.
Definition at line 210 of file communicator_stack.cpp.
void commDimPartitionedSet | ( | int | dir | ) |
Definition at line 208 of file communicator_stack.cpp.
bool commGlobalReduction | ( | ) |
Definition at line 212 of file communicator_stack.cpp.
void commGlobalReductionSet | ( | bool | global_reduce | ) |
Definition at line 214 of file communicator_stack.cpp.
void reduceDouble | ( | double & | sum | ) |
Definition at line 198 of file communicator_stack.cpp.
void reduceDoubleArray | ( | double * | max, |
const int | len | ||
) |
Definition at line 200 of file communicator_stack.cpp.
void reduceMaxDouble | ( | double & | max | ) |
Definition at line 196 of file communicator_stack.cpp.