16 template <
class Field>
17 void inline split_field(Field &collect_field, std::vector<Field *> &v_base_field,
const CommKey &comm_key,
24 int total_rank =
product(comm_grid_dim);
39 auto processor_dim = comm_grid_dim / comm_key;
41 = comm_grid_dim / processor_dim;
43 int n_replicates =
product(comm_key);
44 std::vector<void *> v_send_buffer_h(n_replicates,
nullptr);
45 std::vector<MsgHandle *> v_mh_send(n_replicates,
nullptr);
47 int n_fields = v_base_field.size();
48 if (n_fields == 0) {
errorQuda(
"split_field: input field vec has zero size."); }
50 const auto meta = v_base_field[0];
53 for (
int i = 0; i < n_replicates; i++) {
55 auto processor_idx = comm_grid_idx / partition_dim;
57 auto dst_idx = partition_idx * processor_dim + processor_idx;
60 int tag = rank * total_rank + dst_rank;
62 size_t bytes = meta->TotalBytes();
66 v_base_field[i % n_fields]->copy_to_buffer(v_send_buffer_h[i]);
72 using param_type =
typename Field::param_type;
74 param_type
param(*meta);
75 Field *buffer_field = Field::Create(
param);
77 CommKey field_dim = {meta->full_dim(0), meta->full_dim(1), meta->full_dim(2), meta->full_dim(3)};
80 for (
int i = 0; i < n_replicates; i++) {
84 = (comm_grid_idx % processor_dim) * partition_dim + partition_idx;
87 int tag = src_rank * total_rank + rank;
89 size_t bytes = buffer_field->TotalBytes();
98 buffer_field->copy_from_buffer(recv_buffer_h);
103 auto offset = partition_idx * field_dim;
112 for (
auto &p : v_send_buffer_h) {
115 for (
auto &p : v_mh_send) {
120 template <
class Field>
121 void inline join_field(std::vector<Field *> &v_base_field,
const Field &collect_field,
const CommKey &comm_key,
128 int total_rank =
product(comm_grid_dim);
130 auto processor_dim = comm_grid_dim / comm_key;
132 = comm_grid_dim / processor_dim;
134 int n_replicates =
product(comm_key);
135 std::vector<void *> v_send_buffer_h(n_replicates,
nullptr);
136 std::vector<MsgHandle *> v_mh_send(n_replicates,
nullptr);
138 int n_fields = v_base_field.size();
139 if (n_fields == 0) {
errorQuda(
"join_field: output field vec has zero size."); }
141 const auto &meta = *(v_base_field[0]);
143 using param_type =
typename Field::param_type;
145 param_type
param(meta);
146 Field *buffer_field = Field::Create(
param);
148 CommKey field_dim = {meta.full_dim(0), meta.full_dim(1), meta.full_dim(2), meta.full_dim(3)};
151 for (
int i = 0; i < n_replicates; i++) {
154 auto dst_idx = (comm_grid_idx % processor_dim) * partition_dim + partition_idx;
157 int tag = rank * total_rank + dst_rank;
159 size_t bytes = meta.TotalBytes();
161 auto offset = partition_idx * field_dim;
165 buffer_field->copy_to_buffer(v_send_buffer_h[i]);
173 for (
int i = 0; i < n_replicates; i++) {
176 auto processor_idx = comm_grid_idx / partition_dim;
178 auto src_idx = partition_idx * processor_dim + processor_idx;
181 int tag = src_rank * total_rank + rank;
183 size_t bytes = buffer_field->TotalBytes();
192 v_base_field[i % n_fields]->copy_from_buffer(recv_buffer_h);
202 for (
auto &p : v_send_buffer_h) {
host_free(p); };
203 for (
auto &p : v_mh_send) {
comm_free(p); };
void comm_start(MsgHandle *mh)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
void comm_wait(MsgHandle *mh)
void comm_free(MsgHandle *&mh)
enum QudaPCType_s QudaPCType
#define pinned_malloc(size)
constexpr int product(const CommKey &input)
void join_field(std::vector< Field * > &v_base_field, const Field &collect_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)
void split_field(Field &collect_field, std::vector< Field * > &v_base_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)
constexpr CommKey coordinate_from_index(int index, CommKey dim)
void copyFieldOffset(CloverField &out, const CloverField &in, CommKey offset, QudaPCType pc_type)
This function is used for copying from a source clover field to a destination clover field with an of...
Main header file for the QUDA library.
int comm_rank_from_coords(const int *coords)