QUDA  v1.1.0
A library for QCD on GPUs
split_grid.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <quda.h>
4 #include <comm_quda.h>
5 #include <communicator_quda.h>
6 
7 #include <gauge_field.h>
8 #include <color_spinor_field.h>
9 #include <clover_field.h>
10 
11 int comm_rank_from_coords(const int *coords);
12 
13 namespace quda
14 {
15 
16  template <class Field>
17  void inline split_field(Field &collect_field, std::vector<Field *> &v_base_field, const CommKey &comm_key,
18  QudaPCType pc_type = QUDA_4D_PC)
19  {
20  CommKey comm_grid_dim = {comm_dim(0), comm_dim(1), comm_dim(2), comm_dim(3)};
21  CommKey comm_grid_idx = {comm_coord(0), comm_coord(1), comm_coord(2), comm_coord(3)};
22 
23  int rank = comm_rank();
24  int total_rank = product(comm_grid_dim);
25 
39  auto processor_dim = comm_grid_dim / comm_key; // How many processors are there in a processor grid sub-parititon?
40  auto partition_dim
41  = comm_grid_dim / processor_dim; // How many such sub-partitions are there? partition_dim == comm_key
42 
43  int n_replicates = product(comm_key);
44  std::vector<void *> v_send_buffer_h(n_replicates, nullptr);
45  std::vector<MsgHandle *> v_mh_send(n_replicates, nullptr);
46 
47  int n_fields = v_base_field.size();
48  if (n_fields == 0) { errorQuda("split_field: input field vec has zero size."); }
49 
50  const auto meta = v_base_field[0];
51 
52  // Send cycles
53  for (int i = 0; i < n_replicates; i++) {
54  auto partition_idx = coordinate_from_index(i, comm_key); // Which partition to send to?
55  auto processor_idx = comm_grid_idx / partition_dim; // Which processor in that partition to send to?
56 
57  auto dst_idx = partition_idx * processor_dim + processor_idx;
58 
59  int dst_rank = comm_rank_from_coords(dst_idx.data());
60  int tag = rank * total_rank + dst_rank; // tag = src_rank * total_rank + dst_rank
61 
62  size_t bytes = meta->TotalBytes();
63 
64  v_send_buffer_h[i] = pinned_malloc(bytes);
65 
66  v_base_field[i % n_fields]->copy_to_buffer(v_send_buffer_h[i]);
67 
68  v_mh_send[i] = comm_declare_send_rank(v_send_buffer_h[i], dst_rank, tag, bytes);
69  comm_start(v_mh_send[i]);
70  }
71 
72  using param_type = typename Field::param_type;
73 
74  param_type param(*meta);
75  Field *buffer_field = Field::Create(param);
76 
77  CommKey field_dim = {meta->full_dim(0), meta->full_dim(1), meta->full_dim(2), meta->full_dim(3)};
78 
79  // Receive cycles
80  for (int i = 0; i < n_replicates; i++) {
81  auto partition_idx
82  = coordinate_from_index(i, comm_key); // Here this means which partition of the field we are working on.
83  auto src_idx
84  = (comm_grid_idx % processor_dim) * partition_dim + partition_idx; // And where does this partition comes from?
85 
86  int src_rank = comm_rank_from_coords(src_idx.data());
87  int tag = src_rank * total_rank + rank;
88 
89  size_t bytes = buffer_field->TotalBytes();
90 
91  void *recv_buffer_h = pinned_malloc(bytes);
92 
93  auto mh_recv = comm_declare_recv_rank(recv_buffer_h, src_rank, tag, bytes);
94 
95  comm_start(mh_recv);
96  comm_wait(mh_recv);
97 
98  buffer_field->copy_from_buffer(recv_buffer_h);
99 
100  comm_free(mh_recv);
101  host_free(recv_buffer_h);
102 
103  auto offset = partition_idx * field_dim;
104 
105  quda::copyFieldOffset(collect_field, *buffer_field, offset, pc_type);
106  }
107 
108  delete buffer_field;
109 
110  comm_barrier();
111 
112  for (auto &p : v_send_buffer_h) {
113  if (p) { host_free(p); }
114  };
115  for (auto &p : v_mh_send) {
116  if (p) { comm_free(p); }
117  };
118  }
119 
120  template <class Field>
121  void inline join_field(std::vector<Field *> &v_base_field, const Field &collect_field, const CommKey &comm_key,
122  QudaPCType pc_type = QUDA_4D_PC)
123  {
124  CommKey comm_grid_dim = {comm_dim(0), comm_dim(1), comm_dim(2), comm_dim(3)};
125  CommKey comm_grid_idx = {comm_coord(0), comm_coord(1), comm_coord(2), comm_coord(3)};
126 
127  int rank = comm_rank();
128  int total_rank = product(comm_grid_dim);
129 
130  auto processor_dim = comm_grid_dim / comm_key; // Communicator grid.
131  auto partition_dim
132  = comm_grid_dim / processor_dim; // The full field needs to be partitioned according to the communicator grid.
133 
134  int n_replicates = product(comm_key);
135  std::vector<void *> v_send_buffer_h(n_replicates, nullptr);
136  std::vector<MsgHandle *> v_mh_send(n_replicates, nullptr);
137 
138  int n_fields = v_base_field.size();
139  if (n_fields == 0) { errorQuda("join_field: output field vec has zero size."); }
140 
141  const auto &meta = *(v_base_field[0]);
142 
143  using param_type = typename Field::param_type;
144 
145  param_type param(meta);
146  Field *buffer_field = Field::Create(param);
147 
148  CommKey field_dim = {meta.full_dim(0), meta.full_dim(1), meta.full_dim(2), meta.full_dim(3)};
149 
150  // Send cycles
151  for (int i = 0; i < n_replicates; i++) {
152 
153  auto partition_idx = coordinate_from_index(i, comm_key);
154  auto dst_idx = (comm_grid_idx % processor_dim) * partition_dim + partition_idx;
155 
156  int dst_rank = comm_rank_from_coords(dst_idx.data());
157  int tag = rank * total_rank + dst_rank;
158 
159  size_t bytes = meta.TotalBytes();
160 
161  auto offset = partition_idx * field_dim;
162  quda::copyFieldOffset(*buffer_field, collect_field, offset, pc_type);
163 
164  v_send_buffer_h[i] = pinned_malloc(bytes);
165  buffer_field->copy_to_buffer(v_send_buffer_h[i]);
166 
167  v_mh_send[i] = comm_declare_send_rank(v_send_buffer_h[i], dst_rank, tag, bytes);
168 
169  comm_start(v_mh_send[i]);
170  }
171 
172  // Receive cycles
173  for (int i = 0; i < n_replicates; i++) {
174 
175  auto partition_idx = coordinate_from_index(i, comm_key);
176  auto processor_idx = comm_grid_idx / partition_dim;
177 
178  auto src_idx = partition_idx * processor_dim + processor_idx;
179 
180  int src_rank = comm_rank_from_coords(src_idx.data());
181  int tag = src_rank * total_rank + rank;
182 
183  size_t bytes = buffer_field->TotalBytes();
184 
185  void *recv_buffer_h = pinned_malloc(bytes);
186 
187  auto mh_recv = comm_declare_recv_rank(recv_buffer_h, src_rank, tag, bytes);
188 
189  comm_start(mh_recv);
190  comm_wait(mh_recv);
191 
192  v_base_field[i % n_fields]->copy_from_buffer(recv_buffer_h);
193 
194  comm_free(mh_recv);
195  host_free(recv_buffer_h);
196  }
197 
198  delete buffer_field;
199 
200  comm_barrier();
201 
202  for (auto &p : v_send_buffer_h) { host_free(p); };
203  for (auto &p : v_mh_send) { comm_free(p); };
204  }
205 
206 } // namespace quda
void comm_start(MsgHandle *mh)
void comm_barrier(void)
MsgHandle * comm_declare_recv_rank(void *buffer, int rank, int tag, size_t nbytes)
MsgHandle * comm_declare_send_rank(void *buffer, int rank, int tag, size_t nbytes)
int comm_rank(void)
int comm_coord(int dim)
void comm_wait(MsgHandle *mh)
void comm_free(MsgHandle *&mh)
int comm_dim(int dim)
enum QudaPCType_s QudaPCType
@ QUDA_4D_PC
Definition: enum_quda.h:397
#define pinned_malloc(size)
Definition: malloc_quda.h:107
#define host_free(ptr)
Definition: malloc_quda.h:115
unsigned long long bytes
constexpr int product(const CommKey &input)
Definition: comm_key.h:28
void join_field(std::vector< Field * > &v_base_field, const Field &collect_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)
Definition: split_grid.h:121
void split_field(Field &collect_field, std::vector< Field * > &v_base_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)
Definition: split_grid.h:17
constexpr CommKey coordinate_from_index(int index, CommKey dim)
Definition: comm_key.h:74
void copyFieldOffset(CloverField &out, const CloverField &in, CommKey offset, QudaPCType pc_type)
This function is used for copying from a source clover field to a destination clover field with an of...
QudaGaugeParam param
Definition: pack_test.cpp:18
Main header file for the QUDA library.
int comm_rank_from_coords(const int *coords)
#define errorQuda(...)
Definition: util_quda.h:120