26 null_precision(null_precision),
31 coarse_tmp_h(nullptr),
32 coarse_tmp_d(nullptr),
34 fine_to_coarse_h(nullptr),
35 coarse_to_fine_h(nullptr),
36 fine_to_coarse_d(nullptr),
37 coarse_to_fine_d(nullptr),
40 nspin_fine(B[0]->
Nspin()),
46 transfer_type(transfer_type),
51 int ndim = B[0]->Ndim();
55 for (
int d = 0; d < 4; d++) {
56 while (geo_bs[d] > 0) {
57 if (d==0 && B[0]->X(0) == geo_bs[0])
58 warningQuda(
"X-dimension length %d cannot block length %d", B[0]->X(0), geo_bs[0]);
59 else if ( (B[0]->X(d)/geo_bs[d]+1)%2 == 0)
60 warningQuda(
"Indexing does not (yet) support odd coarse dimensions: X(%d) = %d", d, B[0]->X(d)/geo_bs[d]);
61 else if ( (B[0]->X(d)/geo_bs[d]) * geo_bs[d] != B[0]->X(d) )
62 warningQuda(
"cannot block dim[%d]=%d with block size = %d", d, B[0]->X(d), geo_bs[d]);
67 if (geo_bs[d] == 0)
errorQuda(
"Unable to block dimension %d", d);
72 warningQuda(
"5th dimension block size is being set to 1. This is a benign side effect of staggered fermions");
75 this->geo_bs =
new int[ndim];
76 int total_block_size = 1;
77 for (
int d = 0; d < ndim; d++) {
78 this->geo_bs[d] = geo_bs[d];
79 total_block_size *= geo_bs[d];
86 if (total_block_size != 1)
87 errorQuda(
"Invalid total geometric block size %d for transfer type optimized-kd, must be 1", total_block_size);
90 if (Nvec != B[0]->
Ncolor())
91 errorQuda(
"Invalid Nvec %d for optimized-kd aggregation, must be fine color %d", Nvec, B[0]->
Ncolor());
95 for (
int d = 1; d < ndim; d++) block_str +=
" x " + std::to_string(geo_bs[d]);
99 for (
int d = 0; d < 4; d++) {
100 if (geo_bs[d] != 2)
errorQuda(
"Invalid staggered KD block size %d for dimension %d, must be 2", geo_bs[d], d);
102 if (Nvec != 24)
errorQuda(
"Invalid number of coarse vectors %d for staggered KD multigrid, must be 24", Nvec);
105 createV(B[0]->Location());
117 createGeoMap(geo_bs);
120 spin_map =
static_cast<int**
>(
safe_malloc(nspin_fine*
sizeof(
int*)));
121 for (
int s = 0; s < B[0]->Nspin(); s++) spin_map[s] =
static_cast<int*
>(
safe_malloc(2*
sizeof(
int)));
122 createSpinMap(spin_bs);
138 param.nSpin = B[0]->Nspin();
139 param.nColor = B[0]->Ncolor()*Nvec;
158 param.v = (
void *)std::numeric_limits<uint64_t>::max();
159 param.norm = (
void *)std::numeric_limits<uint64_t>::max();
181 ColorSpinorParam
param(*B[0]);
188 if (fine_tmp_d && coarse_tmp_d)
return;
190 coarse_tmp_d = fine_tmp_d->
CreateCoarse(geo_bs, spin_bs, Nvec);
193 coarse_tmp_h = fine_tmp_h->
CreateCoarse(geo_bs, spin_bs, Nvec);
200 if (!enable_cpu && !enable_gpu)
errorQuda(
"Neither CPU or GPU coarse fields initialized");
207 if (enable_gpu)
return;
213 qudaMemcpy(fine_to_coarse_d, fine_to_coarse_h, B[0]->Volume()*
sizeof(
int), cudaMemcpyHostToDevice);
214 qudaMemcpy(coarse_to_fine_d, coarse_to_fine_h, B[0]->Volume()*
sizeof(
int), cudaMemcpyHostToDevice);
217 if (enable_cpu)
return;
222 errorQuda(
"Unknown location %d", location);
234 if (!enable_gpu)
errorQuda(
"enable_gpu = %d so cannot reset", enable_gpu);
235 BlockOrthogonalize(*V_d, B, fine_to_coarse_d, coarse_to_fine_d, geo_bs, spin_bs, NblockOrtho);
241 if (!enable_cpu)
errorQuda(
"enable_cpu = %d so cannot reset", enable_cpu);
242 BlockOrthogonalize(*V_h, B, fine_to_coarse_h, coarse_to_fine_h, geo_bs, spin_bs, NblockOrtho);
254 for (
int s = 0; s < nspin_fine; s++) {
if (spin_map[s])
host_free(spin_map[s]); }
264 if (fine_tmp_h)
delete fine_tmp_h;
265 if (fine_tmp_d)
delete fine_tmp_d;
267 if (coarse_tmp_h)
delete coarse_tmp_h;
268 if (coarse_tmp_d)
delete coarse_tmp_d;
270 if (geo_bs)
delete []geo_bs;
276 errorQuda(
"Undefined parity %d", parity_);
279 if (site_subset == site_subset_)
return;
280 site_subset = site_subset_;
289 return (
x < a.
x) ? true : (
x==a.
x &&
y<a.
y) ?
true :
false;
294 void Transfer::createGeoMap(
int *geo_bs) {
298 ColorSpinorField &fine(*fine_tmp_h);
299 ColorSpinorField &coarse(*coarse_tmp_h);
302 for (
size_t i = 0; i < fine.Volume(); i++) {
304 fine.LatticeIndex(x, i);
309 for (
int d=0; d<fine.Ndim(); d++) x[d] /= geo_bs[d];
313 coarse.OffsetIndex(k, x);
314 fine_to_coarse_h[i] = k;
321 std::vector<Int2> geo_sort(B[0]->Volume());
322 for (
unsigned int i=0; i<geo_sort.size(); i++) geo_sort[i] = Int2(fine_to_coarse_h[i], i);
323 std::sort(geo_sort.begin(), geo_sort.end());
324 for (
unsigned int i=0; i<geo_sort.size(); i++) coarse_to_fine_h[i] = geo_sort[i].y;
327 qudaMemcpy(fine_to_coarse_d, fine_to_coarse_h, B[0]->Volume()*
sizeof(
int), cudaMemcpyHostToDevice);
328 qudaMemcpy(coarse_to_fine_d, coarse_to_fine_h, B[0]->Volume()*
sizeof(
int), cudaMemcpyHostToDevice);
334 void Transfer::createSpinMap(
int spin_bs) {
342 for (
int s=0; s<B[0]->Nspin(); s++) {
343 spin_map[s][0] = s / spin_bs;
344 spin_map[s][1] = s / spin_bs;
356 const int *fine_to_coarse = use_gpu ? fine_to_coarse_d : fine_to_coarse_h;
369 *output = input->
Even();
383 if (!enable_gpu)
errorQuda(
"not created with enable_gpu set, so cannot run on GPU");
392 errorQuda(
"Cannot prolongate to a full field since only have single parity null-space components");
394 if ((
V->Nspin() != 1) && ((output->
GammaBasis() !=
V->GammaBasis()) || (input->
GammaBasis() !=
V->GammaBasis()))) {
395 errorQuda(
"Cannot apply prolongator using fields in a different basis from the null space (%d,%d) != %d",
399 Prolongate(*output, *input, *
V, Nvec, fine_to_coarse, spin_map, parity);
403 errorQuda(
"Invalid transfer type in prolongate");
419 const int *fine_to_coarse = use_gpu ? fine_to_coarse_d : fine_to_coarse_h;
420 const int *coarse_to_fine = use_gpu ? coarse_to_fine_d : coarse_to_fine_h;
433 output->
Even() = *input;
447 if (!enable_gpu)
errorQuda(
"not created with enable_gpu set, so cannot run on GPU");
456 errorQuda(
"Cannot restrict a full field since only have single parity null-space components");
459 errorQuda(
"Cannot apply restrictor using fields in a different basis from the null space (%d,%d) != %d",
462 Restrict(*output, *input, *
V, Nvec, fine_to_coarse, coarse_to_fine, spin_map, parity);
466 errorQuda(
"Invalid transfer type in restrict");
const ColorSpinorField & Odd() const
QudaSiteSubset SiteSubset() const
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
const ColorSpinorField & Even() const
QudaGammaBasis GammaBasis() const
QudaFieldLocation Location() const
void reset()
for resetting the Transfer when the null vectors have changed
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Transfer(const std::vector< ColorSpinorField * > &B, int Nvec, int NblockOrtho, int *geo_bs, int spin_bs, QudaPrecision null_precision, const QudaTransferType transfer_type, TimeProfile &profile)
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields,...
void P(ColorSpinorField &out, const ColorSpinorField &in) const
quda::mgarray< int > n_block_ortho
enum QudaPrecision_s QudaPrecision
@ QUDA_CUDA_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
@ QUDA_PARITY_SITE_SUBSET
enum QudaTransferType_s QudaTransferType
@ QUDA_TRANSFER_COARSE_KD
@ QUDA_TRANSFER_AGGREGATE
@ QUDA_TRANSFER_OPTIMIZED_KD
enum QudaSiteSubset_s QudaSiteSubset
enum QudaFieldLocation_s QudaFieldLocation
@ QUDA_FLOAT2_FIELD_ORDER
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
@ QUDA_REFERENCE_FIELD_CREATE
enum QudaParity_s QudaParity
#define pool_pinned_malloc(size)
#define pool_device_malloc(size)
#define safe_malloc(size)
#define pool_pinned_free(ptr)
#define pool_device_free(ptr)
void zero(ColorSpinorField &a)
void StaggeredRestrict(ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the unitary "restriction" operator for Kahler-Dirac preconditioning.
void Restrict(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *coarse_to_fine, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the restriction operator.
void BlockOrthogonalize(ColorSpinorField &V, const std::vector< ColorSpinorField * > &B, const int *fine_to_coarse, const int *coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho)
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fin...
void StaggeredProlongate(ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning.
void Prolongate(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the prolongation operator.
#define qudaMemcpy(dst, src, count, kind)
#define qudaDeviceSynchronize()
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
QudaFieldLocation location
bool operator<(const Int2 &a) const
QudaVerbosity getVerbosity()