2 #include <color_spinor_field.h>
3 #include <gauge_field.h>
5 // This define controls which kernels get compiled in `coarse_op.cuh`.
6 // This ensures only kernels relevant for coarsening a coarse operator
7 // get built, saving compile time.
9 #include <coarse_op.cuh>
14 @brief dummyClover is a helper function to allow us to create an
15 empty clover object - this allows us to use the the externally
16 linked reduction kernels when we do have a clover field.
18 inline std::unique_ptr<cudaCloverField> dummyClover()
20 CloverFieldParam cf_param;
23 cf_param.setPrecision(QUDA_SINGLE_PRECISION);
25 for (int i = 0; i < cf_param.nDim; i++) cf_param.x[i] = 0;
27 cf_param.direct = true;
28 cf_param.inverse = true;
29 cf_param.clover = nullptr;
31 cf_param.cloverInv = nullptr;
33 cf_param.create = QUDA_NULL_FIELD_CREATE;
34 cf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
36 // create a dummy cudaCloverField if one is not defined
37 cf_param.order = QUDA_INVALID_CLOVER_ORDER;
38 return std::make_unique<cudaCloverField>(cf_param);
41 template <typename Float, typename vFloat, int fineColor, int fineSpin, int coarseColor, int coarseSpin>
42 void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
43 const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
44 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
45 bool need_bidirectional, bool use_mma)
48 if (Y.Location() == QUDA_CPU_FIELD_LOCATION) {
50 if (use_mma) { errorQuda("MMA intructions are not supported on the host."); }
52 constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
53 constexpr QudaGaugeFieldOrder gOrder = QUDA_QDP_GAUGE_ORDER;
55 if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
56 errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
57 if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
59 typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> V;
60 typedef typename colorspinor::FieldOrderCB<Float,2*fineSpin,fineColor,coarseColor,csOrder,vFloat> F;
61 typedef typename gauge::FieldOrder<Float,fineColor*fineSpin,fineSpin,gOrder,true,vFloat> gFine;
62 typedef typename gauge::FieldOrder<Float,fineColor*fineSpin,fineSpin,gOrder,true,vFloat> cFine;
63 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
64 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
66 const ColorSpinorField &v = T.Vectors(Y.Location());
68 V vAccessor(const_cast<ColorSpinorField&>(v));
69 F uvAccessor(const_cast<ColorSpinorField&>(uv));
70 gFine gAccessor(const_cast<GaugeField&>(g));
71 cFine cAccessor(const_cast<GaugeField&>(clover));
72 cFine cInvAccessor(const_cast<GaugeField&>(cloverInv));
73 gCoarse yAccessor(const_cast<GaugeField&>(Y));
74 gCoarse xAccessor(const_cast<GaugeField&>(X));
75 gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
76 gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
78 calculateY<QUDA_CPU_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
79 yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor, cAccessor,
80 cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<ColorSpinorField &>(v), v, g, *dummyClover(), kappa, mass, mu,
81 mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()), T.coarseToFine(Y.Location()), use_mma);
86 constexpr QudaFieldOrder csOrder = QUDA_FLOAT2_FIELD_ORDER;
87 constexpr QudaGaugeFieldOrder gOrder = QUDA_FLOAT2_GAUGE_ORDER;
89 if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
90 errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
91 if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
93 typedef typename colorspinor::FieldOrderCB<Float, fineSpin, fineColor, coarseColor, csOrder, vFloat> V;
94 typedef typename colorspinor::FieldOrderCB<Float, 2 * fineSpin, fineColor, coarseColor, csOrder, vFloat> F;
95 typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> gFine;
96 typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> cFine;
97 typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, vFloat> gCoarse;
98 typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, storeType> gCoarseAtomic;
100 const ColorSpinorField &v = T.Vectors(Y.Location());
102 V vAccessor(const_cast<ColorSpinorField &>(v));
103 F uvAccessor(const_cast<ColorSpinorField &>(uv));
104 gFine gAccessor(const_cast<GaugeField &>(g));
105 cFine cAccessor(const_cast<GaugeField &>(clover));
106 cFine cInvAccessor(const_cast<GaugeField &>(cloverInv));
107 gCoarse yAccessor(const_cast<GaugeField &>(Y));
108 gCoarse xAccessor(const_cast<GaugeField &>(X));
109 gCoarseAtomic yAccessorAtomic(const_cast<GaugeField &>(Yatomic));
110 gCoarseAtomic xAccessorAtomic(const_cast<GaugeField &>(Xatomic));
112 // create a dummy clover field to allow us to call the external clover reduction routines elsewhere
113 calculateY<QUDA_CUDA_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
114 yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor,
115 cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<ColorSpinorField &>(v), v, g, *dummyClover(),
116 kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()),
117 T.coarseToFine(Y.Location()), use_mma);
121 constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
122 constexpr QudaGaugeFieldOrder gOrder = QUDA_MILC_GAUGE_ORDER;
124 typedef typename colorspinor::FieldOrderCB<Float, fineSpin, fineColor, coarseColor, csOrder, vFloat> V;
125 typedef typename colorspinor::FieldOrderCB<Float, 2 * fineSpin, fineColor, coarseColor, csOrder, vFloat> F;
126 typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> gFine;
127 typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> cFine;
128 typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, vFloat> gCoarse;
129 typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, storeType> gCoarseAtomic;
131 const ColorSpinorField &v = T.Vectors(Y.Location());
133 ColorSpinorParam param_v(v);
134 param_v.fieldOrder = csOrder;
135 param_v.setPrecision(v.Precision());
137 cudaColorSpinorField v_(param_v);
142 F uvAccessor(const_cast<ColorSpinorField &>(uv));
143 gFine gAccessor(const_cast<GaugeField &>(g));
144 cFine cAccessor(const_cast<GaugeField &>(clover));
145 cFine cInvAccessor(const_cast<GaugeField &>(cloverInv));
147 gCoarse yAccessor(const_cast<GaugeField &>(Y));
148 gCoarse xAccessor(const_cast<GaugeField &>(X));
149 gCoarseAtomic yAccessorAtomic(const_cast<GaugeField &>(Yatomic));
150 gCoarseAtomic xAccessorAtomic(const_cast<GaugeField &>(Xatomic));
152 // create a dummy clover field to allow us to call the external clover reduction routines elsewhere
153 calculateY<QUDA_CUDA_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
154 yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor,
155 cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<cudaColorSpinorField &>(v_), v_, g,
156 *dummyClover(), kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()),
157 T.coarseToFine(Y.Location()), use_mma);
162 // template on fine colors
163 template <typename Float, typename vFloat, int fineSpin>
164 void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
165 const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
166 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
167 bool need_bidirectional, bool use_mma)
169 if (T.Vectors().Nspin()/T.Spin_bs() != 2)
170 errorQuda("Unsupported number of coarse spins %d\n",T.Vectors().Nspin()/T.Spin_bs());
171 const int fineColor = g.Ncolor() / fineSpin;
172 const int coarseSpin = 2;
173 const int coarseColor = Y.Ncolor() / coarseSpin;
175 if (fineColor == 6) { // free field Wilson
176 if (coarseColor == 6) {
177 calculateYcoarse<Float, vFloat, 6, fineSpin, 6, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv,
178 kappa, mass, mu, mu_factor, dirac, matpc,
179 need_bidirectional, use_mma);
181 errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
185 if (fineColor == 24) { // coarsened Wilson or free field staggered
186 if (coarseColor == 24) {
187 calculateYcoarse<Float, vFloat, 24, fineSpin, 24, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
188 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
189 need_bidirectional, use_mma);
192 if (coarseColor == 32) {
193 calculateYcoarse<Float, vFloat, 24, fineSpin, 32, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
194 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
195 need_bidirectional, use_mma);
199 if (coarseColor == 64) {
200 calculateYcoarse<Float, vFloat, 24, fineSpin, 64, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
201 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
202 need_bidirectional, use_mma);
203 } else // --- note, coarsening Nc == 24 -> Nc == 96 for staggered is worth revisiting in the future
206 errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
209 } else if (fineColor == 32) {
210 if (coarseColor == 32) {
211 calculateYcoarse<Float, vFloat, 32, fineSpin, 32, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
212 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
213 need_bidirectional, use_mma);
215 errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
219 } else if (fineColor == 64) {
220 if (coarseColor == 64) {
221 calculateYcoarse<Float, vFloat, 64, fineSpin, 64, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
222 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
223 need_bidirectional, use_mma);
224 } else if (coarseColor == 96) {
225 calculateYcoarse<Float, vFloat, 64, fineSpin, 96, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
226 cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
227 need_bidirectional, use_mma);
229 errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
230 } // --- note, revisit Nc == 96 -> Nc == 96 in the future
233 errorQuda("Unsupported number of colors %d\n", g.Ncolor());
237 // template on fine spin
238 template <typename Float, typename vFloat>
239 void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
240 const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
241 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
242 bool need_bidirectional, bool use_mma)
244 if (T.Vectors().Nspin() == 2) {
245 calculateYcoarse<Float, vFloat, 2>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor,
246 dirac, matpc, need_bidirectional, use_mma);
248 errorQuda("Unsupported number of spins %d\n", T.Vectors().Nspin());
252 //Does the heavy lifting of creating the coarse color matrices Y
253 void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
254 const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
255 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
256 bool need_bidirectional, bool use_mma)
259 checkPrecision(X, Y, g, clover, cloverInv, uv, T.Vectors(X.Location()));
260 checkPrecision(Xatomic, Yatomic);
262 if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Computing Y field......\n");
263 if (Y.Precision() == QUDA_DOUBLE_PRECISION) {
264 #ifdef GPU_MULTIGRID_DOUBLE
265 if (use_mma) errorQuda("MG-MMA does not support double precision, yet.");
266 if (T.Vectors(X.Location()).Precision() == QUDA_DOUBLE_PRECISION) {
267 calculateYcoarse<double, double>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor,
268 dirac, matpc, need_bidirectional, use_mma);
270 errorQuda("Unsupported precision %d\n", Y.Precision());
273 errorQuda("Double precision multigrid has not been enabled");
275 } else if (Y.Precision() == QUDA_SINGLE_PRECISION) {
276 #if QUDA_PRECISION & 4
277 if (T.Vectors(X.Location()).Precision() == QUDA_SINGLE_PRECISION) {
278 calculateYcoarse<float, float>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor, dirac,
279 matpc, need_bidirectional, use_mma);
281 errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
284 errorQuda("QUDA_PRECISION=%d does not enable single precision", QUDA_PRECISION);
286 } else if (Y.Precision() == QUDA_HALF_PRECISION) {
287 #if QUDA_PRECISION & 2
288 if (T.Vectors(X.Location()).Precision() == QUDA_HALF_PRECISION) {
289 calculateYcoarse<float, short>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor, dirac,
290 matpc, need_bidirectional, use_mma);
292 errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
295 errorQuda("QUDA_PRECISION=%d does not enable half precision", QUDA_PRECISION);
298 errorQuda("Unsupported precision %d\n", Y.Precision());
300 if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("....done computing Y field\n");
302 errorQuda("Multigrid has not been built");
303 #endif // GPU_MULTIGRID
306 //Calculates the coarse color matrix and puts the result in Y.
307 //N.B. Assumes Y, X have been allocated.
308 void CoarseCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge,
309 const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mass, double mu, double mu_factor,
310 QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, bool use_mma)
312 QudaPrecision precision = Y.Precision();
313 QudaFieldLocation location = checkLocation(X, Y, gauge, clover, cloverInv);
315 //Create a field UV which holds U*V. Has the same similar
316 //structure to V but double the number of spins so we can store
317 //the four distinct block chiral multiplications in a single UV
319 ColorSpinorParam UVparam(T.Vectors(location));
320 UVparam.create = QUDA_ZERO_FIELD_CREATE;
321 UVparam.location = location;
322 UVparam.nSpin *= 2; // so nSpin == 4
323 UVparam.setPrecision(T.Vectors(location).Precision());
324 UVparam.mem_type = Y.MemType(); // allocate temporaries to match coarse-grid link field
326 ColorSpinorField *uv = ColorSpinorField::Create(UVparam);
330 GaugeField *Yatomic = &Y;
331 GaugeField *Xatomic = &X;
332 if (Y.Precision() < QUDA_SINGLE_PRECISION) {
333 // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
334 // else we can just coarsen directly into the original fields
335 GaugeFieldParam param(X); // use X since we want scalar geometry
336 param.location = location;
337 param.setPrecision(QUDA_SINGLE_PRECISION, location == QUDA_CUDA_FIELD_LOCATION ? true : false);
339 Yatomic = GaugeField::Create(param);
340 Xatomic = GaugeField::Create(param);
343 calculateYcoarse(Y, X, *Yatomic, *Xatomic, *uv, T, gauge, clover, cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
344 need_bidirectional, use_mma);
346 if (Yatomic != &Y) delete Yatomic;
347 if (Xatomic != &X) delete Xatomic;
351 if (location == QUDA_CPU_FIELD_LOCATION) {
352 errorQuda("use_mma = true does not go with QUDA_CPU_FIELD_LOCATION.");
355 // The MMA implementation requires fields to be in AoS order: we check each gauge field, and create a copy if not.
356 // The UV field is an intermediate field, its order doesn't matter; The AoS copy of the V field will be created
357 // later in the code path.
358 constexpr QudaGaugeFieldOrder gOrder = QUDA_MILC_GAUGE_ORDER;
360 auto create_gauge_copy = [](const GaugeField &X, QudaGaugeFieldOrder order, bool copy_content) -> auto
362 GaugeField *output = nullptr;
363 if (X.Order() == order) {
364 output = const_cast<GaugeField *>(&X);
366 GaugeFieldParam param(X);
368 output = cudaGaugeField::Create(param);
369 if (copy_content) output->copy(X);
371 return static_cast<cudaGaugeField *>(output);
374 auto Y_order = create_gauge_copy(Y, gOrder, false);
375 auto X_order = create_gauge_copy(X, gOrder, false);
376 auto G_order = create_gauge_copy(gauge, gOrder, true);
377 auto C_order = create_gauge_copy(clover, gOrder, true);
378 auto I_order = create_gauge_copy(cloverInv, gOrder, true);
380 GaugeField *Yatomic = nullptr;
381 GaugeField *Xatomic = nullptr;
383 if (Y.Precision() < QUDA_SINGLE_PRECISION) {
384 // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
385 // else we can just coarsen directly into the original fields
386 GaugeFieldParam param(*X_order); // use X since we want scalar geometry
387 param.location = location;
388 param.order = gOrder;
389 param.setPrecision(QUDA_SINGLE_PRECISION);
391 Yatomic = GaugeField::Create(param);
392 Xatomic = GaugeField::Create(param);
398 calculateYcoarse(*Y_order, *X_order, *Yatomic, *Xatomic, *uv, T, *G_order, *C_order, *I_order, kappa, mass, mu,
399 mu_factor, dirac, matpc, need_bidirectional, use_mma);
401 if (Yatomic != Y_order) delete Yatomic;
402 if (Xatomic != X_order) delete Xatomic;
414 if (&gauge != G_order) { delete G_order; }
415 if (&clover != C_order) { delete C_order; }
416 if (&cloverInv != I_order) { delete I_order; }