QUDA  v1.1.0
A library for QCD on GPUs
coarsecoarse_op.cu
Go to the documentation of this file.
1 #include <transfer.h>
2 #include <color_spinor_field.h>
3 #include <gauge_field.h>
4 
5 // This define controls which kernels get compiled in `coarse_op.cuh`.
6 // This ensures only kernels relevant for coarsening a coarse operator
7 // get built, saving compile time.
8 #define COARSECOARSE
9 #include <coarse_op.cuh>
10 
11 namespace quda {
12 
13  /**
14  @brief dummyClover is a helper function to allow us to create an
15  empty clover object - this allows us to use the the externally
16  linked reduction kernels when we do have a clover field.
17  */
18  inline std::unique_ptr<cudaCloverField> dummyClover()
19  {
20  CloverFieldParam cf_param;
21  cf_param.nDim = 4;
22  cf_param.pad = 0;
23  cf_param.setPrecision(QUDA_SINGLE_PRECISION);
24 
25  for (int i = 0; i < cf_param.nDim; i++) cf_param.x[i] = 0;
26 
27  cf_param.direct = true;
28  cf_param.inverse = true;
29  cf_param.clover = nullptr;
30  cf_param.norm = 0;
31  cf_param.cloverInv = nullptr;
32  cf_param.invNorm = 0;
33  cf_param.create = QUDA_NULL_FIELD_CREATE;
34  cf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
35 
36  // create a dummy cudaCloverField if one is not defined
37  cf_param.order = QUDA_INVALID_CLOVER_ORDER;
38  return std::make_unique<cudaCloverField>(cf_param);
39  }
40 
41  template <typename Float, typename vFloat, int fineColor, int fineSpin, int coarseColor, int coarseSpin>
42  void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
43  const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
44  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
45  bool need_bidirectional, bool use_mma)
46  {
47 
48  if (Y.Location() == QUDA_CPU_FIELD_LOCATION) {
49 
50  if (use_mma) { errorQuda("MMA intructions are not supported on the host."); }
51 
52  constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
53  constexpr QudaGaugeFieldOrder gOrder = QUDA_QDP_GAUGE_ORDER;
54 
55  if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
56  errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
57  if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
58 
59  typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> V;
60  typedef typename colorspinor::FieldOrderCB<Float,2*fineSpin,fineColor,coarseColor,csOrder,vFloat> F;
61  typedef typename gauge::FieldOrder<Float,fineColor*fineSpin,fineSpin,gOrder,true,vFloat> gFine;
62  typedef typename gauge::FieldOrder<Float,fineColor*fineSpin,fineSpin,gOrder,true,vFloat> cFine;
63  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
64  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
65 
66  const ColorSpinorField &v = T.Vectors(Y.Location());
67 
68  V vAccessor(const_cast<ColorSpinorField&>(v));
69  F uvAccessor(const_cast<ColorSpinorField&>(uv));
70  gFine gAccessor(const_cast<GaugeField&>(g));
71  cFine cAccessor(const_cast<GaugeField&>(clover));
72  cFine cInvAccessor(const_cast<GaugeField&>(cloverInv));
73  gCoarse yAccessor(const_cast<GaugeField&>(Y));
74  gCoarse xAccessor(const_cast<GaugeField&>(X));
75  gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
76  gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
77 
78  calculateY<QUDA_CPU_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
79  yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor, cAccessor,
80  cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<ColorSpinorField &>(v), v, g, *dummyClover(), kappa, mass, mu,
81  mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()), T.coarseToFine(Y.Location()), use_mma);
82  } else {
83 
84  if (!use_mma) {
85 
86  constexpr QudaFieldOrder csOrder = QUDA_FLOAT2_FIELD_ORDER;
87  constexpr QudaGaugeFieldOrder gOrder = QUDA_FLOAT2_GAUGE_ORDER;
88 
89  if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
90  errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
91  if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
92 
93  typedef typename colorspinor::FieldOrderCB<Float, fineSpin, fineColor, coarseColor, csOrder, vFloat> V;
94  typedef typename colorspinor::FieldOrderCB<Float, 2 * fineSpin, fineColor, coarseColor, csOrder, vFloat> F;
95  typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> gFine;
96  typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> cFine;
97  typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, vFloat> gCoarse;
98  typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, storeType> gCoarseAtomic;
99 
100  const ColorSpinorField &v = T.Vectors(Y.Location());
101 
102  V vAccessor(const_cast<ColorSpinorField &>(v));
103  F uvAccessor(const_cast<ColorSpinorField &>(uv));
104  gFine gAccessor(const_cast<GaugeField &>(g));
105  cFine cAccessor(const_cast<GaugeField &>(clover));
106  cFine cInvAccessor(const_cast<GaugeField &>(cloverInv));
107  gCoarse yAccessor(const_cast<GaugeField &>(Y));
108  gCoarse xAccessor(const_cast<GaugeField &>(X));
109  gCoarseAtomic yAccessorAtomic(const_cast<GaugeField &>(Yatomic));
110  gCoarseAtomic xAccessorAtomic(const_cast<GaugeField &>(Xatomic));
111 
112  // create a dummy clover field to allow us to call the external clover reduction routines elsewhere
113  calculateY<QUDA_CUDA_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
114  yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor,
115  cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<ColorSpinorField &>(v), v, g, *dummyClover(),
116  kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()),
117  T.coarseToFine(Y.Location()), use_mma);
118 
119  } else {
120 
121  constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
122  constexpr QudaGaugeFieldOrder gOrder = QUDA_MILC_GAUGE_ORDER;
123 
124  typedef typename colorspinor::FieldOrderCB<Float, fineSpin, fineColor, coarseColor, csOrder, vFloat> V;
125  typedef typename colorspinor::FieldOrderCB<Float, 2 * fineSpin, fineColor, coarseColor, csOrder, vFloat> F;
126  typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> gFine;
127  typedef typename gauge::FieldOrder<Float, fineColor * fineSpin, fineSpin, gOrder, true, vFloat> cFine;
128  typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, vFloat> gCoarse;
129  typedef typename gauge::FieldOrder<Float, coarseColor * coarseSpin, coarseSpin, gOrder, true, storeType> gCoarseAtomic;
130 
131  const ColorSpinorField &v = T.Vectors(Y.Location());
132 
133  ColorSpinorParam param_v(v);
134  param_v.fieldOrder = csOrder;
135  param_v.setPrecision(v.Precision());
136 
137  cudaColorSpinorField v_(param_v);
138 
139  v_.copy(v);
140 
141  V vAccessor(v_);
142  F uvAccessor(const_cast<ColorSpinorField &>(uv));
143  gFine gAccessor(const_cast<GaugeField &>(g));
144  cFine cAccessor(const_cast<GaugeField &>(clover));
145  cFine cInvAccessor(const_cast<GaugeField &>(cloverInv));
146 
147  gCoarse yAccessor(const_cast<GaugeField &>(Y));
148  gCoarse xAccessor(const_cast<GaugeField &>(X));
149  gCoarseAtomic yAccessorAtomic(const_cast<GaugeField &>(Yatomic));
150  gCoarseAtomic xAccessorAtomic(const_cast<GaugeField &>(Xatomic));
151 
152  // create a dummy clover field to allow us to call the external clover reduction routines elsewhere
153  calculateY<QUDA_CUDA_FIELD_LOCATION, true, Float, fineSpin, fineColor, coarseSpin, coarseColor>(
154  yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor, vAccessor, vAccessor, gAccessor,
155  cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, const_cast<cudaColorSpinorField &>(v_), v_, g,
156  *dummyClover(), kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional, T.fineToCoarse(Y.Location()),
157  T.coarseToFine(Y.Location()), use_mma);
158  }
159  }
160  }
161 
162  // template on fine colors
163  template <typename Float, typename vFloat, int fineSpin>
164  void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
165  const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
166  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
167  bool need_bidirectional, bool use_mma)
168  {
169  if (T.Vectors().Nspin()/T.Spin_bs() != 2)
170  errorQuda("Unsupported number of coarse spins %d\n",T.Vectors().Nspin()/T.Spin_bs());
171  const int fineColor = g.Ncolor() / fineSpin;
172  const int coarseSpin = 2;
173  const int coarseColor = Y.Ncolor() / coarseSpin;
174 #ifdef NSPIN4
175  if (fineColor == 6) { // free field Wilson
176  if (coarseColor == 6) {
177  calculateYcoarse<Float, vFloat, 6, fineSpin, 6, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv,
178  kappa, mass, mu, mu_factor, dirac, matpc,
179  need_bidirectional, use_mma);
180  } else {
181  errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
182  }
183  } else
184 #endif
185  if (fineColor == 24) { // coarsened Wilson or free field staggered
186  if (coarseColor == 24) {
187  calculateYcoarse<Float, vFloat, 24, fineSpin, 24, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
188  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
189  need_bidirectional, use_mma);
190  } else
191 #ifdef NSPIN4
192  if (coarseColor == 32) {
193  calculateYcoarse<Float, vFloat, 24, fineSpin, 32, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
194  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
195  need_bidirectional, use_mma);
196  } else
197 #endif // NSPIN4
198 #ifdef NSPIN1
199  if (coarseColor == 64) {
200  calculateYcoarse<Float, vFloat, 24, fineSpin, 64, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
201  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
202  need_bidirectional, use_mma);
203  } else // --- note, coarsening Nc == 24 -> Nc == 96 for staggered is worth revisiting in the future
204 #endif
205  {
206  errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
207  }
208 #ifdef NSPIN4
209  } else if (fineColor == 32) {
210  if (coarseColor == 32) {
211  calculateYcoarse<Float, vFloat, 32, fineSpin, 32, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
212  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
213  need_bidirectional, use_mma);
214  } else {
215  errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
216  }
217 #endif // NSPIN4
218 #ifdef NSPIN1
219  } else if (fineColor == 64) {
220  if (coarseColor == 64) {
221  calculateYcoarse<Float, vFloat, 64, fineSpin, 64, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
222  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
223  need_bidirectional, use_mma);
224  } else if (coarseColor == 96) {
225  calculateYcoarse<Float, vFloat, 64, fineSpin, 96, coarseSpin>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
226  cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
227  need_bidirectional, use_mma);
228  } else {
229  errorQuda("Unsupported fineColor = %d coarseColor = %d\n", fineColor, coarseColor);
230  } // --- note, revisit Nc == 96 -> Nc == 96 in the future
231 #endif // NSPIN1
232  } else {
233  errorQuda("Unsupported number of colors %d\n", g.Ncolor());
234  }
235  }
236 
237  // template on fine spin
238  template <typename Float, typename vFloat>
239  void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
240  const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
241  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
242  bool need_bidirectional, bool use_mma)
243  {
244  if (T.Vectors().Nspin() == 2) {
245  calculateYcoarse<Float, vFloat, 2>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor,
246  dirac, matpc, need_bidirectional, use_mma);
247  } else {
248  errorQuda("Unsupported number of spins %d\n", T.Vectors().Nspin());
249  }
250  }
251 
252  //Does the heavy lifting of creating the coarse color matrices Y
253  void calculateYcoarse(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic, ColorSpinorField &uv,
254  const Transfer &T, const GaugeField &g, const GaugeField &clover, const GaugeField &cloverInv,
255  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc,
256  bool need_bidirectional, bool use_mma)
257  {
258 #ifdef GPU_MULTIGRID
259  checkPrecision(X, Y, g, clover, cloverInv, uv, T.Vectors(X.Location()));
260  checkPrecision(Xatomic, Yatomic);
261 
262  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Computing Y field......\n");
263  if (Y.Precision() == QUDA_DOUBLE_PRECISION) {
264 #ifdef GPU_MULTIGRID_DOUBLE
265  if (use_mma) errorQuda("MG-MMA does not support double precision, yet.");
266  if (T.Vectors(X.Location()).Precision() == QUDA_DOUBLE_PRECISION) {
267  calculateYcoarse<double, double>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor,
268  dirac, matpc, need_bidirectional, use_mma);
269  } else {
270  errorQuda("Unsupported precision %d\n", Y.Precision());
271  }
272 #else
273  errorQuda("Double precision multigrid has not been enabled");
274 #endif
275  } else if (Y.Precision() == QUDA_SINGLE_PRECISION) {
276 #if QUDA_PRECISION & 4
277  if (T.Vectors(X.Location()).Precision() == QUDA_SINGLE_PRECISION) {
278  calculateYcoarse<float, float>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor, dirac,
279  matpc, need_bidirectional, use_mma);
280  } else {
281  errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
282  }
283 #else
284  errorQuda("QUDA_PRECISION=%d does not enable single precision", QUDA_PRECISION);
285 #endif
286  } else if (Y.Precision() == QUDA_HALF_PRECISION) {
287 #if QUDA_PRECISION & 2
288  if (T.Vectors(X.Location()).Precision() == QUDA_HALF_PRECISION) {
289  calculateYcoarse<float, short>(Y, X, Yatomic, Xatomic, uv, T, g, clover, cloverInv, kappa, mass, mu, mu_factor, dirac,
290  matpc, need_bidirectional, use_mma);
291  } else {
292  errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
293  }
294 #else
295  errorQuda("QUDA_PRECISION=%d does not enable half precision", QUDA_PRECISION);
296 #endif
297  } else {
298  errorQuda("Unsupported precision %d\n", Y.Precision());
299  }
300  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("....done computing Y field\n");
301 #else
302  errorQuda("Multigrid has not been built");
303 #endif // GPU_MULTIGRID
304  }
305 
306  //Calculates the coarse color matrix and puts the result in Y.
307  //N.B. Assumes Y, X have been allocated.
308  void CoarseCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge,
309  const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mass, double mu, double mu_factor,
310  QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, bool use_mma)
311  {
312  QudaPrecision precision = Y.Precision();
313  QudaFieldLocation location = checkLocation(X, Y, gauge, clover, cloverInv);
314 
315  //Create a field UV which holds U*V. Has the same similar
316  //structure to V but double the number of spins so we can store
317  //the four distinct block chiral multiplications in a single UV
318  //computation.
319  ColorSpinorParam UVparam(T.Vectors(location));
320  UVparam.create = QUDA_ZERO_FIELD_CREATE;
321  UVparam.location = location;
322  UVparam.nSpin *= 2; // so nSpin == 4
323  UVparam.setPrecision(T.Vectors(location).Precision());
324  UVparam.mem_type = Y.MemType(); // allocate temporaries to match coarse-grid link field
325 
326  ColorSpinorField *uv = ColorSpinorField::Create(UVparam);
327 
328  if (!use_mma) {
329 
330  GaugeField *Yatomic = &Y;
331  GaugeField *Xatomic = &X;
332  if (Y.Precision() < QUDA_SINGLE_PRECISION) {
333  // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
334  // else we can just coarsen directly into the original fields
335  GaugeFieldParam param(X); // use X since we want scalar geometry
336  param.location = location;
337  param.setPrecision(QUDA_SINGLE_PRECISION, location == QUDA_CUDA_FIELD_LOCATION ? true : false);
338 
339  Yatomic = GaugeField::Create(param);
340  Xatomic = GaugeField::Create(param);
341  }
342 
343  calculateYcoarse(Y, X, *Yatomic, *Xatomic, *uv, T, gauge, clover, cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
344  need_bidirectional, use_mma);
345 
346  if (Yatomic != &Y) delete Yatomic;
347  if (Xatomic != &X) delete Xatomic;
348 
349  } else {
350 
351  if (location == QUDA_CPU_FIELD_LOCATION) {
352  errorQuda("use_mma = true does not go with QUDA_CPU_FIELD_LOCATION.");
353  }
354 
355  // The MMA implementation requires fields to be in AoS order: we check each gauge field, and create a copy if not.
356  // The UV field is an intermediate field, its order doesn't matter; The AoS copy of the V field will be created
357  // later in the code path.
358  constexpr QudaGaugeFieldOrder gOrder = QUDA_MILC_GAUGE_ORDER;
359 
360  auto create_gauge_copy = [](const GaugeField &X, QudaGaugeFieldOrder order, bool copy_content) -> auto
361  {
362  GaugeField *output = nullptr;
363  if (X.Order() == order) {
364  output = const_cast<GaugeField *>(&X);
365  } else {
366  GaugeFieldParam param(X);
367  param.order = order;
368  output = cudaGaugeField::Create(param);
369  if (copy_content) output->copy(X);
370  }
371  return static_cast<cudaGaugeField *>(output);
372  };
373 
374  auto Y_order = create_gauge_copy(Y, gOrder, false);
375  auto X_order = create_gauge_copy(X, gOrder, false);
376  auto G_order = create_gauge_copy(gauge, gOrder, true);
377  auto C_order = create_gauge_copy(clover, gOrder, true);
378  auto I_order = create_gauge_copy(cloverInv, gOrder, true);
379 
380  GaugeField *Yatomic = nullptr;
381  GaugeField *Xatomic = nullptr;
382 
383  if (Y.Precision() < QUDA_SINGLE_PRECISION) {
384  // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
385  // else we can just coarsen directly into the original fields
386  GaugeFieldParam param(*X_order); // use X since we want scalar geometry
387  param.location = location;
388  param.order = gOrder;
389  param.setPrecision(QUDA_SINGLE_PRECISION);
390 
391  Yatomic = GaugeField::Create(param);
392  Xatomic = GaugeField::Create(param);
393  } else {
394  Yatomic = Y_order;
395  Xatomic = X_order;
396  }
397 
398  calculateYcoarse(*Y_order, *X_order, *Yatomic, *Xatomic, *uv, T, *G_order, *C_order, *I_order, kappa, mass, mu,
399  mu_factor, dirac, matpc, need_bidirectional, use_mma);
400 
401  if (Yatomic != Y_order) delete Yatomic;
402  if (Xatomic != X_order) delete Xatomic;
403 
404  if (&Y != Y_order) {
405  Y.copy(*Y_order);
406  delete Y_order;
407  }
408 
409  if (&X != X_order) {
410  X.copy(*X_order);
411  delete X_order;
412  }
413 
414  if (&gauge != G_order) { delete G_order; }
415  if (&clover != C_order) { delete C_order; }
416  if (&cloverInv != I_order) { delete I_order; }
417  }
418 
419  delete uv;
420  }
421 } //namespace quda