QUDA  v1.1.0
A library for QCD on GPUs
coarse_op.cu
Go to the documentation of this file.
1 #include <transfer.h>
2 #include <color_spinor_field.h>
3 #include <gauge_field.h>
4 #include <clover_field.h>
5 
6 // This define controls which kernels get compiled in `coarse_op.cuh`.
7 // This ensures only kernels relevant for coarsening Wilson-type
8 // operators get built, saving compile time.
9 #define WILSONCOARSE
10 #include <coarse_op.cuh>
11 
12 namespace quda {
13 
14  template <typename Float, typename vFloat, int fineColor, int fineSpin, int coarseColor, int coarseSpin>
15  void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
16  ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T,
17  const GaugeField &g, const CloverField &c, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
18  {
19  QudaFieldLocation location = Y.Location();
20 
21  bool need_bidirectional = false;
22  if (dirac == QUDA_CLOVERPC_DIRAC || dirac == QUDA_TWISTED_MASSPC_DIRAC || dirac == QUDA_TWISTED_CLOVERPC_DIRAC) { need_bidirectional = QUDA_BOOLEAN_TRUE; }
23 
24  if (location == QUDA_CPU_FIELD_LOCATION) {
25 
26  constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
27  constexpr QudaGaugeFieldOrder gOrder = QUDA_QDP_GAUGE_ORDER;
28  constexpr QudaCloverFieldOrder clOrder = QUDA_PACKED_CLOVER_ORDER;
29 
30  if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
31  errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
32  if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
33  if (c.Order() != clOrder && c.Bytes()) errorQuda("Unsupported field order %d\n", c.Order());
34 
35  typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> V;
36  typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> F;
37  typedef typename gauge::FieldOrder<Float,fineColor,1,gOrder> gFine;
38  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
39  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
40  typedef typename clover::FieldOrder<Float,fineColor,fineSpin,clOrder> cFine;
41 
42  const ColorSpinorField &v = T.Vectors(g.Location());
43 
44  V vAccessor(const_cast<ColorSpinorField&>(v));
45  F uvAccessor(const_cast<ColorSpinorField&>(uv));
46  F avAccessor(const_cast<ColorSpinorField&>(av));
47  gFine gAccessor(const_cast<GaugeField&>(g));
48  gCoarse yAccessor(const_cast<GaugeField&>(Y));
49  gCoarse xAccessor(const_cast<GaugeField&>(X));
50  gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
51  gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
52  cFine cAccessor(const_cast<CloverField&>(c), false);
53  cFine cInvAccessor(const_cast<CloverField&>(c), true);
54 
55  calculateY<QUDA_CPU_FIELD_LOCATION, false,Float,fineSpin,fineColor,coarseSpin,coarseColor>
56  (yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor,
57  avAccessor, vAccessor, gAccessor, cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, av, v, g, c,
58  kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional,
59  T.fineToCoarse(location), T.coarseToFine(location));
60 
61  } else {
62 
63  constexpr QudaFieldOrder csOrder = QUDA_FLOAT2_FIELD_ORDER;
64  constexpr QudaGaugeFieldOrder gOrder = QUDA_FLOAT2_GAUGE_ORDER;
65  constexpr QudaCloverFieldOrder clOrder = QUDA_FLOAT4_CLOVER_ORDER;
66 
67  if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
68  errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
69  if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
70  if (c.Order() != clOrder && c.Bytes()) errorQuda("Unsupported field order %d\n", c.Order());
71 
72  typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat,vFloat,false,false> F;
73  typedef typename gauge::FieldOrder<Float,fineColor,1,gOrder,true,Float> gFine;
74  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
75  typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
76  typedef typename clover::FieldOrder<Float,fineColor,fineSpin,clOrder> cFine;
77 
78  const ColorSpinorField &v = T.Vectors(g.Location());
79 
80  F vAccessor(const_cast<ColorSpinorField&>(v));
81  F uvAccessor(const_cast<ColorSpinorField&>(uv));
82  F avAccessor(const_cast<ColorSpinorField&>(av));
83  gFine gAccessor(const_cast<GaugeField&>(g));
84  gCoarse yAccessor(const_cast<GaugeField&>(Y));
85  gCoarse xAccessor(const_cast<GaugeField&>(X));
86  gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
87  gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
88  cFine cAccessor(const_cast<CloverField&>(c), false);
89  cFine cInvAccessor(const_cast<CloverField&>(c), true);
90 
91  calculateY<QUDA_CUDA_FIELD_LOCATION, false,Float,fineSpin,fineColor,coarseSpin,coarseColor>
92  (yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor,
93  avAccessor, vAccessor, gAccessor, cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, av, v, g, c,
94  kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional,
95  T.fineToCoarse(location), T.coarseToFine(location));
96  }
97 
98  }
99 
100  // template on the number of coarse degrees of freedom
101  template <typename Float, typename vFloat, int fineColor, int fineSpin>
102  void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
103  ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
104  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
105  {
106  if (T.Vectors().Nspin()/T.Spin_bs() != 2)
107  errorQuda("Unsupported number of coarse spins %d\n",T.Vectors().Nspin()/T.Spin_bs());
108  const int coarseSpin = 2;
109  const int coarseColor = Y.Ncolor() / coarseSpin;
110 
111 #ifdef NSPIN4
112  if (coarseColor == 6) { // free field Wilson
113  calculateY<Float,vFloat,fineColor,fineSpin,6,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
114  } else if (coarseColor == 24) {
115  calculateY<Float,vFloat,fineColor,fineSpin,24,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
116  } else if (coarseColor == 32) {
117  calculateY<Float,vFloat,fineColor,fineSpin,32,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
118  } else
119 #endif
120  {
121  errorQuda("Unsupported number of coarse dof %d\n", Y.Ncolor());
122  }
123  }
124 
125  // template on fine spin
126  template <typename Float, typename vFloat, int fineColor>
127  void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
128  ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
129  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
130  {
131  if (uv.Nspin() == 4) {
132  calculateY<Float,vFloat,fineColor,4>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
133  } else {
134  errorQuda("Unsupported number of spins %d\n", uv.Nspin());
135  }
136  }
137 
138  // template on fine colors
139  template <typename Float, typename vFloat>
140  void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
141  ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
142  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
143  {
144  if (g.Ncolor() == 3) {
145  calculateY<Float,vFloat,3>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
146  } else {
147  errorQuda("Unsupported number of colors %d\n", g.Ncolor());
148  }
149  }
150 
151  //Does the heavy lifting of creating the coarse color matrices Y
152  void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
153  ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g,
154  const CloverField &c, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
155  {
156 #ifdef GPU_MULTIGRID
157  checkPrecision(Xatomic, Yatomic, g);
158  checkPrecision(uv, av, T.Vectors(X.Location()), X, Y);
159 
160  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Computing Y field......\n");
161 
162  if (Y.Precision() == QUDA_DOUBLE_PRECISION) {
163 #ifdef GPU_MULTIGRID_DOUBLE
164  if (T.Vectors(X.Location()).Precision() == QUDA_DOUBLE_PRECISION) {
165  calculateY<double,double>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
166  } else {
167  errorQuda("Unsupported precision %d\n", Y.Precision());
168  }
169 #else
170  errorQuda("Double precision multigrid has not been enabled");
171 #endif
172  } else if (Y.Precision() == QUDA_SINGLE_PRECISION) {
173 #if QUDA_PRECISION & 4
174  if (T.Vectors(X.Location()).Precision() == QUDA_SINGLE_PRECISION) {
175  calculateY<float,float>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
176  } else {
177  errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
178  }
179 #else
180  errorQuda("QUDA_PRECISION=%d does not enable single precision", QUDA_PRECISION);
181 #endif
182  } else if (Y.Precision() == QUDA_HALF_PRECISION) {
183 #if QUDA_PRECISION & 2
184  if (T.Vectors(X.Location()).Precision() == QUDA_HALF_PRECISION) {
185  calculateY<float,short>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
186  } else {
187  errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
188  }
189 #else
190  errorQuda("QUDA_PRECISION=%d does not enable half precision", QUDA_PRECISION);
191 #endif
192  } else {
193  errorQuda("Unsupported precision %d\n", Y.Precision());
194  }
195  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("....done computing Y field\n");
196 #else
197  errorQuda("Multigrid has not been built");
198 #endif // GPU_MULTIGRID
199  }
200 
201  //Calculates the coarse color matrix and puts the result in Y.
202  //N.B. Assumes Y, X have been allocated.
203  void CoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T,
204  const cudaGaugeField &gauge, const cudaCloverField *clover,
205  double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
206  {
207  QudaPrecision precision = Y.Precision();
208  QudaFieldLocation location = checkLocation(Y, X);
209 
210  GaugeField *U = location == QUDA_CUDA_FIELD_LOCATION ? const_cast<cudaGaugeField*>(&gauge) : nullptr;
211  CloverField *C = location == QUDA_CUDA_FIELD_LOCATION ? const_cast<cudaCloverField*>(clover) : nullptr;
212 
213  if (location == QUDA_CPU_FIELD_LOCATION) {
214  //First make a cpu gauge field from the cuda gauge field
215  int pad = 0;
216  GaugeFieldParam gf_param(gauge.X(), precision, QUDA_RECONSTRUCT_NO, pad, gauge.Geometry());
217  gf_param.order = QUDA_QDP_GAUGE_ORDER;
218  gf_param.fixed = gauge.GaugeFixed();
219  gf_param.link_type = gauge.LinkType();
220  gf_param.t_boundary = gauge.TBoundary();
221  gf_param.anisotropy = gauge.Anisotropy();
222  gf_param.gauge = NULL;
223  gf_param.create = QUDA_NULL_FIELD_CREATE;
224  gf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
225  gf_param.nFace = 1;
226  gf_param.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
227 
228  U = new cpuGaugeField(gf_param);
229 
230  //Copy the cuda gauge field to the cpu
231  gauge.saveCPUField(*static_cast<cpuGaugeField*>(U));
232  } else if (location == QUDA_CUDA_FIELD_LOCATION && gauge.Reconstruct() != QUDA_RECONSTRUCT_NO) {
233  //Create a copy of the gauge field with no reconstruction, required for fine-grained access
234  GaugeFieldParam gf_param(gauge);
235  gf_param.reconstruct = QUDA_RECONSTRUCT_NO;
236  gf_param.order = QUDA_FLOAT2_GAUGE_ORDER;
237  gf_param.setPrecision(gf_param.Precision());
238  U = new cudaGaugeField(gf_param);
239 
240  U->copy(gauge);
241  }
242 
243  CloverFieldParam cf_param;
244  cf_param.nDim = 4;
245  cf_param.pad = 0;
246  cf_param.setPrecision(clover ? clover->Precision() : QUDA_SINGLE_PRECISION);
247 
248  // if we have no clover term then create an empty clover field
249  for(int i = 0; i < cf_param.nDim; i++) cf_param.x[i] = clover ? clover->X()[i] : 0;
250 
251  cf_param.direct = true;
252  cf_param.inverse = true;
253  cf_param.clover = NULL;
254  cf_param.norm = 0;
255  cf_param.cloverInv = NULL;
256  cf_param.invNorm = 0;
257  cf_param.create = QUDA_NULL_FIELD_CREATE;
258  cf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
259 
260  if (location == QUDA_CUDA_FIELD_LOCATION && !clover) {
261  // create a dummy cudaCloverField if one is not defined
262  cf_param.order = QUDA_INVALID_CLOVER_ORDER;
263  C = new cudaCloverField(cf_param);
264  } else if (location == QUDA_CPU_FIELD_LOCATION) {
265  //Create a cpuCloverField from the cudaCloverField
266  cf_param.order = QUDA_PACKED_CLOVER_ORDER;
267  C = new cpuCloverField(cf_param);
268  if (clover) clover->saveCPUField(*static_cast<cpuCloverField*>(C));
269  }
270 
271  //Create a field UV which holds U*V. Has the same structure as V.
272  ColorSpinorParam UVparam(T.Vectors(location));
273  UVparam.create = QUDA_ZERO_FIELD_CREATE;
274  UVparam.location = location;
275  UVparam.setPrecision(T.Vectors(location).Precision());
276  UVparam.mem_type = Y.MemType(); // allocate temporaries to match coarse-grid link field
277 
278  ColorSpinorField *uv = ColorSpinorField::Create(UVparam);
279 
280  // if we are coarsening a preconditioned clover or twisted-mass operator we need
281  // an additional vector to store the cloverInv * V field, else just alias v
282  ColorSpinorField *av = ((matpc != QUDA_MATPC_INVALID && clover) || (dirac == QUDA_TWISTED_MASSPC_DIRAC)) ?
283  ColorSpinorField::Create(UVparam) : &const_cast<ColorSpinorField&>(T.Vectors(location));
284 
285  GaugeField *Yatomic = &Y;
286  GaugeField *Xatomic = &X;
287  if (Y.Precision() < QUDA_SINGLE_PRECISION) {
288  // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
289  // else we can just coarsen directly into the original fields
290  GaugeFieldParam param(X); // use X since we want scalar geometry
291  param.location = location;
292  param.setPrecision(QUDA_SINGLE_PRECISION, location == QUDA_CUDA_FIELD_LOCATION ? true : false);
293 
294  Yatomic = GaugeField::Create(param);
295  Xatomic = GaugeField::Create(param);
296  }
297 
298  calculateY(Y, X, *Yatomic, *Xatomic, *uv, *av, T, *U, *C, kappa, mass, mu, mu_factor, dirac, matpc);
299 
300  if (Yatomic != &Y) delete Yatomic;
301  if (Xatomic != &X) delete Xatomic;
302 
303  if (&T.Vectors(location) != av) delete av;
304  delete uv;
305 
306  if (C != clover) delete C;
307  if (U != &gauge) delete U;
308  }
309 
310 } //namespace quda