2 #include <color_spinor_field.h>
3 #include <gauge_field.h>
4 #include <clover_field.h>
6 // This define controls which kernels get compiled in `coarse_op.cuh`.
7 // This ensures only kernels relevant for coarsening Wilson-type
8 // operators get built, saving compile time.
10 #include <coarse_op.cuh>
14 template <typename Float, typename vFloat, int fineColor, int fineSpin, int coarseColor, int coarseSpin>
15 void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
16 ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T,
17 const GaugeField &g, const CloverField &c, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
19 QudaFieldLocation location = Y.Location();
21 bool need_bidirectional = false;
22 if (dirac == QUDA_CLOVERPC_DIRAC || dirac == QUDA_TWISTED_MASSPC_DIRAC || dirac == QUDA_TWISTED_CLOVERPC_DIRAC) { need_bidirectional = QUDA_BOOLEAN_TRUE; }
24 if (location == QUDA_CPU_FIELD_LOCATION) {
26 constexpr QudaFieldOrder csOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
27 constexpr QudaGaugeFieldOrder gOrder = QUDA_QDP_GAUGE_ORDER;
28 constexpr QudaCloverFieldOrder clOrder = QUDA_PACKED_CLOVER_ORDER;
30 if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
31 errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
32 if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
33 if (c.Order() != clOrder && c.Bytes()) errorQuda("Unsupported field order %d\n", c.Order());
35 typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> V;
36 typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat> F;
37 typedef typename gauge::FieldOrder<Float,fineColor,1,gOrder> gFine;
38 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
39 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
40 typedef typename clover::FieldOrder<Float,fineColor,fineSpin,clOrder> cFine;
42 const ColorSpinorField &v = T.Vectors(g.Location());
44 V vAccessor(const_cast<ColorSpinorField&>(v));
45 F uvAccessor(const_cast<ColorSpinorField&>(uv));
46 F avAccessor(const_cast<ColorSpinorField&>(av));
47 gFine gAccessor(const_cast<GaugeField&>(g));
48 gCoarse yAccessor(const_cast<GaugeField&>(Y));
49 gCoarse xAccessor(const_cast<GaugeField&>(X));
50 gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
51 gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
52 cFine cAccessor(const_cast<CloverField&>(c), false);
53 cFine cInvAccessor(const_cast<CloverField&>(c), true);
55 calculateY<QUDA_CPU_FIELD_LOCATION, false,Float,fineSpin,fineColor,coarseSpin,coarseColor>
56 (yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor,
57 avAccessor, vAccessor, gAccessor, cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, av, v, g, c,
58 kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional,
59 T.fineToCoarse(location), T.coarseToFine(location));
63 constexpr QudaFieldOrder csOrder = QUDA_FLOAT2_FIELD_ORDER;
64 constexpr QudaGaugeFieldOrder gOrder = QUDA_FLOAT2_GAUGE_ORDER;
65 constexpr QudaCloverFieldOrder clOrder = QUDA_FLOAT4_CLOVER_ORDER;
67 if (T.Vectors(Y.Location()).FieldOrder() != csOrder)
68 errorQuda("Unsupported field order %d\n", T.Vectors(Y.Location()).FieldOrder());
69 if (g.FieldOrder() != gOrder) errorQuda("Unsupported field order %d\n", g.FieldOrder());
70 if (c.Order() != clOrder && c.Bytes()) errorQuda("Unsupported field order %d\n", c.Order());
72 typedef typename colorspinor::FieldOrderCB<Float,fineSpin,fineColor,coarseColor,csOrder,vFloat,vFloat,false,false> F;
73 typedef typename gauge::FieldOrder<Float,fineColor,1,gOrder,true,Float> gFine;
74 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,vFloat> gCoarse;
75 typedef typename gauge::FieldOrder<Float,coarseColor*coarseSpin,coarseSpin,gOrder,true,storeType> gCoarseAtomic;
76 typedef typename clover::FieldOrder<Float,fineColor,fineSpin,clOrder> cFine;
78 const ColorSpinorField &v = T.Vectors(g.Location());
80 F vAccessor(const_cast<ColorSpinorField&>(v));
81 F uvAccessor(const_cast<ColorSpinorField&>(uv));
82 F avAccessor(const_cast<ColorSpinorField&>(av));
83 gFine gAccessor(const_cast<GaugeField&>(g));
84 gCoarse yAccessor(const_cast<GaugeField&>(Y));
85 gCoarse xAccessor(const_cast<GaugeField&>(X));
86 gCoarseAtomic yAccessorAtomic(const_cast<GaugeField&>(Yatomic));
87 gCoarseAtomic xAccessorAtomic(const_cast<GaugeField&>(Xatomic));
88 cFine cAccessor(const_cast<CloverField&>(c), false);
89 cFine cInvAccessor(const_cast<CloverField&>(c), true);
91 calculateY<QUDA_CUDA_FIELD_LOCATION, false,Float,fineSpin,fineColor,coarseSpin,coarseColor>
92 (yAccessor, xAccessor, yAccessorAtomic, xAccessorAtomic, uvAccessor,
93 avAccessor, vAccessor, gAccessor, cAccessor, cInvAccessor, Y, X, Yatomic, Xatomic, uv, av, v, g, c,
94 kappa, mass, mu, mu_factor, dirac, matpc, need_bidirectional,
95 T.fineToCoarse(location), T.coarseToFine(location));
100 // template on the number of coarse degrees of freedom
101 template <typename Float, typename vFloat, int fineColor, int fineSpin>
102 void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
103 ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
104 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
106 if (T.Vectors().Nspin()/T.Spin_bs() != 2)
107 errorQuda("Unsupported number of coarse spins %d\n",T.Vectors().Nspin()/T.Spin_bs());
108 const int coarseSpin = 2;
109 const int coarseColor = Y.Ncolor() / coarseSpin;
112 if (coarseColor == 6) { // free field Wilson
113 calculateY<Float,vFloat,fineColor,fineSpin,6,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
114 } else if (coarseColor == 24) {
115 calculateY<Float,vFloat,fineColor,fineSpin,24,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
116 } else if (coarseColor == 32) {
117 calculateY<Float,vFloat,fineColor,fineSpin,32,coarseSpin>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
121 errorQuda("Unsupported number of coarse dof %d\n", Y.Ncolor());
125 // template on fine spin
126 template <typename Float, typename vFloat, int fineColor>
127 void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
128 ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
129 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
131 if (uv.Nspin() == 4) {
132 calculateY<Float,vFloat,fineColor,4>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
134 errorQuda("Unsupported number of spins %d\n", uv.Nspin());
138 // template on fine colors
139 template <typename Float, typename vFloat>
140 void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
141 ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g, const CloverField &c,
142 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
144 if (g.Ncolor() == 3) {
145 calculateY<Float,vFloat,3>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
147 errorQuda("Unsupported number of colors %d\n", g.Ncolor());
151 //Does the heavy lifting of creating the coarse color matrices Y
152 void calculateY(GaugeField &Y, GaugeField &X, GaugeField &Yatomic, GaugeField &Xatomic,
153 ColorSpinorField &uv, ColorSpinorField &av, const Transfer &T, const GaugeField &g,
154 const CloverField &c, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
157 checkPrecision(Xatomic, Yatomic, g);
158 checkPrecision(uv, av, T.Vectors(X.Location()), X, Y);
160 if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Computing Y field......\n");
162 if (Y.Precision() == QUDA_DOUBLE_PRECISION) {
163 #ifdef GPU_MULTIGRID_DOUBLE
164 if (T.Vectors(X.Location()).Precision() == QUDA_DOUBLE_PRECISION) {
165 calculateY<double,double>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
167 errorQuda("Unsupported precision %d\n", Y.Precision());
170 errorQuda("Double precision multigrid has not been enabled");
172 } else if (Y.Precision() == QUDA_SINGLE_PRECISION) {
173 #if QUDA_PRECISION & 4
174 if (T.Vectors(X.Location()).Precision() == QUDA_SINGLE_PRECISION) {
175 calculateY<float,float>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
177 errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
180 errorQuda("QUDA_PRECISION=%d does not enable single precision", QUDA_PRECISION);
182 } else if (Y.Precision() == QUDA_HALF_PRECISION) {
183 #if QUDA_PRECISION & 2
184 if (T.Vectors(X.Location()).Precision() == QUDA_HALF_PRECISION) {
185 calculateY<float,short>(Y, X, Yatomic, Xatomic, uv, av, T, g, c, kappa, mass, mu, mu_factor, dirac, matpc);
187 errorQuda("Unsupported precision %d\n", T.Vectors(X.Location()).Precision());
190 errorQuda("QUDA_PRECISION=%d does not enable half precision", QUDA_PRECISION);
193 errorQuda("Unsupported precision %d\n", Y.Precision());
195 if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("....done computing Y field\n");
197 errorQuda("Multigrid has not been built");
198 #endif // GPU_MULTIGRID
201 //Calculates the coarse color matrix and puts the result in Y.
202 //N.B. Assumes Y, X have been allocated.
203 void CoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T,
204 const cudaGaugeField &gauge, const cudaCloverField *clover,
205 double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
207 QudaPrecision precision = Y.Precision();
208 QudaFieldLocation location = checkLocation(Y, X);
210 GaugeField *U = location == QUDA_CUDA_FIELD_LOCATION ? const_cast<cudaGaugeField*>(&gauge) : nullptr;
211 CloverField *C = location == QUDA_CUDA_FIELD_LOCATION ? const_cast<cudaCloverField*>(clover) : nullptr;
213 if (location == QUDA_CPU_FIELD_LOCATION) {
214 //First make a cpu gauge field from the cuda gauge field
216 GaugeFieldParam gf_param(gauge.X(), precision, QUDA_RECONSTRUCT_NO, pad, gauge.Geometry());
217 gf_param.order = QUDA_QDP_GAUGE_ORDER;
218 gf_param.fixed = gauge.GaugeFixed();
219 gf_param.link_type = gauge.LinkType();
220 gf_param.t_boundary = gauge.TBoundary();
221 gf_param.anisotropy = gauge.Anisotropy();
222 gf_param.gauge = NULL;
223 gf_param.create = QUDA_NULL_FIELD_CREATE;
224 gf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
226 gf_param.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
228 U = new cpuGaugeField(gf_param);
230 //Copy the cuda gauge field to the cpu
231 gauge.saveCPUField(*static_cast<cpuGaugeField*>(U));
232 } else if (location == QUDA_CUDA_FIELD_LOCATION && gauge.Reconstruct() != QUDA_RECONSTRUCT_NO) {
233 //Create a copy of the gauge field with no reconstruction, required for fine-grained access
234 GaugeFieldParam gf_param(gauge);
235 gf_param.reconstruct = QUDA_RECONSTRUCT_NO;
236 gf_param.order = QUDA_FLOAT2_GAUGE_ORDER;
237 gf_param.setPrecision(gf_param.Precision());
238 U = new cudaGaugeField(gf_param);
243 CloverFieldParam cf_param;
246 cf_param.setPrecision(clover ? clover->Precision() : QUDA_SINGLE_PRECISION);
248 // if we have no clover term then create an empty clover field
249 for(int i = 0; i < cf_param.nDim; i++) cf_param.x[i] = clover ? clover->X()[i] : 0;
251 cf_param.direct = true;
252 cf_param.inverse = true;
253 cf_param.clover = NULL;
255 cf_param.cloverInv = NULL;
256 cf_param.invNorm = 0;
257 cf_param.create = QUDA_NULL_FIELD_CREATE;
258 cf_param.siteSubset = QUDA_FULL_SITE_SUBSET;
260 if (location == QUDA_CUDA_FIELD_LOCATION && !clover) {
261 // create a dummy cudaCloverField if one is not defined
262 cf_param.order = QUDA_INVALID_CLOVER_ORDER;
263 C = new cudaCloverField(cf_param);
264 } else if (location == QUDA_CPU_FIELD_LOCATION) {
265 //Create a cpuCloverField from the cudaCloverField
266 cf_param.order = QUDA_PACKED_CLOVER_ORDER;
267 C = new cpuCloverField(cf_param);
268 if (clover) clover->saveCPUField(*static_cast<cpuCloverField*>(C));
271 //Create a field UV which holds U*V. Has the same structure as V.
272 ColorSpinorParam UVparam(T.Vectors(location));
273 UVparam.create = QUDA_ZERO_FIELD_CREATE;
274 UVparam.location = location;
275 UVparam.setPrecision(T.Vectors(location).Precision());
276 UVparam.mem_type = Y.MemType(); // allocate temporaries to match coarse-grid link field
278 ColorSpinorField *uv = ColorSpinorField::Create(UVparam);
280 // if we are coarsening a preconditioned clover or twisted-mass operator we need
281 // an additional vector to store the cloverInv * V field, else just alias v
282 ColorSpinorField *av = ((matpc != QUDA_MATPC_INVALID && clover) || (dirac == QUDA_TWISTED_MASSPC_DIRAC)) ?
283 ColorSpinorField::Create(UVparam) : &const_cast<ColorSpinorField&>(T.Vectors(location));
285 GaugeField *Yatomic = &Y;
286 GaugeField *Xatomic = &X;
287 if (Y.Precision() < QUDA_SINGLE_PRECISION) {
288 // we need to coarsen into single precision fields (float or int), so we allocate temporaries for this purpose
289 // else we can just coarsen directly into the original fields
290 GaugeFieldParam param(X); // use X since we want scalar geometry
291 param.location = location;
292 param.setPrecision(QUDA_SINGLE_PRECISION, location == QUDA_CUDA_FIELD_LOCATION ? true : false);
294 Yatomic = GaugeField::Create(param);
295 Xatomic = GaugeField::Create(param);
298 calculateY(Y, X, *Yatomic, *Xatomic, *uv, *av, T, *U, *C, kappa, mass, mu, mu_factor, dirac, matpc);
300 if (Yatomic != &Y) delete Yatomic;
301 if (Xatomic != &X) delete Xatomic;
303 if (&T.Vectors(location) != av) delete av;
306 if (C != clover) delete C;
307 if (U != &gauge) delete U;