QUDA  0.9.0
dirac_coarse.cpp
Go to the documentation of this file.
1 #include <string.h>
2 #include <multigrid.h>
3 #include <algorithm>
4 
5 namespace quda {
6 
7  DiracCoarse::DiracCoarse(const DiracParam &param, bool enable_gpu)
9  Y_h(nullptr), X_h(nullptr), Xinv_h(nullptr), Yhat_h(nullptr),
10  Y_d(nullptr), X_d(nullptr), Xinv_d(nullptr), Yhat_d(nullptr),
11  enable_gpu(enable_gpu), init(true)
12  {
14  }
15 
19  : Dirac(param), mu(param.mu), mu_factor(param.mu_factor), transfer(nullptr), dirac(nullptr),
22  enable_gpu(Y_d && X_d && Xinv_d), init(false)
23  {
24 
25  }
26 
31  enable_gpu(dirac.enable_gpu), init(false)
32  {
33 
34  }
35 
37  {
38  if (init) {
39  if (Y_h) delete Y_h;
40  if (X_h) delete X_h;
41  if (Xinv_h) delete Xinv_h;
42  if (Yhat_h) delete Yhat_h;
43  if (Y_d) delete Y_d;
44  if (X_d) delete X_d;
45  if (Xinv_d) delete Xinv_d;
46  if (Yhat_d) delete Yhat_d;
47  }
48  }
49 
51  {
53  int ndim = transfer->Vectors().Ndim();
54  int x[QUDA_MAX_DIM];
55  //Number of coarse sites.
56  const int *geo_bs = transfer->Geo_bs();
57  for (int i = 0; i < ndim; i++) x[i] = transfer->Vectors().X(i)/geo_bs[i];
58 
59  //Coarse Color
60  int Nc_c = transfer->nvec();
61 
62  //Coarse Spin
63  int Ns_c = transfer->Vectors().Nspin()/transfer->Spin_bs();
64 
66  memcpy(gParam.x, x, QUDA_MAX_DIM*sizeof(int));
67  gParam.nColor = Nc_c*Ns_c;
68  gParam.reconstruct = QUDA_RECONSTRUCT_NO;
70  gParam.link_type = QUDA_COARSE_LINKS;
71  gParam.t_boundary = QUDA_PERIODIC_T;
73  gParam.precision = prec;
74  gParam.nDim = ndim;
75  gParam.siteSubset = QUDA_FULL_SITE_SUBSET;
76  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
77  gParam.nFace = 1;
78 
79  gParam.geometry = QUDA_COARSE_GEOMETRY;
80 
81  Y_h = new cpuGaugeField(gParam);
83 
84  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
85  gParam.nFace = 0;
86 
87  gParam.geometry = QUDA_SCALAR_GEOMETRY;
88  X_h = new cpuGaugeField(gParam);
90 
91  if (enable_gpu) {
92  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
93  gParam.nFace = 1;
95  gParam.geometry = QUDA_COARSE_GEOMETRY;
96  int pad = std::max( { (x[0]*x[1]*x[2])/2, (x[1]*x[2]*x[3])/2, (x[0]*x[2]*x[3])/2, (x[0]*x[1]*x[3])/2 } );
97  gParam.pad = gParam.nFace * pad * 2; // factor of 2 since we have to store bi-directional ghost zone
98  Y_d = new cudaGaugeField(gParam);
100 
101  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
102  gParam.nFace = 0;
103  gParam.pad = 0;
104 
105  gParam.geometry = QUDA_SCALAR_GEOMETRY;
106  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
107  X_d = new cudaGaugeField(gParam);
109  }
110 
111  bool gpu_setup = true;
112 
113  if (enable_gpu && gpu_setup) dirac->createCoarseOp(*Y_d,*X_d,*Xinv_d,*Yhat_d,*transfer,kappa,Mu(),MuFactor());
115 
116  if (enable_gpu) {
117  if (gpu_setup) {
118  Y_h->copy(*Y_d);
119  Yhat_h->copy(*Yhat_d);
120  X_h->copy(*X_d);
121  Xinv_h->copy(*Xinv_d);
122  } else {
123  Y_d->copy(*Y_h);
124  Yhat_d->copy(*Yhat_h);
125  X_d->copy(*X_h);
126  Xinv_d->copy(*Xinv_h);
127  }
128  }
129 
130  }
131 
133  {
134  if (&in == &out) errorQuda("Fields cannot alias");
136  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
137  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, parity, false, true, dagger);
138  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
139  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, parity, false, true, dagger);
140  }
141  int n = in.Nspin()*in.Ncolor();
142  flops += (8*n*n-2*n)*(long long)in.VolumeCB();
143  }
144 
146  {
147  if (&in == &out) errorQuda("Fields cannot alias");
149  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
150  ApplyCoarse(out, in, in, *Y_d, *Xinv_d, kappa, parity, false, true, dagger);
151  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
152  ApplyCoarse(out, in, in, *Y_h, *Xinv_h, kappa, parity, false, true, dagger);
153  }
154  int n = in.Nspin()*in.Ncolor();
155  flops += (8*n*n-2*n)*(long long)in.VolumeCB();
156  }
157 
159  const QudaParity parity) const
160  {
162  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
163  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, parity, true, false, dagger);
164  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
165  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, parity, true, false, dagger);
166  }
167  int n = in.Nspin()*in.Ncolor();
168  flops += (8*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
169  }
170 
172  const QudaParity parity, const ColorSpinorField &x,
173  const double &k) const
174  {
175  if (k!=1.0) errorQuda("%s not supported for k!=1.0", __func__);
176 
178  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
179  ApplyCoarse(out, in, x, *Y_d, *X_d, kappa, parity, true, true, dagger);
180  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
181  ApplyCoarse(out, in, x, *Y_h, *X_h, kappa, parity, true, true, dagger);
182  }
183  int n = in.Nspin()*in.Ncolor();
184  flops += (9*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
185  }
186 
188  {
190  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
191  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, QUDA_INVALID_PARITY, true, true, dagger);
192  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
193  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, QUDA_INVALID_PARITY, true, true, dagger);
194  }
195  int n = in.Nspin()*in.Ncolor();
196  flops += (9*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
197  }
198 
200  {
201  bool reset1 = newTmp(&tmp1, in);
202  if (tmp1->SiteSubset() != QUDA_FULL_SITE_SUBSET) errorQuda("Temporary vector is not full-site vector");
203 
204  M(*tmp1, in);
205  Mdag(out, *tmp1);
206 
207  deleteTmp(&tmp1, reset1);
208  }
209 
212  const QudaSolutionType solType) const
213  {
214  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
215  errorQuda("Preconditioned solution requires a preconditioned solve_type");
216  }
217 
218  src = &b;
219  sol = &x;
220  }
221 
223  const QudaSolutionType solType) const
224  {
225  /* do nothing */
226  }
227 
228  //Make the coarse operator one level down. Pass both the coarse gauge field and coarse clover field.
229  void DiracCoarse::createCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, double kappa, double mu, double mu_factor) const
230  {
231  double a = 2.0 * kappa * mu * T.Vectors().TwistFlavor();
232  if (checkLocation(Y, X, Xinv, Yhat) == QUDA_CPU_FIELD_LOCATION) {
233  CoarseCoarseOp(Y, X, Xinv, Yhat, T, *(this->Y_h), *(this->X_h), *(this->Xinv_h), kappa, a, mu_factor, QUDA_COARSE_DIRAC, QUDA_MATPC_INVALID);
234  } else {
235  CoarseCoarseOp(Y, X, Xinv, Yhat, T, *(this->Y_d), *(this->X_d), *(this->Xinv_d), kappa, a, mu_factor, QUDA_COARSE_DIRAC, QUDA_MATPC_INVALID);
236  }
237  }
238 
239  DiracCoarsePC::DiracCoarsePC(const DiracParam &param, bool enable_gpu) : DiracCoarse(param, enable_gpu)
240  {
241  /* do nothing */
242  }
243 
245  {
246  /* do nothing */
247  }
248 
250 
252  {
254  if (!enable_gpu) errorQuda("Cannot apply %s on GPU since enable_gpu has not been set", __func__);
255  ApplyCoarse(out, in, in, *Yhat_d, *X_d, kappa, parity, true, false, dagger);
256  } else if ( checkLocation(out, in) == QUDA_CPU_FIELD_LOCATION ) {
257  ApplyCoarse(out, in, in, *Yhat_h, *X_h, kappa, parity, true, false, dagger);
258  }
259 
260  int n = in.Nspin()*in.Ncolor();
261  flops += (8*(8*n*n)-2*n)*in.VolumeCB()*in.SiteSubset();
262  }
263 
265  const ColorSpinorField &x, const double &k) const
266  {
267  // emulated for now
268  Dslash(out, in, parity);
269  blas::xpay(const_cast<ColorSpinorField&>(x), k, out);
270 
271  int n = in.Nspin()*in.Ncolor();
272  flops += (8*(8*n*n)-2*n)*in.VolumeCB(); // blas flops counted separately so only need to count dslash flops
273  }
274 
276  {
277  bool reset1 = newTmp(&tmp1, in);
278 
279  if (in.SiteSubset() == QUDA_FULL_SITE_SUBSET || out.SiteSubset() == QUDA_FULL_SITE_SUBSET ||
281  errorQuda("Cannot apply preconditioned operator to full field (subsets = %d %d %d)",
282  in.SiteSubset(), out.SiteSubset(), tmp1->SiteSubset());
283 
285  // DiracCoarsePC::Dslash applies A^{-1}Dslash
287  // DiracCoarse::DslashXpay applies (A - D) // FIXME this ignores the -1
290  blas::xpay(*tmp1, -1.0, out);
292  // DiracCoarsePC::Dslash applies A^{-1}Dslash
294  // DiracCoarse::DslashXpay applies (A - D) // FIXME this ignores the -1
297  blas::xpay(*tmp1, -1.0, out);
298  } else if (matpcType == QUDA_MATPC_EVEN_EVEN) {
301  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
303  DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, -1.0);
304  } else {
305  errorQuda("MatPCType %d not valid for DiracCoarsePC", matpcType);
306  }
307 
308  deleteTmp(&tmp1, reset1);
309  }
310 
312  {
313  bool reset1 = newTmp(&tmp2, in);
314  M(*tmp2, in);
315  Mdag(out, *tmp2);
316  deleteTmp(&tmp2, reset1);
317  }
318 
320  const QudaSolutionType solType) const
321  {
322  // we desire solution to preconditioned system
323  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
324  src = &b;
325  sol = &x;
326  return;
327  }
328 
329  bool reset = newTmp(&tmp1, b.Even());
330 
331  // we desire solution to full system
333  // src = A_ee^-1 (b_e - D_eo A_oo^-1 b_o)
334  src = &(x.Odd());
335  CloverInv(*src, b.Odd(), QUDA_ODD_PARITY);
337  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *tmp1);
339  sol = &(x.Even());
340  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
341  // src = A_oo^-1 (b_o - D_oe A_ee^-1 b_e)
342  src = &(x.Even());
343  CloverInv(*src, b.Even(), QUDA_EVEN_PARITY);
345  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *tmp1);
347  sol = &(x.Odd());
349  // src = b_e - D_eo A_oo^-1 b_o
350  src = &(x.Odd());
351  CloverInv(*tmp1, b.Odd(), QUDA_ODD_PARITY);
353  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *src);
354  sol = &(x.Even());
356  // src = b_o - D_oe A_ee^-1 b_e
357  src = &(x.Even());
358  CloverInv(*tmp1, b.Even(), QUDA_EVEN_PARITY);
360  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *src);
361  sol = &(x.Odd());
362  } else {
363  errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
364  }
365 
366  // here we use final solution to store parity solution and parity source
367  // b is now up for grabs if we want
368 
369  deleteTmp(&tmp1, reset);
370  }
371 
373  {
374  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
375  return;
376  }
377 
378  checkFullSpinor(x, b);
379 
380  bool reset = newTmp(&tmp1, b.Even());
381 
382  // create full solution
383 
386  // x_o = A_oo^-1 (b_o - D_oe x_e)
388  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *tmp1);
389  CloverInv(x.Odd(), *tmp1, QUDA_ODD_PARITY);
390  } else if (matpcType == QUDA_MATPC_ODD_ODD ||
392  // x_e = A_ee^-1 (b_e - D_eo x_o)
394  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *tmp1);
395  CloverInv(x.Even(), *tmp1, QUDA_EVEN_PARITY);
396  } else {
397  errorQuda("MatPCType %d not valid for DiracCoarsePC", matpcType);
398  }
399 
400  deleteTmp(&tmp1, reset);
401  }
402 
403  //Make the coarse operator one level down. For the preconditioned
404  //operator we are coarsening the Yhat links, not the Y links. We
405  //pass the fine clover fields, though they are actually ignored.
406  void DiracCoarsePC::createCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, double kappa, double mu, double mu_factor) const
407  {
408  double a = -2.0 * kappa * mu * T.Vectors().TwistFlavor();
409  if (checkLocation(Y, X, Xinv, Yhat) == QUDA_CPU_FIELD_LOCATION) {
410  CoarseCoarseOp(Y, X, Xinv, Yhat, T, *(this->Yhat_h), *(this->X_h), *(this->Xinv_h), kappa, a, -mu_factor, QUDA_COARSEPC_DIRAC, matpcType);
411  } else {
412  CoarseCoarseOp(Y, X, Xinv, Yhat, T, *(this->Yhat_d), *(this->X_d), *(this->Xinv_d), kappa, a, -mu_factor, QUDA_COARSEPC_DIRAC, matpcType);
413  }
414  }
415 
416 }
virtual ~DiracCoarse()
unsigned long long flops
Definition: dirac_quda.h:100
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
Definition: blas_quda.cu:173
double mu
Definition: test_util.cpp:1643
enum QudaPrecision_s QudaPrecision
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
cpuGaugeField * X_h
Definition: dirac_quda.h:749
const void * src
#define errorQuda(...)
Definition: util_quda.h:90
void init()
Definition: blas_quda.cu:64
void CloverInv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the inverse coarse clover operator.
double mu_factor[QUDA_MAX_MG_LEVEL]
Definition: test_util.cpp:1659
virtual void checkFullSpinor(const ColorSpinorField &, const ColorSpinorField &) const
Definition: dirac.cpp:129
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
void deleteTmp(ColorSpinorField **, const bool &reset) const
Definition: dirac.cpp:64
cudaGaugeField * Yhat_d
virtual void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Apply DslashXpay out = (D * in + A * x)
int nvec() const
Definition: transfer.h:203
void Clover(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the coarse clover operator.
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
void createCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, double kappa, double mu, double mu_factor=0.) const
Create the coarse operator from this coarse operator.
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
cpuGaugeField * Y_h
void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Apply DslashXpay out = (D * in + A * x)
QudaGaugeParam param
Definition: pack_test.cpp:17
cudaGaugeField * Y_d
Definition: dirac_quda.h:753
bool newTmp(ColorSpinorField **, const ColorSpinorField &) const
Definition: dirac.cpp:53
#define b
static int ndim
Definition: layout_hyper.c:53
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
cudaGaugeField * Xinv_d
cpuGaugeField * Yhat_h
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
void createCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, double kappa, double mu, double mu_factor=0.) const
Create the coarse even-odd preconditioned coarse operator. Unlike the Wilson operator, the coarsening of the preconditioned coarse operator differs from that of the unpreconditioned coarse operator, so we need to specialize it.
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_CPU_FIELD_LOCATION) const
Definition: transfer.h:195
VOLATILE spinorFloat kappa
const Transfer * transfer
Definition: dirac_quda.h:745
void copy(const GaugeField &src)
virtual void createCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, double kappa, double mu=0., double mu_factor=0.) const
Create the coarse operator (virtual parent)
Definition: dirac_quda.h:167
cpuGaugeField * Xinv_h
cpuColorSpinorField * in
QudaSiteSubset SiteSubset() const
#define checkLocation(...)
enum QudaSolutionType_s QudaSolutionType
QudaDagType dagger
Definition: dirac_quda.h:99
int Spin_bs() const
Definition: transfer.h:209
cudaGaugeField * X_d
Definition: dirac_quda.h:754
enum QudaParity_s QudaParity
void ApplyCoarse(ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false)
double Mu() const
Definition: dirac_quda.h:764
void CoarseCoarseOp(GaugeField &Y, GaugeField &X, GaugeField &Xinv, GaugeField &Yhat, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
Coarse operator construction from an intermediate-grid operator (Coarse)
void * memcpy(void *__dst, const void *__src, size_t __n)
double kappa
Definition: dirac_quda.h:96
QudaMatPCType matpcType
Definition: dirac_quda.h:98
cudaGaugeField * X_d
const int * Geo_bs() const
Definition: transfer.h:215
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: dirac.cpp:73
cudaGaugeField * Xinv_d
Definition: dirac_quda.h:755
GaugeCovDev * dirac
Definition: covdev_test.cpp:75
cpuColorSpinorField * out
GaugeFieldParam gParam
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
cudaGaugeField * Yhat_d
Definition: dirac_quda.h:756
cpuGaugeField * Xinv_h
Definition: dirac_quda.h:750
cpuGaugeField * X_h
cpuGaugeField * Y_h
Definition: dirac_quda.h:748
cudaGaugeField * Y_d
QudaTwistFlavorType TwistFlavor() const
int transfer
Definition: covdev_test.cpp:55
cpuGaugeField * Yhat_h
Definition: dirac_quda.h:751
const int * X() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
double MuFactor() const
Definition: dirac_quda.h:765
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
const Dirac * dirac
Definition: dirac_quda.h:746
void copy(const GaugeField &src)
QudaPrecision Precision() const
QudaParity parity
Definition: covdev_test.cpp:53
DiracCoarse(const DiracParam &param, bool enable_gpu=true)
Definition: dirac_coarse.cpp:7
ColorSpinorField * tmp2
Definition: dirac_quda.h:102
QudaPrecision prec
Definition: test_util.cpp:1615
#define a
DiracCoarsePC(const DiracParam &param, bool enable_gpu=true)
ColorSpinorField * tmp1
Definition: dirac_quda.h:101