QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
inv_mpbicgstab_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <quda_internal.h>
6 #include <color_spinor_field.h>
7 #include <blas_quda.h>
8 #include <dslash_quda.h>
9 #include <invert_quda.h>
10 #include <util_quda.h>
11 #include <sys/time.h>
12 
13 #include <face_quda.h>
14 
15 #include <iostream>
16 
17 namespace quda {
18 
20  Solver(param, profile), mat(mat)
21  {
22  }
23 
25  }
26 
27 
28  void MPBiCGstab::computeMatrixPowers(std::vector<cudaColorSpinorField>& pr, cudaColorSpinorField& p, cudaColorSpinorField& r, int nsteps){
29  cudaColorSpinorField temp(p);
30  pr[0] = p;
31  for(int i=1; i<=(2*nsteps); ++i){
32  mat(pr[i], pr[i-1], temp);
33  }
34 
35  pr[(2*nsteps)+1] = r;
36  // for(int i=(2*nsteps+2); i<(4*nsteps+2); ++i){
37  for(int i=(2*nsteps+2); i<(4*nsteps+1); ++i){
38  mat(pr[i], pr[i-1], temp);
39  }
40  }
41 
42 #ifdef SSTEP
43  static void print(const double d[], int n){
44  for(int i=0; i<n; ++i){
45  std::cout << d[i] << " ";
46  }
47  std::cout << std::endl;
48  }
49 
50  static void print(const Complex d[], int n){
51  for(int i=0; i<n; ++i){
52  std::cout << "(" << real(d[i]) << "," << imag(d[i]) << ") ";
53  }
54  std::cout << std::endl;
55  }
56 
57 
58  template<typename T>
59  static void zero(T d[], int N){
60  for(int i=0; i<N; ++i) d[i] = static_cast<T>(0);
61  }
62 
63 
64  static void computeGramMatrix(Complex** G, std::vector<cudaColorSpinorField>& v){
65 
66  const int dim = v.size();
67 
68  for(int i=0; i<dim; ++i){
69  for(int j=0; j<dim; ++j){
70  G[i][j] = cDotProductCuda(v[i],v[j]);
71  }
72  }
73  return;
74  }
75 
76  static void computeGramVector(Complex* g, cudaColorSpinorField& r0, std::vector<cudaColorSpinorField>& pr){
77 
78  const int dim = pr.size();
79 
80  for(int i=0; i<dim; ++i){
81  g[i] = cDotProductCuda(r0,pr[i]);
82  }
83  }
84 
85 /*
86  // Here, B is an (s+1)x(s+1) matrix with 1s on the subdiagonal
87  template<class T>
88  static void getBColumn(T *col, int col_index, int nsteps){
89  zero(col,nsteps+1);
90  col[col_index] = static_cast<T>(1);
91  }
92 
93  template<typename T>
94  static void init_c_vector(T *c, int index, int nsteps){
95  zero(c,2*nsteps+2);
96  getBColumn(c+(nsteps+1),index,nsteps);
97  }
98 
99  template<typename T>
100  static void init_a_vector(T *a, int index, int nsteps){
101  zero(a,2*nsteps+2);
102  getBColumn(a,index,nsteps);
103  }
104 
105  template<typename T>
106  static void init_e_vector(T *e, int nsteps){
107  zero(e,2*nsteps+2);
108  e[2*nsteps+2] = static_cast<T>(1);
109  }
110 */
111 
112  template<typename T>
113  static T zip(T a[], T b[], int dim){
114  T result = 0.0;
115  for(int i=0; i<dim; ++i){
116  result += a[i]*b[i];
117  }
118  return result;
119  }
120 
121  static Complex computeUdaggerMV(Complex* u, Complex** M, Complex* v, int dim)
122  {
123  Complex result(0,0);
124 
125  for(int i=0; i<dim; ++i){
126  for(int j=0; j<dim; ++j){
127  result += conj(u[i])*v[j]*M[i][j];
128  }
129  }
130  return result;
131  }
132 #endif
133 
135  {
136 #ifndef SSTEP
137  errorQuda("S-step solvers not built\n");
138 #else
139  // Check to see that we're not trying to invert on a zero-field source
140  const double b2 = norm2(b);
141  if(b2 == 0){
143  printfQuda("Warning: inverting on zero-field source\n");
144  x=b;
145  param.true_res = 0.0;
146  param.true_res_hq = 0.0;
147  return;
148  }
149 
151  csParam.create = QUDA_ZERO_FIELD_CREATE;
152 
153  cudaColorSpinorField temp(b, csParam);
154 
156 
157 
158 
159  mat(r, x, temp); // r = Ax
160  double r2 = xmyNormCuda(b,r); // r = b - Ax
161 
162 
163 
164  cudaColorSpinorField r0(r);
166  cudaColorSpinorField Ap(r);
167 
168 
169  const int s = 3;
170 
171  // Vector of matrix powers
172  std::vector<cudaColorSpinorField> PR(4*s+2,cudaColorSpinorField(b,csParam));
173 
174 
175  Complex r0r;
176  Complex alpha;
177  Complex omega;
178  Complex beta;
179 
180  Complex** G = new Complex*[4*s+2];
181  for(int i=0; i<(4*s+2); ++i){
182  G[i] = new Complex[4*s+2];
183  }
184  Complex* g = new Complex[4*s+2];
185 
186  Complex** a = new Complex*[2*s+1];
187  Complex** c = new Complex*[2*s+1];
188  Complex** a_new = new Complex*[2*s+1];
189  Complex** c_new = new Complex*[2*s+1];
190 
191  for(int i=0; i<(2*s+1); ++i){
192  a[i] = new Complex[4*s+2];
193  c[i] = new Complex[4*s+2];
194  a_new[i] = new Complex[4*s+2];
195  c_new[i] = new Complex[4*s+2];
196  }
197 
198 
199  Complex* e = new Complex[4*s+2];
200 
201 
202 
203 
204  double stop = stopping(param.tol, b2, param.residual_type);
205  int it=0;
206  int m=0;
207  while(!convergence(r2, 0.0, stop, 0.0 ) && it<param.maxiter){
208 
209  computeMatrixPowers(PR, p, r, s);
210  computeGramVector(g, r0, PR);
211  computeGramMatrix(G, PR);
212 
213  // initialize coefficient vectors
214  for(int i=0; i<(2*s+1); ++i){
215  zero(a[i],(4*s+2));
216  zero(c[i],(4*s+2));
217  a[i][i] = static_cast<Complex>(1);
218  c[i][i + (2*s+1)] = static_cast<Complex>(1);
219  }
220 
221 
222  zero(e,(4*s+2));
223  int j=0;
224  while(!convergence(r2,0.0,stop,0.0) && j<s){
225  PrintStats("MPBiCGstab", it, r2, b2, 0.0);
226 
227  alpha = zip(g,c[0],4*s+2)/zip(g,a[1],4*s+2);
228 
229  Complex omega_num = computeUdaggerMV(c[0],G,c[1],(4*s+2))
230  - alpha*computeUdaggerMV(c[0],G,a[2],(4*s+2))
231  - conj(alpha)*computeUdaggerMV(a[1],G,c[1],(4*s+2))
232  + conj(alpha)*alpha*computeUdaggerMV(a[1],G,a[2],(4*s+2));
233 
234  Complex omega_denom = computeUdaggerMV(c[1],G,c[1],(4*s+2))
235  - alpha*computeUdaggerMV(c[1],G,a[2],(4*s+2))
236  - conj(alpha)*computeUdaggerMV(a[2],G,c[1],(4*s+2))
237  + conj(alpha)*alpha*computeUdaggerMV(a[2],G,a[2],(4*s+2));
238 
239  omega = omega_num/omega_denom;
240  // Update candidate solution
241  for(int i=0; i<(4*s+2); ++i){
242  e[i] += alpha*a[0][i] + omega*c[0][i] - alpha*omega*a[1][i];
243  }
244 
245  // Update residual
246  for(int k=0; k<=(2*(s - j - 1)); ++k){
247  for(int i=0; i<(4*s+2); ++i){
248  c_new[k][i] = c[k][i] - alpha*a[k+1][i] - omega*c[k+1][i] + alpha*omega*a[k+2][i];
249  }
250  }
251 
252  // update search direction
253  beta = (zip(g,c_new[0],(4*s+2))/zip(g,c[0],(4*s+2)))*(alpha/omega);
254 
255  for(int k=0; k<=(2*(s - j - 1)); ++k){
256  for(int i=0; i<(4*s+2); ++i){
257  a_new[k][i] = c_new[k][i] + beta*a[k][i] - beta*omega*a[k+1][i];
258  }
259 
260  for(int i=0; i<(4*s+2); ++i){
261  a[k][i] = a_new[k][i];
262  c[k][i] = c_new[k][i];
263  }
264  }
265  zeroCuda(r);
266  for(int i=0; i<(4*s+2); ++i){
267  caxpyCuda(c[0][i], PR[i], r);
268  }
269  r2 = norm2(r);
270  j++;
271  it++;
272  } // j
273 
274  zeroCuda(p);
275  for(int i=0; i<(4*s+2); ++i){
276  caxpyCuda(a[0][i], PR[i], p);
277  caxpyCuda(e[i], PR[i], x);
278  }
279 
280  m++;
281  }
282 
283  if(it >= param.maxiter)
284  warningQuda("Exceeded maximum iterations %d", param.maxiter);
285 
286  // compute the true residual
287  mat(r, x, temp);
288  param.true_res = sqrt(xmyNormCuda(b, r)/b2);
289 
290  PrintSummary("MPBiCGstab", it, r2, b2);
291 
292 
293 
294  for(int i=0; i<(4*s+2); ++i){
295  delete[] G[i];
296  }
297  delete[] G;
298 
299 
300  delete[] g;
301 
302  // Is 2*s + 3 really correct?
303  for(int i=0; i<(2*s+1); ++i){
304  delete[] a[i];
305  delete[] a_new[i];
306  delete[] c[i];
307  delete[] c_new[i];
308  }
309  delete[] a;
310  delete[] a_new;
311  delete[] c;
312  delete[] c_new;
313  delete[] e;
314 #endif
315  return;
316  }
317 
318 } // namespace quda
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
Definition: solver.cpp:82
void caxpyCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:207
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
Definition: solver.cpp:65
#define errorQuda(...)
Definition: util_quda.h:73
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
std::complex< double > Complex
Definition: eig_variables.h:13
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
TimeProfile & profile
Definition: invert_quda.h:224
QudaGaugeParam param
Definition: pack_test.cpp:17
__host__ __device__ void zero(double &x)
Definition: reduce_core.h:1
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
Definition: solver.cpp:137
QudaResidualType residual_type
Definition: invert_quda.h:35
Complex cDotProductCuda(cudaColorSpinorField &, cudaColorSpinorField &)
Definition: reduce_quda.cu:468
ColorSpinorParam csParam
Definition: pack_test.cpp:24
#define warningQuda(...)
Definition: util_quda.h:84
int x[4]
SolverParam & param
Definition: invert_quda.h:223
void Stop(QudaProfileType idx)
MPBiCGstab(DiracMatrix &mat, SolverParam &param, TimeProfile &profile)
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
Definition: solver.cpp:122
#define printfQuda(...)
Definition: util_quda.h:67
void zeroCuda(cudaColorSpinorField &a)
Definition: blas_quda.cu:40
void print(const double d[], int n)
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:115
VOLATILE spinorFloat * s
double norm2(const ColorSpinorField &)
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:343
void operator()(cudaColorSpinorField &out, cudaColorSpinorField &in)