QUDA  v1.1.0
A library for QCD on GPUs
inv_mre.cpp
Go to the documentation of this file.
1 #include <invert_quda.h>
2 #include <blas_quda.h>
3 #include <eigen_helper.h>
4 
5 namespace quda {
6 
7  MinResExt::MinResExt(const DiracMatrix &mat, bool orthogonal, bool apply_mat, bool hermitian, TimeProfile &profile) :
8  mat(mat),
9  orthogonal(orthogonal),
10  apply_mat(apply_mat),
11  hermitian(hermitian),
12  profile(profile)
13  {
14  }
15 
17 
18  }
19 
20  /* Solve the equation A p_k psi_k = b by minimizing the residual and
21  using Eigen's SVD algorithm for numerical stability */
22  void MinResExt::solve(Complex *psi_, std::vector<ColorSpinorField*> &p,
23  std::vector<ColorSpinorField*> &q, ColorSpinorField &b, bool hermitian)
24  {
25  typedef Matrix<Complex, Dynamic, Dynamic> matrix;
27 
28  const int N = q.size();
29  vector phi(N), psi(N);
30  matrix A(N,N);
31 
32  // form the a Nx(N+1) matrix using only a single reduction - this
33  // presently requires forgoing the matrix symmetry, but the improvement is well worth it
34  std::vector<ColorSpinorField*> Q;
35  for (int i=0; i<N; i++) Q.push_back(q[i]);
36  Q.push_back(&b);
37 
38  Complex *A_ = new Complex[N*(N+1)];
39 
40  if (hermitian) {
41  // linear system is Hermitian, solve directly
42  // compute rhs vector phi = P* b = (q_i, b) and construct the matrix
43  // P* Q = P* A P = (p_i, q_j) = (p_i, A p_j)
44  blas::cDotProduct(A_, p, Q);
45  } else {
46  // linear system is not Hermitian, solve the normal system
47  // compute rhs vector phi = Q* b = (q_i, b) and construct the matrix
48  // Q* Q = (A P)* (A P) = (q_i, q_j) = (A p_i, A p_j)
49  blas::cDotProduct(A_, q, Q);
50  }
51 
52  for (int i=0; i<N; i++) {
53  phi(i) = A_[i*(N+1)+N];
54  for (int j=0; j<N; j++) {
55  A(i,j) = A_[i*(N+1)+j];
56  }
57  }
58 
59  delete []A_;
60 
62  profile.TPSTART(QUDA_PROFILE_EIGEN);
63 
64  LDLT<matrix> cholesky(A);
65  psi = cholesky.solve(phi);
66 
69 
70  for (int i=0; i<N; i++) psi_[i] = psi(i);
71  }
72 
73 
74  /*
75  We want to find the best initial guess of the solution of
76  A x = b, and we have N previous solutions x_i.
77  The method goes something like this:
78 
79  1. Orthonormalise the p_i and q_i
80  2. Form the matrix G_ij = x_i^dagger A x_j
81  3. Form the vector B_i = x_i^dagger b
82  4. solve A_ij a_j = B_i
83  5. x = a_i p_i
84  */
86  std::vector<ColorSpinorField*> p, std::vector<ColorSpinorField*> q) {
87 
88  bool running = profile.isRunning(QUDA_PROFILE_CHRONO);
89  if (!running) profile.TPSTART(QUDA_PROFILE_CHRONO);
90 
91  const int N = p.size();
92 
93  if (getVerbosity() >= QUDA_VERBOSE)
94  printfQuda("Constructing minimum residual extrapolation with basis size %d\n", N);
95 
96  // if no guess is required, then set initial guess = 0
97  if (N == 0) {
98  blas::zero(x);
99  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
100  return;
101  }
102 
103  if (N == 1) {
104  blas::copy(x, *p[0]);
105  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
106  return;
107  }
108 
109  // Solution coefficient vectors
110  Complex *alpha = new Complex[N];
111 
112  double b2 = getVerbosity() >= QUDA_SUMMARIZE ? blas::norm2(b) : 0.0;
113 
114  // Orthonormalise the vector basis
115  if (orthogonal) {
116  for (int i=0; i<N; i++) {
117  double p2 = blas::norm2(*p[i]);
118  blas::ax(1 / sqrt(p2), *p[i]);
119  if (!apply_mat) blas::ax(1 / sqrt(p2), *q[i]);
120 
121  if (i+1<N) {
122  std::vector<ColorSpinorField*> Pi;
123  Pi.push_back(p[i]);
124  std::vector<ColorSpinorField*> P;
125  for (int j=i+1; j<N; j++) P.push_back(p[j]);
126  blas::cDotProduct(alpha+i+1, Pi, P); // single multi reduction
127  for (int j=i+1; j<N; j++) alpha[j] = -alpha[j];
128  blas::caxpy(alpha+i+1, Pi, P); // single block Pj update
129 
130  if (!apply_mat) {
131  // if not applying the matrix below then orthongonalize q
132  std::vector<ColorSpinorField*> X;
133  X.push_back(q[i]);
134  std::vector<ColorSpinorField*> Y;
135  for (int j=i+1; j<N; j++) { Y.push_back(q[j]); }
136  blas::caxpy(alpha+i+1, X, Y); // single block Qj update
137  }
138  }
139  }
140  }
141 
142  // if operator hasn't already been applied then apply
143  if (apply_mat) for (int i=0; i<N; i++) mat(*q[i], *p[i]);
144 
145  solve(alpha, p, q, b, hermitian);
146 
147  blas::zero(x);
148  std::vector<ColorSpinorField*> X;
149  X.push_back(&x);
150  blas::caxpy(alpha, p, X);
151 
152  if (getVerbosity() >= QUDA_SUMMARIZE) {
153  // compute the residual only if we're going to print it
154  for (int i=0; i<N; i++) alpha[i] = -alpha[i];
155  std::vector<ColorSpinorField*> B;
156  B.push_back(&b);
157  blas::caxpy(alpha, q, B);
158 
159  double rsd = sqrt(blas::norm2(b) / b2 );
160  printfQuda("MinResExt: N = %d, |res| / |src| = %e\n", N, rsd);
161  }
162 
163  delete [] alpha;
164 
165  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
166  }
167 
168  // Wrapper for the above
169  void MinResExt::operator()(ColorSpinorField &x, ColorSpinorField &b, std::vector<std::pair<ColorSpinorField*,ColorSpinorField*> > basis) {
170  std::vector<ColorSpinorField*> p(basis.size()), q(basis.size());
171  for (unsigned int i=0; i<basis.size(); i++) { p[i] = basis[i].first; q[i] = basis[i].second; }
172  (*this)(x, b, p, q);
173  }
174 
175 
176 
177 } // namespace quda
TimeProfile & profile
whether A is hermitian ot not
Definition: invert_quda.h:1310
bool apply_mat
Whether to construct an orthogonal basis or not.
Definition: invert_quda.h:1308
const DiracMatrix & mat
Definition: invert_quda.h:1306
void operator()(ColorSpinorField &x, ColorSpinorField &b, std::vector< std::pair< ColorSpinorField *, ColorSpinorField * > > basis)
Definition: inv_mre.cpp:169
virtual ~MinResExt()
Definition: inv_mre.cpp:16
bool hermitian
Whether to compute q = Ap or assume it is provided.
Definition: invert_quda.h:1309
MinResExt(const DiracMatrix &mat, bool orthogonal, bool apply_mat, bool hermitian, TimeProfile &profile)
Definition: inv_mre.cpp:7
void solve(Complex *psi_, std::vector< ColorSpinorField * > &p, std::vector< ColorSpinorField * > &q, ColorSpinorField &b, bool hermitian)
Solve the equation A p_k psi_k = q_k psi_k = b by minimizing the residual and using Eigen's SVD algor...
Definition: inv_mre.cpp:22
bool isRunning(QudaProfileType idx)
Definition: timer.h:260
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_CHRONO
Definition: timer.h:113
@ QUDA_PROFILE_EIGEN
Definition: timer.h:114
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21