QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
inv_mre.cpp
Go to the documentation of this file.
1 #include <invert_quda.h>
2 #include <blas_quda.h>
3 #include <Eigen/Dense>
4 
5 namespace quda {
6 
7  MinResExt::MinResExt(DiracMatrix &mat, bool orthogonal, bool apply_mat, bool hermitian, TimeProfile &profile)
8  : mat(mat), orthogonal(orthogonal), apply_mat(apply_mat), hermitian(hermitian), profile(profile){
9 
10  }
11 
13 
14  }
15 
16  /* Solve the equation A p_k psi_k = b by minimizing the residual and
17  using Eigen's SVD algorithm for numerical stability */
18  void MinResExt::solve(Complex *psi_, std::vector<ColorSpinorField*> &p,
19  std::vector<ColorSpinorField*> &q, ColorSpinorField &b, bool hermitian)
20  {
21  using namespace Eigen;
22  typedef Matrix<Complex, Dynamic, Dynamic> matrix;
23  typedef Matrix<Complex, Dynamic, 1> vector;
24 
25  const int N = q.size();
26  vector phi(N), psi(N);
27  matrix A(N,N);
28 
29  // form the a Nx(N+1) matrix using only a single reduction - this
30  // presently requires forgoing the matrix symmetry, but the improvement is well worth it
31  std::vector<ColorSpinorField*> Q;
32  for (int i=0; i<N; i++) Q.push_back(q[i]);
33  Q.push_back(&b);
34 
35  Complex *A_ = new Complex[N*(N+1)];
36 
37  if (hermitian) {
38  // linear system is Hermitian, solve directly
39  // compute rhs vector phi = P* b = (q_i, b) and construct the matrix
40  // P* Q = P* A P = (p_i, q_j) = (p_i, A p_j)
41  blas::cDotProduct(A_, p, Q);
42  } else {
43  // linear system is not Hermitian, solve the normal system
44  // compute rhs vector phi = Q* b = (q_i, b) and construct the matrix
45  // Q* Q = (A P)* (A P) = (q_i, q_j) = (A p_i, A p_j)
46  blas::cDotProduct(A_, q, Q);
47  }
48 
49  for (int i=0; i<N; i++) {
50  phi(i) = A_[i*(N+1)+N];
51  for (int j=0; j<N; j++) {
52  A(i,j) = A_[i*(N+1)+j];
53  }
54  }
55 
56  delete []A_;
57 
59  profile.TPSTART(QUDA_PROFILE_EIGEN);
60 
61  LDLT<matrix> cholesky(A);
62  psi = cholesky.solve(phi);
63 
66 
67  for (int i=0; i<N; i++) psi_[i] = psi(i);
68  }
69 
70 
71  /*
72  We want to find the best initial guess of the solution of
73  A x = b, and we have N previous solutions x_i.
74  The method goes something like this:
75 
76  1. Orthonormalise the p_i and q_i
77  2. Form the matrix G_ij = x_i^dagger A x_j
78  3. Form the vector B_i = x_i^dagger b
79  4. solve A_ij a_j = B_i
80  5. x = a_i p_i
81  */
83  std::vector<ColorSpinorField*> p, std::vector<ColorSpinorField*> q) {
84 
85  bool running = profile.isRunning(QUDA_PROFILE_CHRONO);
86  if (!running) profile.TPSTART(QUDA_PROFILE_CHRONO);
87 
88  const int N = p.size();
89 
90  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Constructing minimum residual extrapolation with basis size %d\n", N);
91 
92  // if no guess is required, then set initial guess = 0
93  if (N == 0) {
94  blas::zero(x);
95  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
96  return;
97  }
98 
99  if (N == 1) {
100  blas::copy(x, *p[0]);
101  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
102  return;
103  }
104 
105  // Solution coefficient vectors
106  Complex *alpha = new Complex[N];
107 
108  double b2 = getVerbosity() >= QUDA_SUMMARIZE ? blas::norm2(b) : 0.0;
109 
110  // Orthonormalise the vector basis
111  if (orthogonal) {
112  for (int i=0; i<N; i++) {
113  double p2 = blas::norm2(*p[i]);
114  blas::ax(1 / sqrt(p2), *p[i]);
115  if (!apply_mat) blas::ax(1 / sqrt(p2), *q[i]);
116 
117  if (i+1<N) {
118  std::vector<ColorSpinorField*> Pi;
119  Pi.push_back(p[i]);
120  std::vector<ColorSpinorField*> P;
121  for (int j=i+1; j<N; j++) P.push_back(p[j]);
122  blas::cDotProduct(alpha+i+1, Pi, P); // single multi reduction
123  for (int j=i+1; j<N; j++) alpha[j] = -alpha[j];
124  blas::caxpy(alpha+i+1, Pi, P); // single block Pj update
125 
126  if (!apply_mat) {
127  // if not applying the matrix below then orthongonalize q
128  std::vector<ColorSpinorField*> X;
129  X.push_back(q[i]);
130  std::vector<ColorSpinorField*> Y;
131  for (int j=i+1; j<N; j++) { Y.push_back(q[j]); }
132  blas::caxpy(alpha+i+1, X, Y); // single block Qj update
133  }
134  }
135  }
136  }
137 
138  // if operator hasn't already been applied then apply
139  if (apply_mat) for (int i=0; i<N; i++) mat(*q[i], *p[i]);
140 
141  solve(alpha, p, q, b, hermitian);
142 
143  blas::zero(x);
144  std::vector<ColorSpinorField*> X;
145  X.push_back(&x);
146  blas::caxpy(alpha, p, X);
147 
148  if (getVerbosity() >= QUDA_SUMMARIZE) {
149  // compute the residual only if we're going to print it
150  for (int i=0; i<N; i++) alpha[i] = -alpha[i];
151  std::vector<ColorSpinorField*> B;
152  B.push_back(&b);
153  blas::caxpy(alpha, q, B);
154 
155  double rsd = sqrt(blas::norm2(b) / b2 );
156  printfQuda("MinResExt: N = %d, |res| / |src| = %e\n", N, rsd);
157  }
158 
159  delete [] alpha;
160 
161  if (!running) profile.TPSTOP(QUDA_PROFILE_CHRONO);
162  }
163 
164  // Wrapper for the above
165  void MinResExt::operator()(ColorSpinorField &x, ColorSpinorField &b, std::vector<std::pair<ColorSpinorField*,ColorSpinorField*> > basis) {
166  std::vector<ColorSpinorField*> p(basis.size()), q(basis.size());
167  for (unsigned int i=0; i<basis.size(); i++) { p[i] = basis[i].first; q[i] = basis[i].second; }
168  (*this)(x, b, p, q);
169  }
170 
171 
172 
173 } // namespace quda
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
void operator()(ColorSpinorField &x, ColorSpinorField &b, std::vector< std::pair< ColorSpinorField *, ColorSpinorField *> > basis)
Definition: inv_mre.cpp:165
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
bool hermitian
Whether to compute q = Ap or assume it is provided.
Definition: invert_quda.h:1177
bool isRunning(QudaProfileType idx)
Definition: timer.h:257
void solve(Complex *psi_, std::vector< ColorSpinorField *> &p, std::vector< ColorSpinorField *> &q, ColorSpinorField &b, bool hermitian)
Solve the equation A p_k psi_k = q_k psi_k = b by minimizing the residual and using Eigen&#39;s SVD algor...
Definition: inv_mre.cpp:18
TimeProfile & profile
whether A is hermitian ot not
Definition: invert_quda.h:1178
MinResExt(DiracMatrix &mat, bool orthogonal, bool apply_mat, bool hermitian, TimeProfile &profile)
Definition: inv_mre.cpp:7
int X[4]
Definition: covdev_test.cpp:70
std::complex< double > Complex
Definition: quda_internal.h:46
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
bool apply_mat
Whether to construct an orthogonal basis or not.
Definition: invert_quda.h:1176
virtual ~MinResExt()
Definition: inv_mre.cpp:12
#define printfQuda(...)
Definition: util_quda.h:115
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
const DiracMatrix & mat
Definition: invert_quda.h:1174