21 static void applyT(T d_out[],
const T d_in[],
const T gamma[],
const T rho[],
int N)
24 for(
int i=0; i<N; ++i){
25 d_out[i] = d_in[i]/gamma[i];
28 for(
int i=0; i<N-1; ++i){
29 d_out[i] += (d_in[i+1]*(1-rho[i+1])/(gamma[i+1]*rho[i+1]));
32 for(
int i=1; i<N; ++i){
33 d_out[i] -= d_in[i-1]/(rho[i-1]*gamma[i-1]);
40 static void applyB(T d_out[],
const T d_in[],
int N)
42 d_out[0] =
static_cast<T
>(0);
43 for(
int i=1; i<N; ++i) d_out[i] = d_in[i-1];
47 void print(
const double d[],
int n){
48 for(
int i=0; i<n; ++i){
49 std::cout << d[i] <<
" ";
51 std::cout << std::endl;
55 static void zero(T d[],
int N){
56 for(
int i=0; i<N; ++i) d[i] = static_cast<T>(0);
60 static void applyThirdTerm(T d_out[],
const T d_in[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
64 const int dim = 2*s + 1;
67 if(k) applyT(d_out, d_in, gamma_kprev, rho_kprev, s);
70 applyB(d_out+s, d_in+s, s+1);
73 if(k) d_out[
s] -= d_in[s-1]/(rho_kprev[s-1]*gamma_kprev[s-1]);
76 for(
int i=0; i<
dim; ++i) d_out[i] *= -rho[j]*gamma[j];
82 static void computeCoeffs(T d_out[],
const T d_p1[],
const T d_p2[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
84 applyThirdTerm(d_out, d_p1, k, j-1, s, gamma, rho, gamma_kprev, rho_kprev);
87 for(
int i=0; i<(2*s+1); ++i){
88 d_out[i] += rho[j-1]*d_p1[i] + (1 - rho[j-1])*d_p2[i];
97 Solver(param, profile), mat(mat)
110 for(
int i=1; i<nvec; ++i){
111 mat(out[i], out[i-1], temp);
116 void MPCG::computeMatrixPowers(std::vector<cudaColorSpinorField>& out, std::vector<cudaColorSpinorField>& in,
int nsteps)
118 cudaColorSpinorField temp(in[0]);
120 for(
int i=0; i<=nsteps; ++i) out[i] = in[i];
122 for(
int i=(nsteps+1); i<=(2*nsteps); ++i){
123 mat(out[i], out[i-1], temp);
129 static void computeGramMatrix(
double** G, std::vector<cudaColorSpinorField>& v,
double*
mu){
131 const int dim = v.size();
132 const int nsteps = (dim-1)/2;
135 std::vector<cudaColorSpinorField*> vp1; vp1.reserve((nsteps+1)*nsteps);
136 std::vector<cudaColorSpinorField*> vp2; vp2.reserve((nsteps+1)*nsteps);
137 double g[(nsteps+1)*nsteps];
139 for(
int i=0; i<nsteps; ++i){
140 for(
int j=nsteps; j<
dim; j++){
141 vp1.push_back(&v[i]);
142 vp2.push_back(&v[j]);
147 for(
int i=0; i<nsteps; ++i){
148 for(
int j=nsteps; j<
dim; j++){
155 const int num = dim-nsteps;
156 const int offset = nsteps;
157 double d[2*nsteps+1];
158 std::vector<cudaColorSpinorField*> vp1; vp1.reserve(2*nsteps+1);
159 std::vector<cudaColorSpinorField*> vp2; vp2.reserve(2*nsteps+1);
160 for(
int i=0; i<=nsteps; ++i){
161 vp1.push_back(&v[0+offset]);
162 vp2.push_back(&v[i+offset]);
164 for(
int i=1; i<=nsteps; ++i){
165 vp1.push_back(&v[i+offset]);
166 vp2.push_back(&v[nsteps+offset]);
172 for(
int i=0; i<num; ++i){
173 for(
int j=0; j<=i; ++j){
174 G[j+offset][i+offset] = G[i+offset][j+offset] = d[i+j];
177 for(
int i=0; i<nsteps; ++i){
183 static void computeMuNu(
double& result,
const double* u,
double** G,
const double* v,
int dim){
186 const int nsteps = (dim-1)/2;
188 for(
int i=nsteps; i<
dim; ++i){
189 for(
int j=nsteps; j<
dim; ++j){
190 result += u[i]*v[j]*G[i][j];
194 for(
int i=0; i<nsteps; ++i){
195 for(
int j=nsteps; j<
dim; ++j){
196 result += (u[i]*v[j] + u[j]*v[i])*G[i][j];
198 result += u[i]*v[i]*G[i][i];
212 const double b2 =
norm2(b);
215 printfQuda(
"Warning: inverting on zero-field source\n");
249 double* d =
new double[2*s+1];
250 double* d_p1 =
new double[2*s+1];
251 double* d_p2 =
new double[2*s+1];
252 double* g =
new double[2*s+1];
253 double* g_p1 =
new double[2*s+1];
254 double* g_p2 =
new double[2*s+1];
255 double** G =
new double*[2*s+1];
256 for(
int i=0; i<(2*s+1); ++i){
257 G[i] =
new double[2*s+1];
278 double gamma_kprev[
s];
282 for(
int i=0; i<(2*s+1); ++i){
290 computeMatrixPowers(V, R, s);
291 computeGramMatrix(G,V, mu);
297 const int prev_idx = j ? j-1 : s-1;
299 double& mu_prev = mu[prev_idx];
300 double& rho_prev = rho[prev_idx];
301 double& gamma_prev = gamma[prev_idx];
306 zero(d, 2*s+1); d[s+1] = 1.0;
308 zero(g, 2*s+1); g[
s] = 1.0;
313 d_p2[s-2] = (1 - rho_kprev[s-1])/(rho_kprev[s-1]*gamma_kprev[s-1]);
314 d_p2[s-1] = (1./gamma_kprev[s-1]);
315 d_p2[
s] = (-1./(gamma_kprev[s-1]*rho_kprev[s-1]));
318 if(k > 0) g_p2[s-1] = 1.0;
321 for(
int i=0; i<(2*s+1); ++i){
322 g[i] = rho[j-1]*g_p1[i] - rho[j-1]*gamma[j-1]*d_p1[i]
323 + (1 - rho[j-1])*g_p2[i];
327 computeCoeffs(d, d_p1, d_p2, k, j, s, gamma, rho, gamma_kprev, rho_kprev);
329 for(
int i=0; i<(2*s+1); ++i){
330 if(d[i] != 0.)
axpyCuda(d[i], V[i], w);
334 computeMuNu(r2, g, G, g, 2*s+1);
335 computeMuNu(rAr, g, G, d, 2*s+1);
339 rho[j] = (it==0) ? 1.0 : 1.0/(1.0 - (gamma[j]/gamma_prev)*(mu[j]/mu_prev)*(1.0/rho_prev));
342 axCuda((1.0 - rho[j]), R[j+1]);
344 axpyCuda(-rho[j]*gamma[j], w, R[j+1]);
347 axCuda((1.0 - rho[j]), x_new);
349 axpyCuda(gamma[j]*rho[j], R[j], x_new);
355 for(
int i=0; i<(2*s+1); ++i) d_p2[i] = d_p1[i];
356 for(
int i=0; i<(2*s+1); ++i) g_p2[i] = g_p1[i];
358 for(
int i=0; i<(2*s+1); ++i) d_p1[i] = d[i];
359 for(
int i=0; i<(2*s+1); ++i) g_p1[i] = g[i];
372 for(
int i=0; i<
s; ++i){
373 rho_kprev[i] = rho[i];
374 gamma_kprev[i] = gamma[i];
380 mat(R[0], x_prev, temp);
394 for(
int i=0; i<(2*s+1); ++i){
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
__host__ __device__ ValueType sqrt(ValueType x)
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
MPCG(DiracMatrix &mat, SolverParam ¶m, TimeProfile &profile)
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
QudaResidualType residual_type
void axpyCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
double reDotProductCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
cpuColorSpinorField * out
void Stop(QudaProfileType idx)
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
void zeroCuda(cudaColorSpinorField &a)
void operator()(cudaColorSpinorField &out, cudaColorSpinorField &in)
void print(const double d[], int n)
void axCuda(const double &a, cudaColorSpinorField &x)
double norm2(const ColorSpinorField &)
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)