18 static void applyT(T d_out[],
const T d_in[],
const T gamma[],
const T rho[],
int N)
21 for(
int i=0; i<N; ++i){
22 d_out[i] = d_in[i]/gamma[i];
25 for(
int i=0; i<N-1; ++i){
26 d_out[i] += (d_in[i+1]*(1-rho[i+1])/(gamma[i+1]*rho[i+1]));
29 for(
int i=1; i<N; ++i){
30 d_out[i] -= d_in[i-1]/(rho[i-1]*gamma[i-1]);
37 static void applyB(T d_out[],
const T d_in[],
int N)
39 d_out[0] =
static_cast<T
>(0);
40 for(
int i=1; i<N; ++i) d_out[i] = d_in[i-1];
44 void print(
const double d[],
int n){
45 for(
int i=0; i<n; ++i){
46 std::cout << d[i] <<
" ";
48 std::cout << std::endl;
52 static void zero(T d[],
int N){
53 for(
int i=0; i<N; ++i) d[i] = static_cast<T>(0);
57 static void applyThirdTerm(T d_out[],
const T d_in[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
61 const int dim = 2*s + 1;
64 if(k)
applyT(d_out, d_in, gamma_kprev, rho_kprev, s);
67 applyB(d_out+s, d_in+s, s+1);
70 if(k) d_out[
s] -= d_in[s-1]/(rho_kprev[s-1]*gamma_kprev[s-1]);
73 for(
int i=0; i<dim; ++i) d_out[i] *= -rho[j]*gamma[j];
79 static void computeCoeffs(T d_out[],
const T d_p1[],
const T d_p2[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
81 applyThirdTerm(d_out, d_p1, k, j-1, s, gamma, rho, gamma_kprev, rho_kprev);
84 for(
int i=0; i<(2*s+1); ++i){
85 d_out[i] += rho[j-1]*d_p1[i] + (1 - rho[j-1])*d_p2[i];
94 Solver(param, profile), mat(mat)
107 for(
int i=1; i<
nvec; ++i){
108 mat(out[i], out[i-1], temp);
117 for(
int i=0; i<=nsteps; ++i) out[i] = in[i];
119 for(
int i=(nsteps+1); i<=(2*nsteps); ++i){
120 mat(out[i], out[i-1], temp);
126 static void computeGramMatrix(
double** G, std::vector<cudaColorSpinorField>& v,
double*
mu){
128 const int dim = v.size();
129 const int nsteps = (dim-1)/2;
132 std::vector<cudaColorSpinorField*> vp1; vp1.reserve((nsteps+1)*nsteps);
133 std::vector<cudaColorSpinorField*> vp2; vp2.reserve((nsteps+1)*nsteps);
134 double g[(nsteps+1)*nsteps];
136 for(
int i=0; i<nsteps; ++i){
137 for(
int j=nsteps; j<dim; j++){
138 vp1.push_back(&v[i]);
139 vp2.push_back(&v[j]);
144 for(
int i=0; i<nsteps; ++i){
145 for(
int j=nsteps; j<dim; j++){
152 const int num = dim-nsteps;
153 const int offset = nsteps;
154 double d[2*nsteps+1];
155 std::vector<cudaColorSpinorField*> vp1; vp1.reserve(2*nsteps+1);
156 std::vector<cudaColorSpinorField*> vp2; vp2.reserve(2*nsteps+1);
157 for(
int i=0; i<=nsteps; ++i){
158 vp1.push_back(&v[0+offset]);
159 vp2.push_back(&v[i+offset]);
161 for(
int i=1; i<=nsteps; ++i){
162 vp1.push_back(&v[i+offset]);
163 vp2.push_back(&v[nsteps+offset]);
169 for(
int i=0; i<num; ++i){
170 for(
int j=0; j<=i; ++j){
171 G[j+offset][i+offset] = G[i+offset][j+offset] = d[i+j];
174 for(
int i=0; i<nsteps; ++i){
180 static void computeMuNu(
double& result,
const double* u,
double** G,
const double* v,
int dim){
183 const int nsteps = (dim-1)/2;
185 for(
int i=nsteps; i<dim; ++i){
186 for(
int j=nsteps; j<dim; ++j){
187 result += u[i]*v[j]*G[i][j];
191 for(
int i=0; i<nsteps; ++i){
192 for(
int j=nsteps; j<dim; ++j){
193 result += (u[i]*v[j] + u[j]*v[i])*G[i][j];
195 result += u[i]*v[i]*G[i][i];
212 printfQuda(
"Warning: inverting on zero-field source\n");
246 double* d =
new double[2*s+1];
247 double* d_p1 =
new double[2*s+1];
248 double* d_p2 =
new double[2*s+1];
249 double* g =
new double[2*s+1];
250 double* g_p1 =
new double[2*s+1];
251 double* g_p2 =
new double[2*s+1];
252 double** G =
new double*[2*s+1];
253 for(
int i=0; i<(2*s+1); ++i){
254 G[i] =
new double[2*s+1];
275 double gamma_kprev[
s];
279 for(
int i=0; i<(2*s+1); ++i){
288 computeGramMatrix(G,V, mu);
294 const int prev_idx = j ? j-1 : s-1;
296 double& mu_prev = mu[prev_idx];
297 double& rho_prev = rho[prev_idx];
298 double& gamma_prev = gamma[prev_idx];
303 zero(d, 2*s+1); d[s+1] = 1.0;
305 zero(g, 2*s+1); g[
s] = 1.0;
310 d_p2[s-2] = (1 - rho_kprev[s-1])/(rho_kprev[s-1]*gamma_kprev[s-1]);
311 d_p2[s-1] = (1./gamma_kprev[s-1]);
312 d_p2[
s] = (-1./(gamma_kprev[s-1]*rho_kprev[s-1]));
315 if(k > 0) g_p2[s-1] = 1.0;
318 for(
int i=0; i<(2*s+1); ++i){
319 g[i] = rho[j-1]*g_p1[i] - rho[j-1]*gamma[j-1]*d_p1[i]
320 + (1 - rho[j-1])*g_p2[i];
324 computeCoeffs(d, d_p1, d_p2, k, j, s, gamma, rho, gamma_kprev, rho_kprev);
326 for(
int i=0; i<(2*s+1); ++i){
331 computeMuNu(r2, g, G, g, 2*s+1);
332 computeMuNu(rAr, g, G, d, 2*s+1);
336 rho[j] = (it==0) ? 1.0 : 1.0/(1.0 - (gamma[j]/gamma_prev)*(mu[j]/mu_prev)*(1.0/rho_prev));
352 for(
int i=0; i<(2*s+1); ++i) d_p2[i] = d_p1[i];
353 for(
int i=0; i<(2*s+1); ++i) g_p2[i] = g_p1[i];
355 for(
int i=0; i<(2*s+1); ++i) d_p1[i] = d[i];
356 for(
int i=0; i<(2*s+1); ++i) g_p1[i] = g[i];
369 for(
int i=0; i<
s; ++i){
370 rho_kprev[i] = rho[i];
371 gamma_kprev[i] = gamma[i];
377 mat(R[0], x_prev, temp);
391 for(
int i=0; i<(2*s+1); ++i){
void ax(double a, ColorSpinorField &x)
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
static void computeCoeffs(T d_out[], const T d_p1[], const T d_p2[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
static void applyT(T d_out[], const T d_in[], const T gamma[], const T rho[], int N)
MPCG(DiracMatrix &mat, SolverParam ¶m, TimeProfile &profile)
QudaResidualType residual_type
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
int nvec[QUDA_MAX_MG_LEVEL]
static void applyB(T d_out[], const T d_in[], int N)
void zero(ColorSpinorField &a)
void computeMatrixPowers(cudaColorSpinorField out[], cudaColorSpinorField &in, int nvec)
cpuColorSpinorField * out
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void print(const double d[], int n)
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void applyThirdTerm(T d_out[], const T d_in[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
__device__ __host__ void zero(vector_type< scalar, n > &v)