18 static void applyT(T d_out[],
const T d_in[],
const T gamma[],
const T rho[],
int N)
21 for(
int i=0; i<N; ++i){
22 d_out[i] = d_in[i]/gamma[i];
25 for(
int i=0; i<N-1; ++i){
26 d_out[i] += (d_in[i+1]*(1-rho[i+1])/(gamma[i+1]*rho[i+1]));
29 for(
int i=1; i<N; ++i){
30 d_out[i] -= d_in[i-1]/(rho[i-1]*gamma[i-1]);
37 static void applyB(T d_out[],
const T d_in[],
int N)
39 d_out[0] =
static_cast<T
>(0);
40 for(
int i=1; i<N; ++i) d_out[i] = d_in[i-1];
44 void print(
const double d[],
int n){
45 for(
int i=0; i<n; ++i){
46 std::cout << d[i] <<
" ";
48 std::cout << std::endl;
52 static void zero(T d[],
int N){
53 for(
int i=0; i<N; ++i) d[i] = static_cast<T>(0);
57 static void applyThirdTerm(T d_out[],
const T d_in[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
61 const int dim = 2*s + 1;
64 if(k) applyT(d_out, d_in, gamma_kprev, rho_kprev, s);
67 applyB(d_out+s, d_in+s, s+1);
70 if(k) d_out[s] -= d_in[s-1]/(rho_kprev[s-1]*gamma_kprev[s-1]);
73 for(
int i=0; i<
dim; ++i) d_out[i] *= -rho[j]*gamma[j];
79 static void computeCoeffs(T d_out[],
const T d_p1[],
const T d_p2[],
int k,
int j,
int s,
const T gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
81 applyThirdTerm(d_out, d_p1, k, j-1, s, gamma, rho, gamma_kprev, rho_kprev);
84 for(
int i=0; i<(2*s+1); ++i){
85 d_out[i] += rho[j-1]*d_p1[i] + (1 - rho[j-1])*d_p2[i];
103 for(
int i=1; i<
nvec; ++i){
104 mat(out[i], out[i-1], temp);
109 void MPCG::computeMatrixPowers(std::vector<cudaColorSpinorField>& out, std::vector<cudaColorSpinorField>& in,
int nsteps)
111 cudaColorSpinorField temp(in[0]);
113 for(
int i=0; i<=nsteps; ++i) out[i] = in[i];
115 for(
int i=(nsteps+1); i<=(2*nsteps); ++i){
116 mat(out[i], out[i-1], temp);
122 static void computeGramMatrix(
double** G, std::vector<cudaColorSpinorField>& v,
double*
mu){
124 const int dim = v.size();
125 const int nsteps = (
dim-1)/2;
128 std::vector<cudaColorSpinorField*> vp1; vp1.reserve((nsteps+1)*nsteps);
129 std::vector<cudaColorSpinorField*> vp2; vp2.reserve((nsteps+1)*nsteps);
130 double g[(nsteps+1)*nsteps];
132 for(
int i=0; i<nsteps; ++i){
133 for(
int j=nsteps; j<
dim; j++){
134 vp1.push_back(&v[i]);
135 vp2.push_back(&v[j]);
140 for(
int i=0; i<nsteps; ++i){
141 for(
int j=nsteps; j<
dim; j++){
148 const int num =
dim-nsteps;
149 const int offset = nsteps;
150 double d[2*nsteps+1];
151 std::vector<cudaColorSpinorField*> vp1; vp1.reserve(2*nsteps+1);
152 std::vector<cudaColorSpinorField*> vp2; vp2.reserve(2*nsteps+1);
153 for(
int i=0; i<=nsteps; ++i){
154 vp1.push_back(&v[0+offset]);
155 vp2.push_back(&v[i+offset]);
157 for(
int i=1; i<=nsteps; ++i){
158 vp1.push_back(&v[i+offset]);
159 vp2.push_back(&v[nsteps+offset]);
165 for(
int i=0; i<num; ++i){
166 for(
int j=0; j<=i; ++j){
167 G[j+offset][i+offset] = G[i+offset][j+offset] = d[i+j];
170 for(
int i=0; i<nsteps; ++i){
176 static void computeMuNu(
double& result,
const double* u,
double** G,
const double* v,
int dim){
179 const int nsteps = (
dim-1)/2;
181 for(
int i=nsteps; i<
dim; ++i){
182 for(
int j=nsteps; j<
dim; ++j){
183 result += u[i]*v[j]*G[i][j];
187 for(
int i=0; i<nsteps; ++i){
188 for(
int j=nsteps; j<
dim; ++j){
189 result += (u[i]*v[j] + u[j]*v[i])*G[i][j];
191 result += u[i]*v[i]*G[i][i];
208 printfQuda(
"Warning: inverting on zero-field source\n");
242 double* d =
new double[2*s+1];
243 double* d_p1 =
new double[2*s+1];
244 double* d_p2 =
new double[2*s+1];
245 double* g =
new double[2*s+1];
246 double* g_p1 =
new double[2*s+1];
247 double* g_p2 =
new double[2*s+1];
248 double** G =
new double*[2*s+1];
249 for(
int i=0; i<(2*s+1); ++i){
250 G[i] =
new double[2*s+1];
271 double gamma_kprev[s];
275 for(
int i=0; i<(2*s+1); ++i){
283 computeMatrixPowers(
V, R, s);
284 computeGramMatrix(G,
V,
mu);
290 const int prev_idx = j ? j-1 : s-1;
292 double& mu_prev =
mu[prev_idx];
293 double& rho_prev = rho[prev_idx];
294 double& gamma_prev = gamma[prev_idx];
299 zero(d, 2*s+1); d[s+1] = 1.0;
301 zero(g, 2*s+1); g[s] = 1.0;
306 d_p2[s-2] = (1 - rho_kprev[s-1])/(rho_kprev[s-1]*gamma_kprev[s-1]);
307 d_p2[s-1] = (1./gamma_kprev[s-1]);
308 d_p2[s] = (-1./(gamma_kprev[s-1]*rho_kprev[s-1]));
311 if(k > 0) g_p2[s-1] = 1.0;
314 for(
int i=0; i<(2*s+1); ++i){
315 g[i] = rho[j-1]*g_p1[i] - rho[j-1]*gamma[j-1]*d_p1[i]
316 + (1 - rho[j-1])*g_p2[i];
320 computeCoeffs(d, d_p1, d_p2, k, j, s, gamma, rho, gamma_kprev, rho_kprev);
322 for(
int i=0; i<(2*s+1); ++i){
327 computeMuNu(r2, g, G, g, 2*s+1);
328 computeMuNu(rAr, g, G, d, 2*s+1);
332 rho[j] = (it==0) ? 1.0 : 1.0/(1.0 - (gamma[j]/gamma_prev)*(
mu[j]/mu_prev)*(1.0/rho_prev));
348 for(
int i=0; i<(2*s+1); ++i) d_p2[i] = d_p1[i];
349 for(
int i=0; i<(2*s+1); ++i) g_p2[i] = g_p1[i];
351 for(
int i=0; i<(2*s+1); ++i) d_p1[i] = d[i];
352 for(
int i=0; i<(2*s+1); ++i) g_p1[i] = g[i];
365 for(
int i=0; i<s; ++i){
366 rho_kprev[i] = rho[i];
367 gamma_kprev[i] = gamma[i];
373 mat(R[0], x_prev, temp);
387 for(
int i=0; i<(2*s+1); ++i){
void operator()(ColorSpinorField &out, ColorSpinorField &in)
MPCG(const DiracMatrix &mat, SolverParam ¶m, TimeProfile &profile)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
quda::mgarray< int > nvec
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
void stop()
Stop profiling.
__device__ __host__ void zero(double &a)
__host__ __device__ ValueType sqrt(ValueType x)
void print(const double d[], int n)
QudaResidualType residual_type