18 static void applyT(T d_out[],
const T d_in[],
const T
gamma[],
const T rho[],
int N)
21 for(
int i=0;
i<N; ++
i){
25 for(
int i=0;
i<N-1; ++
i){
26 d_out[
i] += (d_in[
i+1]*(1-rho[
i+1])/(
gamma[
i+1]*rho[
i+1]));
29 for(
int i=1;
i<N; ++
i){
30 d_out[
i] -= d_in[
i-1]/(rho[
i-1]*
gamma[
i-1]);
37 static void applyB(T d_out[],
const T d_in[],
int N)
39 d_out[0] =
static_cast<T
>(0);
40 for(
int i=1;
i<N; ++
i) d_out[
i] = d_in[
i-1];
45 for(
int i=0;
i<
n; ++
i){
46 std::cout <<
d[
i] <<
" ";
48 std::cout << std::endl;
53 for(
int i=0; i<N; ++i) d[i] = static_cast<T>(0);
57 static void applyThirdTerm(T d_out[],
const T d_in[],
int k,
int j,
int s,
const T
gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
61 const int dim = 2*
s + 1;
64 if(k)
applyT(d_out, d_in, gamma_kprev, rho_kprev,
s);
70 if(k) d_out[
s] -= d_in[
s-1]/(rho_kprev[
s-1]*gamma_kprev[
s-1]);
79 static void computeCoeffs(T d_out[],
const T d_p1[],
const T d_p2[],
int k,
int j,
int s,
const T
gamma[],
const T rho[],
const T gamma_kprev[],
const T rho_kprev[])
84 for(
int i=0;
i<(2*
s+1); ++
i){
85 d_out[
i] += rho[j-1]*d_p1[
i] + (1 - rho[j-1])*d_p2[
i];
119 for(
int i=(nsteps+1);
i<=(2*nsteps); ++
i){
126 static void computeGramMatrix(
double** G, std::vector<cudaColorSpinorField>& v,
double*
mu){
128 const int dim = v.size();
129 const int nsteps = (
dim-1)/2;
132 std::vector<cudaColorSpinorField*> vp1; vp1.reserve((nsteps+1)*nsteps);
133 std::vector<cudaColorSpinorField*> vp2; vp2.reserve((nsteps+1)*nsteps);
134 double g[(nsteps+1)*nsteps];
136 for(
int i=0;
i<nsteps; ++
i){
137 for(
int j=nsteps; j<
dim; j++){
138 vp1.push_back(&v[
i]);
139 vp2.push_back(&v[j]);
144 for(
int i=0;
i<nsteps; ++
i){
145 for(
int j=nsteps; j<
dim; j++){
152 const int num =
dim-nsteps;
153 const int offset = nsteps;
154 double d[2*nsteps+1];
155 std::vector<cudaColorSpinorField*> vp1; vp1.reserve(2*nsteps+1);
156 std::vector<cudaColorSpinorField*> vp2; vp2.reserve(2*nsteps+1);
157 for(
int i=0;
i<=nsteps; ++
i){
158 vp1.push_back(&v[0+
offset]);
161 for(
int i=1;
i<=nsteps; ++
i){
163 vp2.push_back(&v[nsteps+
offset]);
169 for(
int i=0;
i<num; ++
i){
170 for(
int j=0; j<=
i; ++j){
174 for(
int i=0;
i<nsteps; ++
i){
180 static void computeMuNu(
double& result,
const double* u,
double** G,
const double* v,
int dim){
183 const int nsteps = (
dim-1)/2;
185 for(
int i=nsteps;
i<
dim; ++
i){
186 for(
int j=nsteps; j<
dim; ++j){
187 result += u[
i]*v[j]*G[
i][j];
191 for(
int i=0;
i<nsteps; ++
i){
192 for(
int j=nsteps; j<
dim; ++j){
193 result += (u[
i]*v[j] + u[j]*v[
i])*G[
i][j];
195 result += u[
i]*v[
i]*G[
i][
i];
212 printfQuda(
"Warning: inverting on zero-field source\n");
246 double*
d =
new double[2*
s+1];
247 double* d_p1 =
new double[2*
s+1];
248 double* d_p2 =
new double[2*
s+1];
249 double* g =
new double[2*
s+1];
250 double* g_p1 =
new double[2*
s+1];
251 double* g_p2 =
new double[2*
s+1];
252 double** G =
new double*[2*
s+1];
253 for(
int i=0;
i<(2*
s+1); ++
i){
254 G[
i] =
new double[2*
s+1];
275 double gamma_kprev[
s];
279 for(
int i=0;
i<(2*
s+1); ++
i){
288 computeGramMatrix(G,
V,
mu);
294 const int prev_idx = j ? j-1 :
s-1;
296 double& mu_prev =
mu[prev_idx];
297 double& rho_prev = rho[prev_idx];
298 double& gamma_prev =
gamma[prev_idx];
305 zero(g, 2*
s+1); g[
s] = 1.0;
310 d_p2[
s-2] = (1 - rho_kprev[
s-1])/(rho_kprev[
s-1]*gamma_kprev[
s-1]);
311 d_p2[
s-1] = (1./gamma_kprev[
s-1]);
312 d_p2[
s] = (-1./(gamma_kprev[
s-1]*rho_kprev[
s-1]));
315 if(k > 0) g_p2[
s-1] = 1.0;
318 for(
int i=0;
i<(2*
s+1); ++
i){
319 g[
i] = rho[j-1]*g_p1[
i] - rho[j-1]*
gamma[j-1]*d_p1[
i]
320 + (1 - rho[j-1])*g_p2[
i];
326 for(
int i=0;
i<(2*
s+1); ++
i){
331 computeMuNu(r2, g, G, g, 2*
s+1);
332 computeMuNu(rAr, g, G,
d, 2*
s+1);
336 rho[j] = (
it==0) ? 1.0 : 1.0/(1.0 - (
gamma[j]/gamma_prev)*(
mu[j]/mu_prev)*(1.0/rho_prev));
352 for(
int i=0;
i<(2*
s+1); ++
i) d_p2[
i] = d_p1[
i];
353 for(
int i=0;
i<(2*
s+1); ++
i) g_p2[
i] = g_p1[
i];
355 for(
int i=0;
i<(2*
s+1); ++
i) d_p1[
i] =
d[
i];
356 for(
int i=0;
i<(2*
s+1); ++
i) g_p1[
i] = g[
i];
369 for(
int i=0;
i<
s; ++
i){
370 rho_kprev[
i] = rho[
i];
377 mat(
R[0], x_prev, temp);
391 for(
int i=0;
i<(2*
s+1); ++
i){
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void ax(const double &a, ColorSpinorField &x)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
static void computeCoeffs(T d_out[], const T d_p1[], const T d_p2[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
static void applyT(T d_out[], const T d_in[], const T gamma[], const T rho[], int N)
MPCG(DiracMatrix &mat, SolverParam ¶m, TimeProfile &profile)
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
QudaResidualType residual_type
static void applyB(T d_out[], const T d_in[], int N)
double gamma(double) __attribute__((availability(macosx
void zero(ColorSpinorField &a)
void computeMatrixPowers(cudaColorSpinorField out[], cudaColorSpinorField &in, int nvec)
void axpy(const double &a, ColorSpinorField &x, ColorSpinorField &y)
cpuColorSpinorField * out
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void print(const double d[], int n)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void applyThirdTerm(T d_out[], const T d_in[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
static __inline__ size_t size_t d
__device__ __host__ void zero(vector_type< scalar, n > &v)