QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <color_spinor_field.h> 00002 #include <blas_quda.h> 00003 #include <face_quda.h> 00004 00005 template <typename Float> 00006 void axpby(const Float &a, const Float *x, const Float &b, Float *y, const int N) { 00007 for (int i=0; i<N; i++) y[i] = a*x[i] + b*y[i]; 00008 } 00009 00010 void axpbyCpu(const double &a, const cpuColorSpinorField &x, 00011 const double &b, cpuColorSpinorField &y) { 00012 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00013 axpby(a, (double*)x.V(), b, (double*)y.V(), x.Length()); 00014 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00015 axpby((float)a, (float*)x.V(), (float)b, (float*)y.V(), x.Length()); 00016 else 00017 errorQuda("Precision type %d not implemented", x.Precision()); 00018 } 00019 00020 void xpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) { 00021 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00022 axpby(1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length()); 00023 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00024 axpby(1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length()); 00025 else 00026 errorQuda("Precision type %d not implemented", x.Precision()); 00027 } 00028 00029 void axpyCpu(const double &a, const cpuColorSpinorField &x, 00030 cpuColorSpinorField &y) { 00031 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00032 axpby(a, (double*)x.V(), 1.0, (double*)y.V(), x.Length()); 00033 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00034 axpby((float)a, (float*)x.V(), 1.0f, (float*)y.V(), x.Length()); 00035 else 00036 errorQuda("Precision type %d not implemented", x.Precision()); 00037 } 00038 00039 void xpayCpu(const cpuColorSpinorField &x, const double &a, 00040 cpuColorSpinorField &y) { 00041 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00042 axpby(1.0, (double*)x.V(), a, (double*)y.V(), x.Length()); 00043 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00044 axpby(1.0f, (float*)x.V(), (float)a, (float*)y.V(), x.Length()); 00045 else 00046 errorQuda("Precision type %d not implemented", x.Precision()); 00047 } 00048 00049 void mxpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) { 00050 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00051 axpby(-1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length()); 00052 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00053 axpby(-1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length()); 00054 else 00055 errorQuda("Precision type %d not implemented", x.Precision()); 00056 } 00057 00058 void axCpu(const double &a, cpuColorSpinorField &x) { 00059 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00060 axpby(0.0, (double*)x.V(), a, (double*)x.V(), x.Length()); 00061 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00062 axpby(0.0f, (float*)x.V(), (float)a, (float*)x.V(), x.Length()); 00063 else 00064 errorQuda("Precision type %d not implemented", x.Precision()); 00065 } 00066 00067 template <typename Float> 00068 void caxpby(const std::complex<Float> &a, const std::complex<Float> *x, 00069 const std::complex<Float> &b, std::complex<Float> *y, int N) { 00070 00071 for (int i=0; i<N; i++) { 00072 y[i] = a*x[i] + b*y[i]; 00073 } 00074 00075 } 00076 00077 void caxpyCpu(const quda::Complex &a, const cpuColorSpinorField &x, 00078 cpuColorSpinorField &y) { 00079 00080 if ( x.Precision() == QUDA_DOUBLE_PRECISION) 00081 caxpby(a, (quda::Complex*)x.V(), quda::Complex(1.0), 00082 (quda::Complex*)y.V(), x.Length()/2); 00083 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00084 caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), std::complex<float>(1.0), 00085 (std::complex<float>*)y.V(), x.Length()/2); 00086 else 00087 errorQuda("Precision type %d not implemented", x.Precision()); 00088 } 00089 00090 void caxpbyCpu(const quda::Complex &a, const cpuColorSpinorField &x, 00091 const quda::Complex &b, cpuColorSpinorField &y) { 00092 00093 if ( x.Precision() == QUDA_DOUBLE_PRECISION) 00094 caxpby(a, (quda::Complex*)x.V(), b, (quda::Complex*)y.V(), x.Length()/2); 00095 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00096 caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), (std::complex<float>)b, 00097 (std::complex<float>*)y.V(), x.Length()/2); 00098 else 00099 errorQuda("Precision type %d not implemented", x.Precision()); 00100 } 00101 00102 template <typename Float> 00103 void caxpbypcz(const std::complex<Float> &a, const std::complex<Float> *x, 00104 const std::complex<Float> &b, const std::complex<Float> *y, 00105 const std::complex<Float> &c, std::complex<Float> *z, int N) { 00106 00107 for (int i=0; i<N; i++) { 00108 z[i] = a*x[i] + b*y[i] + c*z[i]; 00109 } 00110 00111 } 00112 00113 void cxpaypbzCpu(const cpuColorSpinorField &x, const quda::Complex &a, 00114 const cpuColorSpinorField &y, const quda::Complex &b, 00115 cpuColorSpinorField &z) { 00116 00117 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00118 caxpbypcz(quda::Complex(1, 0), (quda::Complex*)x.V(), a, (quda::Complex*)y.V(), 00119 b, (quda::Complex*)z.V(), x.Length()/2); 00120 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00121 caxpbypcz(std::complex<float>(1, 0), (std::complex<float>*)x.V(), (std::complex<float>)a, (std::complex<float>*)y.V(), 00122 (std::complex<float>)b, (std::complex<float>*)z.V(), x.Length()/2); 00123 else 00124 errorQuda("Precision type %d not implemented", x.Precision()); 00125 } 00126 00127 void axpyBzpcxCpu(const double &a, cpuColorSpinorField& x, cpuColorSpinorField& y, 00128 const double &b, const cpuColorSpinorField& z, const double &c) { 00129 axpyCpu(a, x, y); 00130 axpbyCpu(b, z, c, x); 00131 } 00132 00133 // performs the operations: {y[i] = a*x[i] + y[i]; x[i] = z[i] + b*x[i]} 00134 void axpyZpbxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, 00135 const cpuColorSpinorField &z, const double &b) { 00136 axpyCpu(a, x, y); 00137 xpayCpu(z, b, x); 00138 } 00139 00140 // performs the operation z[i] = a*x[i] + b*y[i] + z[i] and y[i] -= b*w[i] 00141 void caxpbypzYmbwCpu(const quda::Complex &a, const cpuColorSpinorField &x, const quda::Complex &b, 00142 cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w) { 00143 00144 if (x.Precision() == QUDA_DOUBLE_PRECISION) 00145 caxpbypcz(a, (quda::Complex*)x.V(), b, (quda::Complex*)y.V(), 00146 quda::Complex(1, 0), (quda::Complex*)z.V(), x.Length()/2); 00147 else if (x.Precision() == QUDA_SINGLE_PRECISION) 00148 caxpbypcz((std::complex<float>)a, (std::complex<float>*)x.V(), 00149 (std::complex<float>)b, (std::complex<float>*)y.V(), 00150 (std::complex<float>)(1.0f), (std::complex<float>*)z.V(), x.Length()/2); 00151 else 00152 errorQuda("Precision type %d not implemented", x.Precision()); 00153 00154 caxpyCpu(-b, w, y); 00155 } 00156 00157 template <typename Float> 00158 double norm(const Float *a, const int N) { 00159 double norm2 = 0; 00160 for (int i=0; i<N; i++) norm2 += a[i]*a[i]; 00161 return norm2; 00162 } 00163 00164 double normCpu(const cpuColorSpinorField &a) { 00165 double norm2 = 0.0; 00166 if (a.Precision() == QUDA_DOUBLE_PRECISION) 00167 norm2 = norm((double*)a.V(), a.Length()); 00168 else if (a.Precision() == QUDA_SINGLE_PRECISION) 00169 norm2 = norm((float*)a.V(), a.Length()); 00170 else 00171 errorQuda("Precision type %d not implemented", a.Precision()); 00172 reduceDouble(norm2); 00173 return norm2; 00174 } 00175 00176 double axpyNormCpu(const double &a, const cpuColorSpinorField &x, 00177 cpuColorSpinorField &y) { 00178 axpyCpu(a, x, y); 00179 return normCpu(y); 00180 } 00181 00182 template <typename Float> 00183 double reDotProduct(const Float *a, const Float *b, const int N) { 00184 double dot = 0; 00185 for (int i=0; i<N; i++) dot += a[i]*b[i]; 00186 return dot; 00187 } 00188 00189 double reDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) { 00190 double dot = 0.0; 00191 if (a.Precision() == QUDA_DOUBLE_PRECISION) 00192 dot = reDotProduct((double*)a.V(), (double*)b.V(), a.Length()); 00193 else if (a.Precision() == QUDA_SINGLE_PRECISION) 00194 dot = reDotProduct((float*)a.V(), (float*)b.V(), a.Length()); 00195 else 00196 errorQuda("Precision type %d not implemented", a.Precision()); 00197 reduceDouble(dot); 00198 return dot; 00199 } 00200 00201 // First performs the operation y[i] = x[i] - y[i] 00202 // Second returns the norm of y 00203 double xmyNormCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) { 00204 xpayCpu(x, -1, y); 00205 return normCpu(y); 00206 } 00207 00208 template <typename Float> 00209 quda::Complex cDotProduct(const std::complex<Float> *a, const std::complex<Float> *b, const int N) { 00210 quda::Complex dot = 0; 00211 for (int i=0; i<N; i++) dot += conj(a[i])*b[i]; 00212 return dot; 00213 } 00214 00215 quda::Complex cDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) { 00216 quda::Complex dot = 0.0; 00217 if (a.Precision() == QUDA_DOUBLE_PRECISION) 00218 dot = cDotProduct((quda::Complex*)a.V(), (quda::Complex*)b.V(), a.Length()/2); 00219 else if (a.Precision() == QUDA_SINGLE_PRECISION) 00220 dot = cDotProduct((std::complex<float>*)a.V(), (std::complex<float>*)b.V(), a.Length()/2); 00221 else 00222 errorQuda("Precision type %d not implemented", a.Precision()); 00223 reduceDoubleArray((double*)&dot, 2); 00224 return dot; 00225 } 00226 00227 // First performs the operation y = x + a*y 00228 // Second returns complex dot product (z,y) 00229 quda::Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a, 00230 cpuColorSpinorField &y, const cpuColorSpinorField &z) { 00231 xpayCpu(x, a, y); 00232 return cDotProductCpu(z,y); 00233 } 00234 00235 double3 cDotProductNormACpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) { 00236 quda::Complex dot = cDotProductCpu(a, b); 00237 double norm = normCpu(a); 00238 return make_double3(real(dot), imag(dot), norm); 00239 } 00240 00241 double3 cDotProductNormBCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) { 00242 quda::Complex dot = cDotProductCpu(a, b); 00243 double norm = normCpu(b); 00244 return make_double3(real(dot), imag(dot), norm); 00245 } 00246 00247 // This convoluted kernel does the following: z += a*x + b*y, y -= b*w, norm = (y,y), dot = (u, y) 00248 double3 caxpbypzYmbwcDotProductUYNormYCpu(const quda::Complex &a, const cpuColorSpinorField &x, 00249 const quda::Complex &b, cpuColorSpinorField &y, 00250 cpuColorSpinorField &z, const cpuColorSpinorField &w, 00251 const cpuColorSpinorField &u) { 00252 00253 caxpbypzYmbwCpu(a, x, b, y, z, w); 00254 return cDotProductNormBCpu(u, y); 00255 } 00256 00257 void cabxpyAxCpu(const double &a, const quda::Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) { 00258 axCpu(a, x); 00259 caxpyCpu(b, x, y); 00260 } 00261 00262 double caxpyNormCpu(const quda::Complex &a, cpuColorSpinorField &x, 00263 cpuColorSpinorField &y) { 00264 caxpyCpu(a, x, y); 00265 return norm2(y); 00266 } 00267 00268 double caxpyXmazNormXCpu(const quda::Complex &a, cpuColorSpinorField &x, 00269 cpuColorSpinorField &y, cpuColorSpinorField &z) { 00270 caxpyCpu(a, x, y); 00271 caxpyCpu(-a, z, x); 00272 return norm2(x); 00273 } 00274 00275 void caxpyXmazCpu(const quda::Complex &a, cpuColorSpinorField &x, 00276 cpuColorSpinorField &y, cpuColorSpinorField &z) { 00277 caxpyCpu(a, x, y); 00278 caxpyCpu(-a, z, x); 00279 } 00280 00281 double cabxpyAxNormCpu(const double &a, const quda::Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) { 00282 axCpu(a, x); 00283 caxpyCpu(b, x, y); 00284 return norm2(y); 00285 } 00286 00287 void caxpbypzCpu(const quda::Complex &a, cpuColorSpinorField &x, const quda::Complex &b, cpuColorSpinorField &y, 00288 cpuColorSpinorField &z) { 00289 caxpyCpu(a, x, z); 00290 caxpyCpu(b, y, z); 00291 } 00292 00293 void caxpbypczpwCpu(const quda::Complex &a, cpuColorSpinorField &x, const quda::Complex &b, cpuColorSpinorField &y, 00294 const quda::Complex &c, cpuColorSpinorField &z, cpuColorSpinorField &w) { 00295 caxpyCpu(a, x, w); 00296 caxpyCpu(b, y, w); 00297 caxpyCpu(c, z, w); 00298 00299 } 00300 00301 quda::Complex caxpyDotzyCpu(const quda::Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, 00302 cpuColorSpinorField &z) { 00303 caxpyCpu(a, x, y); 00304 return cDotProductCpu(z, y); 00305 }