QUDA: quda/lib/blas_cpu.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <color_spinor_field.h>
00002 #include <blas_quda.h>
00003 #include <face_quda.h>
00004 
00005 template <typename Float>
00006 void axpby(const Float &a, const Float *x, const Float &b, Float *y, const int N) {
00007   for (int i=0; i<N; i++) y[i] = a*x[i] + b*y[i];
00008 }
00009 
00010 void axpbyCpu(const double &a, const cpuColorSpinorField &x, 
00011               const double &b, cpuColorSpinorField &y) {
00012   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00013     axpby(a, (double*)x.V(), b, (double*)y.V(), x.Length());
00014   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00015     axpby((float)a, (float*)x.V(), (float)b, (float*)y.V(), x.Length());
00016   else
00017     errorQuda("Precision type %d not implemented", x.Precision());
00018 }
00019 
00020 void xpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) {
00021   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00022     axpby(1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
00023   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00024     axpby(1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
00025   else
00026     errorQuda("Precision type %d not implemented", x.Precision());
00027 }
00028 
00029 void axpyCpu(const double &a, const cpuColorSpinorField &x, 
00030              cpuColorSpinorField &y) {
00031   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00032     axpby(a, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
00033   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00034     axpby((float)a, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
00035   else
00036     errorQuda("Precision type %d not implemented", x.Precision());
00037 }
00038 
00039 void xpayCpu(const cpuColorSpinorField &x, const double &a, 
00040              cpuColorSpinorField &y) {
00041   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00042     axpby(1.0, (double*)x.V(), a, (double*)y.V(), x.Length());
00043   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00044     axpby(1.0f, (float*)x.V(), (float)a, (float*)y.V(), x.Length());
00045   else
00046     errorQuda("Precision type %d not implemented", x.Precision());
00047 }
00048 
00049 void mxpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) {
00050   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00051     axpby(-1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
00052   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00053     axpby(-1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
00054   else
00055     errorQuda("Precision type %d not implemented", x.Precision());
00056 }
00057 
00058 void axCpu(const double &a, cpuColorSpinorField &x) {
00059   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00060     axpby(0.0, (double*)x.V(), a, (double*)x.V(), x.Length());
00061   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00062     axpby(0.0f, (float*)x.V(), (float)a, (float*)x.V(), x.Length());
00063   else
00064     errorQuda("Precision type %d not implemented", x.Precision());
00065 }
00066 
00067 template <typename Float>
00068 void caxpby(const std::complex<Float> &a, const std::complex<Float> *x,
00069             const std::complex<Float> &b, std::complex<Float> *y, int N) {
00070 
00071   for (int i=0; i<N; i++) {
00072     y[i] = a*x[i] + b*y[i];
00073   }
00074 
00075 }
00076 
00077 void caxpyCpu(const quda::Complex &a, const cpuColorSpinorField &x,
00078               cpuColorSpinorField &y) {
00079 
00080   if ( x.Precision() == QUDA_DOUBLE_PRECISION)
00081     caxpby(a, (quda::Complex*)x.V(), quda::Complex(1.0), 
00082            (quda::Complex*)y.V(), x.Length()/2);
00083   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00084     caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), std::complex<float>(1.0), 
00085            (std::complex<float>*)y.V(), x.Length()/2);
00086   else 
00087     errorQuda("Precision type %d not implemented", x.Precision());
00088 }
00089 
00090 void caxpbyCpu(const quda::Complex &a, const cpuColorSpinorField &x,
00091                const quda::Complex &b, cpuColorSpinorField &y) {
00092 
00093   if ( x.Precision() == QUDA_DOUBLE_PRECISION)
00094     caxpby(a, (quda::Complex*)x.V(), b, (quda::Complex*)y.V(), x.Length()/2);
00095   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00096     caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), (std::complex<float>)b, 
00097           (std::complex<float>*)y.V(), x.Length()/2);
00098   else 
00099     errorQuda("Precision type %d not implemented", x.Precision());
00100 }
00101 
00102 template <typename Float>
00103 void caxpbypcz(const std::complex<Float> &a, const std::complex<Float> *x,
00104               const std::complex<Float> &b, const std::complex<Float> *y, 
00105               const std::complex<Float> &c, std::complex<Float> *z, int N) {
00106 
00107   for (int i=0; i<N; i++) {
00108     z[i] = a*x[i] + b*y[i] + c*z[i];
00109   }
00110 
00111 }
00112 
00113 void cxpaypbzCpu(const cpuColorSpinorField &x, const quda::Complex &a, 
00114                  const cpuColorSpinorField &y, const quda::Complex &b,
00115                  cpuColorSpinorField &z) {
00116 
00117   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00118     caxpbypcz(quda::Complex(1, 0), (quda::Complex*)x.V(), a, (quda::Complex*)y.V(), 
00119              b, (quda::Complex*)z.V(), x.Length()/2);
00120   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00121     caxpbypcz(std::complex<float>(1, 0), (std::complex<float>*)x.V(), (std::complex<float>)a, (std::complex<float>*)y.V(), 
00122              (std::complex<float>)b, (std::complex<float>*)z.V(), x.Length()/2);
00123   else 
00124     errorQuda("Precision type %d not implemented", x.Precision());
00125 }
00126 
00127 void axpyBzpcxCpu(const double &a, cpuColorSpinorField& x, cpuColorSpinorField& y, 
00128                   const double &b, const cpuColorSpinorField& z, const double &c) {
00129   axpyCpu(a, x, y);
00130   axpbyCpu(b, z, c, x);
00131 }
00132 
00133 // performs the operations: {y[i] = a*x[i] + y[i]; x[i] = z[i] + b*x[i]}
00134 void axpyZpbxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, 
00135                  const cpuColorSpinorField &z, const double &b) {
00136   axpyCpu(a, x, y);
00137   xpayCpu(z, b, x);
00138 }
00139 
00140 // performs the operation z[i] = a*x[i] + b*y[i] + z[i] and y[i] -= b*w[i]
00141 void caxpbypzYmbwCpu(const quda::Complex &a, const cpuColorSpinorField &x, const quda::Complex &b, 
00142                      cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w) {
00143 
00144   if (x.Precision() == QUDA_DOUBLE_PRECISION)
00145     caxpbypcz(a, (quda::Complex*)x.V(), b, (quda::Complex*)y.V(), 
00146               quda::Complex(1, 0), (quda::Complex*)z.V(), x.Length()/2);
00147   else if (x.Precision() == QUDA_SINGLE_PRECISION)
00148     caxpbypcz((std::complex<float>)a, (std::complex<float>*)x.V(), 
00149               (std::complex<float>)b, (std::complex<float>*)y.V(), 
00150               (std::complex<float>)(1.0f), (std::complex<float>*)z.V(), x.Length()/2);
00151   else 
00152     errorQuda("Precision type %d not implemented", x.Precision());
00153 
00154   caxpyCpu(-b, w, y);
00155 }
00156 
00157 template <typename Float>
00158 double norm(const Float *a, const int N) {
00159   double norm2 = 0;
00160   for (int i=0; i<N; i++) norm2 += a[i]*a[i];
00161   return norm2;
00162 }
00163 
00164 double normCpu(const cpuColorSpinorField &a) {
00165   double norm2 = 0.0;
00166   if (a.Precision() == QUDA_DOUBLE_PRECISION)
00167     norm2 = norm((double*)a.V(), a.Length());
00168   else if (a.Precision() == QUDA_SINGLE_PRECISION)
00169     norm2 = norm((float*)a.V(), a.Length());
00170   else
00171     errorQuda("Precision type %d not implemented", a.Precision());
00172   reduceDouble(norm2);
00173   return norm2;
00174 }
00175 
00176 double axpyNormCpu(const double &a, const cpuColorSpinorField &x, 
00177                    cpuColorSpinorField &y) {
00178   axpyCpu(a, x, y);
00179   return normCpu(y);
00180 }
00181 
00182 template <typename Float>
00183 double reDotProduct(const Float *a, const Float *b, const int N) {
00184   double dot = 0;
00185   for (int i=0; i<N; i++) dot += a[i]*b[i];
00186   return dot;
00187 }
00188 
00189 double reDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) {
00190   double dot = 0.0;
00191   if (a.Precision() == QUDA_DOUBLE_PRECISION)
00192     dot = reDotProduct((double*)a.V(), (double*)b.V(), a.Length());
00193   else if (a.Precision() == QUDA_SINGLE_PRECISION)
00194     dot = reDotProduct((float*)a.V(), (float*)b.V(), a.Length());
00195   else
00196     errorQuda("Precision type %d not implemented", a.Precision());
00197   reduceDouble(dot);
00198   return dot;
00199 }
00200 
00201 // First performs the operation y[i] = x[i] - y[i]
00202 // Second returns the norm of y
00203 double xmyNormCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y) {
00204   xpayCpu(x, -1, y);
00205   return normCpu(y);
00206 }
00207 
00208 template <typename Float>
00209 quda::Complex cDotProduct(const std::complex<Float> *a, const std::complex<Float> *b, const int N) {
00210   quda::Complex dot = 0;
00211   for (int i=0; i<N; i++) dot += conj(a[i])*b[i];
00212   return dot;
00213 }
00214 
00215 quda::Complex cDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) {
00216   quda::Complex dot = 0.0;
00217   if (a.Precision() == QUDA_DOUBLE_PRECISION)
00218     dot = cDotProduct((quda::Complex*)a.V(), (quda::Complex*)b.V(), a.Length()/2);
00219   else if (a.Precision() == QUDA_SINGLE_PRECISION)
00220     dot = cDotProduct((std::complex<float>*)a.V(), (std::complex<float>*)b.V(), a.Length()/2);
00221   else
00222     errorQuda("Precision type %d not implemented", a.Precision());
00223   reduceDoubleArray((double*)&dot, 2);
00224   return dot;
00225 }
00226 
00227 // First performs the operation y = x + a*y
00228 // Second returns complex dot product (z,y)
00229 quda::Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a, 
00230                       cpuColorSpinorField &y, const cpuColorSpinorField &z) {
00231   xpayCpu(x, a, y);
00232   return cDotProductCpu(z,y);
00233 }
00234 
00235 double3 cDotProductNormACpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) {
00236   quda::Complex dot = cDotProductCpu(a, b);
00237   double norm = normCpu(a);
00238   return make_double3(real(dot), imag(dot), norm);
00239 }
00240 
00241 double3 cDotProductNormBCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b) {
00242   quda::Complex dot = cDotProductCpu(a, b);
00243   double norm = normCpu(b);
00244   return make_double3(real(dot), imag(dot), norm);
00245 }
00246 
00247 // This convoluted kernel does the following: z += a*x + b*y, y -= b*w, norm = (y,y), dot = (u, y)
00248 double3 caxpbypzYmbwcDotProductUYNormYCpu(const quda::Complex &a, const cpuColorSpinorField &x, 
00249                                           const quda::Complex &b, cpuColorSpinorField &y, 
00250                                           cpuColorSpinorField &z, const cpuColorSpinorField &w, 
00251                                           const cpuColorSpinorField &u) {
00252 
00253   caxpbypzYmbwCpu(a, x, b, y, z, w);
00254   return cDotProductNormBCpu(u, y);
00255 }
00256 
00257 void cabxpyAxCpu(const double &a, const quda::Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) {
00258   axCpu(a, x);
00259   caxpyCpu(b, x, y);
00260 }
00261 
00262 double caxpyNormCpu(const quda::Complex &a, cpuColorSpinorField &x, 
00263                     cpuColorSpinorField &y) {
00264   caxpyCpu(a, x, y);
00265   return norm2(y);
00266 }
00267 
00268 double caxpyXmazNormXCpu(const quda::Complex &a, cpuColorSpinorField &x, 
00269                          cpuColorSpinorField &y, cpuColorSpinorField &z) {
00270   caxpyCpu(a, x, y);
00271   caxpyCpu(-a, z, x);
00272   return norm2(x);
00273 }
00274 
00275 void caxpyXmazCpu(const quda::Complex &a, cpuColorSpinorField &x, 
00276                     cpuColorSpinorField &y, cpuColorSpinorField &z) {
00277   caxpyCpu(a, x, y);
00278   caxpyCpu(-a, z, x);
00279 }
00280 
00281 double cabxpyAxNormCpu(const double &a, const quda::Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) {
00282   axCpu(a, x);
00283   caxpyCpu(b, x, y);
00284   return norm2(y);
00285 }
00286 
00287 void caxpbypzCpu(const quda::Complex &a, cpuColorSpinorField &x, const quda::Complex &b, cpuColorSpinorField &y, 
00288                  cpuColorSpinorField &z) {
00289   caxpyCpu(a, x, z);
00290   caxpyCpu(b, y, z);
00291 }
00292 
00293 void caxpbypczpwCpu(const quda::Complex &a, cpuColorSpinorField &x, const quda::Complex &b, cpuColorSpinorField &y, 
00294                     const quda::Complex &c, cpuColorSpinorField &z, cpuColorSpinorField &w) {
00295   caxpyCpu(a, x, w);
00296   caxpyCpu(b, y, w);
00297   caxpyCpu(c, z, w);
00298 
00299 }
00300 
00301 quda::Complex caxpyDotzyCpu(const quda::Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y,
00302                       cpuColorSpinorField &z) {
00303   caxpyCpu(a, x, y);
00304   return cDotProductCpu(z, y);
00305 }