QUDA: quda/tests/blas_reference.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <blas_reference.h>
00002 #include <stdio.h>
00003 #ifdef MPI_COMMS
00004 #include <comm_quda.h>
00005 #elif QMP_COMMS
00006 #include <qmp.h>
00007 #endif
00008 
00009 template <typename Float>
00010 inline void aXpY(Float a, Float *x, Float *y, int len)
00011 {
00012   for(int i=0; i < len; i++){ y[i] += a*x[i]; }
00013 }
00014 
00015 void axpy(double a, void *x, void *y, int len, QudaPrecision precision) { 
00016   if( precision == QUDA_DOUBLE_PRECISION ) aXpY(a, (double *)x, (double *)y, len);
00017   else aXpY((float)a, (float *)x, (float *)y, len);
00018 }
00019 
00020 // performs the operation x[i] *= a
00021 template <typename Float>
00022 inline void aX(Float a, Float *x, int len) {
00023   for (int i=0; i<len; i++) x[i] *= a;
00024 }
00025 
00026 void ax(double a, void *x, int len, QudaPrecision precision) {
00027   if (precision == QUDA_DOUBLE_PRECISION) aX(a, (double*)x, len);
00028   else aX((float)a, (float*)x, len);
00029 }
00030 
00031 // performs the operation y[i] -= x[i] (minus x plus y)
00032 template <typename Float>
00033 inline void mXpY(Float *x, Float *y, int len) {
00034   for (int i=0; i<len; i++) y[i] -= x[i];
00035 }
00036 
00037 void mxpy(void* x, void* y, int len, QudaPrecision precision) {
00038   if (precision == QUDA_DOUBLE_PRECISION) mXpY((double*)x, (double*)y, len);
00039   else mXpY((float*)x, (float*)y, len);
00040 }
00041 
00042 
00043 // returns the square of the L2 norm of the vector
00044 template <typename Float>
00045 inline double norm2(Float *v, int len) {
00046   double sum=0.0;
00047   for (int i=0; i<len; i++) sum += v[i]*v[i];
00048 #ifdef MPI_COMMS
00049   comm_allreduce(&sum);
00050 #elif QMP_COMMS
00051   QMP_sum_double(&sum);
00052 #endif
00053   return sum;
00054 }
00055 
00056 double norm_2(void *v, int len, QudaPrecision precision) {
00057   if (precision == QUDA_DOUBLE_PRECISION) return norm2((double*)v, len);
00058   else return norm2((float*)v, len);
00059 }
00060 
00061 
00062 /*
00063 
00064 
00065 // sets all elements of the destination vector to zero
00066 void zero(float* a, int cnt) {
00067     for (int i = 0; i < cnt; i++)
00068         a[i] = 0;
00069 }
00070 
00071 // copy one spinor to the other
00072 void copy(float* a, float *b, int len) {
00073   for (int i = 0; i < len; i++) a[i] = b[i];
00074 }
00075 
00076 // performs the operation y[i] = a*x[i] + b*y[i]
00077 void axpby(float a, float *x, float b, float *y, int len) {
00078     for (int i=0; i<len; i++) y[i] = a*x[i] + b*y[i];
00079 }
00080 
00081 // performs the operation y[i] = a*x[i] + y[i]
00082 void axpy(float a, float *x, float *y, int len) {
00083     for (int i=0; i<len; i++) y[i] += a*x[i];
00084 }
00085 
00086 
00087 // returns the real part of the dot product of 2 complex valued vectors
00088 float reDotProduct(float *v1, float *v2, int len) {
00089 
00090   float dot=0.0;
00091   for (int i=0; i<len; i++) {
00092     dot += v1[i]*v2[i];
00093   }
00094 
00095   return dot;
00096 }
00097 
00098 // returns the imaginary part of the dot product of 2 complex valued vectors
00099 float imDotProduct(float *v1, float *v2, int len) {
00100 
00101   float dot=0.0;
00102   for (int i=0; i<len; i+=2) {
00103     dot += v1[i]*v2[i+1] - v1[i+1]*v2[i];
00104   }
00105 
00106   return dot;
00107 }
00108 
00109 // returns the square of the L2 norm of the vector
00110 double normD(float *v, int len) {
00111 
00112   double sum=0.0;
00113   for (int i=0; i<len; i++) {
00114     sum += v[i]*v[i];
00115   }
00116 
00117   return sum;
00118 }
00119 
00120 // returns the real part of the dot product of 2 complex valued vectors
00121 double reDotProductD(float *v1, float *v2, int len) {
00122 
00123   double dot=0.0;
00124   for (int i=0; i<len; i++) {
00125     dot += v1[i]*v2[i];
00126   }
00127 
00128   return dot;
00129 }
00130 
00131 // returns the imaginary part of the dot product of 2 complex valued vectors
00132 double imDotProductD(float *v1, float *v2, int len) {
00133 
00134   double dot=0.0;
00135   for (int i=0; i<len; i+=2) {
00136     dot += v1[i]*v2[i+1] - v1[i+1]*v2[i];
00137   }
00138 
00139   return dot;
00140 }
00141 */