QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <blas_reference.h> 00002 #include <stdio.h> 00003 #ifdef MPI_COMMS 00004 #include <comm_quda.h> 00005 #elif QMP_COMMS 00006 #include <qmp.h> 00007 #endif 00008 00009 template <typename Float> 00010 inline void aXpY(Float a, Float *x, Float *y, int len) 00011 { 00012 for(int i=0; i < len; i++){ y[i] += a*x[i]; } 00013 } 00014 00015 void axpy(double a, void *x, void *y, int len, QudaPrecision precision) { 00016 if( precision == QUDA_DOUBLE_PRECISION ) aXpY(a, (double *)x, (double *)y, len); 00017 else aXpY((float)a, (float *)x, (float *)y, len); 00018 } 00019 00020 // performs the operation x[i] *= a 00021 template <typename Float> 00022 inline void aX(Float a, Float *x, int len) { 00023 for (int i=0; i<len; i++) x[i] *= a; 00024 } 00025 00026 void ax(double a, void *x, int len, QudaPrecision precision) { 00027 if (precision == QUDA_DOUBLE_PRECISION) aX(a, (double*)x, len); 00028 else aX((float)a, (float*)x, len); 00029 } 00030 00031 // performs the operation y[i] -= x[i] (minus x plus y) 00032 template <typename Float> 00033 inline void mXpY(Float *x, Float *y, int len) { 00034 for (int i=0; i<len; i++) y[i] -= x[i]; 00035 } 00036 00037 void mxpy(void* x, void* y, int len, QudaPrecision precision) { 00038 if (precision == QUDA_DOUBLE_PRECISION) mXpY((double*)x, (double*)y, len); 00039 else mXpY((float*)x, (float*)y, len); 00040 } 00041 00042 00043 // returns the square of the L2 norm of the vector 00044 template <typename Float> 00045 inline double norm2(Float *v, int len) { 00046 double sum=0.0; 00047 for (int i=0; i<len; i++) sum += v[i]*v[i]; 00048 #ifdef MPI_COMMS 00049 comm_allreduce(&sum); 00050 #elif QMP_COMMS 00051 QMP_sum_double(&sum); 00052 #endif 00053 return sum; 00054 } 00055 00056 double norm_2(void *v, int len, QudaPrecision precision) { 00057 if (precision == QUDA_DOUBLE_PRECISION) return norm2((double*)v, len); 00058 else return norm2((float*)v, len); 00059 } 00060 00061 00062 /* 00063 00064 00065 // sets all elements of the destination vector to zero 00066 void zero(float* a, int cnt) { 00067 for (int i = 0; i < cnt; i++) 00068 a[i] = 0; 00069 } 00070 00071 // copy one spinor to the other 00072 void copy(float* a, float *b, int len) { 00073 for (int i = 0; i < len; i++) a[i] = b[i]; 00074 } 00075 00076 // performs the operation y[i] = a*x[i] + b*y[i] 00077 void axpby(float a, float *x, float b, float *y, int len) { 00078 for (int i=0; i<len; i++) y[i] = a*x[i] + b*y[i]; 00079 } 00080 00081 // performs the operation y[i] = a*x[i] + y[i] 00082 void axpy(float a, float *x, float *y, int len) { 00083 for (int i=0; i<len; i++) y[i] += a*x[i]; 00084 } 00085 00086 00087 // returns the real part of the dot product of 2 complex valued vectors 00088 float reDotProduct(float *v1, float *v2, int len) { 00089 00090 float dot=0.0; 00091 for (int i=0; i<len; i++) { 00092 dot += v1[i]*v2[i]; 00093 } 00094 00095 return dot; 00096 } 00097 00098 // returns the imaginary part of the dot product of 2 complex valued vectors 00099 float imDotProduct(float *v1, float *v2, int len) { 00100 00101 float dot=0.0; 00102 for (int i=0; i<len; i+=2) { 00103 dot += v1[i]*v2[i+1] - v1[i+1]*v2[i]; 00104 } 00105 00106 return dot; 00107 } 00108 00109 // returns the square of the L2 norm of the vector 00110 double normD(float *v, int len) { 00111 00112 double sum=0.0; 00113 for (int i=0; i<len; i++) { 00114 sum += v[i]*v[i]; 00115 } 00116 00117 return sum; 00118 } 00119 00120 // returns the real part of the dot product of 2 complex valued vectors 00121 double reDotProductD(float *v1, float *v2, int len) { 00122 00123 double dot=0.0; 00124 for (int i=0; i<len; i++) { 00125 dot += v1[i]*v2[i]; 00126 } 00127 00128 return dot; 00129 } 00130 00131 // returns the imaginary part of the dot product of 2 complex valued vectors 00132 double imDotProductD(float *v1, float *v2, int len) { 00133 00134 double dot=0.0; 00135 for (int i=0; i<len; i+=2) { 00136 dot += v1[i]*v2[i+1] - v1[i+1]*v2[i]; 00137 } 00138 00139 return dot; 00140 } 00141 */