|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #ifndef _QUDA_BLAS_H 00002 #define _QUDA_BLAS_H 00003 00004 #include <quda_internal.h> 00005 #include <color_spinor_field.h> 00006 00007 00008 // keep these with C-linkage for the moment 00009 00010 #ifdef __cplusplus 00011 extern "C" { 00012 #endif 00013 00014 // ---------- blas_quda.cu ---------- 00015 00016 // creates and destroys reduction buffers 00017 void initBlas(void); 00018 void endBlas(void); 00019 00020 void setBlasTuning(int tuning); 00021 void setBlasParam(int kernel, int prec, int threads, int blocks); 00022 00023 extern unsigned long long blas_quda_flops; 00024 extern unsigned long long blas_quda_bytes; 00025 00026 #ifdef __cplusplus 00027 } 00028 #endif 00029 00030 // C++ linkage 00031 00032 // Generic variants 00033 00034 double norm2(const ColorSpinorField&); 00035 00036 // CUDA variants 00037 00038 void zeroCuda(cudaColorSpinorField &a); 00039 void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src); 00040 00041 double axpyNormCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y); 00042 double sumCuda(cudaColorSpinorField &b); 00043 double normCuda(const cudaColorSpinorField &b); 00044 double reDotProductCuda(cudaColorSpinorField &a, cudaColorSpinorField &b); 00045 double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b); 00046 00047 void axpbyCuda(const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y); 00048 void axpyCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y); 00049 void axCuda(const double &a, cudaColorSpinorField &x); 00050 void xpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y); 00051 void xpayCuda(const cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y); 00052 void mxpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y); 00053 00054 void axpyZpbxCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b); 00055 void axpyBzpcxCuda(const double &a, cudaColorSpinorField& x, cudaColorSpinorField& y, const double &b, cudaColorSpinorField& z, const double &c); 00056 00057 void caxpbyCuda(const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y); 00058 void caxpyCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y); 00059 void cxpaypbzCuda(cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z); 00060 void caxpbypzYmbwCuda(const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &); 00061 00062 Complex cDotProductCuda(cudaColorSpinorField &, cudaColorSpinorField &); 00063 Complex xpaycDotzyCuda(cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z); 00064 00065 double3 cDotProductNormACuda(cudaColorSpinorField &a, cudaColorSpinorField &b); 00066 double3 cDotProductNormBCuda(cudaColorSpinorField &a, cudaColorSpinorField &b); 00067 double3 caxpbypzYmbwcDotProductWYNormYCuda(const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, 00068 cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u); 00069 00070 // CPU variants 00071 00072 double normCpu(const cpuColorSpinorField &b); 00073 00074 00075 #endif // _QUDA_BLAS_H
1.7.3