QUDA: quda/lib/quda_matrix.h Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #ifndef _QUDA_MATRIX_H_
00002 #define _QUDA_MATRIX_H_
00003 
00004 #include <cstdio>
00005 #include <cstdlib>
00006 #include <iostream>
00007 #include <iomanip>
00008 #include <cuda.h>
00009 
00010 namespace hisq{
00011 
00012   // Given a real type T, returns the corresponding complex type
00013   template<class T>
00014     struct ComplexTypeId;
00015 
00016   template<>
00017     struct ComplexTypeId<float>
00018     {
00019       typedef float2 Type;
00020     };
00021 
00022   template<>
00023     struct ComplexTypeId<double>
00024     {
00025       typedef double2 Type;
00026     };
00027 
00028   template<class T> 
00029     struct RealTypeId; 
00030 
00031   template<>
00032     struct RealTypeId<float2>
00033     {
00034       typedef float Type;
00035     };
00036 
00037   template<>
00038     struct RealTypeId<double2>
00039     {
00040       typedef double Type;
00041     };
00042 
00043 
00044   template<class T, class U>
00045     struct PromoteTypeId
00046     {
00047       typedef T Type;
00048     };
00049 
00050 
00051   template<>
00052     struct PromoteTypeId<float2, float>
00053     {
00054       typedef float2 Type;
00055     };
00056 
00057 
00058   template<>
00059     struct PromoteTypeId<float, float2>
00060     {
00061       typedef float2 Type;
00062     };
00063 
00064 
00065   template<>
00066     struct PromoteTypeId<double2, double>
00067     {
00068       typedef double2 Type;
00069     };
00070 
00071 
00072   template<>
00073     struct PromoteTypeId<double, double2>
00074     {
00075       typedef double2 Type;
00076     };
00077 
00078   template<>
00079     struct PromoteTypeId<double,int>
00080     {
00081       typedef double Type;
00082     };
00083 
00084   template<>
00085     struct PromoteTypeId<int,double>
00086     {
00087       typedef double Type;
00088     };
00089 
00090   template<>
00091     struct PromoteTypeId<float,int>
00092     {
00093       typedef float Type;
00094     };
00095 
00096   template<>
00097     struct PromoteTypeId<int,float>
00098     {
00099       typedef float Type;
00100     };
00101 
00102 
00103 
00104 
00105   template<class Cmplx>
00106     __device__ __host__ 
00107     Cmplx makeComplex(const typename RealTypeId<Cmplx>::Type & a, const typename RealTypeId<Cmplx>::Type & b)
00108     {
00109       Cmplx z;
00110       z.x = a; z.y = b;
00111       return z;
00112     }
00113 
00114 
00115   __device__ __host__
00116     double2 makeComplex(const double & a, const double & b){
00117       return make_double2(a,b);
00118     }
00119 
00120   __device__ __host__
00121     float2 makeComplex(const float & a, const float & b){
00122       return make_float2(a,b);
00123     } 
00124 
00125   /*
00126   //  doesn't seem to work
00127   template<class Cmplx> 
00128   __device__ __host__ 
00129   const Cmplx & operator+=(Cmplx & a, const Cmplx & b){
00130   a.x += b.x; 
00131   a.y += b.y;
00132   return a;
00133   }
00134    */
00135 
00136   template<class Cmplx>
00137     __device__ __host__ Cmplx operator+(const Cmplx & a, const Cmplx & b){
00138       return makeComplex(a.x+b.x,a.y+b.y);
00139     }
00140 
00141   template<class Cmplx>
00142     __device__ __host__ Cmplx operator-(const Cmplx & a, const Cmplx & b)
00143     {
00144       return makeComplex(a.x-b.x,a.y-b.y);
00145     }
00146 
00147   template<class Cmplx>
00148     __device__ __host__ Cmplx operator*(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar)
00149     {
00150       return makeComplex(a.x*scalar,a.y*scalar);
00151     }
00152 
00153   template<class Cmplx>
00154     __device__ __host__ Cmplx operator/(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar)
00155     {
00156       return makeComplex(a.x/scalar,a.y/scalar);
00157     }
00158 
00159   template<class Cmplx>
00160     __device__ __host__ Cmplx operator+(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar)
00161     {
00162       return makeComplex(a.x+scalar,a.y);
00163     }
00164 
00165   template<class Cmplx>
00166     __device__ __host__ Cmplx operator+(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & a)
00167     {
00168       return makeComplex(a.x+scalar,a.y);
00169     }
00170 
00171   template<class Cmplx>
00172     __device__ __host__ Cmplx operator-(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar)
00173     {
00174       return makeComplex(a.x-scalar,a.y);
00175     }
00176 
00177   template<class Cmplx>
00178     __device__ __host__ Cmplx operator-(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & a)
00179     {
00180       return makeComplex(scalar-a.x,-a.y);
00181     }
00182 
00183 
00184   template<class Cmplx>
00185     __device__ __host__ Cmplx operator*(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & b)
00186     {
00187       return operator*(b,scalar);
00188     }
00189 
00190   template<class Cmplx>
00191     __device__ __host__ Cmplx operator*(const Cmplx & a, const Cmplx & b)
00192     {
00193       return makeComplex(a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
00194     }
00195 
00196   template<class Cmplx>
00197     __device__ __host__ Cmplx conj(const Cmplx & a)
00198     {
00199       return makeComplex(a.x,-a.y);
00200     }
00201 
00202   __device__ __host__ double conj(const double & a)
00203   {
00204     return a;
00205   }
00206 
00207   __device__ __host__ float conj(const float & a)
00208   {
00209     return a;
00210   }
00211 
00212 
00213 
00214   template<class Cmplx>
00215     __device__ __host__
00216     Cmplx getPreciseInverse(const Cmplx & z){
00217       typename RealTypeId<Cmplx>::Type ratio, max, denom;
00218       if( fabs(z.x) > fabs(z.y) ){ max = z.x; ratio = z.y/max; }else{ max=z.y; ratio = z.x/max; }
00219       denom = max*max*(1 + ratio*ratio);
00220       return makeComplex(z.x/denom, -z.y/denom);
00221     }
00222 
00223 
00224   // for printing
00225   std::ostream & operator << (std::ostream & os, const float2 & z){
00226     os << "(" << z.x << "," << z.y << ")";
00227     return os;
00228   }
00229 
00230   std::ostream & operator << (std::ostream & os, const double2 & z){
00231     os << "(" << z.x << "," << z.y << ")";
00232     return os;
00233   }
00234 
00235 
00236 
00237   template<class T>
00238     struct Zero
00239     {
00240       //static const T val;
00241       __device__ __host__
00242         static T val();
00243     };
00244 
00245   template<>
00246     __device__ __host__
00247     float2 Zero<float2>::val()  
00248     {
00249       return make_float2(0.,0.);
00250     }
00251 
00252   template<>
00253     __device__ __host__
00254     double2 Zero<double2>::val()
00255     {
00256       return make_double2(0.,0.);
00257     }
00258 
00259 
00260 
00261   template<class T>
00262     struct Identity
00263     {
00264       __device__  __host__
00265         static T val();
00266     };
00267 
00268   template<>
00269     __device__ __host__
00270     float2 Identity<float2>::val(){
00271       return make_float2(1.,0.);
00272     }
00273 
00274   template<>
00275     __device__ __host__
00276     double2 Identity<double2>::val(){
00277       return make_double2(1.,0.);
00278     }
00279 
00280   template<int N>
00281     inline  __device__ __host__
00282     int index(int i, int j)
00283     {
00284       return i*N + j;
00285     }
00286 
00287   template<class T, int N>
00288     class Matrix
00289     {
00290       public:
00291         T data[N*N];
00292 
00293         __device__ __host__ T const & operator()(int i, int j) const{
00294           return data[index<N>(i,j)];
00295         }
00296 
00297         __device__ __host__ T & operator()(int i, int j){
00298           return data[index<N>(i,j)];
00299         }
00300     };
00301 
00302   template<class T>
00303     __device__ __host__ T getTrace(const Matrix<T,3>& a)
00304     {
00305       return a(0,0) + a(1,1) + a(2,2);
00306     }
00307 
00308 
00309   template<class T>
00310     __device__ __host__  T getDeterminant(const Matrix<T,3> & a){
00311 
00312       T result;
00313       result = a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2))
00314         - a(0,1)*(a(1,0)*a(2,2) - a(1,2)*a(2,0))
00315         + a(0,2)*(a(1,0)*a(2,1) - a(1,1)*a(2,0));
00316 
00317       return result;
00318     }
00319 
00320   template<class T, int N>
00321     __device__ __host__ Matrix<T,N> operator+(const Matrix<T,N> & a, const Matrix<T,N> & b)
00322     {
00323       Matrix<T,N> result;
00324       for(int i=0; i<N*N; i++){
00325         result.data[i] = a.data[i] + b.data[i];
00326       }
00327       return result;
00328     }
00329 
00330 
00331 
00332   template<class T, int N>
00333     __device__ __host__ Matrix<T,N> operator-(const Matrix<T,N> & a, const Matrix<T,N> & b)
00334     {
00335       Matrix<T,N> result;
00336       for(int i=0; i<N*N; i++){
00337         result.data[i] = a.data[i] - b.data[i];
00338       }
00339       return result;
00340     }
00341 
00342 
00343 
00344   template<class T, int N, class S>
00345     __device__ __host__ Matrix<T,N> operator*(const S & scalar, const Matrix<T,N> & a){
00346       Matrix<T,N> result;
00347       for(int i=0; i<N*N; ++i){
00348         result.data[i] = scalar*a.data[i];
00349       }
00350       return result;
00351     }
00352 
00353 
00354   template<class T, int N, class S>
00355     __device__ __host__ Matrix<T,N> operator*(const Matrix<T,N> & a, const S & scalar){
00356       return scalar*a;
00357     }
00358 
00359 
00360 
00361   template<class T>
00362     __device__ __host__
00363     Matrix<T,3> operator*(const Matrix<T,3> & a, const Matrix<T,3> & b)
00364     {
00365       // The compiler has a hard time unrolling nested loops,
00366       // so here I do it by hand. 
00367       // I could do something more sophisticated in the future.
00368       Matrix<T,3> result;
00369       result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0);
00370       result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1) + a(0,2)*b(2,1);
00371       result(0,2) = a(0,0)*b(0,2) + a(0,1)*b(1,2) + a(0,2)*b(2,2);
00372       result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0) + a(1,2)*b(2,0);
00373       result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1) + a(1,2)*b(2,1);
00374       result(1,2) = a(1,0)*b(0,2) + a(1,1)*b(1,2) + a(1,2)*b(2,2);
00375       result(2,0) = a(2,0)*b(0,0) + a(2,1)*b(1,0) + a(2,2)*b(2,0);
00376       result(2,1) = a(2,0)*b(0,1) + a(2,1)*b(1,1) + a(2,2)*b(2,1);
00377       result(2,2) = a(2,0)*b(0,2) + a(2,1)*b(1,2) + a(2,2)*b(2,2);
00378       return result;
00379     }
00380 
00381 
00382   // This is so that I can multiply real and complex matrices
00383   template<class T, class U>
00384     __device__ __host__
00385     Matrix<typename PromoteTypeId<T,U>::Type,3> operator*(const Matrix<T,3> & a, const Matrix<U,3> & b)
00386     {
00387       Matrix<typename PromoteTypeId<T,U>::Type,3> result;
00388       result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0);
00389       result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1) + a(0,2)*b(2,1);
00390       result(0,2) = a(0,0)*b(0,2) + a(0,1)*b(1,2) + a(0,2)*b(2,2);
00391       result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0) + a(1,2)*b(2,0);
00392       result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1) + a(1,2)*b(2,1);
00393       result(1,2) = a(1,0)*b(0,2) + a(1,1)*b(1,2) + a(1,2)*b(2,2);
00394       result(2,0) = a(2,0)*b(0,0) + a(2,1)*b(1,0) + a(2,2)*b(2,0);
00395       result(2,1) = a(2,0)*b(0,1) + a(2,1)*b(1,1) + a(2,2)*b(2,1);
00396       result(2,2) = a(2,0)*b(0,2) + a(2,1)*b(1,2) + a(2,2)*b(2,2);
00397       return result;
00398     }
00399 
00400 
00401 
00402   template<class T>
00403     __device__ __host__
00404     Matrix<T,2> operator*(const Matrix<T,2> & a, const Matrix<T,2> & b)
00405     {
00406       Matrix<T,2> result;
00407       result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0);
00408       result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1);
00409       result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0);
00410       result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1);
00411       return result;
00412     }
00413 
00414 
00415   template<class T, int N>
00416     __device__ __host__
00417     Matrix<T,N> conj(const Matrix<T,N> & other){
00418       Matrix<T,N> result;
00419       for(int i=0; i<N; ++i){
00420         for(int j=0; j<N; ++j){
00421           result(i,j) = conj(other(j,i));
00422         }
00423       }
00424       return result;
00425     }
00426 
00427 
00428   template<class T> 
00429     __device__  __host__
00430     void computeMatrixInverse(const Matrix<T,3>& u, Matrix<T,3>* uinv)
00431     {
00432 
00433       const T & det = getDeterminant(u);
00434       const T & det_inv = getPreciseInverse(det);
00435 
00436       T temp;
00437 
00438       temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
00439       (*uinv)(0,0) = (det_inv*temp);
00440 
00441       temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
00442       (*uinv)(0,1) = (temp*det_inv);
00443 
00444       temp = u(0,1)*u(1,2)  - u(0,2)*u(1,1);
00445       (*uinv)(0,2) = (temp*det_inv);
00446 
00447       temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
00448       (*uinv)(1,0) = (det_inv*temp);
00449 
00450       temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
00451       (*uinv)(1,1) = (temp*det_inv);
00452 
00453       temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
00454       (*uinv)(1,2) = (temp*det_inv);
00455 
00456       temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
00457       (*uinv)(2,0) = (det_inv*temp);
00458 
00459       temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
00460       (*uinv)(2,1) = (temp*det_inv);
00461 
00462       temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
00463       (*uinv)(2,2) = (temp*det_inv);
00464 
00465       return;
00466     } 
00467 
00468 
00469 
00470   template<class T, int N>
00471     inline __device__ __host__
00472     void setIdentity(Matrix<T,N>* m){
00473 
00474       for(int i=0; i<N; ++i){
00475         (*m)(i,i) = 1;
00476         for(int j=i+1; j<N; ++j){
00477           (*m)(i,j) = (*m)(j,i) = 0;
00478         }
00479       }
00480       return;
00481     }
00482 
00483 
00484   template<int N>
00485     inline __device__ __host__
00486     void setIdentity(Matrix<float2,N>* m){
00487 
00488       for(int i=0; i<N; ++i){
00489         (*m)(i,i) = make_float2(1,0);
00490         for(int j=i+1; j<N; ++j){
00491           (*m)(i,j) = (*m)(j,i) = make_float2(0.,0.);    
00492         }
00493       }
00494       return;
00495     }
00496 
00497 
00498   template<int N>
00499     inline __device__ __host__
00500     void setIdentity(Matrix<double2,N>* m){
00501 
00502       for(int i=0; i<N; ++i){
00503         (*m)(i,i) = make_double2(1,0);
00504         for(int j=i+1; j<N; ++j){
00505           (*m)(i,j) = (*m)(j,i) = make_double2(0.,0.);    
00506         }
00507       }
00508       return;
00509     }
00510 
00511 
00512   // Need to write more generic code for this!
00513   template<class T, int N>
00514     inline __device__ __host__
00515     void setZero(Matrix<T,N>* m){
00516 
00517       for(int i=0; i<N; ++i){
00518         for(int j=0; j<N; ++j){
00519           (*m)(i,j) = 0;
00520         }
00521       }
00522       return;
00523     }
00524 
00525 
00526   template<int N>
00527     inline __device__ __host__
00528     void setZero(Matrix<float2,N>* m){
00529 
00530       for(int i=0; i<N; ++i){
00531         for(int j=0; j<N; ++j){
00532           (*m)(i,j) = make_float2(0.,0.);
00533         }
00534       }
00535       return;
00536     }
00537 
00538 
00539   template<int N>
00540     inline __device__ __host__
00541     void setZero(Matrix<double2,N>* m){
00542 
00543       for(int i=0; i<N; ++i){
00544         for(int j=0; j<N; ++j){
00545           (*m)(i,j) = make_double2(0.,0.);
00546         }
00547       }
00548       return;
00549     }
00550 
00551 
00552 
00553 
00554   // Matrix and array are very similar
00555   // Maybe I should factor out the similar 
00556   // code. However, I want to make sure that 
00557   // the compiler knows to store the 
00558   // data elements in registers, so I won't do 
00559   // it right now.
00560   template<class T, int N>
00561     class Array
00562     {
00563       private:
00564         T data[N];
00565 
00566       public:
00567         // access function
00568         __device__ __host__
00569           T const & operator[](int i) const{
00570             return data[i];
00571           }
00572 
00573         // assignment function
00574         __device__ __host__ 
00575           T & operator[](int i){
00576             return data[i];
00577           }
00578     };
00579 
00580 
00581   template<class T, int N>
00582     __device__  __host__ 
00583     void copyColumn(const Matrix<T,N>& m, int c, Array<T,N>* a)
00584     {
00585       for(int i=0; i<N; ++i){
00586         (*a)[i] = m(i,c); // c is the column index
00587       }
00588       return;
00589     }
00590 
00591 
00592   template<class T, int N>
00593     __device__ __host__
00594     void outerProd(const Array<T,N>& a, const Array<T,N> & b, Matrix<T,N>* m){
00595       for(int i=0; i<N; ++i){
00596         const T conjb_i = conj(b[i]);
00597         for(int j=0; j<N; ++j){
00598           (*m)(j,i) = a[j]*conjb_i; // we reverse the ordering of indices because it cuts down on the number of function calls
00599         }
00600       }
00601       return;
00602     }
00603 
00604 
00605   // Need some print utilities
00606   template<class T, int N>
00607     std::ostream & operator << (std::ostream & os, const Matrix<T,N> & m){
00608       for(int i=0; i<N; ++i){
00609         for(int j=0; j<N; ++j){
00610           os << m(i,j) << " ";
00611         }
00612         if(i<N-1) os << std::endl;
00613       }
00614       return os;
00615     }
00616 
00617 
00618   template<class T, int N>
00619     std::ostream & operator << (std::ostream & os, const Array<T,N> & a){
00620       for(int i=0; i<N; ++i){
00621         os << a[i] << " ";
00622       }
00623       return os;
00624     }
00625 
00626 
00627   template<class T>
00628     __device__
00629     void loadLinkVariableFromArray(const T* const array, int dir, int idx, int stride, Matrix<T,3> *link)
00630     {
00631       for(int i=0; i<9; ++i){
00632         link->data[i] = array[idx + (dir*9 + i)*stride];
00633       }
00634       return;
00635     }
00636 
00637 
00638     __device__  
00639    void loadLinkVariableFromArray(const float2* const array, int dir, int idx, int stride, Matrix<double2,3> *link)
00640    { 
00641                  float2 single_temp; 
00642            for(int i=0; i<9; ++i){
00643         single_temp = array[idx + (dir*9 + i)*stride];
00644                                 link->data[i].x = single_temp.x;
00645                                 link->data[i].y = single_temp.y;
00646                  }
00647                  return;
00648    }
00649    
00650 
00651 
00652 
00653 
00654   template<class T>
00655     __device__
00656     void writeLinkVariableToArray(const Matrix<T,3> & link,  int dir, int idx, int stride, T* const array)
00657     {
00658       for(int i=0; i<9; ++i){ 
00659         array[idx + (dir*9 + i)*stride] = link.data[i];
00660       }
00661       return;
00662     }
00663 
00664 
00665 
00666 
00667     __device__ 
00668           void writeLinkVariableToArray(const Matrix<double2,3> & link, int dir, int idx, int stride, float2* const array)
00669    {
00670             float2 single_temp;
00671 
00672       for(int i=0; i<9; ++i){ 
00673                                 single_temp.x = link.data[i].x;
00674                                 single_temp.y = link.data[i].y;
00675         array[idx + (dir*9 + i)*stride] = single_temp;
00676       }
00677      return;
00678    }
00679 
00680 
00681   template<class Cmplx> 
00682     __device__  __host__
00683     void computeLinkInverse(Matrix<Cmplx,3>* uinv, const Matrix<Cmplx,3>& u)
00684     {
00685 
00686       const Cmplx & det = getDeterminant(u);
00687       const Cmplx & det_inv = getPreciseInverse(det);
00688 
00689       Cmplx temp;
00690 
00691       temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
00692       (*uinv)(0,0) = (det_inv*temp);
00693 
00694       temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
00695       (*uinv)(0,1) = (temp*det_inv);
00696 
00697       temp = u(0,1)*u(1,2)  - u(0,2)*u(1,1);
00698       (*uinv)(0,2) = (temp*det_inv);
00699 
00700       temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
00701       (*uinv)(1,0) = (det_inv*temp);
00702 
00703       temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
00704       (*uinv)(1,1) = (temp*det_inv);
00705 
00706       temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
00707       (*uinv)(1,2) = (temp*det_inv);
00708 
00709       temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
00710       (*uinv)(2,0) = (det_inv*temp);
00711 
00712       temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
00713       (*uinv)(2,1) = (temp*det_inv);
00714 
00715       temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
00716       (*uinv)(2,2) = (temp*det_inv);
00717 
00718       return;
00719     } 
00720   // template this! 
00721         void copyArrayToLink(Matrix<float2,3>* link, float* array){
00722           for(int i=0; i<3; ++i){
00723             for(int j=0; j<3; ++j){
00724               (*link)(i,j).x = array[(i*3+j)*2];
00725               (*link)(i,j).y = array[(i*3+j)*2 + 1];
00726             }
00727           }
00728           return;
00729         }
00730         
00731         template<class Cmplx, class Real>
00732         void copyArrayToLink(Matrix<Cmplx,3>* link, Real* array){
00733           for(int i=0; i<3; ++i){
00734             for(int j=0; j<3; ++j){
00735               (*link)(i,j).x = array[(i*3+j)*2];
00736               (*link)(i,j).y = array[(i*3+j)*2 + 1];
00737             }
00738           }
00739           return;
00740         }
00741         
00742         
00743         // and this!
00744         void copyLinkToArray(float* array, const Matrix<float2,3>& link){
00745           for(int i=0; i<3; ++i){
00746             for(int j=0; j<3; ++j){
00747               array[(i*3+j)*2] = link(i,j).x;
00748               array[(i*3+j)*2 + 1] = link(i,j).y;
00749             }
00750           }
00751           return;
00752         }
00753 
00754         // and this!
00755               template<class Cmplx, class Real>
00756         void copyLinkToArray(Real* array, const Matrix<Cmplx,3>& link){
00757           for(int i=0; i<3; ++i){
00758             for(int j=0; j<3; ++j){
00759               array[(i*3+j)*2] = link(i,j).x;
00760               array[(i*3+j)*2 + 1] = link(i,j).y;
00761             }
00762           }
00763           return;
00764         }
00765 
00766 
00767 
00768         // and this!
00769         template<class Cmplx>
00770         __host__ __device__
00771         void printLink(const Matrix<Cmplx,3>& link){
00772           printf("(%lf, %lf)\t", link(0,0).x, link(0,0).y);
00773           printf("(%lf, %lf)\t", link(0,1).x, link(0,1).y);
00774           printf("(%lf, %lf)\n", link(0,2).x, link(0,2).y);
00775           printf("(%lf, %lf)\t", link(1,0).x, link(1,0).y);
00776           printf("(%lf, %lf)\t", link(1,1).x, link(1,1).y);
00777           printf("(%lf, %lf)\n", link(1,2).x, link(1,2).y);
00778           printf("(%lf, %lf)\t", link(2,0).x, link(2,0).y);
00779           printf("(%lf, %lf)\t", link(2,1).x, link(2,1).y);
00780           printf("(%lf, %lf)\n", link(2,2).x, link(2,2).y);
00781           printf("\n");
00782         }
00783 
00784 } // end namespace hisq
00785 #endif // _QUDA_MATRIX_H_