QUDA v0.4.0
A library for QCD on GPUs
|
00001 #ifndef _QUDA_MATRIX_H_ 00002 #define _QUDA_MATRIX_H_ 00003 00004 #include <cstdio> 00005 #include <cstdlib> 00006 #include <iostream> 00007 #include <iomanip> 00008 #include <cuda.h> 00009 00010 namespace hisq{ 00011 00012 // Given a real type T, returns the corresponding complex type 00013 template<class T> 00014 struct ComplexTypeId; 00015 00016 template<> 00017 struct ComplexTypeId<float> 00018 { 00019 typedef float2 Type; 00020 }; 00021 00022 template<> 00023 struct ComplexTypeId<double> 00024 { 00025 typedef double2 Type; 00026 }; 00027 00028 template<class T> 00029 struct RealTypeId; 00030 00031 template<> 00032 struct RealTypeId<float2> 00033 { 00034 typedef float Type; 00035 }; 00036 00037 template<> 00038 struct RealTypeId<double2> 00039 { 00040 typedef double Type; 00041 }; 00042 00043 00044 template<class T, class U> 00045 struct PromoteTypeId 00046 { 00047 typedef T Type; 00048 }; 00049 00050 00051 template<> 00052 struct PromoteTypeId<float2, float> 00053 { 00054 typedef float2 Type; 00055 }; 00056 00057 00058 template<> 00059 struct PromoteTypeId<float, float2> 00060 { 00061 typedef float2 Type; 00062 }; 00063 00064 00065 template<> 00066 struct PromoteTypeId<double2, double> 00067 { 00068 typedef double2 Type; 00069 }; 00070 00071 00072 template<> 00073 struct PromoteTypeId<double, double2> 00074 { 00075 typedef double2 Type; 00076 }; 00077 00078 template<> 00079 struct PromoteTypeId<double,int> 00080 { 00081 typedef double Type; 00082 }; 00083 00084 template<> 00085 struct PromoteTypeId<int,double> 00086 { 00087 typedef double Type; 00088 }; 00089 00090 template<> 00091 struct PromoteTypeId<float,int> 00092 { 00093 typedef float Type; 00094 }; 00095 00096 template<> 00097 struct PromoteTypeId<int,float> 00098 { 00099 typedef float Type; 00100 }; 00101 00102 00103 00104 00105 template<class Cmplx> 00106 __device__ __host__ 00107 Cmplx makeComplex(const typename RealTypeId<Cmplx>::Type & a, const typename RealTypeId<Cmplx>::Type & b) 00108 { 00109 Cmplx z; 00110 z.x = a; z.y = b; 00111 return z; 00112 } 00113 00114 00115 __device__ __host__ 00116 double2 makeComplex(const double & a, const double & b){ 00117 return make_double2(a,b); 00118 } 00119 00120 __device__ __host__ 00121 float2 makeComplex(const float & a, const float & b){ 00122 return make_float2(a,b); 00123 } 00124 00125 /* 00126 // doesn't seem to work 00127 template<class Cmplx> 00128 __device__ __host__ 00129 const Cmplx & operator+=(Cmplx & a, const Cmplx & b){ 00130 a.x += b.x; 00131 a.y += b.y; 00132 return a; 00133 } 00134 */ 00135 00136 template<class Cmplx> 00137 __device__ __host__ Cmplx operator+(const Cmplx & a, const Cmplx & b){ 00138 return makeComplex(a.x+b.x,a.y+b.y); 00139 } 00140 00141 template<class Cmplx> 00142 __device__ __host__ Cmplx operator-(const Cmplx & a, const Cmplx & b) 00143 { 00144 return makeComplex(a.x-b.x,a.y-b.y); 00145 } 00146 00147 template<class Cmplx> 00148 __device__ __host__ Cmplx operator*(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar) 00149 { 00150 return makeComplex(a.x*scalar,a.y*scalar); 00151 } 00152 00153 template<class Cmplx> 00154 __device__ __host__ Cmplx operator/(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar) 00155 { 00156 return makeComplex(a.x/scalar,a.y/scalar); 00157 } 00158 00159 template<class Cmplx> 00160 __device__ __host__ Cmplx operator+(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar) 00161 { 00162 return makeComplex(a.x+scalar,a.y); 00163 } 00164 00165 template<class Cmplx> 00166 __device__ __host__ Cmplx operator+(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & a) 00167 { 00168 return makeComplex(a.x+scalar,a.y); 00169 } 00170 00171 template<class Cmplx> 00172 __device__ __host__ Cmplx operator-(const Cmplx & a, const typename RealTypeId<Cmplx>::Type & scalar) 00173 { 00174 return makeComplex(a.x-scalar,a.y); 00175 } 00176 00177 template<class Cmplx> 00178 __device__ __host__ Cmplx operator-(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & a) 00179 { 00180 return makeComplex(scalar-a.x,-a.y); 00181 } 00182 00183 00184 template<class Cmplx> 00185 __device__ __host__ Cmplx operator*(const typename RealTypeId<Cmplx>::Type & scalar, const Cmplx & b) 00186 { 00187 return operator*(b,scalar); 00188 } 00189 00190 template<class Cmplx> 00191 __device__ __host__ Cmplx operator*(const Cmplx & a, const Cmplx & b) 00192 { 00193 return makeComplex(a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x); 00194 } 00195 00196 template<class Cmplx> 00197 __device__ __host__ Cmplx conj(const Cmplx & a) 00198 { 00199 return makeComplex(a.x,-a.y); 00200 } 00201 00202 __device__ __host__ double conj(const double & a) 00203 { 00204 return a; 00205 } 00206 00207 __device__ __host__ float conj(const float & a) 00208 { 00209 return a; 00210 } 00211 00212 00213 00214 template<class Cmplx> 00215 __device__ __host__ 00216 Cmplx getPreciseInverse(const Cmplx & z){ 00217 typename RealTypeId<Cmplx>::Type ratio, max, denom; 00218 if( fabs(z.x) > fabs(z.y) ){ max = z.x; ratio = z.y/max; }else{ max=z.y; ratio = z.x/max; } 00219 denom = max*max*(1 + ratio*ratio); 00220 return makeComplex(z.x/denom, -z.y/denom); 00221 } 00222 00223 00224 // for printing 00225 std::ostream & operator << (std::ostream & os, const float2 & z){ 00226 os << "(" << z.x << "," << z.y << ")"; 00227 return os; 00228 } 00229 00230 std::ostream & operator << (std::ostream & os, const double2 & z){ 00231 os << "(" << z.x << "," << z.y << ")"; 00232 return os; 00233 } 00234 00235 00236 00237 template<class T> 00238 struct Zero 00239 { 00240 //static const T val; 00241 __device__ __host__ 00242 static T val(); 00243 }; 00244 00245 template<> 00246 __device__ __host__ 00247 float2 Zero<float2>::val() 00248 { 00249 return make_float2(0.,0.); 00250 } 00251 00252 template<> 00253 __device__ __host__ 00254 double2 Zero<double2>::val() 00255 { 00256 return make_double2(0.,0.); 00257 } 00258 00259 00260 00261 template<class T> 00262 struct Identity 00263 { 00264 __device__ __host__ 00265 static T val(); 00266 }; 00267 00268 template<> 00269 __device__ __host__ 00270 float2 Identity<float2>::val(){ 00271 return make_float2(1.,0.); 00272 } 00273 00274 template<> 00275 __device__ __host__ 00276 double2 Identity<double2>::val(){ 00277 return make_double2(1.,0.); 00278 } 00279 00280 template<int N> 00281 inline __device__ __host__ 00282 int index(int i, int j) 00283 { 00284 return i*N + j; 00285 } 00286 00287 template<class T, int N> 00288 class Matrix 00289 { 00290 public: 00291 T data[N*N]; 00292 00293 __device__ __host__ T const & operator()(int i, int j) const{ 00294 return data[index<N>(i,j)]; 00295 } 00296 00297 __device__ __host__ T & operator()(int i, int j){ 00298 return data[index<N>(i,j)]; 00299 } 00300 }; 00301 00302 template<class T> 00303 __device__ __host__ T getTrace(const Matrix<T,3>& a) 00304 { 00305 return a(0,0) + a(1,1) + a(2,2); 00306 } 00307 00308 00309 template<class T> 00310 __device__ __host__ T getDeterminant(const Matrix<T,3> & a){ 00311 00312 T result; 00313 result = a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2)) 00314 - a(0,1)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) 00315 + a(0,2)*(a(1,0)*a(2,1) - a(1,1)*a(2,0)); 00316 00317 return result; 00318 } 00319 00320 template<class T, int N> 00321 __device__ __host__ Matrix<T,N> operator+(const Matrix<T,N> & a, const Matrix<T,N> & b) 00322 { 00323 Matrix<T,N> result; 00324 for(int i=0; i<N*N; i++){ 00325 result.data[i] = a.data[i] + b.data[i]; 00326 } 00327 return result; 00328 } 00329 00330 00331 00332 template<class T, int N> 00333 __device__ __host__ Matrix<T,N> operator-(const Matrix<T,N> & a, const Matrix<T,N> & b) 00334 { 00335 Matrix<T,N> result; 00336 for(int i=0; i<N*N; i++){ 00337 result.data[i] = a.data[i] - b.data[i]; 00338 } 00339 return result; 00340 } 00341 00342 00343 00344 template<class T, int N, class S> 00345 __device__ __host__ Matrix<T,N> operator*(const S & scalar, const Matrix<T,N> & a){ 00346 Matrix<T,N> result; 00347 for(int i=0; i<N*N; ++i){ 00348 result.data[i] = scalar*a.data[i]; 00349 } 00350 return result; 00351 } 00352 00353 00354 template<class T, int N, class S> 00355 __device__ __host__ Matrix<T,N> operator*(const Matrix<T,N> & a, const S & scalar){ 00356 return scalar*a; 00357 } 00358 00359 00360 00361 template<class T> 00362 __device__ __host__ 00363 Matrix<T,3> operator*(const Matrix<T,3> & a, const Matrix<T,3> & b) 00364 { 00365 // The compiler has a hard time unrolling nested loops, 00366 // so here I do it by hand. 00367 // I could do something more sophisticated in the future. 00368 Matrix<T,3> result; 00369 result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0); 00370 result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1) + a(0,2)*b(2,1); 00371 result(0,2) = a(0,0)*b(0,2) + a(0,1)*b(1,2) + a(0,2)*b(2,2); 00372 result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0) + a(1,2)*b(2,0); 00373 result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1) + a(1,2)*b(2,1); 00374 result(1,2) = a(1,0)*b(0,2) + a(1,1)*b(1,2) + a(1,2)*b(2,2); 00375 result(2,0) = a(2,0)*b(0,0) + a(2,1)*b(1,0) + a(2,2)*b(2,0); 00376 result(2,1) = a(2,0)*b(0,1) + a(2,1)*b(1,1) + a(2,2)*b(2,1); 00377 result(2,2) = a(2,0)*b(0,2) + a(2,1)*b(1,2) + a(2,2)*b(2,2); 00378 return result; 00379 } 00380 00381 00382 // This is so that I can multiply real and complex matrices 00383 template<class T, class U> 00384 __device__ __host__ 00385 Matrix<typename PromoteTypeId<T,U>::Type,3> operator*(const Matrix<T,3> & a, const Matrix<U,3> & b) 00386 { 00387 Matrix<typename PromoteTypeId<T,U>::Type,3> result; 00388 result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0) + a(0,2)*b(2,0); 00389 result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1) + a(0,2)*b(2,1); 00390 result(0,2) = a(0,0)*b(0,2) + a(0,1)*b(1,2) + a(0,2)*b(2,2); 00391 result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0) + a(1,2)*b(2,0); 00392 result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1) + a(1,2)*b(2,1); 00393 result(1,2) = a(1,0)*b(0,2) + a(1,1)*b(1,2) + a(1,2)*b(2,2); 00394 result(2,0) = a(2,0)*b(0,0) + a(2,1)*b(1,0) + a(2,2)*b(2,0); 00395 result(2,1) = a(2,0)*b(0,1) + a(2,1)*b(1,1) + a(2,2)*b(2,1); 00396 result(2,2) = a(2,0)*b(0,2) + a(2,1)*b(1,2) + a(2,2)*b(2,2); 00397 return result; 00398 } 00399 00400 00401 00402 template<class T> 00403 __device__ __host__ 00404 Matrix<T,2> operator*(const Matrix<T,2> & a, const Matrix<T,2> & b) 00405 { 00406 Matrix<T,2> result; 00407 result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0); 00408 result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1); 00409 result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0); 00410 result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1); 00411 return result; 00412 } 00413 00414 00415 template<class T, int N> 00416 __device__ __host__ 00417 Matrix<T,N> conj(const Matrix<T,N> & other){ 00418 Matrix<T,N> result; 00419 for(int i=0; i<N; ++i){ 00420 for(int j=0; j<N; ++j){ 00421 result(i,j) = conj(other(j,i)); 00422 } 00423 } 00424 return result; 00425 } 00426 00427 00428 template<class T> 00429 __device__ __host__ 00430 void computeMatrixInverse(const Matrix<T,3>& u, Matrix<T,3>* uinv) 00431 { 00432 00433 const T & det = getDeterminant(u); 00434 const T & det_inv = getPreciseInverse(det); 00435 00436 T temp; 00437 00438 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1); 00439 (*uinv)(0,0) = (det_inv*temp); 00440 00441 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2); 00442 (*uinv)(0,1) = (temp*det_inv); 00443 00444 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1); 00445 (*uinv)(0,2) = (temp*det_inv); 00446 00447 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2); 00448 (*uinv)(1,0) = (det_inv*temp); 00449 00450 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0); 00451 (*uinv)(1,1) = (temp*det_inv); 00452 00453 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2); 00454 (*uinv)(1,2) = (temp*det_inv); 00455 00456 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0); 00457 (*uinv)(2,0) = (det_inv*temp); 00458 00459 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1); 00460 (*uinv)(2,1) = (temp*det_inv); 00461 00462 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0); 00463 (*uinv)(2,2) = (temp*det_inv); 00464 00465 return; 00466 } 00467 00468 00469 00470 template<class T, int N> 00471 inline __device__ __host__ 00472 void setIdentity(Matrix<T,N>* m){ 00473 00474 for(int i=0; i<N; ++i){ 00475 (*m)(i,i) = 1; 00476 for(int j=i+1; j<N; ++j){ 00477 (*m)(i,j) = (*m)(j,i) = 0; 00478 } 00479 } 00480 return; 00481 } 00482 00483 00484 template<int N> 00485 inline __device__ __host__ 00486 void setIdentity(Matrix<float2,N>* m){ 00487 00488 for(int i=0; i<N; ++i){ 00489 (*m)(i,i) = make_float2(1,0); 00490 for(int j=i+1; j<N; ++j){ 00491 (*m)(i,j) = (*m)(j,i) = make_float2(0.,0.); 00492 } 00493 } 00494 return; 00495 } 00496 00497 00498 template<int N> 00499 inline __device__ __host__ 00500 void setIdentity(Matrix<double2,N>* m){ 00501 00502 for(int i=0; i<N; ++i){ 00503 (*m)(i,i) = make_double2(1,0); 00504 for(int j=i+1; j<N; ++j){ 00505 (*m)(i,j) = (*m)(j,i) = make_double2(0.,0.); 00506 } 00507 } 00508 return; 00509 } 00510 00511 00512 // Need to write more generic code for this! 00513 template<class T, int N> 00514 inline __device__ __host__ 00515 void setZero(Matrix<T,N>* m){ 00516 00517 for(int i=0; i<N; ++i){ 00518 for(int j=0; j<N; ++j){ 00519 (*m)(i,j) = 0; 00520 } 00521 } 00522 return; 00523 } 00524 00525 00526 template<int N> 00527 inline __device__ __host__ 00528 void setZero(Matrix<float2,N>* m){ 00529 00530 for(int i=0; i<N; ++i){ 00531 for(int j=0; j<N; ++j){ 00532 (*m)(i,j) = make_float2(0.,0.); 00533 } 00534 } 00535 return; 00536 } 00537 00538 00539 template<int N> 00540 inline __device__ __host__ 00541 void setZero(Matrix<double2,N>* m){ 00542 00543 for(int i=0; i<N; ++i){ 00544 for(int j=0; j<N; ++j){ 00545 (*m)(i,j) = make_double2(0.,0.); 00546 } 00547 } 00548 return; 00549 } 00550 00551 00552 00553 00554 // Matrix and array are very similar 00555 // Maybe I should factor out the similar 00556 // code. However, I want to make sure that 00557 // the compiler knows to store the 00558 // data elements in registers, so I won't do 00559 // it right now. 00560 template<class T, int N> 00561 class Array 00562 { 00563 private: 00564 T data[N]; 00565 00566 public: 00567 // access function 00568 __device__ __host__ 00569 T const & operator[](int i) const{ 00570 return data[i]; 00571 } 00572 00573 // assignment function 00574 __device__ __host__ 00575 T & operator[](int i){ 00576 return data[i]; 00577 } 00578 }; 00579 00580 00581 template<class T, int N> 00582 __device__ __host__ 00583 void copyColumn(const Matrix<T,N>& m, int c, Array<T,N>* a) 00584 { 00585 for(int i=0; i<N; ++i){ 00586 (*a)[i] = m(i,c); // c is the column index 00587 } 00588 return; 00589 } 00590 00591 00592 template<class T, int N> 00593 __device__ __host__ 00594 void outerProd(const Array<T,N>& a, const Array<T,N> & b, Matrix<T,N>* m){ 00595 for(int i=0; i<N; ++i){ 00596 const T conjb_i = conj(b[i]); 00597 for(int j=0; j<N; ++j){ 00598 (*m)(j,i) = a[j]*conjb_i; // we reverse the ordering of indices because it cuts down on the number of function calls 00599 } 00600 } 00601 return; 00602 } 00603 00604 00605 // Need some print utilities 00606 template<class T, int N> 00607 std::ostream & operator << (std::ostream & os, const Matrix<T,N> & m){ 00608 for(int i=0; i<N; ++i){ 00609 for(int j=0; j<N; ++j){ 00610 os << m(i,j) << " "; 00611 } 00612 if(i<N-1) os << std::endl; 00613 } 00614 return os; 00615 } 00616 00617 00618 template<class T, int N> 00619 std::ostream & operator << (std::ostream & os, const Array<T,N> & a){ 00620 for(int i=0; i<N; ++i){ 00621 os << a[i] << " "; 00622 } 00623 return os; 00624 } 00625 00626 00627 template<class T> 00628 __device__ 00629 void loadLinkVariableFromArray(const T* const array, int dir, int idx, int stride, Matrix<T,3> *link) 00630 { 00631 for(int i=0; i<9; ++i){ 00632 link->data[i] = array[idx + (dir*9 + i)*stride]; 00633 } 00634 return; 00635 } 00636 00637 00638 __device__ 00639 void loadLinkVariableFromArray(const float2* const array, int dir, int idx, int stride, Matrix<double2,3> *link) 00640 { 00641 float2 single_temp; 00642 for(int i=0; i<9; ++i){ 00643 single_temp = array[idx + (dir*9 + i)*stride]; 00644 link->data[i].x = single_temp.x; 00645 link->data[i].y = single_temp.y; 00646 } 00647 return; 00648 } 00649 00650 00651 00652 00653 00654 template<class T> 00655 __device__ 00656 void writeLinkVariableToArray(const Matrix<T,3> & link, int dir, int idx, int stride, T* const array) 00657 { 00658 for(int i=0; i<9; ++i){ 00659 array[idx + (dir*9 + i)*stride] = link.data[i]; 00660 } 00661 return; 00662 } 00663 00664 00665 00666 00667 __device__ 00668 void writeLinkVariableToArray(const Matrix<double2,3> & link, int dir, int idx, int stride, float2* const array) 00669 { 00670 float2 single_temp; 00671 00672 for(int i=0; i<9; ++i){ 00673 single_temp.x = link.data[i].x; 00674 single_temp.y = link.data[i].y; 00675 array[idx + (dir*9 + i)*stride] = single_temp; 00676 } 00677 return; 00678 } 00679 00680 00681 template<class Cmplx> 00682 __device__ __host__ 00683 void computeLinkInverse(Matrix<Cmplx,3>* uinv, const Matrix<Cmplx,3>& u) 00684 { 00685 00686 const Cmplx & det = getDeterminant(u); 00687 const Cmplx & det_inv = getPreciseInverse(det); 00688 00689 Cmplx temp; 00690 00691 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1); 00692 (*uinv)(0,0) = (det_inv*temp); 00693 00694 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2); 00695 (*uinv)(0,1) = (temp*det_inv); 00696 00697 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1); 00698 (*uinv)(0,2) = (temp*det_inv); 00699 00700 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2); 00701 (*uinv)(1,0) = (det_inv*temp); 00702 00703 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0); 00704 (*uinv)(1,1) = (temp*det_inv); 00705 00706 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2); 00707 (*uinv)(1,2) = (temp*det_inv); 00708 00709 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0); 00710 (*uinv)(2,0) = (det_inv*temp); 00711 00712 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1); 00713 (*uinv)(2,1) = (temp*det_inv); 00714 00715 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0); 00716 (*uinv)(2,2) = (temp*det_inv); 00717 00718 return; 00719 } 00720 // template this! 00721 void copyArrayToLink(Matrix<float2,3>* link, float* array){ 00722 for(int i=0; i<3; ++i){ 00723 for(int j=0; j<3; ++j){ 00724 (*link)(i,j).x = array[(i*3+j)*2]; 00725 (*link)(i,j).y = array[(i*3+j)*2 + 1]; 00726 } 00727 } 00728 return; 00729 } 00730 00731 template<class Cmplx, class Real> 00732 void copyArrayToLink(Matrix<Cmplx,3>* link, Real* array){ 00733 for(int i=0; i<3; ++i){ 00734 for(int j=0; j<3; ++j){ 00735 (*link)(i,j).x = array[(i*3+j)*2]; 00736 (*link)(i,j).y = array[(i*3+j)*2 + 1]; 00737 } 00738 } 00739 return; 00740 } 00741 00742 00743 // and this! 00744 void copyLinkToArray(float* array, const Matrix<float2,3>& link){ 00745 for(int i=0; i<3; ++i){ 00746 for(int j=0; j<3; ++j){ 00747 array[(i*3+j)*2] = link(i,j).x; 00748 array[(i*3+j)*2 + 1] = link(i,j).y; 00749 } 00750 } 00751 return; 00752 } 00753 00754 // and this! 00755 template<class Cmplx, class Real> 00756 void copyLinkToArray(Real* array, const Matrix<Cmplx,3>& link){ 00757 for(int i=0; i<3; ++i){ 00758 for(int j=0; j<3; ++j){ 00759 array[(i*3+j)*2] = link(i,j).x; 00760 array[(i*3+j)*2 + 1] = link(i,j).y; 00761 } 00762 } 00763 return; 00764 } 00765 00766 00767 00768 // and this! 00769 template<class Cmplx> 00770 __host__ __device__ 00771 void printLink(const Matrix<Cmplx,3>& link){ 00772 printf("(%lf, %lf)\t", link(0,0).x, link(0,0).y); 00773 printf("(%lf, %lf)\t", link(0,1).x, link(0,1).y); 00774 printf("(%lf, %lf)\n", link(0,2).x, link(0,2).y); 00775 printf("(%lf, %lf)\t", link(1,0).x, link(1,0).y); 00776 printf("(%lf, %lf)\t", link(1,1).x, link(1,1).y); 00777 printf("(%lf, %lf)\n", link(1,2).x, link(1,2).y); 00778 printf("(%lf, %lf)\t", link(2,0).x, link(2,0).y); 00779 printf("(%lf, %lf)\t", link(2,1).x, link(2,1).y); 00780 printf("(%lf, %lf)\n", link(2,2).x, link(2,2).y); 00781 printf("\n"); 00782 } 00783 00784 } // end namespace hisq 00785 #endif // _QUDA_MATRIX_H_