QUDA  0.9.0
quda_matrix.h
Go to the documentation of this file.
1 #ifndef _QUDA_MATRIX_H_
2 #define _QUDA_MATRIX_H_
3 
4 #include <cstdio>
5 
6 #include <cstdlib>
7 #include <iostream>
8 #include <iomanip>
9 #include <cuda.h>
10 
11 #include <register_traits.h>
12 #include <float_vector.h>
13 
14 #include <complex_quda.h>
15 
16 namespace quda {
17 
18 
19  template<class T>
20  struct Zero
21  {
22  //static const T val;
23  __device__ __host__ inline
24  static T val();
25  };
26 
27  template<>
28  __device__ __host__ inline
30  {
31  return make_float2(0.,0.);
32  }
33 
34  template<>
35  __device__ __host__ inline
37  {
38  return make_double2(0.,0.);
39  }
40 
41 
42 
43  template<class T>
44  struct Identity
45  {
46  __device__ __host__ inline
47  static T val();
48  };
49 
50  template<>
51  __device__ __host__ inline
53  return make_float2(1.,0.);
54  }
55 
56  template<>
57  __device__ __host__ inline
59  return make_double2(1.,0.);
60  }
61 
62  template<typename Float, typename T> struct gauge_wrapper;
63  template<typename Float, typename T> struct gauge_ghost_wrapper;
64  template<typename Float, typename T> struct clover_wrapper;
65  template<typename T, int N> struct HMatrix;
66 
67  template<class T, int N>
68  class Matrix
69  {
70  private:
71  __device__ __host__ inline int index(int i, int j) const { return i*N + j; }
72 
73  public:
74  T data[N*N];
75 
76  __device__ __host__ inline Matrix() {
77 #pragma unroll
78  for (int i=0; i<N*N; i++) zero(data[i]);
79  }
80 
81  __device__ __host__ inline Matrix(const Matrix<T,N> &a) {
82 #pragma unroll
83  for (int i=0; i<N*N; i++) data[i] = a.data[i];
84  }
85 
86  __device__ __host__ inline Matrix(const T data_[]) {
87 #pragma unroll
88  for (int i=0; i<N*N; i++) data[i] = data_[i];
89  }
90 
91  __device__ __host__ inline Matrix(const HMatrix<typename RealType<T>::type,N> &a);
92 
93  __device__ __host__ inline T const & operator()(int i, int j) const {
94  return data[index(i,j)];
95  }
96 
97  __device__ __host__ inline T & operator()(int i, int j) {
98  return data[index(i,j)];
99  }
100 
101  __device__ __host__ inline T const & operator()(int i) const {
102  int j = i % N;
103  int k = i / N;
104  return data[index(j,k)];
105  }
106 
107  __device__ __host__ inline T& operator()(int i) {
108  int j = i % N;
109  int k = i / N;
110  return data[index(j,k)];
111  }
112 
113  template<class U>
114  __device__ __host__ inline void operator=(const Matrix<U,N> & b) {
115 #pragma unroll
116  for (int i=0; i<N*N; i++) data[i] = b.data[i];
117  }
118 
119  template<typename S>
120  __device__ __host__ inline Matrix(const gauge_wrapper<typename RealType<T>::type, S> &s);
121 
122  template<typename S>
123  __device__ __host__ inline void operator=(const gauge_wrapper<typename RealType<T>::type, S> &s);
124 
125  template<typename S>
126  __device__ __host__ inline Matrix(const gauge_ghost_wrapper<typename RealType<T>::type, S> &s);
127 
128  template<typename S>
129  __device__ __host__ inline void operator=(const gauge_ghost_wrapper<typename RealType<T>::type, S> &s);
130 
136  __device__ __host__ inline uint64_t checksum() const {
137  // ensure length is rounded up to 64-bit multiple
138  constexpr int length = (N*N*sizeof(T) + sizeof(uint64_t) - 1)/ sizeof(uint64_t);
139  const uint64_t* base = reinterpret_cast<const uint64_t*>( static_cast< const void* >(data) );
140  uint64_t checksum_ = base[0];
141  for (int i=1; i<length; i++) checksum_ ^= base[i];
142  return checksum_;
143  }
144  };
145 
151  template <typename T, typename Hmat>
153  Hmat &mat;
154  const int i;
155  const int j;
156  const int idx;
157 
158  __device__ __host__ inline HMatrix_wrapper(Hmat &mat, int i, int j, int idx) : mat(mat), i(i), j(j), idx(idx) { }
159 
160  __device__ __host__ inline void operator=(const complex<T> &a) {
161  if (i==j) {
162  mat.data[idx] = a.real();
163  } else if (j<i) {
164  mat.data[idx+0] = a.real();
165  mat.data[idx+1] = a.imag();
166  } else {
167  mat.data[idx+0] = a.real();
168  mat.data[idx+1] = -a.imag();
169  }
170  }
171 
172  __device__ __host__ inline void operator+=(const complex<T> &a) {
173  if (i==j) {
174  mat.data[idx] += a.real();
175  } else if (j<i) {
176  mat.data[idx+0] += a.real();
177  mat.data[idx+1] += a.imag();
178  } else {
179  mat.data[idx+0] += a.real();
180  mat.data[idx+1] += -a.imag();
181  }
182  }
183  };
184 
188  template<class T, int N>
189  class HMatrix
190  {
192  private:
193  // compute index into triangular-packed Hermitian matrix
194  __device__ __host__ inline int index(int i, int j) const {
195  if (i==j) {
196  return i;
197  } else if (j<i) {
198  int k = N*(N-1)/2 - (N-j)*(N-j-1)/2 + i - j - 1;
199  return N + 2*k;
200  } else { // i>j
201  // switch coordinates to count from bottom right instead of top left of matrix
202  int k = N*(N-1)/2 - (N-i)*(N-i-1)/2 + j - i - 1;
203  return N + 2*k;
204  }
205  }
206 
207  public:
208  T data[N*N]; // store in real-valued array
209 
210  __device__ __host__ inline HMatrix() {
211 #pragma unroll
212  for (int i=0; i<N*N; i++) zero(data[i]);
213  }
214 
215  __device__ __host__ inline HMatrix(const HMatrix<T,N> &a) {
216 #pragma unroll
217  for (int i=0; i<N*N; i++) data[i] = a.data[i];
218  }
219 
220  __device__ __host__ inline HMatrix(const T data_[]) {
221 #pragma unroll
222  for (int i=0; i<N*N; i++) data[i] = data_[i];
223  }
224 
225  __device__ __host__ inline complex<T> const operator()(int i, int j) const {
226  const int idx = index(i,j);
227  if (i==j) {
228  return complex<T>(data[idx],0.0);
229  } else if (j<i) {
230  return complex<T>(data[idx], data[idx+1]);
231  } else {
232  return complex<T>(data[idx],-data[idx+1]);
233  }
234  }
235 
236  __device__ __host__ inline HMatrix_wrapper<T,HMatrix<T,N> > operator() (int i, int j) {
237  return HMatrix_wrapper<T,HMatrix<T,N> >(*this, i, j, index(i,j));
238  }
239 
240  template<class U>
241  __device__ __host__ inline void operator=(const HMatrix<U,N> & b) {
242 #pragma unroll
243  for (int i=0; i<N*N; i++) data[i] = b.data[i];
244  }
245 
246  template<typename S>
247  __device__ __host__ inline HMatrix(const clover_wrapper<T, S> &s);
248 
249  template<typename S>
250  __device__ __host__ inline void operator=(const clover_wrapper<T, S> &s);
251 
256  __device__ __host__ inline HMatrix<T,N> square() const {
257  HMatrix<T,N> result;
258  complex<T> tmp;
259 #pragma unroll
260  for (int i=0; i<N; i++) {
261 #pragma unroll
262  for (int k=0; k<N; k++) if (i<=k) { // else compiler can't handle triangular unroll
263  tmp.x = (*this)(i,0).real() * (*this)(0,k).real();
264  tmp.x -= (*this)(i,0).imag() * (*this)(0,k).imag();
265  tmp.y = (*this)(i,0).real() * (*this)(0,k).imag();
266  tmp.y += (*this)(i,0).imag() * (*this)(0,k).real();
267 #pragma unroll
268  for (int j=1; j<N; j++) {
269  tmp.x += (*this)(i,j).real() * (*this)(j,k).real();
270  tmp.x -= (*this)(i,j).imag() * (*this)(j,k).imag();
271  tmp.y += (*this)(i,j).real() * (*this)(j,k).imag();
272  tmp.y += (*this)(i,j).imag() * (*this)(j,k).real();
273  }
274  result(i,k) = tmp;
275  }
276  }
277  return result;
278  }
279 
280  __device__ __host__ void print() const {
281  for (int i=0; i<N; i++) {
282  printf("i=%d ", i);
283  for (int j=0; j<N; j++) {
284  printf(" (%e, %e)", (*this)(i,j).real(), (*this)(i,j).imag());
285  }
286  printf("\n");
287  }
288  printf("\n");
289  }
290 
291  };
292 
293  template<class T,int N>
294  __device__ __host__ Matrix<T,N>::Matrix(const HMatrix<typename RealType<T>::type,N> &a) {
295 #pragma unroll
296  for (int i=0; i<N; i++) {
297 #pragma unroll
298  for (int j=0; j<N; j++) {
299  (*this)(i,j) = a(i,j);
300  }
301  }
302  }
303 
304  template<class T>
305  __device__ __host__ inline T getTrace(const Matrix<T,3>& a)
306  {
307  return a(0,0) + a(1,1) + a(2,2);
308  }
309 
310 
311  template< template<typename,int> class Mat, class T>
312  __device__ __host__ inline T getDeterminant(const Mat<T,3> & a){
313 
314  T result;
315  result = a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2))
316  - a(0,1)*(a(1,0)*a(2,2) - a(1,2)*a(2,0))
317  + a(0,2)*(a(1,0)*a(2,1) - a(1,1)*a(2,0));
318 
319  return result;
320  }
321 
322  template< template<typename,int> class Mat, class T, int N>
323  __device__ __host__ inline Mat<T,N> operator+(const Mat<T,N> & a, const Mat<T,N> & b)
324  {
325  Mat<T,N> result;
326 #pragma unroll
327  for (int i=0; i<N*N; i++) result.data[i] = a.data[i] + b.data[i];
328  return result;
329  }
330 
331 
332  template< template<typename,int> class Mat, class T, int N>
333  __device__ __host__ inline Mat<T,N> operator+=(Mat<T,N> & a, const Mat<T,N> & b)
334  {
335 #pragma unroll
336  for (int i=0; i<N*N; i++) a.data[i] += b.data[i];
337  return a;
338  }
339 
340  template< template<typename,int> class Mat, class T, int N>
341  __device__ __host__ inline Mat<T,N> operator+=(Mat<T,N> & a, const T & b)
342  {
343 #pragma unroll
344  for (int i=0; i<N; i++) a(i,i) += b;
345  return a;
346  }
347 
348  template< template<typename,int> class Mat, class T, int N>
349  __device__ __host__ inline Mat<T,N> operator-=(Mat<T,N> & a, const Mat<T,N> & b)
350  {
351 #pragma unroll
352  for (int i=0; i<N*N; i++) a.data[i] -= b.data[i];
353  return a;
354  }
355 
356  template< template<typename,int> class Mat, class T, int N>
357  __device__ __host__ inline Mat<T,N> operator-(const Mat<T,N> & a, const Mat<T,N> & b)
358  {
359  Mat<T,N> result;
360 #pragma unroll
361  for (int i=0; i<N*N; i++) result.data[i] = a.data[i] - b.data[i];
362  return result;
363  }
364 
365  template< template<typename,int> class Mat, class T, int N, class S>
366  __device__ __host__ inline Mat<T,N> operator*(const S & scalar, const Mat<T,N> & a){
367  Mat<T,N> result;
368 #pragma unroll
369  for (int i=0; i<N*N; ++i) result.data[i] = scalar*a.data[i];
370  return result;
371  }
372 
373  template< template<typename,int> class Mat, class T, int N, class S>
374  __device__ __host__ inline Mat<T,N> operator*(const Mat<T,N> & a, const S & scalar){
375  return scalar*a;
376  }
377 
378  template< template<typename,int> class Mat, class T, int N, class S>
379  __device__ __host__ inline Mat<T,N> operator *=(Mat<T,N> & a, const S & scalar){
380  a = scalar*a;
381  return a;
382  }
383 
384  template< template<typename,int> class Mat, class T, int N>
385  __device__ __host__ inline Mat<T,N> operator-(const Mat<T,N> & a){
386  Mat<T,N> result;
387 #pragma unroll
388  for (int i=0; i<(N*N); ++i) result.data[i] = -a.data[i];
389  return result;
390  }
391 
392 
396  template< template<typename,int> class Mat, class T, int N>
397  __device__ __host__ inline Mat<T,N> operator*(const Mat<T,N> &a, const Mat<T,N> &b)
398  {
399  Mat<T,N> result;
400 #pragma unroll
401  for (int i=0; i<N; i++) {
402 #pragma unroll
403  for (int k=0; k<N; k++) {
404  result(i,k) = a(i,0) * b(0,k);
405 #pragma unroll
406  for (int j=1; j<N; j++) {
407  result(i,k) += a(i,j) * b(j,k);
408  }
409  }
410  }
411  return result;
412  }
413 
417  template< template<typename> class complex, typename T, int N>
418  __device__ __host__ inline Matrix<complex<T>,N> operator*(const Matrix<complex<T>,N> &a, const Matrix<complex<T>,N> &b)
419  {
420  Matrix<complex<T>,N> result;
421 #pragma unroll
422  for (int i=0; i<N; i++) {
423 #pragma unroll
424  for (int k=0; k<N; k++) {
425  result(i,k).x = a(i,0).real() * b(0,k).real();
426  result(i,k).x -= a(i,0).imag() * b(0,k).imag();
427  result(i,k).y = a(i,0).real() * b(0,k).imag();
428  result(i,k).y += a(i,0).imag() * b(0,k).real();
429 #pragma unroll
430  for (int j=1; j<N; j++) {
431  result(i,k).x += a(i,j).real() * b(j,k).real();
432  result(i,k).x -= a(i,j).imag() * b(j,k).imag();
433  result(i,k).y += a(i,j).real() * b(j,k).imag();
434  result(i,k).y += a(i,j).imag() * b(j,k).real();
435  }
436  }
437  }
438  return result;
439  }
440 
441  template<class T, int N>
442  __device__ __host__ inline Matrix<T,N> operator *=(Matrix<T,N> & a, const Matrix<T,N>& b){
443 
444  Matrix<T,N> c = a;
445  a = c*b;
446  return a;
447  }
448 
449 
450  // This is so that I can multiply real and complex matrice
451  template<class T, class U, int N>
452  __device__ __host__ inline
454  {
456 #pragma unroll
457  for (int i=0; i<N; i++) {
458 #pragma unroll
459  for (int k=0; k<N; k++) {
460  result(i,k) = a(i,0) * b(0,k);
461 #pragma unroll
462  for (int j=1; j<N; j++) {
463  result(i,k) += a(i,j) * b(j,k);
464  }
465  }
466  }
467  return result;
468  }
469 
470 
471  template<class T>
472  __device__ __host__ inline
474  {
475  Matrix<T,2> result;
476  result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0);
477  result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1);
478  result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0);
479  result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1);
480  return result;
481  }
482 
483 
484  template<class T, int N>
485  __device__ __host__ inline
486  Matrix<T,N> conj(const Matrix<T,N> & other){
487  Matrix<T,N> result;
488 #pragma unroll
489  for (int i=0; i<N; ++i){
490 #pragma unroll
491  for (int j=0; j<N; ++j){
492  result(i,j) = conj( other(j,i) );
493  }
494  }
495  return result;
496  }
497 
498 
499  template<class T>
500  __device__ __host__ inline
502  {
503 
504  const T & det = getDeterminant(u);
505  const T & det_inv = static_cast<typename T::value_type>(1.0)/det;
506 
507  T temp;
508 
509  temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
510  (*uinv)(0,0) = (det_inv*temp);
511 
512  temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
513  (*uinv)(0,1) = (temp*det_inv);
514 
515  temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
516  (*uinv)(0,2) = (temp*det_inv);
517 
518  temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
519  (*uinv)(1,0) = (det_inv*temp);
520 
521  temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
522  (*uinv)(1,1) = (temp*det_inv);
523 
524  temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
525  (*uinv)(1,2) = (temp*det_inv);
526 
527  temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
528  (*uinv)(2,0) = (det_inv*temp);
529 
530  temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
531  (*uinv)(2,1) = (temp*det_inv);
532 
533  temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
534  (*uinv)(2,2) = (temp*det_inv);
535 
536  return;
537  }
538 
539 
540 
541  template<class T, int N>
542  __device__ __host__ inline
544 
545 #pragma unroll
546  for (int i=0; i<N; ++i){
547  (*m)(i,i) = 1;
548 #pragma unroll
549  for (int j=i+1; j<N; ++j){
550  (*m)(i,j) = (*m)(j,i) = 0;
551  }
552  }
553  return;
554  }
555 
556 
557  template<int N>
558  __device__ __host__ inline
560 
561 #pragma unroll
562  for (int i=0; i<N; ++i){
563  (*m)(i,i) = make_float2(1,0);
564 #pragma unroll
565  for (int j=i+1; j<N; ++j){
566  (*m)(i,j) = (*m)(j,i) = make_float2(0.,0.);
567  }
568  }
569  return;
570  }
571 
572 
573  template<int N>
574  __device__ __host__ inline
576 
577 #pragma unroll
578  for (int i=0; i<N; ++i){
579  (*m)(i,i) = make_double2(1,0);
580 #pragma unroll
581  for (int j=i+1; j<N; ++j){
582  (*m)(i,j) = (*m)(j,i) = make_double2(0.,0.);
583  }
584  }
585  return;
586  }
587 
588 
589  // Need to write more generic code for this!
590  template<class T, int N>
591  __device__ __host__ inline
593 
594 #pragma unroll
595  for (int i=0; i<N; ++i){
596 #pragma unroll
597  for (int j=0; j<N; ++j){
598  (*m)(i,j) = 0;
599  }
600  }
601  return;
602  }
603 
604 
605  template<int N>
606  __device__ __host__ inline
608 
609 #pragma unroll
610  for (int i=0; i<N; ++i){
611 #pragma unroll
612  for (int j=0; j<N; ++j){
613  (*m)(i,j) = make_float2(0.,0.);
614  }
615  }
616  return;
617  }
618 
619 
620  template<int N>
621  __device__ __host__ inline
623 
624 #pragma unroll
625  for (int i=0; i<N; ++i){
626 #pragma unroll
627  for (int j=0; j<N; ++j){
628  (*m)(i,j) = make_double2(0.,0.);
629  }
630  }
631  return;
632  }
633 
634 
635  template<typename Complex,int N>
636  __device__ __host__ inline void makeAntiHerm(Matrix<Complex,N> &m) {
637  typedef typename Complex::value_type real;
638  // first make the matrix anti-hermitian
639  Matrix<Complex,N> am = m - conj(m);
640 
641  // second make it traceless
642  real imag_trace = 0.0;
643 #pragma unroll
644  for (int i=0; i<N; i++) imag_trace += am(i,i).y;
645 #pragma unroll
646  for (int i=0; i<N; i++) {
647  am(i,i).y -= imag_trace/N;
648  }
649  m = 0.5*am;
650  }
651 
652 
653 
654  // Matrix and array are very similar
655  // Maybe I should factor out the similar
656  // code. However, I want to make sure that
657  // the compiler knows to store the
658  // data elements in registers, so I won't do
659  // it right now.
660  template<class T, int N>
661  class Array
662  {
663  private:
664  T data[N];
665 
666  public:
667  // access function
668  __device__ __host__ inline
669  T const & operator[](int i) const{
670  return data[i];
671  }
672 
673  // assignment function
674  __device__ __host__ inline
675  T & operator[](int i){
676  return data[i];
677  }
678  };
679 
680 
681  template<class T, int N>
682  __device__ __host__ inline
683  void copyColumn(const Matrix<T,N>& m, int c, Array<T,N>* a)
684  {
685 #pragma unroll
686  for (int i=0; i<N; ++i){
687  (*a)[i] = m(i,c); // c is the column index
688  }
689  return;
690  }
691 
692 
693  template<class T, int N>
694  __device__ __host__ inline
695  void outerProd(const Array<T,N>& a, const Array<T,N> & b, Matrix<T,N>* m){
696 #pragma unroll
697  for (int i=0; i<N; ++i){
698  const T conjb_i = conj(b[i]);
699  for (int j=0; j<N; ++j){
700  (*m)(j,i) = a[j]*conjb_i; // we reverse the ordering of indices because it cuts down on the number of function calls
701  }
702  }
703  return;
704  }
705 
706  template<class T, int N>
707  __device__ __host__ inline
708  void outerProd(const T (&a)[N], const T (&b)[N], Matrix<T,N>* m){
709 #pragma unroll
710  for (int i=0; i<N; ++i){
711  const T conjb_i = conj(b[i]);
712 #pragma unroll
713  for (int j=0; j<N; ++j){
714  (*m)(j,i) = a[j]*conjb_i; // we reverse the ordering of indices because it cuts down on the number of function calls
715  }
716  }
717  return;
718  }
719 
720 
721  // Need some print utilities
722  template<class T, int N>
723  std::ostream & operator << (std::ostream & os, const Matrix<T,N> & m){
724 #pragma unroll
725  for (int i=0; i<N; ++i){
726 #pragma unroll
727  for (int j=0; j<N; ++j){
728  os << m(i,j) << " ";
729  }
730  if(i<N-1) os << std::endl;
731  }
732  return os;
733  }
734 
735 
736  template<class T, int N>
737  std::ostream & operator << (std::ostream & os, const Array<T,N> & a){
738  for (int i=0; i<N; ++i){
739  os << a[i] << " ";
740  }
741  return os;
742  }
743 
744 
745  template<class T, class U>
746  __device__ inline
747  void loadLinkVariableFromArray(const T* const array, const int dir, const int idx, const int stride, Matrix<U,3> *link)
748  {
749 #pragma unroll
750  for (int i=0; i<9; ++i){
751  link->data[i] = array[idx + (dir*9 + i)*stride];
752  }
753  return;
754  }
755 
756 
757  template<class T, class U, int N>
758  __device__ inline
759  void loadMatrixFromArray(const T* const array, const int idx, const int stride, Matrix<U,N> *mat)
760  {
761 #pragma unroll
762  for (int i=0; i<(N*N); ++i){
763  mat->data[i] = array[idx + i*stride];
764  }
765  }
766 
767 
768  __device__ inline
769  void loadLinkVariableFromArray(const float2* const array, const int dir, const int idx, const int stride, Matrix<complex<double>,3> *link)
770  {
771  float2 single_temp;
772 #pragma unroll
773  for (int i=0; i<9; ++i){
774  single_temp = array[idx + (dir*9 + i)*stride];
775  link->data[i].x = single_temp.x;
776  link->data[i].y = single_temp.y;
777  }
778  return;
779  }
780 
781 
782 
783  template<class T, int N, class U>
784  __device__ inline
785  void writeMatrixToArray(const Matrix<T,N>& mat, const int idx, const int stride, U* const array)
786  {
787 #pragma unroll
788  for (int i=0; i<(N*N); ++i){
789  array[idx + i*stride] = mat.data[i];
790  }
791  }
792 
793  __device__ inline
794  void appendMatrixToArray(const Matrix<complex<double>,3>& mat, const int idx, const int stride, double2* const array)
795  {
796 #pragma unroll
797  for (int i=0; i<9; ++i){
798  array[idx + i*stride].x += mat.data[i].x;
799  array[idx + i*stride].y += mat.data[i].y;
800  }
801  }
802 
803  __device__ inline
804  void appendMatrixToArray(const Matrix<complex<float>,3>& mat, const int idx, const int stride, float2* const array)
805  {
806 #pragma unroll
807  for (int i=0; i<9; ++i){
808  array[idx + i*stride].x += mat.data[i].x;
809  array[idx + i*stride].y += mat.data[i].y;
810  }
811  }
812 
813 
814  template<class T, class U>
815  __device__ inline
816  void writeLinkVariableToArray(const Matrix<T,3> & link, const int dir, const int idx, const int stride, U* const array)
817  {
818 #pragma unroll
819  for (int i=0; i<9; ++i){
820  array[idx + (dir*9 + i)*stride] = link.data[i];
821  }
822  return;
823  }
824 
825 
826 
827 
828  __device__ inline
829  void writeLinkVariableToArray(const Matrix<complex<double>,3> & link, const int dir, const int idx, const int stride, float2* const array)
830  {
831  float2 single_temp;
832 
833 #pragma unroll
834  for (int i=0; i<9; ++i){
835  single_temp.x = link.data[i].x;
836  single_temp.y = link.data[i].y;
837  array[idx + (dir*9 + i)*stride] = single_temp;
838  }
839  return;
840  }
841 
842 
843  template<class T>
844  __device__ inline
845  void loadMomentumFromArray(const T* const array, const int dir, const int idx, const int stride, Matrix<T,3> *mom)
846  {
847  T temp2[5];
848  temp2[0] = array[idx + dir*stride*5];
849  temp2[1] = array[idx + dir*stride*5 + stride];
850  temp2[2] = array[idx + dir*stride*5 + 2*stride];
851  temp2[3] = array[idx + dir*stride*5 + 3*stride];
852  temp2[4] = array[idx + dir*stride*5 + 4*stride];
853 
854  mom->data[0].x = 0.;
855  mom->data[0].y = temp2[3].x;
856  mom->data[1] = temp2[0];
857  mom->data[2] = temp2[1];
858 
859  mom->data[3].x = -mom->data[1].x;
860  mom->data[3].y = mom->data[1].y;
861  mom->data[4].x = 0.;
862  mom->data[4].y = temp2[3].y;
863  mom->data[5] = temp2[2];
864 
865  mom->data[6].x = -mom->data[2].x;
866  mom->data[6].y = mom->data[2].y;
867 
868  mom->data[7].x = -mom->data[5].x;
869  mom->data[7].y = mom->data[5].y;
870 
871  mom->data[8].x = 0.;
872  mom->data[8].y = temp2[4].x;
873 
874  return;
875  }
876 
877 
878 
879  template<class T, class U>
880  __device__ inline
881  void writeMomentumToArray(const Matrix<T,3> & mom, const int dir, const int idx, const U coeff, const int stride, T* const array)
882  {
883  typedef typename T::value_type real;
884  T temp2;
885  temp2.x = (mom.data[1].x - mom.data[3].x)*0.5*coeff;
886  temp2.y = (mom.data[1].y + mom.data[3].y)*0.5*coeff;
887  array[idx + dir*stride*5] = temp2;
888 
889  temp2.x = (mom.data[2].x - mom.data[6].x)*0.5*coeff;
890  temp2.y = (mom.data[2].y + mom.data[6].y)*0.5*coeff;
891  array[idx + dir*stride*5 + stride] = temp2;
892 
893  temp2.x = (mom.data[5].x - mom.data[7].x)*0.5*coeff;
894  temp2.y = (mom.data[5].y + mom.data[7].y)*0.5*coeff;
895  array[idx + dir*stride*5 + stride*2] = temp2;
896 
897  const real temp = (mom.data[0].y + mom.data[4].y + mom.data[8].y)*0.3333333333333333333333333;
898  temp2.x = (mom.data[0].y-temp)*coeff;
899  temp2.y = (mom.data[4].y-temp)*coeff;
900  array[idx + dir*stride*5 + stride*3] = temp2;
901 
902  temp2.x = (mom.data[8].y - temp)*coeff;
903  temp2.y = 0.0;
904  array[idx + dir*stride*5 + stride*4] = temp2;
905 
906  return;
907  }
908 
909 
910 
911  template<class Cmplx>
912  __device__ __host__ inline
914  {
915 
916  const Cmplx & det = getDeterminant(u);
917  const Cmplx & det_inv = static_cast<typename Cmplx::value_type>(1.0)/det;
918 
919  Cmplx temp;
920 
921  temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
922  (*uinv)(0,0) = (det_inv*temp);
923 
924  temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
925  (*uinv)(0,1) = (temp*det_inv);
926 
927  temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
928  (*uinv)(0,2) = (temp*det_inv);
929 
930  temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
931  (*uinv)(1,0) = (det_inv*temp);
932 
933  temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
934  (*uinv)(1,1) = (temp*det_inv);
935 
936  temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
937  (*uinv)(1,2) = (temp*det_inv);
938 
939  temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
940  (*uinv)(2,0) = (det_inv*temp);
941 
942  temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
943  (*uinv)(2,1) = (temp*det_inv);
944 
945  temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
946  (*uinv)(2,2) = (temp*det_inv);
947 
948  return;
949  }
950  // template this!
951  inline void copyArrayToLink(Matrix<float2,3>* link, float* array){
952 #pragma unroll
953  for (int i=0; i<3; ++i){
954 #pragma unroll
955  for (int j=0; j<3; ++j){
956  (*link)(i,j).x = array[(i*3+j)*2];
957  (*link)(i,j).y = array[(i*3+j)*2 + 1];
958  }
959  }
960  return;
961  }
962 
963  template<class Cmplx, class Real>
964  inline void copyArrayToLink(Matrix<Cmplx,3>* link, Real* array){
965 #pragma unroll
966  for (int i=0; i<3; ++i){
967 #pragma unroll
968  for (int j=0; j<3; ++j){
969  (*link)(i,j).x = array[(i*3+j)*2];
970  (*link)(i,j).y = array[(i*3+j)*2 + 1];
971  }
972  }
973  return;
974  }
975 
976 
977  // and this!
978  inline void copyLinkToArray(float* array, const Matrix<float2,3>& link){
979 #pragma unroll
980  for (int i=0; i<3; ++i){
981 #pragma unroll
982  for (int j=0; j<3; ++j){
983  array[(i*3+j)*2] = link(i,j).x;
984  array[(i*3+j)*2 + 1] = link(i,j).y;
985  }
986  }
987  return;
988  }
989 
990  // and this!
991  template<class Cmplx, class Real>
992  inline void copyLinkToArray(Real* array, const Matrix<Cmplx,3>& link){
993 #pragma unroll
994  for (int i=0; i<3; ++i){
995 #pragma unroll
996  for (int j=0; j<3; ++j){
997  array[(i*3+j)*2] = link(i,j).x;
998  array[(i*3+j)*2 + 1] = link(i,j).y;
999  }
1000  }
1001  return;
1002  }
1003 
1004  template<class T>
1005  __device__ __host__ inline Matrix<T,3> getSubTraceUnit(const Matrix<T,3>& a){
1006  T tr = (a(0,0) + a(1,1) + a(2,2)) / 3.0;
1007  Matrix<T,3> res;
1008  res(0,0) = a(0,0) - tr; res(0,1) = a(0,1); res(0,2) = a(0,2);
1009  res(1,0) = a(1,0); res(1,1) = a(1,1) - tr; res(1,2) = a(1,2);
1010  res(2,0) = a(2,0); res(2,1) = a(2,1); res(2,2) = a(2,2) - tr;
1011  return res;
1012  }
1013 
1014  template<class T>
1015  __device__ __host__ inline void SubTraceUnit(Matrix<T,3>& a){
1016  T tr = (a(0,0) + a(1,1) + a(2,2)) / static_cast<T>(3.0);
1017  a(0,0) -= tr; a(1,1) -= tr; a(2,2) -= tr;
1018  }
1019 
1020  template<class T>
1021  __device__ __host__ inline double getRealTraceUVdagger(const Matrix<T,3>& a, const Matrix<T,3>& b){
1022  double sum = (double)(a(0,0).x * b(0,0).x + a(0,0).y * b(0,0).y);
1023  sum += (double)(a(0,1).x * b(0,1).x + a(0,1).y * b(0,1).y);
1024  sum += (double)(a(0,2).x * b(0,2).x + a(0,2).y * b(0,2).y);
1025  sum += (double)(a(1,0).x * b(1,0).x + a(1,0).y * b(1,0).y);
1026  sum += (double)(a(1,1).x * b(1,1).x + a(1,1).y * b(1,1).y);
1027  sum += (double)(a(1,2).x * b(1,2).x + a(1,2).y * b(1,2).y);
1028  sum += (double)(a(2,0).x * b(2,0).x + a(2,0).y * b(2,0).y);
1029  sum += (double)(a(2,1).x * b(2,1).x + a(2,1).y * b(2,1).y);
1030  sum += (double)(a(2,2).x * b(2,2).x + a(2,2).y * b(2,2).y);
1031  return sum;
1032  }
1033 
1034 
1035 
1036  // and this!
1037  template<class Cmplx>
1038  __host__ __device__ inline
1039  void printLink(const Matrix<Cmplx,3>& link){
1040  printf("(%lf, %lf)\t", link(0,0).x, link(0,0).y);
1041  printf("(%lf, %lf)\t", link(0,1).x, link(0,1).y);
1042  printf("(%lf, %lf)\n", link(0,2).x, link(0,2).y);
1043  printf("(%lf, %lf)\t", link(1,0).x, link(1,0).y);
1044  printf("(%lf, %lf)\t", link(1,1).x, link(1,1).y);
1045  printf("(%lf, %lf)\n", link(1,2).x, link(1,2).y);
1046  printf("(%lf, %lf)\t", link(2,0).x, link(2,0).y);
1047  printf("(%lf, %lf)\t", link(2,1).x, link(2,1).y);
1048  printf("(%lf, %lf)\n", link(2,2).x, link(2,2).y);
1049  printf("\n");
1050  }
1051 
1052  template<class Cmplx>
1053  __device__ __host__
1054  bool isUnitary(const Matrix<Cmplx,3>& matrix, double max_error)
1055  {
1056  const Matrix<Cmplx,3> identity = conj(matrix)*matrix;
1057 
1058 #pragma unroll
1059  for (int i=0; i<3; ++i){
1060  if( fabs(identity(i,i).x - 1.0) > max_error || fabs(identity(i,i).y) > max_error) return false;
1061 #pragma unroll
1062  for (int j=i+1; j<3; ++j){
1063  if( fabs(identity(i,j).x) > max_error || fabs(identity(i,j).y) > max_error
1064  || fabs(identity(j,i).x) > max_error || fabs(identity(j,i).y) > max_error ){
1065  return false;
1066  }
1067  }
1068  }
1069 
1070 #pragma unroll
1071  for (int i=0; i<3; i++) {
1072 #pragma unroll
1073  for (int j=0; j<3; j++) {
1074  if (isnan(matrix(i,j).x) || isnan(matrix(i,j).y)) return false;
1075  }
1076  }
1077 
1078  return true;
1079  }
1080 
1081  template<class Cmplx>
1082  __device__ __host__
1083  double ErrorSU3(const Matrix<Cmplx,3>& matrix)
1084  {
1085  const Matrix<Cmplx,3> identity_comp = conj(matrix)*matrix;
1086  double error = 0.0;
1087  Cmplx temp(0,0);
1088  int i=0;
1089  int j=0;
1090 
1091  //error = ||U^dagger U - I||_L2
1092 #pragma unroll
1093  for (i=0; i<3; ++i)
1094 #pragma unroll
1095  for (j=0; j<3; ++j)
1096  if(i==j) {
1097  temp = identity_comp(i,j);
1098  temp -= 1.0;
1099  error += norm(temp);
1100  }
1101  else {
1102  error += norm(identity_comp(i,j));
1103  }
1104  //error is L2 norm, should be (very close) to zero.
1105  return error;
1106  }
1107 
1108  template<class T>
1109  __device__ __host__ inline
1110  void exponentiate_iQ(const Matrix<T,3>& Q, Matrix<T,3>* exp_iQ)
1111  {
1112  // Use Cayley-Hamilton Theorem for SU(3) exp{iQ}.
1113  // This algorithm is outlined in
1114  // http://arxiv.org/pdf/hep-lat/0311018v1.pdf
1115  // Equation numbers in the paper are referenced by [eq_no].
1116 
1117  //Declarations
1118  typedef decltype(Q(0,0).x) undMatType;
1119 
1120  undMatType inv3 = 1.0/3.0;
1121  undMatType c0, c1, c0_max, Tr_re;
1122  undMatType f0_re, f0_im, f1_re, f1_im, f2_re, f2_im;
1123  undMatType theta;
1124  undMatType u_p, w_p; //u, w parameters.
1125  Matrix<T,3> temp1;
1126  Matrix<T,3> temp2;
1127  //[14] c0 = det(Q) = 1/3Tr(Q^3)
1128  const T & det_Q = getDeterminant(Q);
1129  c0 = det_Q.x;
1130  //[15] c1 = 1/2Tr(Q^2)
1131  // Q = Q^dag => Tr(Q^2) = Tr(QQ^dag) = sum_ab [Q_ab * Q_ab^*]
1132  temp1 = Q;
1133  temp1 = temp1 * Q;
1134  Tr_re = getTrace(temp1).x;
1135  c1 = 0.5*Tr_re;
1136 
1137  //We now have the coeffiecients c0 and c1.
1138  //We now find: exp(iQ) = f0*I + f1*Q + f2*Q^2
1139  // where fj = fj(c0,c1), j=0,1,2.
1140 
1141  //[17]
1142  c0_max = 2*pow(c1*inv3,1.5);
1143 
1144  //[25]
1145  theta = acos(c0/c0_max);
1146 
1147  //[23]
1148  u_p = sqrt(c1*inv3)*cos(theta*inv3);
1149 
1150  //[24]
1151  w_p = sqrt(c1)*sin(theta*inv3);
1152 
1153  //[29] Construct objects for fj = hj/(9u^2 - w^2).
1154  undMatType u_sq = u_p*u_p;
1155  undMatType w_sq = w_p*w_p;
1156  undMatType denom_inv = 1.0/(9*u_sq - w_sq);
1157  undMatType exp_iu_re = cos(u_p);
1158  undMatType exp_iu_im = sin(u_p);
1159  undMatType exp_2iu_re = exp_iu_re*exp_iu_re - exp_iu_im*exp_iu_im;
1160  undMatType exp_2iu_im = 2*exp_iu_re*exp_iu_im;
1161  undMatType cos_w = cos(w_p);
1162  undMatType sinc_w;
1163  undMatType hj_re = 0.0;
1164  undMatType hj_im = 0.0;
1165 
1166  //[33] Added one more term to the series given in the paper.
1167  if (w_p < 0.05 && w_p > -0.05) {
1168  //1 - 1/6 x^2 (1 - 1/20 x^2 (1 - 1/42 x^2(1 - 1/72*x^2)))
1169  sinc_w = 1.0 - (w_sq/6.0)*(1 - (w_sq*0.05)*(1 - (w_sq/42.0)*(1 - (w_sq/72.0))));
1170  }
1171  else sinc_w = sin(w_p)/w_p;
1172 
1173 
1174  //[34] Test for c0 < 0.
1175  int parity = 0;
1176  if(c0 < 0) {
1177  c0 *= -1.0;
1178  parity = 1;
1179  //calculate fj with c0 > 0 and then convert all fj.
1180  }
1181 
1182  //Get all the numerators for fj,
1183  //[30] f0
1184  hj_re = (u_sq - w_sq)*exp_2iu_re + 8*u_sq*cos_w*exp_iu_re + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_im;
1185  hj_im = (u_sq - w_sq)*exp_2iu_im - 8*u_sq*cos_w*exp_iu_im + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_re;
1186  f0_re = hj_re*denom_inv;
1187  f0_im = hj_im*denom_inv;
1188 
1189  //[31] f1
1190  hj_re = 2*u_p*exp_2iu_re - 2*u_p*cos_w*exp_iu_re + (3*u_sq - w_sq)*sinc_w*exp_iu_im;
1191  hj_im = 2*u_p*exp_2iu_im + 2*u_p*cos_w*exp_iu_im + (3*u_sq - w_sq)*sinc_w*exp_iu_re;
1192  f1_re = hj_re*denom_inv;
1193  f1_im = hj_im*denom_inv;
1194 
1195  //[32] f2
1196  hj_re = exp_2iu_re - cos_w*exp_iu_re - 3*u_p*sinc_w*exp_iu_im;
1197  hj_im = exp_2iu_im + cos_w*exp_iu_im - 3*u_p*sinc_w*exp_iu_re;
1198  f2_re = hj_re*denom_inv;
1199  f2_im = hj_im*denom_inv;
1200 
1201  //[34] If c0 < 0, apply tranformation fj(-c0,c1) = (-1)^j f^*j(c0,c1)
1202  if (parity == 1) {
1203  f0_im *= -1.0;
1204  f1_re *= -1.0;
1205  f2_im *= -1.0;
1206  }
1207 
1208  T f0_c;
1209  T f1_c;
1210  T f2_c;
1211 
1212  f0_c.x = f0_re;
1213  f0_c.y = f0_im;
1214 
1215  f1_c.x = f1_re;
1216  f1_c.y = f1_im;
1217 
1218  f2_c.x = f2_re;
1219  f2_c.y = f2_im;
1220 
1221  //[19] Construct exp{iQ}
1222  setZero(exp_iQ);
1223  Matrix<T,3> UnitM;
1224  setIdentity(&UnitM);
1225  // +f0*I
1226  temp1 = f0_c * UnitM;
1227  *exp_iQ = temp1;
1228 
1229  // +f1*Q
1230  temp1 = f1_c * Q;
1231  *exp_iQ += temp1;
1232 
1233  // +f2*Q^2
1234  temp1 = Q * Q;
1235  temp2 = f2_c * temp1;
1236  *exp_iQ += temp2;
1237 
1238  //exp(iQ) is now defined.
1239  return;
1240  }
1241 
1242 
1243 } // end namespace quda
1244 #endif // _QUDA_MATRIX_H_
__host__ __device__ float4 operator-=(float4 &x, const float4 y)
Definition: float_vector.h:131
gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors.
__device__ __host__ double ErrorSU3(const Matrix< Cmplx, 3 > &matrix)
Definition: quda_matrix.h:1083
__device__ __host__ void setZero(Matrix< T, N > *m)
Definition: quda_matrix.h:592
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:896
__device__ static __host__ T val()
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
T data[N *N]
Definition: quda_matrix.h:208
__device__ __host__ double getRealTraceUVdagger(const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
Definition: quda_matrix.h:1021
__device__ __host__ Matrix(const T data_[])
Definition: quda_matrix.h:86
__device__ __host__ void operator=(const complex< T > &a)
Definition: quda_matrix.h:160
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
__device__ void writeMomentumToArray(const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)
Definition: quda_matrix.h:881
__host__ __device__ float2 operator*=(float2 &x, const float a)
Definition: float_vector.h:151
__device__ __host__ uint64_t checksum() const
Definition: quda_matrix.h:136
__device__ __host__ void operator=(const Matrix< U, N > &b)
Definition: quda_matrix.h:114
__device__ void loadLinkVariableFromArray(const T *const array, const int dir, const int idx, const int stride, Matrix< U, 3 > *link)
Definition: quda_matrix.h:747
This is just a dummy structure we use for trove to define the required structure size.
__host__ __device__ void printLink(const Matrix< Cmplx, 3 > &link)
Definition: quda_matrix.h:1039
void copyLinkToArray(float *array, const Matrix< float2, 3 > &link)
Definition: quda_matrix.h:978
bool isUnitary(const cpuGaugeField &field, double max_error)
__device__ __host__ T const & operator()(int i, int j) const
Definition: quda_matrix.h:93
#define b
__device__ __host__ void outerProd(const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
Definition: quda_matrix.h:695
__device__ __host__ T const & operator[](int i) const
Definition: quda_matrix.h:669
wrapper class that enables us to write to Hmatrices in packed format
Definition: quda_matrix.h:152
__device__ __host__ complex< T > const operator()(int i, int j) const
Definition: quda_matrix.h:225
int printf(const char *,...) __attribute__((__format__(__printf__
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:40
__host__ __device__ void sum(double &a, double &b)
__device__ __host__ HMatrix()
Definition: quda_matrix.h:210
T data[N *N]
Definition: quda_matrix.h:74
gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors...
__device__ static __host__ T val()
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
Definition: color_spinor.h:907
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:100
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:65
unsigned long long uint64_t
__device__ __host__ void copyColumn(const Matrix< T, N > &m, int c, Array< T, N > *a)
Definition: quda_matrix.h:683
Provides precision abstractions and defines the register precision given the storage precision using ...
__device__ __host__ T & operator()(int i)
Definition: quda_matrix.h:107
__device__ void writeLinkVariableToArray(const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, U *const array)
Definition: quda_matrix.h:816
__device__ __host__ void SubTraceUnit(Matrix< T, 3 > &a)
Definition: quda_matrix.h:1015
__device__ void loadMatrixFromArray(const T *const array, const int idx, const int stride, Matrix< U, N > *mat)
Definition: quda_matrix.h:759
void copyArrayToLink(Matrix< float2, 3 > *link, float *array)
Definition: quda_matrix.h:951
__device__ __host__ void setIdentity(Matrix< T, N > *m)
Definition: quda_matrix.h:543
__device__ __host__ int index(int i, int j) const
Definition: quda_matrix.h:194
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
Definition: quda_matrix.h:305
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
Definition: color_spinor.h:885
__device__ __host__ int index(int i, int j) const
Definition: quda_matrix.h:71
__device__ void appendMatrixToArray(const Matrix< complex< double >, 3 > &mat, const int idx, const int stride, double2 *const array)
Definition: quda_matrix.h:794
__device__ void loadMomentumFromArray(const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom)
Definition: quda_matrix.h:845
__device__ __host__ Matrix< T, 3 > getSubTraceUnit(const Matrix< T, 3 > &a)
Definition: quda_matrix.h:1005
__device__ __host__ T & operator()(int i, int j)
Definition: quda_matrix.h:97
__device__ __host__ void operator=(const HMatrix< U, N > &b)
Definition: quda_matrix.h:241
__device__ __host__ void computeLinkInverse(Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
Definition: quda_matrix.h:913
__device__ __host__ void print() const
Definition: quda_matrix.h:280
double fabs(double)
__device__ __host__ void computeMatrixInverse(const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)
Definition: quda_matrix.h:501
void size_t length
__device__ __host__ HMatrix(const T data_[])
Definition: quda_matrix.h:220
__device__ __host__ HMatrix< T, N > square() const
Hermitian matrix square.
Definition: quda_matrix.h:256
__device__ __host__ Matrix()
Definition: quda_matrix.h:76
__device__ __host__ void makeAntiHerm(Matrix< Complex, N > &m)
Definition: quda_matrix.h:636
const void * c
__device__ __host__ Matrix(const Matrix< T, N > &a)
Definition: quda_matrix.h:81
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:35
static unsigned base
__device__ __host__ HMatrix_wrapper(Hmat &mat, int i, int j, int idx)
Definition: quda_matrix.h:158
__device__ __host__ void operator+=(const complex< T > &a)
Definition: quda_matrix.h:172
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
Definition: float_vector.h:96
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
Definition: color_spinor.h:929
__host__ __device__ ValueType acos(ValueType x)
Definition: complex_quda.h:50
struct cudaExtent unsigned int cudaArray_t array
__device__ __host__ T getDeterminant(const Mat< T, 3 > &a)
Definition: quda_matrix.h:312
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:115
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaParity parity
Definition: covdev_test.cpp:53
#define a
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82
__device__ void writeMatrixToArray(const Matrix< T, N > &mat, const int idx, const int stride, U *const array)
Definition: quda_matrix.h:785
__device__ __host__ HMatrix(const HMatrix< T, N > &a)
Definition: quda_matrix.h:215
__device__ __host__ void exponentiate_iQ(const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ)
Definition: quda_matrix.h:1110
__device__ __host__ T & operator[](int i)
Definition: quda_matrix.h:675
__device__ __host__ T const & operator()(int i) const
Definition: quda_matrix.h:101