19 __device__ __host__
inline
24 __device__ __host__
inline
27 return make_float2(0.,0.);
31 __device__ __host__
inline
34 return make_double2(0.,0.);
40 __device__ __host__
inline
45 __device__ __host__
inline
47 return make_float2(1.,0.);
51 __device__ __host__
inline
53 return make_double2(1.,0.);
56 template<
typename Float,
typename T>
struct gauge_wrapper;
57 template<
typename Float,
typename T>
struct gauge_ghost_wrapper;
58 template<
typename Float,
typename T>
struct clover_wrapper;
59 template <
typename T,
int N>
class HMatrix;
61 template<
class T,
int N>
67 __device__ __host__
inline int index(
int i,
int j)
const {
return i*N + j; }
72 __device__ __host__ constexpr
int size()
const {
return N; }
79 for (
int i=0; i<N*N; i++)
data[i] = a.
data[i];
85 for (
int i = 0; i < N * N; i++)
data[i] = a.
data[i];
88 __device__ __host__
inline Matrix(
const T data_[])
91 for (
int i=0; i<N*N; i++)
data[i] = data_[i];
96 __device__ __host__
inline T
const &
operator()(
int i,
int j)
const {
97 return data[index(i,j)];
101 return data[index(i,j)];
104 __device__ __host__
inline T
const &
operator()(
int i)
const {
107 return data[index(j,k)];
113 return data[index(j,k)];
119 for (
int i=0; i<N*N; i++)
data[i] = b.
data[i];
139 __device__ __host__
inline real
L1() {
142 for (
int j=0; j<N; j++) {
145 for (
int i=0; i<N; i++) {
148 l1 = col_sum > l1 ? col_sum : l1;
158 __device__ __host__
inline real
L2() {
161 for (
int j=0; j<N; j++) {
163 for (
int i=0; i<N; i++) {
175 __device__ __host__
inline real
Linf() {
178 for (
int i=0; i<N; i++) {
181 for (
int j=0; j<N; j++) {
184 linf = row_sum > linf ? row_sum : linf;
194 __device__ __host__
inline uint64_t
checksum()
const {
196 constexpr
int length = (N*N*
sizeof(T) +
sizeof(uint64_t) - 1)/
sizeof(uint64_t);
197 uint64_t base[
length] = { };
198 memcpy(base,
data, N * N *
sizeof(T));
199 uint64_t checksum_ = base[0];
200 for (
int i=1; i<
length; i++) checksum_ ^= base[i];
204 __device__ __host__
inline bool isUnitary(
double max_error)
const
209 for (
int i=0; i<N; ++i){
210 if( fabs(
identity(i,i).real() - 1.0) > max_error ||
211 fabs(
identity(i,i).imag()) > max_error)
return false;
214 for (
int j=i+1; j<N; ++j){
215 if( fabs(
identity(i,j).real()) > max_error ||
216 fabs(
identity(i,j).imag()) > max_error ||
217 fabs(
identity(j,i).real()) > max_error ||
218 fabs(
identity(j,i).imag()) > max_error ){
225 for (
int i=0; i<N; i++) {
227 for (
int j=0; j<N; j++) {
228 if (std::isnan((*
this)(i,j).real()) ||
229 std::isnan((*
this)(i,j).imag()))
return false;
243 template <
typename T,
typename Hmat>
280 template<
class T,
int N>
286 __device__ __host__
inline int index(
int i,
int j)
const {
290 int k = N*(N-1)/2 - (N-j)*(N-j-1)/2 + i - j - 1;
294 int k = N*(N-1)/2 - (N-i)*(N-i-1)/2 + j - i - 1;
304 for (
int i = 0; i < N * N; i++)
data[i] = (T)0.0;
309 for (
int i=0; i<N*N; i++)
data[i] = a.
data[i];
312 __device__ __host__
inline HMatrix(
const T data_[]) {
314 for (
int i=0; i<N*N; i++)
data[i] = data_[i];
318 const int idx = index(i,j);
335 for (
int i=0; i<N*N; i++)
data[i] = b.
data[i];
352 for (
int i=0; i<N; i++) {
354 for (
int k=0; k<N; k++)
if (i<=k) {
355 tmp.x = (*this)(i,0).real() * (*this)(0,k).real();
356 tmp.x -= (*this)(i,0).imag() * (*this)(0,k).imag();
357 tmp.y = (*this)(i,0).real() * (*this)(0,k).imag();
358 tmp.y += (*this)(i,0).imag() * (*this)(0,k).real();
360 for (
int j=1; j<N; j++) {
361 tmp.x += (*this)(i,j).real() * (*this)(j,k).real();
362 tmp.x -= (*this)(i,j).imag() * (*this)(j,k).imag();
363 tmp.y += (*this)(i,j).real() * (*this)(j,k).imag();
364 tmp.y += (*this)(i,j).imag() * (*this)(j,k).real();
376 __device__ __host__
inline T
max()
const
379 T
max =
static_cast<T
>(0.0);
385 __device__ __host__
void print()
const {
386 for (
int i=0; i<N; i++) {
388 for (
int j=0; j<N; j++) {
389 printf(
" (%e, %e)", (*
this)(i,j).real(), (*
this)(i,j).imag());
398 template<
class T,
int N>
401 for (
int i=0; i<N; i++) {
403 for (
int j=0; j<N; j++) {
404 (*this)(i,j) = a(i,j);
412 return a(0,0) + a(1,1) + a(2,2);
416 template<
template<
typename,
int>
class Mat,
class T>
420 result = a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2))
421 - a(0,1)*(a(1,0)*a(2,2) - a(1,2)*a(2,0))
422 + a(0,2)*(a(1,0)*a(2,1) - a(1,1)*a(2,0));
427 template<
template<
typename,
int>
class Mat,
class T,
int N>
428 __device__ __host__
inline Mat<T,N>
operator+(
const Mat<T,N> & a,
const Mat<T,N> & b)
432 for (
int i=0; i<N*N; i++) result.data[i] = a.data[i] + b.data[i];
437 template<
template<
typename,
int>
class Mat,
class T,
int N>
438 __device__ __host__
inline Mat<T,N>
operator+=(Mat<T,N> & a,
const Mat<T,N> & b)
441 for (
int i=0; i<N*N; i++) a.data[i] += b.data[i];
445 template<
template<
typename,
int>
class Mat,
class T,
int N>
446 __device__ __host__
inline Mat<T,N>
operator+=(Mat<T,N> & a,
const T & b)
449 for (
int i=0; i<N; i++) a(i,i) += b;
453 template<
template<
typename,
int>
class Mat,
class T,
int N>
454 __device__ __host__
inline Mat<T,N>
operator-=(Mat<T,N> & a,
const Mat<T,N> & b)
457 for (
int i=0; i<N*N; i++) a.data[i] -= b.data[i];
461 template<
template<
typename,
int>
class Mat,
class T,
int N>
462 __device__ __host__
inline Mat<T,N>
operator-(
const Mat<T,N> & a,
const Mat<T,N> & b)
466 for (
int i=0; i<N*N; i++) result.data[i] = a.data[i] - b.data[i];
470 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
474 for (
int i=0; i<N*N; ++i) result.data[i] =
scalar*a.data[i];
478 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
483 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
489 template<
template<
typename,
int>
class Mat,
class T,
int N>
490 __device__ __host__
inline Mat<T,N>
operator-(
const Mat<T,N> & a){
493 for (
int i=0; i<(N*N); ++i) result.data[i] = -a.data[i];
501 template<
template<
typename,
int>
class Mat,
class T,
int N>
502 __device__ __host__
inline Mat<T,N>
operator*(
const Mat<T,N> &a,
const Mat<T,N> &b)
506 for (
int i=0; i<N; i++) {
508 for (
int k=0; k<N; k++) {
509 result(i,k) = a(i,0) * b(0,k);
511 for (
int j=1; j<N; j++) {
512 result(i,k) += a(i,j) * b(j,k);
522 template<
template<
typename>
class complex,
typename T,
int N>
527 for (
int i=0; i<N; i++) {
529 for (
int k=0; k<N; k++) {
530 result(i,k).x = a(i,0).real() * b(0,k).real();
531 result(i,k).x -= a(i,0).imag() * b(0,k).imag();
532 result(i,k).y = a(i,0).real() * b(0,k).imag();
533 result(i,k).y += a(i,0).imag() * b(0,k).real();
535 for (
int j=1; j<N; j++) {
536 result(i,k).x += a(i,j).real() * b(j,k).real();
537 result(i,k).x -= a(i,j).imag() * b(j,k).imag();
538 result(i,k).y += a(i,j).real() * b(j,k).imag();
539 result(i,k).y += a(i,j).imag() * b(j,k).real();
546 template<
class T,
int N>
556 template <
class T,
class U,
int N>
562 for (
int i=0; i<N; i++) {
564 for (
int k=0; k<N; k++) {
565 result(i,k) = a(i,0) * b(0,k);
567 for (
int j=1; j<N; j++) {
568 result(i,k) += a(i,j) * b(j,k);
576 __device__ __host__
inline
580 result(0,0) = a(0,0)*b(0,0) + a(0,1)*b(1,0);
581 result(0,1) = a(0,0)*b(0,1) + a(0,1)*b(1,1);
582 result(1,0) = a(1,0)*b(0,0) + a(1,1)*b(1,0);
583 result(1,1) = a(1,0)*b(0,1) + a(1,1)*b(1,1);
588 template<
class T,
int N>
589 __device__ __host__
inline
593 for (
int i=0; i<N; ++i){
595 for (
int j=0; j<N; ++j){
596 result(i,j) =
conj( other(j,i) );
604 __device__ __host__
inline
608 const T det_inv =
static_cast<typename T::value_type
>(1.0)/det;
613 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
614 uinv(0,0) = (det_inv*temp);
616 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
617 uinv(0,1) = (temp*det_inv);
619 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
620 uinv(0,2) = (temp*det_inv);
622 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
623 uinv(1,0) = (det_inv*temp);
625 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
626 uinv(1,1) = (temp*det_inv);
628 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
629 uinv(1,2) = (temp*det_inv);
631 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
632 uinv(2,0) = (det_inv*temp);
634 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
635 uinv(2,1) = (temp*det_inv);
637 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
638 uinv(2,2) = (temp*det_inv);
645 template<
class T,
int N>
646 __device__ __host__
inline
650 for (
int i=0; i<N; ++i){
653 for (
int j=i+1; j<N; ++j){
654 (*m)(i,j) = (*m)(j,i) = 0;
661 __device__ __host__
inline
665 for (
int i=0; i<N; ++i){
666 (*m)(i,i) = make_float2(1,0);
668 for (
int j=i+1; j<N; ++j){
669 (*m)(i,j) = (*m)(j,i) = make_float2(0.,0.);
676 __device__ __host__
inline
680 for (
int i=0; i<N; ++i){
681 (*m)(i,i) = make_double2(1,0);
683 for (
int j=i+1; j<N; ++j){
684 (*m)(i,j) = (*m)(j,i) = make_double2(0.,0.);
691 template<
class T,
int N>
692 __device__ __host__
inline
696 for (
int i=0; i<N; ++i){
698 for (
int j=0; j<N; ++j){
706 __device__ __host__
inline
710 for (
int i=0; i<N; ++i){
712 for (
int j=0; j<N; ++j){
713 (*m)(i,j) = make_float2(0.,0.);
720 __device__ __host__
inline
724 for (
int i=0; i<N; ++i){
726 for (
int j=0; j<N; ++j){
727 (*m)(i,j) = make_double2(0.,0.);
733 template<
typename Complex,
int N>
735 typedef typename Complex::value_type real;
740 real imag_trace = 0.0;
742 for (
int i=0; i<N; i++) imag_trace += am(i,i).y;
744 for (
int i=0; i<N; i++) {
745 am(i,i).y -= imag_trace/N;
752 typedef typename Complex::value_type real;
757 real imag_trace = 0.0;
759 for (
int i = 0; i < N; i++) imag_trace += am(i, i).y;
761 for (
int i = 0; i < N; i++) { am(i, i).y -= imag_trace / N; }
773 template<
class T,
int N>
781 __device__ __host__
inline
787 __device__ __host__
inline
794 template<
class T,
int N>
795 __device__ __host__
inline
799 for (
int i=0; i<N; ++i){
806 template<
class T,
int N>
809 for (
int i=0; i<N; ++i){
811 for (
int j=0; j<N; ++j){
814 if(i<N-1) os << std::endl;
820 template<
class T,
int N>
822 for (
int i=0; i<N; ++i){
828 template<
class Cmplx>
829 __device__ __host__
inline
833 const Cmplx & det_inv =
static_cast<typename Cmplx::value_type
>(1.0)/det;
837 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
838 (*uinv)(0,0) = (det_inv*temp);
840 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
841 (*uinv)(0,1) = (temp*det_inv);
843 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
844 (*uinv)(0,2) = (temp*det_inv);
846 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
847 (*uinv)(1,0) = (det_inv*temp);
849 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
850 (*uinv)(1,1) = (temp*det_inv);
852 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
853 (*uinv)(1,2) = (temp*det_inv);
855 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
856 (*uinv)(2,0) = (det_inv*temp);
858 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
859 (*uinv)(2,1) = (temp*det_inv);
861 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
862 (*uinv)(2,2) = (temp*det_inv);
867 for (
int i=0; i<3; ++i){
869 for (
int j=0; j<3; ++j){
870 (*link)(i,j).x = array[(i*3+j)*2];
871 (*link)(i,j).y = array[(i*3+j)*2 + 1];
876 template<
class Cmplx,
class Real>
879 for (
int i=0; i<3; ++i){
881 for (
int j=0; j<3; ++j){
882 (*link)(i,j).x = array[(i*3+j)*2];
883 (*link)(i,j).y = array[(i*3+j)*2 + 1];
892 for (
int i=0; i<3; ++i){
894 for (
int j=0; j<3; ++j){
895 array[(i*3+j)*2] = link(i,j).x;
896 array[(i*3+j)*2 + 1] = link(i,j).y;
902 template<
class Cmplx,
class Real>
905 for (
int i=0; i<3; ++i){
907 for (
int j=0; j<3; ++j){
908 array[(i*3+j)*2] = link(i,j).x;
909 array[(i*3+j)*2 + 1] = link(i,j).y;
916 T tr = (a(0,0) + a(1,1) + a(2,2)) / 3.0;
918 res(0,0) = a(0,0) - tr; res(0,1) = a(0,1); res(0,2) = a(0,2);
919 res(1,0) = a(1,0); res(1,1) = a(1,1) - tr; res(1,2) = a(1,2);
920 res(2,0) = a(2,0); res(2,1) = a(2,1); res(2,2) = a(2,2) - tr;
926 T tr = (a(0,0) + a(1,1) + a(2,2)) /
static_cast<T
>(3.0);
927 a(0,0) -= tr; a(1,1) -= tr; a(2,2) -= tr;
932 double sum = (double)(a(0,0).x * b(0,0).x + a(0,0).y * b(0,0).y);
933 sum += (double)(a(0,1).x * b(0,1).x + a(0,1).y * b(0,1).y);
934 sum += (double)(a(0,2).x * b(0,2).x + a(0,2).y * b(0,2).y);
935 sum += (double)(a(1,0).x * b(1,0).x + a(1,0).y * b(1,0).y);
936 sum += (double)(a(1,1).x * b(1,1).x + a(1,1).y * b(1,1).y);
937 sum += (double)(a(1,2).x * b(1,2).x + a(1,2).y * b(1,2).y);
938 sum += (double)(a(2,0).x * b(2,0).x + a(2,0).y * b(2,0).y);
939 sum += (double)(a(2,1).x * b(2,1).x + a(2,1).y * b(2,1).y);
940 sum += (double)(a(2,2).x * b(2,2).x + a(2,2).y * b(2,2).y);
945 template<
class Cmplx>
946 __host__ __device__
inline
948 printf(
"(%lf, %lf)\t", link(0,0).x, link(0,0).y);
949 printf(
"(%lf, %lf)\t", link(0,1).x, link(0,1).y);
950 printf(
"(%lf, %lf)\n", link(0,2).x, link(0,2).y);
951 printf(
"(%lf, %lf)\t", link(1,0).x, link(1,0).y);
952 printf(
"(%lf, %lf)\t", link(1,1).x, link(1,1).y);
953 printf(
"(%lf, %lf)\n", link(1,2).x, link(1,2).y);
954 printf(
"(%lf, %lf)\t", link(2,0).x, link(2,0).y);
955 printf(
"(%lf, %lf)\t", link(2,1).x, link(2,1).y);
956 printf(
"(%lf, %lf)\n", link(2,2).x, link(2,2).y);
960 template<
class Cmplx>
976 temp = identity_comp(i,j);
981 error +=
norm(identity_comp(i,j));
995 typedef decltype(Q(0, 0).x) matType;
997 matType inv3 = 1.0 / 3.0;
998 matType c0, c1, c0_max, Tr_re;
999 matType f0_re, f0_im, f1_re, f1_im, f2_re, f2_im;
1019 auto sqrt_c1_inv3 =
sqrt(c1 * inv3);
1020 c0_max = 2 * (c1 * inv3 * sqrt_c1_inv3);
1023 theta =
acos(c0/c0_max);
1025 sincos(theta * inv3, &w_p, &u_p);
1027 u_p *= sqrt_c1_inv3;
1033 matType u_sq = u_p * u_p;
1034 matType w_sq = w_p * w_p;
1035 matType denom_inv = 1.0 / (9 * u_sq - w_sq);
1036 matType exp_iu_re, exp_iu_im;
1037 sincos(u_p, &exp_iu_im, &exp_iu_re);
1038 matType exp_2iu_re = exp_iu_re * exp_iu_re - exp_iu_im * exp_iu_im;
1039 matType exp_2iu_im = 2 * exp_iu_re * exp_iu_im;
1040 matType cos_w =
cos(w_p);
1042 matType hj_re = 0.0;
1043 matType hj_im = 0.0;
1046 if (w_p < 0.05 && w_p > -0.05) {
1048 sinc_w = 1.0 - (w_sq/6.0)*(1 - (w_sq*0.05)*(1 - (w_sq/42.0)*(1 - (w_sq/72.0))));
1050 else sinc_w =
sin(w_p)/w_p;
1062 hj_re = (u_sq - w_sq)*exp_2iu_re + 8*u_sq*cos_w*exp_iu_re + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_im;
1063 hj_im = (u_sq - w_sq)*exp_2iu_im - 8*u_sq*cos_w*exp_iu_im + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_re;
1064 f0_re = hj_re*denom_inv;
1065 f0_im = hj_im*denom_inv;
1068 hj_re = 2*u_p*exp_2iu_re - 2*u_p*cos_w*exp_iu_re + (3*u_sq - w_sq)*sinc_w*exp_iu_im;
1069 hj_im = 2*u_p*exp_2iu_im + 2*u_p*cos_w*exp_iu_im + (3*u_sq - w_sq)*sinc_w*exp_iu_re;
1070 f1_re = hj_re*denom_inv;
1071 f1_im = hj_im*denom_inv;
1074 hj_re = exp_2iu_re - cos_w*exp_iu_re - 3*u_p*sinc_w*exp_iu_im;
1075 hj_im = exp_2iu_im + cos_w*exp_iu_im - 3*u_p*sinc_w*exp_iu_re;
1076 f2_re = hj_re*denom_inv;
1077 f2_im = hj_im*denom_inv;
1105 temp1 = f0_c * UnitM;
1114 temp2 = f2_c * temp1;
1128 Complex a2 = (q(3) * q(1) + q(7) * q(5) + q(6) * q(2) - (q(0) * q(4) + (q(0) + q(4)) * q(8))) / (
Float)3.0;
1129 Complex a3 = q(0) * q(4) * q(8) + q(1) * q(5) * q(6) + q(2) * q(3) * q(7) - q(6) * q(4) * q(2)
1130 - q(3) * q(1) * q(8) - q(0) * q(7) * q(5);
1142 w1[1] = r1 * cp + r2 * cm;
1143 w1[2] = r2 * cp + r1 * cm;
1144 Complex z1 = q(1) * q(6) - q(0) * q(7);
1145 Complex z2 = q(3) * q(7) - q(4) * q(6);
1148 Complex wr21 = (z1 + al * q(7)) / (z2 + al * q(6));
1149 Complex wr31 = (al - q(0) - wr21 * q(3)) / q(6);
1152 Complex wr22 = (z1 + al * q(7)) / (z2 + al * q(6));
1153 Complex wr32 = (al - q(0) - wr22 * q(3)) / q(6);
1156 Complex wr23 = (z1 + al * q(7)) / (z2 + al * q(6));
1157 Complex wr33 = (al - q(0) - wr23 * q(3)) / q(6);
1159 z1 = q(3) * q(2) - q(0) * q(5);
1160 z2 = q(1) * q(5) - q(4) * q(2);
1163 Complex wl21 =
conj((z1 + al * q(5)) / (z2 + al * q(2)));
1167 Complex wl22 =
conj((z1 + al * q(5)) / (z2 + al * q(2)));
1171 Complex wl23 =
conj((z1 + al * q(5)) / (z2 + al * q(2)));
1184 Complex y21 = wr21 * d1 / xn1;
1185 Complex y22 = wr22 * d2 / xn2;
1186 Complex y23 = wr23 * d3 / xn3;
1187 Complex y31 = wr31 * d1 / xn1;
1188 Complex y32 = wr32 * d2 / xn2;
1189 Complex y33 = wr33 * d3 / xn3;
1190 q(0) = y11 + y12 + y13;
1191 q(1) = y21 + y22 + y23;
1192 q(2) = y31 + y32 + y33;
1193 q(3) = y11 *
conj(wl21) + y12 *
conj(wl22) + y13 *
conj(wl23);
1194 q(4) = y21 *
conj(wl21) + y22 *
conj(wl22) + y23 *
conj(wl23);
1195 q(5) = y31 *
conj(wl21) + y32 *
conj(wl22) + y33 *
conj(wl23);
1196 q(6) = y11 *
conj(wl31) + y12 *
conj(wl32) + y13 *
conj(wl33);
1197 q(7) = y21 *
conj(wl31) + y22 *
conj(wl32) + y23 *
conj(wl33);
1198 q(8) = y31 *
conj(wl31) + y32 *
conj(wl32) + y33 *
conj(wl33);
__device__ __host__ T & operator[](int i)
__device__ __host__ T const & operator[](int i) const
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices)
__device__ __host__ void print() const
__device__ __host__ T max() const
Compute the absolute max element of the Hermitian matrix.
__device__ __host__ HMatrix(const T data_[])
__device__ __host__ void operator=(const HMatrix< U, N > &b)
__device__ __host__ HMatrix(const HMatrix< T, N > &a)
__device__ __host__ HMatrix()
__device__ __host__ complex< T > const operator()(int i, int j) const
__device__ __host__ HMatrix< T, N > square() const
Hermitian matrix square.
__device__ __host__ real Linf()
Compute the matrix Linfinity norm - this is the maximum of the absolute row sums.
__device__ __host__ Matrix(const Matrix< T, N > &a)
__device__ __host__ real L2()
Compute the matrix L2 norm. We actually compute the Frobenius norm which is an upper bound on the L2 ...
__device__ __host__ void operator=(const gauge_ghost_wrapper< real, S > &s)
__device__ constexpr __host__ int size() const
__device__ __host__ Matrix()
__device__ __host__ bool isUnitary(double max_error) const
__device__ __host__ T const & operator()(int i) const
__device__ __host__ Matrix(const gauge_ghost_wrapper< real, S > &s)
__device__ __host__ void operator=(const Matrix< U, N > &b)
__device__ __host__ Matrix(const HMatrix< real, N > &a)
__device__ __host__ T & operator()(int i, int j)
__device__ __host__ Matrix(const Matrix< U, N > &a)
__device__ __host__ Matrix(const gauge_wrapper< real, S > &s)
__device__ __host__ void operator=(const gauge_wrapper< real, S > &s)
__device__ __host__ uint64_t checksum() const
__device__ __host__ real L1()
Compute the matrix L1 norm - this is the maximum of the absolute column sums.
__device__ __host__ T const & operator()(int i, int j) const
__device__ __host__ Matrix(const T data_[])
__device__ __host__ T & operator()(int i)
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
cudaColorSpinorField * tmp
__host__ __device__ ValueType conj(ValueType x)
__host__ __device__ void printLink(const Matrix< Cmplx, 3 > &link)
void copyLinkToArray(float *array, const Matrix< float2, 3 > &link)
__host__ __device__ float4 operator+=(float4 &x, const float4 &y)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
__host__ __device__ ValueType cos(ValueType x)
__device__ __host__ double ErrorSU3(const Matrix< Cmplx, 3 > &matrix)
__host__ __device__ ValueType acos(ValueType x)
__host__ __device__ float2 operator*=(float2 &x, const float &a)
__device__ __host__ auto exponentiate_iQ(const Matrix< T, 3 > &Q)
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
__host__ __device__ ValueType sin(ValueType x)
__host__ __device__ ValueType log(ValueType x)
std::complex< double > Complex
void copyArrayToLink(Matrix< float2, 3 > *link, float *array)
__host__ __device__ ValueType sqrt(ValueType x)
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
__device__ __host__ Matrix< T, 3 > getSubTraceUnit(const Matrix< T, 3 > &a)
__host__ __device__ float4 operator-=(float4 &x, const float4 &y)
__device__ __host__ void makeAntiHerm(Matrix< Complex, N > &m)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
__device__ __host__ void setIdentity(Matrix< T, N > *m)
__device__ __host__ T getDeterminant(const Mat< T, 3 > &a)
__device__ __host__ double getRealTraceUVdagger(const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
__host__ __device__ ValueType exp(ValueType x)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__device__ __host__ void copyColumn(const Matrix< T, N > &m, int c, Array< T, N > *a)
__device__ __host__ void SubTraceUnit(Matrix< T, 3 > &a)
__device__ __host__ void computeLinkInverse(Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
__device__ __host__ void setZero(Matrix< T, N > *m)
__device__ __host__ void makeHerm(Matrix< Complex, N > &m)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
__host__ __device__ ValueType abs(ValueType x)
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
__device__ __host__ void expsu3(Matrix< complex< Float >, 3 > &q)
FloatingPoint< float > Float
__host__ __device__ T sum(const array< T, s > &a)
Provides precision abstractions and defines the register precision given the storage precision using ...
wrapper class that enables us to write to Hmatrices in packed format
__device__ __host__ void operator+=(const complex< T > &a)
__device__ __host__ HMatrix_wrapper(Hmat &mat, int i, int j, int idx)
__device__ __host__ void operator=(const complex< T > &a)
__device__ static __host__ T val()
__device__ static __host__ T val()
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors,...
__host__ __device__ ValueType imag() const volatile
__host__ __device__ ValueType real() const volatile
gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors,...
gauge_wrapper is an internal class that is used to wrap instances of gauge accessors,...