1 #ifndef _QUDA_MATRIX_H_ 2 #define _QUDA_MATRIX_H_ 23 __device__ __host__
inline 28 __device__ __host__
inline 31 return make_float2(0.,0.);
35 __device__ __host__
inline 38 return make_double2(0.,0.);
46 __device__ __host__
inline 51 __device__ __host__
inline 53 return make_float2(1.,0.);
57 __device__ __host__
inline 59 return make_double2(1.,0.);
62 template<
typename Float,
typename T>
struct gauge_wrapper;
63 template<
typename Float,
typename T>
struct gauge_ghost_wrapper;
64 template<
typename Float,
typename T>
struct clover_wrapper;
65 template<
typename T,
int N>
struct HMatrix;
67 template<
class T,
int N>
71 __device__ __host__
inline int index(
int i,
int j)
const {
return i*N + j; }
76 __device__ __host__
inline Matrix() {
86 __device__ __host__
inline Matrix(
const T data_[]) {
88 for (
int i=0;
i<N*N;
i++)
data[
i] = data_[
i];
93 __device__ __host__
inline T
const &
operator()(
int i,
int j)
const {
116 for (
int i=0;
i<N*N;
i++)
data[
i] =
b.data[
i];
151 template <
typename T,
typename Hmat>
160 __device__ __host__
inline void operator=(
const complex<T> &
a) {
188 template<
class T,
int N>
194 __device__ __host__
inline int index(
int i,
int j)
const {
198 int k = N*(N-1)/2 - (N-j)*(N-j-1)/2 +
i - j - 1;
202 int k = N*(N-1)/2 - (N-
i)*(N-
i-1)/2 + j -
i - 1;
217 for (
int i=0;
i<N*N;
i++)
data[
i] =
a.data[
i];
220 __device__ __host__
inline HMatrix(
const T data_[]) {
222 for (
int i=0;
i<N*N;
i++)
data[
i] = data_[
i];
225 __device__ __host__
inline complex<T>
const operator()(
int i,
int j)
const {
228 return complex<T>(
data[
idx],0.0);
243 for (
int i=0;
i<N*N;
i++)
data[
i] =
b.data[
i];
260 for (
int i=0;
i<N;
i++) {
262 for (
int k=0; k<N; k++)
if (
i<=k) {
263 tmp.x = (*this)(
i,0).real() * (*this)(0,k).real();
264 tmp.x -= (*this)(
i,0).imag() * (*this)(0,k).imag();
265 tmp.y = (*this)(
i,0).real() * (*this)(0,k).imag();
266 tmp.y += (*this)(
i,0).imag() * (*this)(0,k).real();
268 for (
int j=1; j<N; j++) {
269 tmp.x += (*this)(
i,j).real() * (*this)(j,k).real();
270 tmp.x -= (*this)(
i,j).imag() * (*this)(j,k).imag();
271 tmp.y += (*this)(
i,j).real() * (*this)(j,k).imag();
272 tmp.y += (*this)(
i,j).imag() * (*this)(j,k).real();
280 __device__ __host__
void print()
const {
281 for (
int i=0;
i<N;
i++) {
283 for (
int j=0; j<N; j++) {
284 printf(
" (%e, %e)", (*
this)(
i,j).real(), (*
this)(
i,j).imag());
293 template<
class T,
int N>
296 for (
int i=0;
i<N;
i++) {
298 for (
int j=0; j<N; j++) {
299 (*this)(
i,j) =
a(
i,j);
307 return a(0,0) +
a(1,1) +
a(2,2);
311 template<
template<
typename,
int>
class Mat,
class T>
315 result =
a(0,0)*(
a(1,1)*
a(2,2) -
a(2,1)*
a(1,2))
316 -
a(0,1)*(
a(1,0)*
a(2,2) -
a(1,2)*
a(2,0))
317 +
a(0,2)*(
a(1,0)*
a(2,1) -
a(1,1)*
a(2,0));
322 template<
template<
typename,
int>
class Mat,
class T,
int N>
323 __device__ __host__
inline Mat<T,N>
operator+(
const Mat<T,N> &
a,
const Mat<T,N> &
b)
327 for (
int i=0;
i<N*N;
i++) result.data[
i] =
a.data[
i] +
b.data[
i];
332 template<
template<
typename,
int>
class Mat,
class T,
int N>
333 __device__ __host__
inline Mat<T,N>
operator+=(Mat<T,N> &
a,
const Mat<T,N> &
b)
336 for (
int i=0;
i<N*N;
i++)
a.data[
i] +=
b.data[
i];
340 template<
template<
typename,
int>
class Mat,
class T,
int N>
341 __device__ __host__
inline Mat<T,N>
operator+=(Mat<T,N> &
a,
const T &
b)
344 for (
int i=0;
i<N;
i++)
a(
i,
i) +=
b;
348 template<
template<
typename,
int>
class Mat,
class T,
int N>
349 __device__ __host__
inline Mat<T,N>
operator-=(Mat<T,N> &
a,
const Mat<T,N> &
b)
352 for (
int i=0;
i<N*N;
i++)
a.data[
i] -=
b.data[
i];
356 template<
template<
typename,
int>
class Mat,
class T,
int N>
357 __device__ __host__
inline Mat<T,N>
operator-(
const Mat<T,N> &
a,
const Mat<T,N> &
b)
361 for (
int i=0;
i<N*N;
i++) result.data[
i] =
a.data[
i] -
b.data[
i];
365 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
369 for (
int i=0;
i<N*N; ++
i) result.data[
i] =
scalar*
a.data[
i];
373 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
378 template<
template<
typename,
int>
class Mat,
class T,
int N,
class S>
384 template<
template<
typename,
int>
class Mat,
class T,
int N>
385 __device__ __host__
inline Mat<T,N>
operator-(
const Mat<T,N> &
a){
388 for (
int i=0;
i<(N*N); ++
i) result.data[
i] = -
a.data[
i];
396 template<
template<
typename,
int>
class Mat,
class T,
int N>
397 __device__ __host__
inline Mat<T,N>
operator*(
const Mat<T,N> &
a,
const Mat<T,N> &
b)
401 for (
int i=0;
i<N;
i++) {
403 for (
int k=0; k<N; k++) {
404 result(
i,k) =
a(
i,0) *
b(0,k);
406 for (
int j=1; j<N; j++) {
407 result(
i,k) +=
a(
i,j) *
b(j,k);
417 template<
template<
typename>
class complex,
typename T,
int N>
422 for (
int i=0;
i<N;
i++) {
424 for (
int k=0; k<N; k++) {
425 result(
i,k).x =
a(
i,0).real() *
b(0,k).real();
426 result(
i,k).x -=
a(
i,0).imag() *
b(0,k).imag();
427 result(
i,k).y =
a(
i,0).real() *
b(0,k).imag();
428 result(
i,k).y +=
a(
i,0).imag() *
b(0,k).real();
430 for (
int j=1; j<N; j++) {
431 result(
i,k).x +=
a(
i,j).real() *
b(j,k).real();
432 result(
i,k).x -=
a(
i,j).imag() *
b(j,k).imag();
433 result(
i,k).y +=
a(
i,j).real() *
b(j,k).imag();
434 result(
i,k).y +=
a(
i,j).imag() *
b(j,k).real();
441 template<
class T,
int N>
451 template<
class T,
class U,
int N>
452 __device__ __host__
inline 457 for (
int i=0;
i<N;
i++) {
459 for (
int k=0; k<N; k++) {
460 result(
i,k) =
a(
i,0) *
b(0,k);
462 for (
int j=1; j<N; j++) {
463 result(
i,k) +=
a(
i,j) *
b(j,k);
472 __device__ __host__
inline 476 result(0,0) =
a(0,0)*
b(0,0) +
a(0,1)*
b(1,0);
477 result(0,1) =
a(0,0)*
b(0,1) +
a(0,1)*
b(1,1);
478 result(1,0) =
a(1,0)*
b(0,0) +
a(1,1)*
b(1,0);
479 result(1,1) =
a(1,0)*
b(0,1) +
a(1,1)*
b(1,1);
484 template<
class T,
int N>
485 __device__ __host__
inline 489 for (
int i=0;
i<N; ++
i){
491 for (
int j=0; j<N; ++j){
492 result(
i,j) =
conj( other(j,
i) );
500 __device__ __host__
inline 505 const T & det_inv =
static_cast<typename T::value_type
>(1.0)/det;
509 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
510 (*uinv)(0,0) = (det_inv*temp);
512 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
513 (*uinv)(0,1) = (temp*det_inv);
515 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
516 (*uinv)(0,2) = (temp*det_inv);
518 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
519 (*uinv)(1,0) = (det_inv*temp);
521 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
522 (*uinv)(1,1) = (temp*det_inv);
524 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
525 (*uinv)(1,2) = (temp*det_inv);
527 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
528 (*uinv)(2,0) = (det_inv*temp);
530 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
531 (*uinv)(2,1) = (temp*det_inv);
533 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
534 (*uinv)(2,2) = (temp*det_inv);
541 template<
class T,
int N>
542 __device__ __host__
inline 546 for (
int i=0;
i<N; ++
i){
549 for (
int j=
i+1; j<N; ++j){
550 (*m)(
i,j) = (*m)(j,
i) = 0;
558 __device__ __host__
inline 562 for (
int i=0;
i<N; ++
i){
563 (*m)(
i,
i) = make_float2(1,0);
565 for (
int j=
i+1; j<N; ++j){
566 (*m)(
i,j) = (*m)(j,
i) = make_float2(0.,0.);
574 __device__ __host__
inline 578 for (
int i=0;
i<N; ++
i){
579 (*m)(
i,
i) = make_double2(1,0);
581 for (
int j=
i+1; j<N; ++j){
582 (*m)(
i,j) = (*m)(j,
i) = make_double2(0.,0.);
590 template<
class T,
int N>
591 __device__ __host__
inline 595 for (
int i=0;
i<N; ++
i){
597 for (
int j=0; j<N; ++j){
606 __device__ __host__
inline 610 for (
int i=0;
i<N; ++
i){
612 for (
int j=0; j<N; ++j){
613 (*m)(
i,j) = make_float2(0.,0.);
621 __device__ __host__
inline 625 for (
int i=0;
i<N; ++
i){
627 for (
int j=0; j<N; ++j){
628 (*m)(
i,j) = make_double2(0.,0.);
635 template<
typename Complex,
int N>
637 typedef typename Complex::value_type real;
642 real imag_trace = 0.0;
644 for (
int i=0;
i<N;
i++) imag_trace += am(
i,
i).y;
646 for (
int i=0;
i<N;
i++) {
647 am(
i,
i).y -= imag_trace/N;
660 template<
class T,
int N>
668 __device__ __host__
inline 674 __device__ __host__
inline 681 template<
class T,
int N>
682 __device__ __host__
inline 686 for (
int i=0;
i<N; ++
i){
693 template<
class T,
int N>
694 __device__ __host__
inline 697 for (
int i=0;
i<N; ++
i){
698 const T conjb_i =
conj(
b[
i]);
699 for (
int j=0; j<N; ++j){
700 (*m)(j,
i) =
a[j]*conjb_i;
706 template<
class T,
int N>
707 __device__ __host__
inline 710 for (
int i=0;
i<N; ++
i){
711 const T conjb_i =
conj(
b[
i]);
713 for (
int j=0; j<N; ++j){
714 (*m)(j,
i) =
a[j]*conjb_i;
722 template<
class T,
int N>
723 std::ostream & operator << (std::ostream & os, const Matrix<T,N> & m){
725 for (
int i=0;
i<N; ++
i){
727 for (
int j=0; j<N; ++j){
730 if(
i<N-1) os << std::endl;
736 template<
class T,
int N>
737 std::ostream & operator << (std::ostream & os, const Array<T,N> &
a){
738 for (
int i=0;
i<N; ++
i){
745 template<
class T,
class U>
750 for (
int i=0;
i<9; ++
i){
757 template<
class T,
class U,
int N>
762 for (
int i=0;
i<(N*N); ++
i){
773 for (
int i=0;
i<9; ++
i){
774 single_temp =
array[
idx + (dir*9 +
i)*stride];
775 link->data[
i].x = single_temp.x;
776 link->data[
i].y = single_temp.y;
783 template<
class T,
int N,
class U>
788 for (
int i=0;
i<(N*N); ++
i){
797 for (
int i=0;
i<9; ++
i){
807 for (
int i=0;
i<9; ++
i){
814 template<
class T,
class U>
819 for (
int i=0;
i<9; ++
i){
834 for (
int i=0;
i<9; ++
i){
835 single_temp.x = link.data[
i].x;
836 single_temp.y = link.data[
i].y;
837 array[
idx + (dir*9 +
i)*stride] = single_temp;
848 temp2[0] =
array[
idx + dir*stride*5];
849 temp2[1] =
array[
idx + dir*stride*5 + stride];
850 temp2[2] =
array[
idx + dir*stride*5 + 2*stride];
851 temp2[3] =
array[
idx + dir*stride*5 + 3*stride];
852 temp2[4] =
array[
idx + dir*stride*5 + 4*stride];
855 mom->
data[0].y = temp2[3].x;
856 mom->
data[1] = temp2[0];
857 mom->
data[2] = temp2[1];
862 mom->
data[4].y = temp2[3].y;
863 mom->
data[5] = temp2[2];
872 mom->
data[8].y = temp2[4].x;
879 template<
class T,
class U>
883 typedef typename T::value_type real;
891 array[
idx + dir*stride*5 + stride] = temp2;
895 array[
idx + dir*stride*5 + stride*2] = temp2;
897 const real temp = (mom.
data[0].y + mom.
data[4].y + mom.
data[8].y)*0.3333333333333333333333333;
900 array[
idx + dir*stride*5 + stride*3] = temp2;
904 array[
idx + dir*stride*5 + stride*4] = temp2;
911 template<
class Cmplx>
912 __device__ __host__
inline 917 const Cmplx & det_inv =
static_cast<typename Cmplx::value_type
>(1.0)/det;
921 temp = u(1,1)*u(2,2) - u(1,2)*u(2,1);
922 (*uinv)(0,0) = (det_inv*temp);
924 temp = u(0,2)*u(2,1) - u(0,1)*u(2,2);
925 (*uinv)(0,1) = (temp*det_inv);
927 temp = u(0,1)*u(1,2) - u(0,2)*u(1,1);
928 (*uinv)(0,2) = (temp*det_inv);
930 temp = u(1,2)*u(2,0) - u(1,0)*u(2,2);
931 (*uinv)(1,0) = (det_inv*temp);
933 temp = u(0,0)*u(2,2) - u(0,2)*u(2,0);
934 (*uinv)(1,1) = (temp*det_inv);
936 temp = u(0,2)*u(1,0) - u(0,0)*u(1,2);
937 (*uinv)(1,2) = (temp*det_inv);
939 temp = u(1,0)*u(2,1) - u(1,1)*u(2,0);
940 (*uinv)(2,0) = (det_inv*temp);
942 temp = u(0,1)*u(2,0) - u(0,0)*u(2,1);
943 (*uinv)(2,1) = (temp*det_inv);
945 temp = u(0,0)*u(1,1) - u(0,1)*u(1,0);
946 (*uinv)(2,2) = (temp*det_inv);
953 for (
int i=0;
i<3; ++
i){
955 for (
int j=0; j<3; ++j){
957 (*link)(
i,j).
y =
array[(
i*3+j)*2 + 1];
963 template<
class Cmplx,
class Real>
966 for (
int i=0;
i<3; ++
i){
968 for (
int j=0; j<3; ++j){
970 (*link)(
i,j).
y =
array[(
i*3+j)*2 + 1];
980 for (
int i=0;
i<3; ++
i){
982 for (
int j=0; j<3; ++j){
983 array[(
i*3+j)*2] = link(
i,j).x;
984 array[(
i*3+j)*2 + 1] = link(
i,j).y;
991 template<
class Cmplx,
class Real>
994 for (
int i=0;
i<3; ++
i){
996 for (
int j=0; j<3; ++j){
997 array[(
i*3+j)*2] = link(
i,j).x;
998 array[(
i*3+j)*2 + 1] = link(
i,j).y;
1006 T tr = (
a(0,0) +
a(1,1) +
a(2,2)) / 3.0;
1008 res(0,0) =
a(0,0) - tr; res(0,1) =
a(0,1); res(0,2) =
a(0,2);
1009 res(1,0) =
a(1,0); res(1,1) =
a(1,1) - tr; res(1,2) =
a(1,2);
1010 res(2,0) =
a(2,0); res(2,1) =
a(2,1); res(2,2) =
a(2,2) - tr;
1016 T tr = (
a(0,0) +
a(1,1) +
a(2,2)) / static_cast<T>(3.0);
1017 a(0,0) -= tr;
a(1,1) -= tr;
a(2,2) -= tr;
1022 double sum = (
double)(
a(0,0).x *
b(0,0).x +
a(0,0).y *
b(0,0).y);
1037 template<
class Cmplx>
1038 __host__ __device__
inline 1040 printf(
"(%lf, %lf)\t", link(0,0).
x, link(0,0).
y);
1041 printf(
"(%lf, %lf)\t", link(0,1).
x, link(0,1).
y);
1042 printf(
"(%lf, %lf)\n", link(0,2).
x, link(0,2).
y);
1043 printf(
"(%lf, %lf)\t", link(1,0).
x, link(1,0).
y);
1044 printf(
"(%lf, %lf)\t", link(1,1).
x, link(1,1).
y);
1045 printf(
"(%lf, %lf)\n", link(1,2).
x, link(1,2).
y);
1046 printf(
"(%lf, %lf)\t", link(2,0).
x, link(2,0).
y);
1047 printf(
"(%lf, %lf)\t", link(2,1).
x, link(2,1).
y);
1048 printf(
"(%lf, %lf)\n", link(2,2).
x, link(2,2).
y);
1052 template<
class Cmplx>
1059 for (
int i=0;
i<3; ++
i){
1060 if(
fabs(identity(
i,
i).
x - 1.0) > max_error ||
fabs(identity(
i,
i).
y) > max_error)
return false;
1062 for (
int j=
i+1; j<3; ++j){
1063 if(
fabs(identity(
i,j).
x) > max_error ||
fabs(identity(
i,j).
y) > max_error
1064 ||
fabs(identity(j,
i).
x) > max_error ||
fabs(identity(j,
i).
y) > max_error ){
1071 for (
int i=0;
i<3;
i++) {
1073 for (
int j=0; j<3; j++) {
1074 if (isnan(matrix(
i,j).
x) || isnan(matrix(
i,j).
y))
return false;
1081 template<
class Cmplx>
1097 temp = identity_comp(
i,j);
1099 error +=
norm(temp);
1102 error +=
norm(identity_comp(
i,j));
1109 __device__ __host__
inline 1118 typedef decltype(Q(0,0).
x) undMatType;
1120 undMatType inv3 = 1.0/3.0;
1121 undMatType c0, c1, c0_max, Tr_re;
1122 undMatType f0_re, f0_im, f1_re, f1_im, f2_re, f2_im;
1124 undMatType u_p, w_p;
1142 c0_max = 2*
pow(c1*inv3,1.5);
1145 theta =
acos(c0/c0_max);
1148 u_p =
sqrt(c1*inv3)*
cos(theta*inv3);
1151 w_p =
sqrt(c1)*
sin(theta*inv3);
1154 undMatType u_sq = u_p*u_p;
1155 undMatType w_sq = w_p*w_p;
1156 undMatType denom_inv = 1.0/(9*u_sq - w_sq);
1157 undMatType exp_iu_re =
cos(u_p);
1158 undMatType exp_iu_im =
sin(u_p);
1159 undMatType exp_2iu_re = exp_iu_re*exp_iu_re - exp_iu_im*exp_iu_im;
1160 undMatType exp_2iu_im = 2*exp_iu_re*exp_iu_im;
1161 undMatType cos_w =
cos(w_p);
1163 undMatType hj_re = 0.0;
1164 undMatType hj_im = 0.0;
1167 if (w_p < 0.05 && w_p > -0.05) {
1169 sinc_w = 1.0 - (w_sq/6.0)*(1 - (w_sq*0.05)*(1 - (w_sq/42.0)*(1 - (w_sq/72.0))));
1171 else sinc_w =
sin(w_p)/w_p;
1184 hj_re = (u_sq - w_sq)*exp_2iu_re + 8*u_sq*cos_w*exp_iu_re + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_im;
1185 hj_im = (u_sq - w_sq)*exp_2iu_im - 8*u_sq*cos_w*exp_iu_im + 2*u_p*(3*u_sq + w_sq)*sinc_w*exp_iu_re;
1186 f0_re = hj_re*denom_inv;
1187 f0_im = hj_im*denom_inv;
1190 hj_re = 2*u_p*exp_2iu_re - 2*u_p*cos_w*exp_iu_re + (3*u_sq - w_sq)*sinc_w*exp_iu_im;
1191 hj_im = 2*u_p*exp_2iu_im + 2*u_p*cos_w*exp_iu_im + (3*u_sq - w_sq)*sinc_w*exp_iu_re;
1192 f1_re = hj_re*denom_inv;
1193 f1_im = hj_im*denom_inv;
1196 hj_re = exp_2iu_re - cos_w*exp_iu_re - 3*u_p*sinc_w*exp_iu_im;
1197 hj_im = exp_2iu_im + cos_w*exp_iu_im - 3*u_p*sinc_w*exp_iu_re;
1198 f2_re = hj_re*denom_inv;
1199 f2_im = hj_im*denom_inv;
1226 temp1 = f0_c * UnitM;
1235 temp2 = f2_c * temp1;
1244 #endif // _QUDA_MATRIX_H_ __host__ __device__ float4 operator-=(float4 &x, const float4 y)
gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors.
__device__ __host__ double ErrorSU3(const Matrix< Cmplx, 3 > &matrix)
__device__ __host__ void setZero(Matrix< T, N > *m)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__device__ static __host__ T val()
__host__ __device__ ValueType sqrt(ValueType x)
cudaColorSpinorField * tmp
__device__ __host__ double getRealTraceUVdagger(const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
__device__ __host__ Matrix(const T data_[])
__device__ __host__ void operator=(const complex< T > &a)
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
__device__ void writeMomentumToArray(const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)
__host__ __device__ float2 operator*=(float2 &x, const float a)
__device__ __host__ uint64_t checksum() const
__device__ __host__ void operator=(const Matrix< U, N > &b)
__device__ void loadLinkVariableFromArray(const T *const array, const int dir, const int idx, const int stride, Matrix< U, 3 > *link)
This is just a dummy structure we use for trove to define the required structure size.
__host__ __device__ void printLink(const Matrix< Cmplx, 3 > &link)
void copyLinkToArray(float *array, const Matrix< float2, 3 > &link)
bool isUnitary(const cpuGaugeField &field, double max_error)
__device__ __host__ T const & operator()(int i, int j) const
__device__ __host__ void outerProd(const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
__device__ __host__ T const & operator[](int i) const
wrapper class that enables us to write to Hmatrices in packed format
__device__ __host__ complex< T > const operator()(int i, int j) const
int printf(const char *,...) __attribute__((__format__(__printf__
__host__ __device__ ValueType sin(ValueType x)
__host__ __device__ void sum(double &a, double &b)
__device__ __host__ HMatrix()
gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors...
__device__ static __host__ T val()
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors...
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
unsigned long long uint64_t
__device__ __host__ void copyColumn(const Matrix< T, N > &m, int c, Array< T, N > *a)
Provides precision abstractions and defines the register precision given the storage precision using ...
__device__ __host__ T & operator()(int i)
__device__ void writeLinkVariableToArray(const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, U *const array)
__device__ __host__ void SubTraceUnit(Matrix< T, 3 > &a)
__device__ void loadMatrixFromArray(const T *const array, const int idx, const int stride, Matrix< U, N > *mat)
void copyArrayToLink(Matrix< float2, 3 > *link, float *array)
__device__ __host__ void setIdentity(Matrix< T, N > *m)
__device__ __host__ int index(int i, int j) const
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
__device__ __host__ int index(int i, int j) const
__device__ void appendMatrixToArray(const Matrix< complex< double >, 3 > &mat, const int idx, const int stride, double2 *const array)
__device__ void loadMomentumFromArray(const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom)
__device__ __host__ Matrix< T, 3 > getSubTraceUnit(const Matrix< T, 3 > &a)
__device__ __host__ T & operator()(int i, int j)
__device__ __host__ void operator=(const HMatrix< U, N > &b)
__device__ __host__ void computeLinkInverse(Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
__device__ __host__ void print() const
__device__ __host__ void computeMatrixInverse(const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)
__device__ __host__ HMatrix(const T data_[])
__device__ __host__ HMatrix< T, N > square() const
Hermitian matrix square.
__device__ __host__ Matrix()
__device__ __host__ void makeAntiHerm(Matrix< Complex, N > &m)
__device__ __host__ Matrix(const Matrix< T, N > &a)
__host__ __device__ ValueType cos(ValueType x)
__device__ __host__ HMatrix_wrapper(Hmat &mat, int i, int j, int idx)
__device__ __host__ void operator+=(const complex< T > &a)
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
__host__ __device__ ValueType acos(ValueType x)
struct cudaExtent unsigned int cudaArray_t array
__device__ __host__ T getDeterminant(const Mat< T, 3 > &a)
__host__ __device__ ValueType conj(ValueType x)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
__device__ __host__ void zero(vector_type< scalar, n > &v)
__device__ void writeMatrixToArray(const Matrix< T, N > &mat, const int idx, const int stride, U *const array)
__device__ __host__ HMatrix(const HMatrix< T, N > &a)
__device__ __host__ void exponentiate_iQ(const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ)
__device__ __host__ T & operator[](int i)
__device__ __host__ T const & operator()(int i) const