9 template <
typename Float,
typename GaugeOr,
typename GaugeDs>
struct GaugeSTOUTArg {
26 for (
int dir = 0; dir < 4; ++dir) {
27 border[dir] = data.
R()[dir];
28 X[dir] = data.
X()[dir] - border[dir] * 2;
35 template <
typename Float,
typename Arg,
typename Link>
41 for (
int dr = 0; dr < 4; ++dr) X[dr] = arg.X[dr];
45 for (
int dr = 0; dr < 4; ++dr) {
46 x[dr] += arg.border[dr];
47 X[dr] += 2 * arg.border[dr];
53 for (
int mu = 0;
mu < 3;
mu++) {
60 int dx[4] = {0, 0, 0, 0};
76 staple = staple + U1 * U2 *
conj(U3);
90 staple = staple +
conj(U1) * U2 * U3;
99 int idx = threadIdx.x + blockIdx.x * blockDim.x;
100 int parity = threadIdx.y + blockIdx.y * blockDim.y;
101 int dir = threadIdx.z + blockIdx.z * blockDim.z;
102 if (idx >= arg.threads)
return;
103 if (dir >= 3)
return;
104 typedef complex<Float>
Complex;
108 for (
int dr = 0; dr < 4; ++dr) X[dr] = arg.X[dr];
112 for (
int dr = 0; dr < 4; ++dr) {
113 x[dr] += arg.border[dr];
114 X[dr] += 2 * arg.border[dr];
117 int dx[4] = {0, 0, 0, 0};
120 Link U, UDag, Stap, Omega, OmegaDiff, ODT, Q, exp_iQ;
125 computeStaple<Float>(
arg, idx,
parity, dir, Stap);
143 Omega = (arg.rho * Stap) * UDag;
148 OmegaDiff =
conj(Omega) - Omega;
152 OmegaDiffTr = (1.0 / 3.0) * OmegaDiffTr;
157 Q = Q - OmegaDiffTr * ODT;
166 error = OmegaDiffTr.real();
167 printf(
"Trace test %d %d %.15e\n", idx, dir, error);
170 Link Q_diff =
conj(Q);
175 error = OmegaDiffTr.real();
176 printf(
"Herm test %d %d %.15e\n", idx, dir, error);
184 printf(
"expiQ test %d %d %.15e\n", idx, dir, error);
191 printf(
"expiQ*u test %d %d %.15e\n", idx, dir, error);
214 const Float tolerance) :
222 for (
int dir = 0; dir < 4; ++dir) {
223 border[dir] = data.
R()[dir];
224 X[dir] = data.
X()[dir] - border[dir] * 2;
231 template <
typename Float,
typename Arg,
typename Link>
237 for (
int dr = 0; dr < 4; ++dr) X[dr] = arg.X[dr];
241 for (
int dr = 0; dr < 4; ++dr) {
242 x[dr] += arg.border[dr];
243 X[dr] += 2 * arg.border[dr];
251 for (
int mu = 0;
mu < 4;
mu++) {
275 int dx[4] = {0, 0, 0, 0};
276 Link U1, U2, U3, U4, U5, U6, U7;
304 rectangle = rectangle + U1 * U2 * U3 *
conj(U4) *
conj(U5);
322 staple = staple + U1 * U2 *
conj(U5);
338 rectangle = rectangle + U1 * U2 * U3 *
conj(U4) *
conj(U6);
363 rectangle = rectangle +
conj(U7) * U4 * U3 * U2 *
conj(U5);
395 rectangle = rectangle +
conj(U1) *
conj(U2) * U3 * U4 * U5;
415 staple = staple +
conj(U1) * U2 * U5;
429 rectangle = rectangle +
conj(U1) * U2 * U3 * U4 *
conj(U6);
456 rectangle = rectangle +
conj(U7) *
conj(U4) * U3 * U2 * U5;
466 int idx = threadIdx.x + blockIdx.x * blockDim.x;
467 int parity = threadIdx.y + blockIdx.y * blockDim.y;
468 int dir = threadIdx.z + blockIdx.z * blockDim.z;
469 if (idx >= arg.threads)
return;
471 typedef complex<Float>
Complex;
475 for (
int dr = 0; dr < 4; ++dr) X[dr] = arg.X[dr];
479 for (
int dr = 0; dr < 4; ++dr) {
480 x[dr] += arg.border[dr];
481 X[dr] += 2 * arg.border[dr];
484 double staple_coeff = (5.0 - 2.0 * arg.epsilon) / 3.0;
485 double rectangle_coeff = (1.0 - arg.epsilon) / 12.0;
487 int dx[4] = {0, 0, 0, 0};
490 Link U, UDag, Stap, Rect, Omega, OmegaDiff, ODT, Q, exp_iQ;
497 computeStapleRectangle<Float>(
arg, idx,
parity, dir, Stap, Rect);
509 Omega = (arg.rho * staple_coeff) * (Stap);
512 Omega = Omega - (arg.rho * rectangle_coeff) * (Rect);
513 Omega = Omega * UDag;
518 OmegaDiff =
conj(Omega) - Omega;
522 OmegaDiffTr = (1.0 / 3.0) * OmegaDiffTr;
527 Q = Q - OmegaDiffTr * ODT;
536 error = OmegaDiffTr.real();
537 printf(
"Trace test %d %d %.15e\n", idx, dir, error);
540 Link Q_diff =
conj(Q);
545 error = OmegaDiffTr.real();
546 printf(
"Herm test %d %d %.15e\n", idx, dir, error);
554 printf(
"expiQ test %d %d %.15e\n", idx, dir, error);
561 printf(
"expiQ*u test %d %d %.15e\n", idx, dir, error);
__device__ __host__ double ErrorSU3(const Matrix< Cmplx, 3 > &matrix)
__device__ __host__ void setZero(Matrix< T, N > *m)
static __device__ __host__ int linkIndexShift(const I x[], const J dx[], const K X[4])
__global__ void computeSTOUTStep(Arg arg)
__global__ void computeOvrImpSTOUTStep(Arg arg)
__host__ __device__ void computeStapleRectangle(Arg &arg, int idx, int parity, int dir, Link &staple, Link &rectangle)
__host__ __device__ void computeStaple(Arg &arg, int idx, int parity, int dir, Link &staple)
Main header file for host and device accessors to GaugeFields.
std::complex< double > Complex
__device__ __host__ void setIdentity(Matrix< T, N > *m)
GaugeSTOUTArg(GaugeOr &origin, GaugeDs &dest, const GaugeField &data, const Float rho, const Float tolerance)
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
GaugeOvrImpSTOUTArg(GaugeOr &origin, GaugeDs &dest, const GaugeField &data, const Float rho, const Float epsilon, const Float tolerance)
__host__ __device__ ValueType conj(ValueType x)
__device__ __host__ void exponentiate_iQ(const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ)
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.