8 template <
typename Float,
typename Fmunu,
typename Gauge>
struct FmunuArg {
16 threads(meta.VolumeCB()),
20 for (
int dir = 0; dir < 4; ++dir) {
21 X[dir] = meta.
X()[dir];
22 border[dir] = (meta_ex.
X()[dir] - X[dir]) / 2;
27 template <
int mu,
int nu,
typename Float,
typename Arg>
36 for (
int dir = 0; dir < 4; ++dir) {
37 x[dir] += arg.border[dir];
38 X[dir] += 2 * arg.border[dir];
45 int dx[4] = {0, 0, 0, 0};
68 int dx[4] = {0, 0, 0, 0};
95 int dx[4] = {0, 0, 0, 0};
122 int dx[4] = {0, 0, 0, 0};
158 F *=
static_cast<Float
>(0.125);
162 constexpr
int munu_idx = (
mu * (
mu - 1)) / 2 + nu;
163 arg.f(munu_idx, idx, parity) = F;
168 int x_cb = threadIdx.x + blockIdx.x * blockDim.x;
169 int parity = threadIdx.y + blockIdx.y * blockDim.y;
170 int mu_nu = threadIdx.z + blockIdx.z * blockDim.z;
171 if (x_cb >= arg.threads)
return;
172 if (mu_nu >= 6)
return;
175 case 0: computeFmunuCore<1, 0, Float>(
arg, x_cb,
parity);
break;
176 case 1: computeFmunuCore<2, 0, Float>(
arg, x_cb,
parity);
break;
177 case 2: computeFmunuCore<2, 1, Float>(
arg, x_cb,
parity);
break;
178 case 3: computeFmunuCore<3, 0, Float>(
arg, x_cb,
parity);
break;
179 case 4: computeFmunuCore<3, 1, Float>(
arg, x_cb,
parity);
break;
180 case 5: computeFmunuCore<3, 2, Float>(
arg, x_cb,
parity);
break;
187 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
188 for (
int mu = 0;
mu < 4;
mu++) {
189 for (
int nu = 0; nu <
mu; nu++) {
190 int mu_nu = (mu * (mu - 1)) / 2 + nu;
192 case 0: computeFmunuCore<1, 0, Float>(
arg, x_cb,
parity);
break;
193 case 1: computeFmunuCore<2, 0, Float>(
arg, x_cb,
parity);
break;
194 case 2: computeFmunuCore<2, 1, Float>(
arg, x_cb,
parity);
break;
195 case 3: computeFmunuCore<3, 0, Float>(
arg, x_cb,
parity);
break;
196 case 4: computeFmunuCore<3, 1, Float>(
arg, x_cb,
parity);
break;
197 case 5: computeFmunuCore<3, 2, Float>(
arg, x_cb,
parity);
break;
static __device__ __host__ int linkIndexShift(const I x[], const J dx[], const K X[4])
Main header file for host and device accessors to GaugeFields.
__device__ __host__ __forceinline__ void computeFmunuCore(Arg &arg, int idx, int parity)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__global__ void computeFmunuKernel(Arg arg)
void computeFmunuCPU(Arg &arg)
__host__ __device__ ValueType conj(ValueType x)
FmunuArg(Fmunu &f, Gauge &gauge, const GaugeField &meta, const GaugeField &meta_ex)
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.