11 template <
typename OutOrder,
typename InOrder>
22 : out(out), in(in), volume(meta.Volume()), nDim(meta.Ndim()),
23 geometry(meta.Geometry()), out_offset(0), in_offset(0) {
24 for (
int d=0; d<nDim; d++) faceVolumeCB[d] = meta.
SurfaceCB(d) * meta.
Nface();
31 template <
typename FloatOut,
typename FloatIn,
int length,
typename Arg>
39 for (
int d=0; d<arg.geometry; d++) {
40 for (
int x=0; x<arg.volume/2; x++) {
41 #ifdef FINE_GRAINED_ACCESS 42 for (
int i=0; i<
nColor; i++)
43 for (
int j=0; j<
nColor; j++) {
44 arg.out(d,
parity, x, i, j) = arg.in(d,
parity, x, i, j);
62 template <
typename Float,
int length,
typename Arg>
69 for (
int d=0; d<arg.geometry; d++) {
70 for (
int x=0; x<arg.volume/2; x++) {
71 #ifdef FINE_GRAINED_ACCESS 72 for (
int i=0; i<
nColor; i++)
73 for (
int j=0; j<
nColor; j++) {
74 complex<Float> u = arg.in(d,
parity, x, i, j);
82 for (
int i=0; i<
length/2; i++)
83 if (isnan(u(i).real()) || isnan(u(i).imag()))
errorQuda(
"Nan detected at parity=%d, dir=%d, x=%d, i=%d",
parity, d, x, i);
95 template <
typename FloatOut,
typename FloatIn,
int length,
typename Arg>
101 int x = blockIdx.x * blockDim.x + threadIdx.x;
102 int parity_d = blockIdx.z * blockDim.z + threadIdx.z;
103 int parity = parity_d / arg.geometry;
104 int d = parity_d % arg.geometry;
106 if (x >= arg.volume/2)
return;
107 if (parity_d >= 2 * arg.geometry)
return;
109 #ifdef FINE_GRAINED_ACCESS 110 int i = blockIdx.y * blockDim.y + threadIdx.y;
111 if (i >= nColor)
return;
112 for (
int j=0; j<
nColor; j++) arg.out(d, parity, x, i, j) = arg.in(d, parity, x, i, j);
116 in = arg.in(d, x, parity);
118 arg.out(d, x, parity) =
out;
125 template <
typename FloatOut,
typename FloatIn,
int length,
typename Arg>
133 for (
int d=0; d<arg.nDim; d++) {
134 for (
int x=0; x<arg.faceVolumeCB[d]; x++) {
135 #ifdef FINE_GRAINED_ACCESS 136 for (
int i=0; i<
nColor; i++)
137 for (
int j=0; j<
nColor; j++)
138 arg.out.Ghost(d+arg.out_offset,
parity, x, i, j) = arg.in.Ghost(d+arg.in_offset,
parity, x, i, j);
142 in = arg.in.Ghost(d+arg.in_offset, x,
parity);
144 arg.out.Ghost(d+arg.out_offset, x,
parity) =
out;
156 template <
typename FloatOut,
typename FloatIn,
int length,
typename Arg>
162 int x = blockIdx.x * blockDim.x + threadIdx.x;
163 int parity_d = blockIdx.z * blockDim.z + threadIdx.z;
164 int parity = parity_d / arg.nDim;
165 int d = parity_d % arg.nDim;
166 if (parity_d >= 2 * arg.nDim)
return;
168 if (x < arg.faceVolumeCB[d]) {
169 #ifdef FINE_GRAINED_ACCESS 170 int i = blockIdx.y * blockDim.y + threadIdx.y;
171 if (i >= nColor)
return;
172 for (
int j=0; j<
nColor; j++)
173 arg.out.Ghost(d+arg.out_offset, parity, x, i, j) = arg.in.Ghost(d+arg.in_offset, parity, x, i, j);
177 in = arg.in.Ghost(d+arg.in_offset, x, parity);
179 arg.out.Ghost(d+arg.out_offset, x, parity) =
out;
__host__ __device__ constexpr int Ncolor(int length)
Return the number of colors of the accessor based on the length of the field.
const int * SurfaceCB() const
__global__ void copyGhostKernel(Arg arg)
CopyGaugeArg(const OutOrder &out, const InOrder &in, const GaugeField &meta)
Main header file for host and device accessors to GaugeFields.
cpuColorSpinorField * out
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__global__ void copyGaugeKernel(Arg arg)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...