10 template <
typename OutOrder,
typename InOrder>
23 const int *faceVolumeCB,
int nDim,
int geometry)
24 :
out(
out),
in(
in), nDim(nDim), geometry(geometry) {
25 for (
int d=0;
d<nDim;
d++) {
26 this->Xout[
d] = Xout[
d];
27 this->Xin[
d] = Xin[
d];
28 this->faceVolumeCB[
d] = faceVolumeCB[
d];
31 if (
out.volumeCB >
in.volumeCB) {
32 this->volume = 2*
in.volumeCB;
33 this->volumeEx = 2*
out.volumeCB;
34 this->regularToextended =
true;
36 this->volume = 2*
out.volumeCB;
37 this->volumeEx = 2*
in.volumeCB;
38 this->regularToextended =
false;
47 template <
typename FloatOut,
typename FloatIn,
int length,
typename OutOrder,
typename InOrder,
bool regularToextended>
55 if(regularToextended){
57 for (
int d=0;
d<4;
d++)
R[
d] = (
arg.Xout[
d] -
arg.Xin[
d]) >> 1;
66 xout = ((((
x[3]+
R[3])*
arg.Xout[2] + (
x[2]+
R[2]))*
arg.Xout[1] + (
x[1]+
R[1]))*
arg.Xout[0]+(
x[0]+
R[0])) >> 1;
70 for (
int d=0;
d<4;
d++)
R[
d] = (
arg.Xin[
d] -
arg.Xout[
d]) >> 1;
71 int za =
X/(
arg.Xout[0]/2);
79 xin = ((((
x[3]+
R[3])*
arg.Xin[2] + (
x[2]+
R[2]))*
arg.Xin[1] + (
x[1]+
R[1]))*
arg.Xin[0]+(
x[0]+
R[0])) >> 1;
82 for (
int d=0;
d<
arg.geometry;
d++){
91 template <
typename FloatOut,
typename FloatIn,
int length,
typename OutOrder,
typename InOrder,
bool regularToextended>
94 for(
int X=0;
X<
arg.volume/2;
X++){
95 copyGaugeEx<FloatOut, FloatIn, length, OutOrder, InOrder, regularToextended>(
arg,
X,
parity);
100 template <
typename FloatOut,
typename FloatIn,
int length,
typename OutOrder,
typename InOrder,
bool regularToextended>
103 int X = blockIdx.x *
blockDim.x + threadIdx.x;
104 if (
X >=
arg.volume/2)
return;
105 copyGaugeEx<FloatOut, FloatIn, length, OutOrder, InOrder, regularToextended>(
arg,
X,
parity);
109 template <
typename FloatOut,
typename FloatIn,
int length,
typename OutOrder,
typename InOrder>
124 :
arg(
arg), meta(meta), location(location) {
125 writeAuxString(
"out_stride=%d,in_stride=%d,geometry=%d",
arg.out.stride,
arg.in.stride,
arg.geometry);
133 if(
arg.regularToextended) copyGaugeEx<FloatOut, FloatIn, length, OutOrder, InOrder, true>(
arg);
134 else copyGaugeEx<FloatOut, FloatIn, length, OutOrder, InOrder, false>(
arg);
136 if(
arg.regularToextended) copyGaugeExKernel<FloatOut, FloatIn, length, OutOrder, InOrder, true>
138 else copyGaugeExKernel<FloatOut, FloatIn, length, OutOrder, InOrder, false>
147 long long flops()
const {
return 0; }
149 int sites = 4*
arg.volume/2;
150 return 2 * sites * (
arg.in.Bytes() +
arg.in.hasPhase*
sizeof(FloatIn)
151 +
arg.out.Bytes() +
arg.out.hasPhase*
sizeof(FloatOut) );
156 template <
typename FloatOut,
typename FloatIn,
int length,
typename OutOrder,
typename InOrder>
157 void copyGaugeEx(OutOrder outOrder,
const InOrder inOrder,
const int *
E,
167 template <
typename FloatOut,
typename FloatIn,
int length,
typename InOrder>
172 for (
int i=0;
i<4;
i++) faceVolumeCB[
i] =
out.SurfaceCB(
i) *
out.Nface();
174 if (
out.isNative()) {
177 copyGaugeEx<short,FloatIn,length>
178 (
FloatNOrder<short,length,2,19>(
out, (
short*)Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
181 copyGaugeEx<FloatOut,FloatIn,length>
182 (G(
out, Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
186 copyGaugeEx<FloatOut,FloatIn,length>
187 (G(
out, Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
190 copyGaugeEx<FloatOut,FloatIn,length>
191 (G(
out, Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
192 #ifdef GPU_STAGGERED_DIRAC 195 copyGaugeEx<FloatOut,FloatIn,length>
196 (G(
out, Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
199 copyGaugeEx<FloatOut,FloatIn,length>
200 (G(
out, Out), inOrder,
out.X(),
X, faceVolumeCB,
out, location);
203 errorQuda(
"Reconstruction %d and order %d not supported",
out.Reconstruct(),
out.Order());
207 #ifdef BUILD_QDP_INTERFACE 208 copyGaugeEx<FloatOut,FloatIn,length>
211 errorQuda(
"QDP interface has not been built\n");
216 #ifdef BUILD_MILC_INTERFACE 217 copyGaugeEx<FloatOut,FloatIn,length>
220 errorQuda(
"MILC interface has not been built\n");
225 #ifdef BUILD_TIFR_INTERFACE 226 copyGaugeEx<FloatOut,FloatIn,length>
229 errorQuda(
"TIFR interface has not been built\n");
233 errorQuda(
"Gauge field %d order not supported",
out.Order());
238 template <
typename FloatOut,
typename FloatIn,
int length>
240 FloatOut *Out, FloatIn *In) {
246 in.X(),
out, location, Out);
249 copyGaugeEx<FloatOut,FloatIn,length> (G(
in, In),
in.X(),
out, location, Out);
253 copyGaugeEx<FloatOut,FloatIn,length> (G(
in, In),
in.X(),
out, location, Out);
256 copyGaugeEx<FloatOut,FloatIn,length> (G(
in, In),
in.X(),
out, location, Out);
257 #ifdef GPU_STAGGERED_DIRAC 260 copyGaugeEx<FloatOut,FloatIn,length> (G(
in, In),
in.X(),
out, location, Out);
263 copyGaugeEx<FloatOut,FloatIn,length> (G(
in, In),
in.X(),
out, location, Out);
266 errorQuda(
"Reconstruction %d and order %d not supported",
in.Reconstruct(),
in.Order());
270 #ifdef BUILD_QDP_INTERFACE 272 in.X(),
out, location, Out);
274 errorQuda(
"QDP interface has not been built\n");
279 #ifdef BUILD_MILC_INTERFACE 281 in.X(),
out, location, Out);
283 errorQuda(
"MILC interface has not been built\n");
288 #ifdef BUILD_TIFR_INTERFACE 290 in.X(),
out, location, Out);
292 errorQuda(
"TIFR interface has not been built\n");
296 errorQuda(
"Gauge field %d order not supported",
in.Order());
301 template <
typename FloatOut,
typename FloatIn>
303 FloatOut *Out, FloatIn *In) {
305 if (
in.Ncolor() != 3 &&
out.Ncolor() != 3) {
306 errorQuda(
"Unsupported number of colors; out.Nc=%d, in.Nc=%d",
out.Ncolor(),
in.Ncolor());
309 if (
out.Geometry() !=
in.Geometry()) {
310 errorQuda(
"Field geometries %d %d do not match",
out.Geometry(),
in.Geometry());
315 copyGaugeEx<FloatOut,FloatIn,18>(
out,
in, location, Out, In);
324 for (
int d=0;
d<
in.Ndim();
d++) {
325 if ( (
out.X()[
d] -
in.X()[
d]) % 2 != 0)
326 errorQuda(
"Cannot copy into an asymmetrically extended gauge field");
QudaFieldLocation location
CopyGaugeExArg(const OutOrder &out, const InOrder &in, const int *Xout, const int *Xin, const int *faceVolumeCB, int nDim, int geometry)
__device__ __host__ void copyGaugeEx(CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity)
QudaVerbosity getVerbosity()
void apply(const cudaStream_t &stream)
const char * VolString() const
QudaFieldGeometry Geometry() const
CopyGaugeExArg< OutOrder, InOrder > arg
unsigned int sharedBytesPerThread() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Main header file for host and device accessors to GaugeFields.
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
unsigned int sharedBytesPerBlock(const TuneParam ¶m) const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Accessor routine for CloverFields in native field order.
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
unsigned int minThreads() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
static __inline__ size_t size_t d
__global__ void copyGaugeExKernel(CopyGaugeExArg< OutOrder, InOrder > arg)
CopyGaugeEx(CopyGaugeExArg< OutOrder, InOrder > &arg, const GaugeField &meta, QudaFieldLocation location)
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)