11 #if __COMPUTE_CAPABILITY__ >= 300 12 #include <trove/ptr.h> 23 #include <type_traits> 40 template <
typename Float,
typename T>
71 template <
typename T,
int N>
81 template <
typename T,
int N>
98 template <
typename Float,
typename T>
129 template <
typename T,
int N>
130 template <
typename S>
139 template <
typename T,
int N>
140 template <
typename S>
147 template<
typename ReduceType,
typename Float>
struct square { __host__ __device__ ReduceType
operator()(quda::complex<Float>
x) {
return static_cast<ReduceType
>(
norm(
x)); } };
149 template<
typename Float,
int nColor, QudaGaugeFieldOrder order>
struct Accessor {
152 errorQuda(
"Not implemented for order=%d", order);
159 template<
typename Float,
int nColor, QudaGaugeFieldOrder order,
bool native_ghost>
163 errorQuda(
"Not implemented for order=%d", order);
170 template<
typename Float,
int nColor>
175 : cb_offset((U.Bytes()>>1) / (sizeof(complex<Float>)*U.Geometry())) {
177 u[
d] = gauge_ ?
static_cast<complex<Float>**
>(gauge_)[
d] :
178 static_cast<complex<Float>**
>(
const_cast<void*
>(U.
Gauge_p()))[
d];
184 __device__ __host__
inline complex<Float>&
operator()(
int d,
int parity,
int x,
int row,
int col)
const 189 typedef typename vector<Float,2>::type vec2;
191 atomicAdd(u2, (vec2&)
val);
203 template<
typename Float,
int nColor,
bool native_ghost>
205 complex<Float> *ghost[8];
208 for (
int d=0;
d<4;
d++) {
209 ghost[
d] = ghost_ ?
static_cast<complex<Float>*
>(ghost_[
d]) :
210 static_cast<complex<Float>*
>(const_cast<void*>(U.
Ghost()[
d]));
214 ghost_ ?
static_cast<complex<Float>*
>(ghost_[
d+4]) :
215 static_cast<complex<Float>*
>(
const_cast<void*
>(U.
Ghost()[
d+4]));
220 for (
int d=0;
d<8;
d++) {
221 ghost[
d] =
a.ghost[
d];
222 ghostOffset[
d] =
a.ghostOffset[
d];
225 __device__ __host__
inline complex<Float>&
operator()(
int d,
int parity,
int x,
int row,
int col)
const 229 template<
typename Float,
int nColor>
235 : u(gauge_ ? static_cast<complex<Float>*>(gauge_) :
236 static_cast<complex<Float>*>(const_cast<void *>(U.Gauge_p()))),
237 volumeCB(U.VolumeCB()), geometry(U.Geometry()) { }
239 : u(
a.u), volumeCB(
a.volumeCB), geometry(
a.geometry) { }
240 __device__ __host__
inline complex<Float>&
operator()(
int d,
int parity,
int x,
int row,
int col)
const 245 typedef typename vector<Float,2>::type vec2;
246 vec2 *u2 =
reinterpret_cast<vec2*
>(u + (((
parity*volumeCB+x_cb)*geometry +
dim)*
nColor + row)*
nColor + col);
247 atomicAdd(u2, (vec2&)
val);
259 template<
typename Float,
int nColor,
bool native_ghost>
261 complex<Float> *ghost[8];
264 for (
int d=0;
d<4;
d++) {
265 ghost[
d] = ghost_ ?
static_cast<complex<Float>*
>(ghost_[
d]) :
266 static_cast<complex<Float>*
>(const_cast<void*>(U.
Ghost()[
d]));
270 ghost_ ?
static_cast<complex<Float>*
>(ghost_[
d+4]) :
271 static_cast<complex<Float>*
>(
const_cast<void*
>(U.
Ghost()[
d+4]));
276 for (
int d=0;
d<8;
d++) {
277 ghost[
d] =
a.ghost[
d];
278 ghostOffset[
d] =
a.ghostOffset[
d];
281 __device__ __host__
inline complex<Float>&
operator()(
int d,
int parity,
int x,
int row,
int col)
const 285 template<
int nColor,
int N>
286 __device__ __host__
inline int indexFloatN(
int dim,
int parity,
int x_cb,
int row,
int col,
int stride,
int offset_cb) {
288 int j = ((row*
nColor+col)*2) / N;
289 int i = ((row*
nColor+col)*2) % N;
290 int index = ((x_cb +
dim*stride*M + j*stride)*2+
i) / 2;
295 template<
typename Float,
int nColor>
302 : u(gauge_ ? static_cast<complex<Float>*>(gauge_) :
303 static_cast<complex<Float>*>(const_cast<void*>(U.Gauge_p()))),
304 offset_cb( (U.Bytes()>>1) / sizeof(complex<Float>)), stride(U.Stride()), geometry(U.Geometry())
307 : u(
a.u), offset_cb(
a.offset_cb), stride(
a.stride), geometry(
a.geometry) { }
309 __device__ __host__
inline complex<Float>&
operator()(
int dim,
int parity,
int x_cb,
int row,
int col)
const 314 typedef typename vector<Float,2>::type vec2;
316 atomicAdd(u2, (vec2&)
val);
323 if (
dim >= geometry)
errorQuda(
"Request dimension %d exceeds dimensionality of the field %d",
dim, geometry);
325 thrust::device_ptr<complex<Float> >
ptr(u);
326 double even = thrust::transform_reduce(thrust::cuda::par(
alloc),
330 double odd = thrust::transform_reduce(thrust::cuda::par(
alloc),
339 template<
typename Float,
int nColor,
bool native_ghost>
341 complex<Float> *ghost[8];
343 int ghostVolumeCB[8];
346 : volumeCB(U.VolumeCB()), accessor(U, gauge_, ghost_)
348 if (!native_ghost) assert(ghost_ !=
nullptr);
349 for (
int d=0;
d<4;
d++) {
350 ghost[
d] = !native_ghost ?
static_cast<complex<Float>*
>(ghost_[
d]) :
nullptr;
357 : volumeCB(
a.volumeCB), accessor(
a.accessor)
359 for (
int d=0;
d<8;
d++) {
360 ghost[
d] =
a.ghost[
d];
361 ghostVolumeCB[
d] =
a.ghostVolumeCB[
d];
364 __device__ __host__
inline complex<Float>&
operator()(
int d,
int parity,
int x_cb,
int row,
int col)
const 367 return accessor(
d%4,
parity, x_cb+(
d/4)*ghostVolumeCB[
d]+volumeCB, row, col);
379 template <
typename Float,
int nColor,
int nSpinCoarse, QudaGaugeFieldOrder order,
bool native_ghost=true>
403 errorQuda(
"GaugeField ordering not supported with reconstruction");
421 __device__ __host__
const complex<Float>&
operator()(
int d,
int parity,
int x,
int row,
int col)
const 443 __device__ __host__
const complex<Float>&
Ghost(
int d,
int parity,
int x,
int row,
int col)
const 454 __device__ __host__ complex<Float>&
Ghost(
int d,
int parity,
int x,
int row,
int col)
468 int s_col,
int c_row,
int c_col)
const {
483 int s_col,
int c_row,
int c_col) {
497 __device__ __host__
inline const complex<Float>&
Ghost(
int d,
int parity,
int x,
int s_row,
498 int s_col,
int c_row,
int c_col)
const {
512 __device__ __host__
inline complex<Float>&
Ghost(
int d,
int parity,
int x,
int s_row,
513 int s_col,
int c_row,
int c_col) {
518 int c_row,
int c_col, complex<Float> &
val) {
532 __device__ __host__
inline int Ndim()
const {
return nDim; }
538 __device__ __host__
inline int NspinCoarse()
const {
return nSpinCoarse; }
556 for (
int x_cb=0; x_cb<
volumeCB; x_cb++) {
557 for (
int row=0; row<
nColor; row++)
558 for (
int col=0; col<
nColor; col++)
572 template <
int N,
typename Float>
584 const RegType phase,
const I *
X,
const int *
R)
const {
593 template <
typename Float>
602 for (
int i=0;
i<18;
i++)
out[
i] =
in[
i] / scale;
606 const RegType phase,
const I *
X,
const int *
R)
const {
608 for (
int i=0;
i<18;
i++)
out[
i] = scale *
in[
i];
623 template <
typename Float,
typename I>
625 QudaTboundary tBoundary,
bool isFirstTimeSlice,
bool isLastTimeSlice,
628 if (
idx >=
X[3]*
X[2]*
X[1]*
X[0]/2 ) {
629 return isFirstTimeSlice ?
static_cast<Float
>(tBoundary) : static_cast<Float>(1.0);
630 }
else if (
idx >= (
X[3]-1)*
X[0]*
X[1]*
X[2]/2 ) {
631 return isLastTimeSlice ?
static_cast<Float
>(tBoundary) : static_cast<Float>(1.0);
633 return static_cast<Float
>(1.0);
636 if (
idx >= (
R[3]-1)*
X[0]*
X[1]*
X[2]/2 &&
idx <
R[3]*
X[0]*
X[1]*
X[2]/2 ) {
638 return isFirstTimeSlice ?
static_cast<Float
>(tBoundary) : static_cast<Float>(1.0);
639 }
else if (
idx >= (
X[3]-
R[3]-1)*
X[0]*
X[1]*
X[2]/2 &&
idx < (
X[3]-
R[3])*
X[0]*
X[1]*
X[2]/2 ) {
641 return isLastTimeSlice ?
static_cast<Float
>(tBoundary) : static_cast<Float>(1.0);
643 return static_cast<Float
>(1.0);
649 template <
typename Float,
typename I>
652 Float
sign =
static_cast<Float
>(1.0);
654 case 0:
if ( ((
x[3] -
R[3]) & 1) != 0)
sign = -static_cast<Float>(1.0);
break;
655 case 1:
if ( ((
x[0] -
R[0] +
x[3] -
R[3]) & 1) != 0)
sign = -static_cast<Float>(1.0);
break;
656 case 2:
if ( ((
x[0] -
R[0] +
x[1] -
R[1] +
x[3] -
R[3]) & 1) != 0)
sign = -static_cast<Float>(1.0);
break;
661 template <
typename Float>
672 isFirstTimeSlice(
comm_coord(3) == 0 ? true : false),
674 ghostExchange(u.GhostExchange()) { }
677 tBoundary(recon.tBoundary), isFirstTimeSlice(recon.isFirstTimeSlice),
678 isLastTimeSlice(recon.isLastTimeSlice), ghostExchange(recon.ghostExchange) { }
687 const RegType phase,
const I *
X,
const int *
R)
const {
692 timeBoundary<RegType>(
idx,
X,
R, tBoundary,isFirstTimeSlice, isLastTimeSlice, ghostExchange);
695 for(
int i=0;
i<6; ++
i) Out[
i] = In[
i];
697 Out[6] = u0*
conj(Out[1]*Out[5] - Out[2]*Out[4]);
698 Out[7] = u0*
conj(Out[2]*Out[3] - Out[0]*Out[5]);
699 Out[8] = u0*
conj(Out[0]*Out[4] - Out[1]*Out[3]);
706 template <
typename Float>
726 const RegType phase,
const I *
X,
const int *
R)
const {
749 template <
typename Float>
758 scale(recon.scale) { }
766 const RegType phase,
const I *
X,
const int *
R)
const {
772 for(
int i=0;
i<6; ++
i) Out[
i] = In[
i];
774 Out[6] =
coeff*
conj(Out[1]*Out[5] - Out[2]*Out[4]);
775 Out[7] =
coeff*
conj(Out[2]*Out[3] - Out[0]*Out[5]);
776 Out[8] =
coeff*
conj(Out[0]*Out[4] - Out[1]*Out[3]);
781 Complex A(cos_sin[0], cos_sin[1]);
789 #if 1 // phase from cross product 792 Complex denom =
conj(In[0]*In[4] - In[1]*In[3]) / scale;
793 Complex expI3Phase = In[8] / denom;
795 #else // phase from determinant 808 template <
typename Float>
819 isFirstTimeSlice(
comm_coord(3) == 0 ? true : false),
821 ghostExchange(u.GhostExchange()) { }
824 tBoundary(recon.tBoundary), isFirstTimeSlice(recon.isFirstTimeSlice),
825 isLastTimeSlice(recon.isLastTimeSlice), ghostExchange(recon.ghostExchange) { }
836 const I *
X,
const int *
R,
const RegType scale=1.0)
const {
846 timeBoundary<RegType>(
idx,
X,
R, tBoundary,isFirstTimeSlice, isLastTimeSlice, ghostExchange);
850 RegType U00_mag =
sqrt(diff >= static_cast<RegType>(0.0) ? diff : static_cast<RegType>(0.0));
858 diff =
static_cast<RegType>(1.0)/(u0*u0) - column_sum;
859 RegType U20_mag =
sqrt(diff >= static_cast<RegType>(0.0) ? diff : static_cast<RegType>(0.0));
868 Out[4] = -(
conj(Out[6])*
conj(Out[2]) + u0*A*Out[1])*r_inv2;
869 Out[5] = (
conj(Out[6])*
conj(Out[1]) - u0*A*Out[2])*r_inv2;
871 A =
conj(Out[0])*Out[6];
872 Out[7] = (
conj(Out[3])*
conj(Out[2]) - u0*A*Out[1])*r_inv2;
873 Out[8] = -(
conj(Out[3])*
conj(Out[1]) + u0*A*Out[2])*r_inv2;
880 template <
typename Float>
890 scale(recon.scale) { }
893 #if 1 // phase from cross product 896 Complex denom =
conj(In[0]*In[4] - In[1]*In[3]) / scale;
897 Complex expI3Phase = In[8] / denom;
899 #else // phase from determinant 917 for (
int i=0; i<9; i++) su3[i] = z * reinterpret_cast<const Complex*>(
in)[
i];
918 reconstruct_8.
Pack(
out, reinterpret_cast<RegType*>(su3),
idx);
923 const RegType phase,
const I *
X,
const int *
R)
const {
929 for (
int i=0; i<9; i++) reinterpret_cast<Complex*>(
out)[
i] *=
z;
934 __host__ __device__
inline constexpr
int ct_sqrt(
int n,
int i = 1){
948 template <
typename Float,
int length,
int N,
int reconLenParam, QudaStaggeredPhase stag_phase=QUDA_STAGGERED_PHASE_NO,
bool huge_alloc=default_huge_alloc>
954 static const int reconLen = (reconLenParam == 11) ? 10 : reconLenParam;
958 #ifdef USE_TEXTURE_OBJECTS 960 cudaTextureObject_t
tex;
961 const int tex_offset;
979 #ifdef USE_TEXTURE_OBJECTS
987 errorQuda(
"This accessor does not support coarse-link fields (lacks support for bidirectional ghost zone");
990 "staggered phase only presently supported for 18 and 12 reconstruct");
991 for (
int i=0;
i<4;
i++) {
994 ghost[
i] = ghost_ ? ghost_[
i] : 0;
997 #ifdef USE_TEXTURE_OBJECTS 999 if (!huge_alloc && this->gauge != u.
Gauge_p() && !
override) {
1000 errorQuda(
"Cannot use texture read since data pointer does not equal field pointer - use with huge_alloc=true instead");
1007 #ifdef USE_TEXTURE_OBJECTS
1008 tex(order.
tex), tex_offset(order.tex_offset),
1014 for (
int i=0;
i<4;
i++) {
1028 for (
int i=0;
i<M;
i++){
1030 #if defined(USE_TEXTURE_OBJECTS) && defined(__CUDA_ARCH__) 1034 for (
int j=0; j<N; j++) copy(tmp[i*N+j], reinterpret_cast<RegType*>(&vecTmp)[j]);
1041 for (
int j=0; j<N; j++) copy(tmp[i*N+j], reinterpret_cast<Float*>(&vecTmp)[j]);
1053 Float
sign = (dir == 0 && ((
coords[3] -
R[3]) & 1) != 0) ||
1054 ( dir == 1 && ((
coords[0] -
R[0] +
coords[3] -
R[3]) & 1) != 0) ||
1058 for (
int i=12;
i<18;
i++) v[
i] *=
sign;
1069 for (
int i=0;
i<M;
i++){
1073 for (
int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j],
tmp[
i*N+j]);
1123 for (
int i=0;
i<M;
i++) {
1129 for (
int j=0; j<N; j++) copy(tmp[i*N+j], reinterpret_cast<Float*>(&vecTmp)[j]);
1146 for (
int i=0;
i<M;
i++) {
1150 for (
int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j],
tmp[
i*N+j]);
1194 int dim,
int g,
int parity,
const int R[])
const {
1199 for (
int i=0;
i<M;
i++) {
1205 for (
int j=0; j<N; j++) copy(tmp[i*N+j], reinterpret_cast<Float*>(&vecTmp)[j]);
1216 int dir,
int dim,
int g,
int parity,
const int R[]) {
1223 for (
int i=0;
i<M;
i++) {
1227 for (
int j=0; j<N; j++) copy(reinterpret_cast<Float*>(&vecTmp)[j],
tmp[
i*N+j]);
1235 static_cast<RegType>(phase/(2.*M_PI)));
1269 template <
typename real,
int length>
struct S { real
v[
length]; };
1275 template <
typename Float,
int length>
1288 errorQuda(
"This accessor does not support coarse-link fields (lacks support for bidirectional ghost zone");
1290 for (
int i=0;
i<4;
i++) {
1298 for (
int i=0;
i<4;
i++) {
1307 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1309 trove::coalesced_ptr<structure> ghost_((structure*)
ghost[dir]);
1318 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1320 trove::coalesced_ptr<structure> ghost_((structure*)
ghost[dir]);
1322 for (
int i=0;
i<
length;
i++) v_.v[
i] = (Float)v[
i];
1330 int dim,
int g,
int parity,
const int R[])
const {
1331 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1333 trove::coalesced_ptr<structure> ghost_((structure*)
ghost[
dim]);
1344 int dir,
int dim,
int g,
int parity,
const int R[]) {
1345 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1347 trove::coalesced_ptr<structure> ghost_((structure*)
ghost[
dim]);
1349 for (
int i=0;
i<
length;
i++) v_.v[
i] = (Float)v[
i];
1371 {
for (
int i=0;
i<4;
i++)
gauge[
i] = gauge_ ? ((Float**)gauge_)[
i] : ((Float**)u.
Gauge_p())[
i]; }
1378 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1380 trove::coalesced_ptr<structure> gauge_((structure*)
gauge[dir]);
1391 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1393 trove::coalesced_ptr<structure> gauge_((structure*)
gauge[dir]);
1395 for (
int i=0;
i<
length;
i++) v_.v[
i] = (Float)v[
i];
1448 {
for (
int i=0;
i<4;
i++)
gauge[
i] = gauge_ ? ((Float**)gauge_)[
i] : ((Float**)u.
Gauge_p())[
i]; }
1522 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1524 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1535 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1537 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1539 for (
int i=0;
i<
length;
i++) v_.v[
i] = (Float)v[
i];
1607 offset(u.SiteOffset()),
size(u.SiteSize()) { ; }
1618 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1620 trove::coalesced_ptr<structure> gauge_((structure*)gauge0);
1621 structure v_ = gauge_[dir];
1634 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1636 trove::coalesced_ptr<structure> gauge_((structure*)gauge0);
1638 for (
int i=0;
i<
length;
i++) v_.v[
i] = (Float)v[
i];
1642 gauge0[dir*
length +
i] = (Float)v[
i];
1660 static constexpr
int Nc = 3;
1673 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1675 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1677 for (
int i=0;
i<
Nc;
i++)
1678 for (
int j=0; j<
Nc; j++)
1679 for (
int z=0;
z<2;
z++)
1682 for (
int i=0;
i<
Nc;
i++) {
1683 for (
int j=0; j<
Nc; j++) {
1684 for (
int z=0;
z<2;
z++) {
1694 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1696 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1698 for (
int i=0;
i<
Nc;
i++)
1699 for (
int j=0; j<
Nc; j++)
1700 for (
int z=0;
z<2;
z++)
1704 for (
int i=0;
i<
Nc;
i++) {
1705 for (
int j=0; j<
Nc; j++) {
1706 for (
int z=0;
z<2;
z++) {
1746 size_t Bytes()
const {
return Nc *
Nc * 2 *
sizeof(Float); }
1761 static constexpr
int Nc = 3;
1778 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1780 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1782 for (
int i=0;
i<
Nc;
i++)
1783 for (
int j=0; j<
Nc; j++)
1784 for (
int z=0;
z<2;
z++)
1787 for (
int i=0;
i<
Nc;
i++) {
1788 for (
int j=0; j<
Nc; j++) {
1789 for (
int z=0;
z<2;
z++) {
1798 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1800 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1802 for (
int i=0;
i<
Nc;
i++)
1803 for (
int j=0; j<
Nc; j++)
1804 for (
int z=0;
z<2;
z++)
1805 v_.v[(j*
Nc+
i)*2+
z] = (Float)(v[(
i*
Nc+j)*2+
z]);
1808 for (
int i=0;
i<
Nc;
i++) {
1809 for (
int j=0; j<
Nc; j++) {
1810 for (
int z=0;
z<2;
z++) {
1849 size_t Bytes()
const {
return Nc *
Nc * 2 *
sizeof(Float); }
1860 static constexpr
int Nc = 3;
1876 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1878 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1880 for (
int i=0;
i<
Nc;
i++)
1881 for (
int j=0; j<
Nc; j++)
1882 for (
int z=0;
z<2;
z++)
1885 for (
int i=0;
i<
Nc;
i++) {
1886 for (
int j=0; j<
Nc; j++) {
1887 for (
int z=0;
z<2;
z++) {
1896 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 1898 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
1900 for (
int i=0;
i<
Nc;
i++)
1901 for (
int j=0; j<
Nc; j++)
1902 for (
int z=0;
z<2;
z++)
1906 for (
int i=0;
i<
Nc;
i++) {
1907 for (
int j=0; j<
Nc; j++) {
1908 for (
int z=0;
z<2;
z++) {
1947 size_t Bytes()
const {
return Nc *
Nc * 2 *
sizeof(Float); }
1959 static constexpr
int Nc = 3;
1966 dim{ u.
X()[0], u.
X()[1], u.
X()[2], u.
X()[3] },
1967 exDim{ u.X()[0], u.X()[1], u.X()[2] + 4, u.X()[3] } {
1978 exDim{order.exDim[0], order.exDim[1], order.exDim[2], order.exDim[3]} {
2003 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 2005 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
2007 for (
int i=0;
i<
Nc;
i++)
2008 for (
int j=0; j<
Nc; j++)
2009 for (
int z=0;
z<2;
z++)
2012 for (
int i=0;
i<
Nc;
i++) {
2013 for (
int j=0; j<
Nc; j++) {
2014 for (
int z=0;
z<2;
z++) {
2026 #if defined( __CUDA_ARCH__) && !defined(DISABLE_TROVE) 2028 trove::coalesced_ptr<structure> gauge_((structure*)
gauge);
2030 for (
int i=0;
i<
Nc;
i++)
2031 for (
int j=0; j<
Nc; j++)
2032 for (
int z=0;
z<2;
z++)
2036 for (
int i=0;
i<
Nc;
i++) {
2037 for (
int j=0; j<
Nc; j++) {
2038 for (
int z=0;
z<2;
z++) {
2077 size_t Bytes()
const {
return Nc *
Nc * 2 *
sizeof(Float); }
2083 template<
typename T,QudaReconstructType,
int N=18,QudaStaggeredPhase stag=QUDA_STAGGERED_PHASE_NO,
bool huge_alloc=gauge::default_huge_alloc>
struct gauge_mapper { };
2086 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
double,
QUDA_RECONSTRUCT_NO,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<double, N, 2, N, stag, huge_alloc> type; };
2087 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
double,
QUDA_RECONSTRUCT_13,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<double, N, 2, 13, stag, huge_alloc> type; };
2088 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
double,
QUDA_RECONSTRUCT_12,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<double, N, 2, 12, stag, huge_alloc> type; };
2089 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
double,
QUDA_RECONSTRUCT_9,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<double, N, 2, 9, stag, huge_alloc> type; };
2090 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
double,
QUDA_RECONSTRUCT_8,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<double, N, 2, 8, stag, huge_alloc> type; };
2093 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
float,
QUDA_RECONSTRUCT_NO,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<float, N, 2, N, stag, huge_alloc> type; };
2094 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
float,
QUDA_RECONSTRUCT_13,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<float, N, 4, 13, stag, huge_alloc> type; };
2095 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
float,
QUDA_RECONSTRUCT_12,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<float, N, 4, 12, stag, huge_alloc> type; };
2096 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
float,
QUDA_RECONSTRUCT_9,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<float, N, 4, 9, stag, huge_alloc> type; };
2097 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<
float,
QUDA_RECONSTRUCT_8,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<float, N, 4, 8, stag, huge_alloc> type; };
2100 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<short,
QUDA_RECONSTRUCT_NO,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<short, N, 2, N, stag, huge_alloc> type; };
2101 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<short,
QUDA_RECONSTRUCT_13,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<short, N, 4, 13, stag, huge_alloc> type; };
2102 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<short,
QUDA_RECONSTRUCT_12,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<short, N, 4, 12, stag, huge_alloc> type; };
2103 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<short,
QUDA_RECONSTRUCT_9,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<short, N, 4, 9, stag, huge_alloc> type; };
2104 template<
int N,QudaStaggeredPhase stag,
bool huge_alloc>
struct gauge_mapper<short,
QUDA_RECONSTRUCT_8,N,stag,huge_alloc> {
typedef gauge::FloatNOrder<short, N, 4, 8, stag, huge_alloc> type; };
2118 #define INSTANTIATE_RECONSTRUCT(func, g, ...) \ 2120 if (!data.isNative()) \ 2121 errorQuda("Field order %d and precision %d is not native", g.Order(), g.Precision()); \ 2122 if( g.Reconstruct() == QUDA_RECONSTRUCT_NO) { \ 2123 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type Gauge; \ 2124 func(Gauge(g), g, __VA_ARGS__); \ 2125 } else if( g.Reconstruct() == QUDA_RECONSTRUCT_12){ \ 2126 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type Gauge; \ 2127 func(Gauge(g), g, __VA_ARGS__); \ 2128 } else if( g.Reconstruct() == QUDA_RECONSTRUCT_8){ \ 2129 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type Gauge; \ 2130 func(Gauge(g), g, __VA_ARGS__); \ 2132 errorQuda("Reconstruction type %d of gauge field not supported", g.Reconstruct()); \ 2136 #define INSTANTIATE_PRECISION(func, lat, ...) \ 2138 if (lat.Precision() == QUDA_DOUBLE_PRECISION) { \ 2139 func<double>(lat, __VA_ARGS__); \ 2140 } else if(lat.Precision() == QUDA_SINGLE_PRECISION) { \ 2141 func<float>(lat, __VA_ARGS__); \ 2143 errorQuda("Precision %d not supported", lat.Precision()); \ 2149 #endif // _GAUGE_ORDER_H __device__ __host__ const gauge_wrapper< Float, BQCDOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
__device__ __host__ gauge_ghost_wrapper< Float, FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc > > Ghost(int dim, int ghost_idx, int parity)
This accessor routine returns a gauge_ghost_wrapper to this object, allowing us to overload various o...
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
__device__ __host__ gauge_wrapper< Float, MILCOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
__device__ __host__ void Unpack(RegType out[18], const RegType in[18], int idx, int dir, const RegType phase, const I *X, const int *R) const
__device__ __host__ int NcolorCoarse() const
__host__ __device__ constexpr int Ncolor(int length)
Return the number of colors of the accessor based on the length of the field.
__device__ __host__ RegType getPhase(const RegType in[18])
gauge::TIFROrder< T, 2 *Nc *Nc > type
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int s_row, int s_col, int c_row, int c_col)
gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors.
Float * ghost[QUDA_MAX_DIM]
static const int hasPhase
__device__ __host__ void Unpack(RegType out[18], const RegType in[8], int idx, int dir, const RegType phase, const I *X, const int *R) const
const Reconstruct< 12, Float > reconstruct_12
__device__ __host__ complex< Float > & Ghost(int d, int parity, int x, int s_row, int s_col, int c_row, int c_col)
QDPJITOrder(const QDPJITOrder &order)
Reconstruct(const Reconstruct< 11, Float > &recon)
__device__ __host__ const complex< Float > & Ghost(int d, int parity, int x, int row, int col) const
static __device__ __host__ int linkIndex(const int x[], const I X[4])
__device__ __host__ void loadGhost(RegType v[length], int x, int dir, int parity) const
__device__ __host__ void Pack(RegType out[8], const RegType in[18], int idx) const
__device__ __host__ void load(RegType v[18], int x, int dir, int parity) const
TIFROrder(const TIFROrder &order)
MILCSiteOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
__device__ __host__ void atomicAdd(int d, int parity, int x, int s_row, int s_col, int c_row, int c_col, complex< Float > &val)
__device__ __host__ void Unpack(RegType out[N], const RegType in[N], int idx, int dir, const RegType phase, const I *X, const int *R) const
__device__ __host__ void Pack(RegType out[N], const RegType in[N], int idx) const
__device__ __host__ void operator=(const M &a)
Assignment operator with Matrix instance as input.
__device__ __host__ void Pack(RegType out[18], const RegType in[18], int idx) const
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
constexpr bool default_huge_alloc
mapper< Float >::type RegType
mapper< Float >::type RegType
__device__ __host__ void operator=(const M &a)
Assignment operator with Matrix instance as input.
FloatNOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0, bool override=false)
__device__ __host__ void Pack(RegType out[12], const RegType in[18], int idx) const
Reconstruct(const Reconstruct< 8, Float > &recon)
mapper< Float >::type RegType
__host__ __device__ constexpr int ct_sqrt(int n, int i=1)
const GhostAccessor< Float, nColor, order, native_ghost > ghostAccessor
gauge::FloatNOrder< double, N, 2, N, stag, huge_alloc > type
__host__ __device__ ValueType sqrt(ValueType x)
static constexpr int nColorCoarse
mapper< Float >::type RegType
std::complex< double > Complex
GhostAccessor(const GhostAccessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, native_ghost > &a)
gauge::FloatNOrder< short, N, 4, 13, stag, huge_alloc > type
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
GhostAccessor(const GaugeField &U, void *gauge_=0, void **ghost_=0)
__device__ __host__ int NspinCoarse() const
Reconstruct(const Reconstruct< 19, Float > &recon)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
cudaColorSpinorField * tmp
__device__ __host__ void load(RegType v[length], int x, int dir, int parity) const
Reconstruct< reconLenParam, Float > reconstruct
__device__ __host__ int Geometry() const
mapper< Float >::type RegType
__host__ __device__ void copy(T1 &a, const T2 &b)
mapper< Float >::type RegType
LegacyOrder(const LegacyOrder &order)
AllocType< huge_alloc >::type AllocInt
__device__ __host__ void load(RegType v[length], int x, int dir, int parity) const
const int * SurfaceCB() const
QudaFieldGeometry Geometry() const
complex< RegType > Complex
__device__ __host__ void loadGhostEx(RegType v[length], int buff_idx, int extended_idx, int dir, int dim, int g, int parity, const int R[]) const
VectorType< Float, N >::type Vector
gauge::BQCDOrder< T, 2 *Nc *Nc > type
Reconstruct(const GaugeField &u)
__device__ __host__ void save(const RegType v[length], int x, int dir, int parity)
__device__ __host__ const gauge_ghost_wrapper< Float, FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc > > Ghost(int dim, int ghost_idx, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
size_t bytes
host memory for backing up the field when tuning
__device__ __host__ gauge_wrapper< Float, TIFROrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
gauge::FloatNOrder< short, N, 4, 8, stag, huge_alloc > type
gauge::FloatNOrder< double, N, 2, 9, stag, huge_alloc > type
enum QudaTboundary_s QudaTboundary
__host__ double device_norm2(int dim) const
__device__ __host__ void atomic_add(int dim, int parity, int x_cb, int row, int col, complex< Float > &val) const
mapper< Float >::type RegType
QDPOrder(const QDPOrder &order)
__device__ __host__ void operator=(const Matrix< U, N > &b)
This is just a dummy structure we use for trove to define the required structure size.
FieldOrder(const FieldOrder &o)
int_fastdiv X[QUDA_MAX_DIM]
FieldOrder(GaugeField &U, void *gauge_=0, void **ghost_=0)
GhostAccessor(const GhostAccessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, native_ghost > &a)
__device__ __host__ RegType getPhase(const RegType in[N]) const
Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER > accessor
gauge::FloatNOrder< short, N, 2, N, stag, huge_alloc > type
mapper< Float >::type RegType
__device__ __host__ void save(const RegType v[length], int x, int dir, int parity)
__device__ __host__ Float timeBoundary(int idx, const I X[QUDA_MAX_DIM], const int R[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice, QudaGhostExchange ghostExchange=QUDA_GHOST_EXCHANGE_NO)
char * index(const char *, int)
struct to define gauge fields packed into an opaque MILC site struct:
__device__ __host__ complex< Float > & operator()(int dim, int parity, int x_cb, int row, int col) const
__device__ __host__ const complex< Float > & operator()(int d, int parity, int x, int row, int col) const
complex< RegType > Complex
mapper< Float >::type RegType
Accessor(const Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER > &a)
BQCDOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
static const int reconLen
mapper< Float >::type RegType
MILCOrder(const MILCOrder &order)
__device__ __host__ const gauge_wrapper< Float, TIFROrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
Accessor(const GaugeField &, void *gauge_=0, void **ghost_=0)
__device__ __host__ void save(const RegType v[length], int x, int dir, int parity)
Reconstruct(const GaugeField &u)
virtual ~TIFRPaddedOrder()
Accessor(const Accessor< Float, nColor, QUDA_MILC_GAUGE_ORDER > &a)
gauge::FloatNOrder< double, N, 2, 12, stag, huge_alloc > type
const Accessor< Float, nColor, order > accessor
QDPOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
__host__ double norm2(int dim) const
Returns the L2 norm squared of the field in a given dimension.
__device__ __host__ RegType getPhase(const RegType in[18]) const
gauge::QDPOrder< T, 2 *Nc *Nc > type
__device__ __host__ void save(const RegType v[18], int x, int dir, int parity)
QudaGhostExchange ghostExchange
MILCOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
__device__ __host__ void save(const RegType v[18], int x, int dir, int parity)
__device__ __host__ void load(RegType v[length], int x, int dir, int parity) const
struct to define BQCD ordered gauge fields:
__device__ __host__ gauge_wrapper< Float, QDPJITOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
__device__ __host__ void saveGhostEx(const RegType v[length], int x, int dummy, int dir, int dim, int g, int parity, const int R[])
enum QudaGhostExchange_s QudaGhostExchange
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
gauge::FloatNOrder< double, N, 2, 8, stag, huge_alloc > type
const Reconstruct< 8, Float > reconstruct_8
MILCSiteOrder(const MILCSiteOrder &order)
gauge::FloatNOrder< float, N, 2, N, stag, huge_alloc > type
Reconstruct(const GaugeField &u)
__device__ __host__ complex< Float > & operator()(int d, int parity, int x_cb, int row, int col) const
const QudaTboundary tBoundary
__device__ __host__ void atomic_add(int dim, int parity, int x_cb, int row, int col, complex< Float > &val) const
gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors...
gauge::FloatNOrder< float, N, 4, 12, stag, huge_alloc > type
__device__ __host__ void save(const RegType v[length], int x, int dir, int parity)
__device__ __host__ RegType getPhase(const RegType in[18]) const
__device__ __host__ void load(RegType v[18], int x, int dir, int parity) const
CPSOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
Accessor(const GaugeField &U, void *gauge_=0, void **ghost_=0)
__device__ __host__ void load(RegType v[18], int x, int dir, int parity) const
mapper< Float >::type RegType
__device__ __host__ complex< Float > & Ghost(int d, int parity, int x, int row, int col)
const void ** Ghost() const
__device__ __host__ const gauge_wrapper< Float, MILCOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
complex< RegType > Complex
Float * gauge[QUDA_MAX_DIM]
Provides precision abstractions and defines the register precision given the storage precision using ...
gauge::FloatNOrder< float, N, 4, 13, stag, huge_alloc > type
TIFRPaddedOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
gauge::FloatNOrder< short, N, 4, 9, stag, huge_alloc > type
__device__ __host__ void load(RegType v[length], int x, int dir, int parity) const
__device__ __host__ void Pack(RegType out[8], const RegType in[18], int idx) const
__device__ __host__ const gauge_wrapper< Float, FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
__device__ __host__ void Unpack(RegType out[18], const RegType in[8], int idx, int dir, const RegType phase, const I *X, const int *R, const RegType scale=1.0) const
__device__ __host__ void save(const RegType v[length], int x, int dir, int parity)
__device__ __host__ int VolumeCB() const
__device__ __host__ void saveGhost(const RegType v[length], int x, int dir, int parity)
#define safe_malloc(size)
void load()
Restore the field from the host after tuning.
__device__ __host__ void loadGhostEx(RegType v[length], int x, int dummy, int dir, int dim, int g, int parity, const int R[]) const
Reconstruct(const GaugeField &u)
__host__ double device_norm2(int dim) const
__device__ __host__ void Unpack(RegType out[18], const RegType in[10], int idx, int dir, const RegType phase, const I *X, const int *R) const
__device__ __host__ gauge_wrapper< Float, TIFRPaddedOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
__device__ __host__ void saveGhostEx(const RegType v[length], int buff_idx, int extended_idx, int dir, int dim, int g, int parity, const int R[])
QudaFieldLocation Location() const
LegacyOrder(const GaugeField &u, Float **ghost_)
__device__ __host__ gauge_wrapper< Float, CPSOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
QudaGhostExchange ghostExchange
const AllocInt phaseOffset
gauge::MILCOrder< T, 2 *Nc *Nc > type
Reconstruct(const GaugeField &u)
Float * gauge[QUDA_MAX_DIM]
gauge::FloatNOrder< short, N, 4, 12, stag, huge_alloc > type
__device__ __host__ int Volume() const
mapper< Float >::type RegType
__device__ __host__ gauge_wrapper< Float, QDPOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
FloatNOrder(const FloatNOrder &order)
enum QudaFieldLocation_s QudaFieldLocation
gauge::FloatNOrder< float, N, 4, 9, stag, huge_alloc > type
int faceVolumeCB[QUDA_MAX_DIM]
Reconstruct(const Reconstruct< 12, Float > &recon)
cpuColorSpinorField * out
__device__ __host__ void load(RegType v[18], int x, int dir, int parity) const
__device__ __host__ int Ncolor() const
BQCDOrder(const BQCDOrder &order)
__device__ __host__ const complex< Float > & Ghost(int d, int parity, int x, int s_row, int s_col, int c_row, int c_col) const
__device__ __host__ RegType getPhase(const RegType in[18])
Accessor(const GaugeField &U, void *gauge_=0, void **ghost_=0)
__device__ __host__ void Pack(RegType out[12], const RegType in[18], int idx) const
__device__ __host__ const gauge_wrapper< Float, QDPOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
void save()
Backup the field to the host when tuning.
complex< RegType > Complex
QudaGhostExchange ghostExchange
mapper< Float >::type RegType
__device__ __host__ gauge_wrapper< Float, BQCDOrder< Float, length > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
Reconstruct(const Reconstruct< N, Float > &recon)
__device__ __host__ gauge_wrapper< Float, FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc > > operator()(int dim, int x_cb, int parity)
This accessor routine returns a gauge_wrapper to this object, allowing us to overload various operato...
__device__ __host__ void Pack(RegType out[10], const RegType in[18], int idx) const
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
__device__ __host__ const gauge_wrapper< Float, TIFRPaddedOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
__device__ __host__ RegType getPhase(const RegType in[18])
__device__ __host__ RegType getPhase(const RegType in[18]) const
__device__ __host__ void saveGhost(const RegType v[length], int x, int dir, int parity)
#define QUDA_MAX_GEOMETRY
Maximum geometry supported by a field. This essentially is the maximum number of dimensions supported...
GhostAccessor(const GaugeField &, void *gauge_=0, void **ghost_=0)
TIFROrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
gauge::QDPJITOrder< T, 2 *Nc *Nc > type
__device__ __host__ int Ndim() const
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
__device__ __host__ void load(RegType v[length], int x, int dir, int parity) const
__device__ __host__ void atomic_add(int dim, int parity, int x_cb, int row, int col, complex< Float > &val) const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Reconstruct(const Reconstruct< 13, Float > &recon)
GhostAccessor(const GhostAccessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, native_ghost > &a)
mapper< Float >::type RegType
__device__ __host__ Matrix()
Reconstruct(const GaugeField &u)
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
TIFRPaddedOrder(const TIFRPaddedOrder &order)
QDPJITOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0)
QudaReconstructType Reconstruct() const
__device__ __host__ void save(const RegType v[18], int x, int dir, int parity)
__host__ double device_norm2(int dim) const
__device__ __host__ Float milcStaggeredPhase(int dim, const int x[], const I R[])
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
gauge::FloatNOrder< float, N, 4, 8, stag, huge_alloc > type
CPSOrder(const CPSOrder &order)
Accessor(const Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER > &a)
void comm_allreduce(double *data)
Reconstruct(const Reconstruct< 9, Float > &recon)
__device__ __host__ T getDeterminant(const Mat< T, 3 > &a)
__host__ __device__ ValueType conj(ValueType x)
__device__ __host__ int indexFloatN(int dim, int parity, int x_cb, int row, int col, int stride, int offset_cb)
gauge::FloatNOrder< T, 2 *Nc *Nc, 2, 2 *Nc *Nc > type
Accessor(const GaugeField &U, void *gauge_=0, void **ghost_=0)
__device__ __host__ void Unpack(RegType out[18], const RegType in[12], int idx, int dir, const RegType phase, const I *X, const int *R) const
__device__ __host__ void Unpack(RegType out[18], const RegType in[12], int idx, int dir, const RegType phase, const I *X, const int *R) const
static __inline__ size_t size_t d
GhostAccessor(const GaugeField &U, void *gauge_=0, void **ghost_=0)
static __inline__ enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode int val
__device__ __host__ int getPaddedIndex(int x_cb, int parity) const
Compute the index into the padded field. Assumes that parity doesn't change from unpadded to padded...
gauge::FloatNOrder< double, N, 2, 13, stag, huge_alloc > type
__device__ __host__ const gauge_wrapper< Float, CPSOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
GhostAccessor(const GaugeField &U, void *gauge_, void **ghost_=0)
__device__ __host__ const gauge_wrapper< Float, QDPJITOrder< Float, length > > operator()(int dim, int x_cb, int parity) const
This accessor routine returns a const gauge_wrapper to this object, allowing us to overload various o...
mapper< Float >::type RegType
const QudaTboundary tBoundary
__device__ __host__ const complex< Float > & operator()(int d, int parity, int x, int s_row, int s_col, int c_row, int c_col) const
QudaFieldLocation location
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
mapper< Float >::type RegType
__device__ __host__ void save(const RegType v[18], int x, int dir, int parity)
Reconstruct(const GaugeField &u)
gauge::TIFRPaddedOrder< T, 2 *Nc *Nc > type
This is just a dummy structure we use for trove to define the required structure size.
__host__ __device__ ReduceType operator()(quda::complex< Float > x)
mapper< Float >::type RegType
__device__ __host__ complex< Float > & operator()(int d, int parity, int x, int row, int col) const
__device__ __host__ void loadGhost(RegType v[length], int x, int dir, int parity) const
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)