15 #ifdef GPU_GAUGE_TOOLS 17 template <
typename Float,
int Nc,
typename Order>
18 struct GaugePhaseArg {
19 static constexpr
int nColor = Nc;
25 complex<Float> i_mu_phase;
26 GaugePhaseArg(
const Order &order,
const GaugeField &u)
27 : order(order), threads(u.VolumeCB()), i_mu(u.iMu())
31 Float dir = u.StaggeredPhaseApplied() ? -1.0 : 1.0;
33 i_mu_phase = complex<Float>(
cos(M_PI * u.iMu() / (u.X()[3]*
comm_dim(3)) ),
34 dir *
sin(M_PI * u.iMu() / (u.X()[3]*
comm_dim(3))) );
36 for (
int d=0; d<4; d++) X[d] = u.X()[d];
42 bool last_node_in_t =
true;
46 GaugePhaseArg(
const GaugePhaseArg &
arg)
47 : order(arg.order), tBoundary(arg.tBoundary), threads(arg.threads),
48 i_mu(arg.i_mu), i_mu_phase(arg.i_mu_phase) {
49 for (
int d=0; d<4; d++) X[d] = arg.X[d];
56 template <
int dim,
typename Float, QudaStaggeredPhase phaseType,
typename Arg>
57 __device__ __host__ Float getPhase(
int x,
int y,
int z,
int t, Arg &
arg) {
61 phase = (1.0 - 2.0 * (t % 2) );
62 }
else if (dim == 1) {
63 phase = (1.0 - 2.0 * ((t + x) % 2) );
64 }
else if (dim == 2) {
65 phase = (1.0 - 2.0 * ((t + x + y) % 2) );
66 }
else if (dim == 3) {
67 phase = (t == arg.X[3]-1) ? arg.tBoundary : 1.0;
71 phase = (1.0 - 2.0 * ((3 + t + z + y) % 2) );
72 }
else if (dim == 1) {
73 phase = (1.0 - 2.0 * ((2 + t + z) % 2) );
74 }
else if (dim == 2) {
75 phase = (1.0 - 2.0 * ((1 + t) % 2) );
76 }
else if (dim == 3) {
77 phase = (t == arg.X[3]-1) ? arg.tBoundary : 1.0;
82 }
else if (dim == 1) {
83 phase = (1.0 - 2.0 * ((1 + x) % 2) );
84 }
else if (dim == 2) {
85 phase = (1.0 - 2.0 * ((1 + x + y) % 2) );
86 }
else if (dim == 3) {
87 phase = ((t == arg.X[3]-1) ? arg.tBoundary : 1.0) *
88 (1.0 - 2 * ((1 + x + y + z) % 2) );
94 template <
typename Float, QudaStaggeredPhase phaseType,
int dim,
typename Arg>
95 __device__ __host__
void gaugePhase(
int indexCB,
int parity, Arg &arg) {
96 typedef typename mapper<Float>::type real;
101 real phase = getPhase<dim,Float,phaseType>(x[0], x[1], x[2], x[3],
arg);
106 if (dim==3 && arg.i_mu != 0.0) u *= arg.i_mu_phase;
108 arg.order(dim, indexCB, parity) = u;
114 template <
typename Float, QudaStaggeredPhase phaseType,
typename Arg>
115 void gaugePhase(Arg &arg) {
116 for (
int parity=0; parity<2; parity++) {
117 for (
int indexCB=0; indexCB < arg.threads; indexCB++) {
118 gaugePhase<Float,phaseType,0>(indexCB,
parity,
arg);
119 gaugePhase<Float,phaseType,1>(indexCB,
parity,
arg);
120 gaugePhase<Float,phaseType,2>(indexCB,
parity,
arg);
121 gaugePhase<Float,phaseType,3>(indexCB,
parity,
arg);
129 template <
typename Float, QudaStaggeredPhase phaseType,
typename Arg>
130 __global__
void gaugePhaseKernel(Arg arg) {
131 int indexCB = blockIdx.x * blockDim.x + threadIdx.x;
132 if (indexCB >= arg.threads)
return;
133 int parity = blockIdx.y * blockDim.y + threadIdx.y;
134 gaugePhase<Float,phaseType,0>(indexCB,
parity,
arg);
135 gaugePhase<Float,phaseType,1>(indexCB,
parity,
arg);
136 gaugePhase<Float,phaseType,2>(indexCB,
parity,
arg);
137 gaugePhase<Float,phaseType,3>(indexCB,
parity,
arg);
140 template <
typename Float, QudaStaggeredPhase phaseType,
typename Arg>
141 class GaugePhase : TunableVectorY {
143 const GaugeField &meta;
146 bool tuneGridDim()
const {
return false; }
147 unsigned int minThreads()
const {
return arg.threads; }
150 GaugePhase(Arg &arg,
const GaugeField &meta)
151 : TunableVectorY(2), arg(arg), meta(meta) {
152 writeAuxString(
"stride=%d,prec=%lu",arg.order.stride,
sizeof(Float));
154 virtual ~GaugePhase() { ; }
156 void apply(
const cudaStream_t &
stream) {
159 gaugePhaseKernel<Float, phaseType, Arg>
160 <<<tp.grid, tp.block, tp.shared_bytes, stream>>>(
arg);
162 gaugePhase<Float, phaseType, Arg>(
arg);
166 TuneKey tuneKey()
const {
167 return TuneKey(meta.VolString(),
typeid(*this).name(), aux);
170 void preTune() { arg.order.save(); }
171 void postTune() { arg.order.load(); }
173 long long flops()
const {
return 0; }
174 long long bytes()
const {
return 2 * arg.threads * 2 * arg.order.Bytes(); }
178 template <
typename Float,
int Nc,
typename Order>
179 void gaugePhase(Order order,
const GaugeField &u) {
181 GaugePhaseArg<Float,Nc,Order>
arg(order, u);
183 GaugePhaseArg<Float,Nc,Order> > phase(arg, u);
186 GaugePhaseArg<Float,Nc,Order>
arg(order, u);
188 GaugePhaseArg<Float,Nc,Order> > phase(arg, u);
191 GaugePhaseArg<Float,Nc,Order>
arg(order, u);
193 GaugePhaseArg<Float,Nc,Order> > phase(arg, u);
203 template <
typename Float>
204 void gaugePhase(GaugeField &u) {
205 if (u.Ncolor() != 3)
errorQuda(
"Unsupported number of colors %d", u.Ncolor());
206 constexpr
int Nc = 3;
210 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type G;
211 gaugePhase<Float,Nc>(G(u), u);
213 errorQuda(
"Unsupported reconstruction type");
216 errorQuda(
"Gauge field %d order not supported", u.Order());
225 #ifdef GPU_GAUGE_TOOLS 227 gaugePhase<double>(u);
229 gaugePhase<float>(u);
QudaVerbosity getVerbosity()
void applyGaugePhase(GaugeField &u)
__host__ __device__ ValueType sin(ValueType x)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Main header file for host and device accessors to GaugeFields.
static int commDim[QUDA_MAX_DIM]
virtual void exchangeGhost(QudaLinkDirection=QUDA_LINK_BACKWARDS)=0
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__host__ __device__ ValueType cos(ValueType x)
QudaGhostExchange GhostExchange() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.