14 #ifdef GPU_GAUGE_TOOLS 16 template <
typename Float,
typename Order>
17 struct GaugePhaseArg {
23 complex<Float> i_mu_phase;
24 GaugePhaseArg(
const Order &order,
const GaugeField &u)
25 : order(order), threads(u.VolumeCB()), i_mu(u.iMu())
29 Float dir = u.StaggeredPhaseApplied() ? -1.0 : 1.0;
31 i_mu_phase = complex<Float>(
cos(M_PI * u.iMu() / (u.X()[3]*
comm_dim(3)) ),
32 dir *
sin(M_PI * u.iMu() / (u.X()[3]*
comm_dim(3))) );
34 for (
int d=0;
d<4;
d++)
X[
d] = u.X()[
d];
40 bool last_node_in_t =
true;
44 GaugePhaseArg(
const GaugePhaseArg &
arg)
45 : order(
arg.order), tBoundary(
arg.tBoundary), threads(
arg.threads),
46 i_mu(
arg.i_mu), i_mu_phase(
arg.i_mu_phase) {
54 template <
int dim,
typename Float, QudaStaggeredPhase phaseType,
typename Arg>
55 __device__ __host__ Float getPhase(
int x,
int y,
int z,
int t, Arg &
arg) {
59 phase = (1.0 - 2.0 * (
t % 2) );
60 }
else if (
dim == 1) {
61 phase = (1.0 - 2.0 * ((
t +
x) % 2) );
62 }
else if (
dim == 2) {
63 phase = (1.0 - 2.0 * ((
t +
x +
y) % 2) );
64 }
else if (
dim == 3) {
65 phase = (
t ==
arg.X[3]-1) ?
arg.tBoundary : 1.0;
69 phase = (1.0 - 2.0 * ((3 +
t +
z +
y) % 2) );
70 }
else if (
dim == 1) {
71 phase = (1.0 - 2.0 * ((2 +
t +
z) % 2) );
72 }
else if (
dim == 2) {
73 phase = (1.0 - 2.0 * ((1 +
t) % 2) );
74 }
else if (
dim == 3) {
75 phase = (
t ==
arg.X[3]-1) ?
arg.tBoundary : 1.0;
80 }
else if (
dim == 1) {
81 phase = (1.0 - 2.0 * ((1 +
x) % 2) );
82 }
else if (
dim == 2) {
83 phase = (1.0 - 2.0 * ((1 +
x +
y) % 2) );
84 }
else if (
dim == 3) {
85 phase = ((
t ==
arg.X[3]-1) ?
arg.tBoundary : 1.0) *
86 (1.0 - 2 * ((1 +
x +
y +
z) % 2) );
92 template <
typename Float,
int length, QudaStaggeredPhase phaseType,
int dim,
typename Arg>
93 __device__ __host__
void gaugePhase(
int indexCB,
int parity, Arg &
arg) {
94 typedef typename mapper<Float>::type RegType;
99 RegType phase = getPhase<dim,Float,phaseType>(
x[0],
x[1],
x[2],
x[3],
arg);
105 if (
dim==3 &&
arg.i_mu != 0.0) {
106 complex<RegType>* v =
reinterpret_cast<complex<RegType>*
>(u);
116 template <
typename Float,
int length, QudaStaggeredPhase phaseType,
typename Arg>
117 void gaugePhase(Arg &
arg) {
119 for (
int indexCB=0; indexCB <
arg.threads; indexCB++) {
120 gaugePhase<Float,length,phaseType,0>(indexCB,
parity,
arg);
121 gaugePhase<Float,length,phaseType,1>(indexCB,
parity,
arg);
122 gaugePhase<Float,length,phaseType,2>(indexCB,
parity,
arg);
123 gaugePhase<Float,length,phaseType,3>(indexCB,
parity,
arg);
131 template <
typename Float,
int length, QudaStaggeredPhase phaseType,
typename Arg>
132 __global__
void gaugePhaseKernel(Arg
arg) {
133 int indexCB = blockIdx.x *
blockDim.x + threadIdx.x;
134 if (indexCB >=
arg.threads)
return;
137 gaugePhase<Float,length,phaseType,0>(indexCB,
parity,
arg);
138 gaugePhase<Float,length,phaseType,1>(indexCB,
parity,
arg);
139 gaugePhase<Float,length,phaseType,2>(indexCB,
parity,
arg);
140 gaugePhase<Float,length,phaseType,3>(indexCB,
parity,
arg);
143 template <
typename Float,
int length, QudaStaggeredPhase phaseType,
typename Arg>
144 class GaugePhase : Tunable {
146 const GaugeField &meta;
150 unsigned int sharedBytesPerThread()
const {
return 0; }
151 unsigned int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0 ;}
153 bool tuneGridDim()
const {
return false; }
154 unsigned int minThreads()
const {
return arg.threads; }
158 :
arg(
arg), meta(meta), location(location) {
159 writeAuxString(
"stride=%d,prec=%lu",
arg.order.stride,
sizeof(Float));
161 virtual ~GaugePhase() { ; }
163 bool advanceBlockDim(TuneParam &
param)
const {
169 void initTuneParam(TuneParam &
param)
const {
174 void apply(
const cudaStream_t &
stream) {
178 gaugePhaseKernel<Float, length, phaseType, Arg>
179 <<<tp.grid, tp.block, tp.shared_bytes,
stream>>>(
arg);
181 gaugePhase<Float, length, phaseType, Arg>(
arg);
185 TuneKey tuneKey()
const {
186 return TuneKey(meta.VolString(),
typeid(*this).name(), aux);
189 void preTune() {
arg.order.save(); }
190 void postTune() {
arg.order.load(); }
192 long long flops()
const {
return 0; }
193 long long bytes()
const {
return 2 *
arg.threads * 2 *
arg.order.Bytes(); }
197 template <
typename Float,
int length,
typename Order>
200 GaugePhaseArg<Float,Order>
arg(order, u);
202 GaugePhaseArg<Float,Order> > phase(
arg, u, location);
205 GaugePhaseArg<Float,Order>
arg(order, u);
207 GaugePhaseArg<Float,Order> > phase(
arg, u, location);
210 GaugePhaseArg<Float,Order>
arg(order, u);
212 GaugePhaseArg<Float,Order> > phase(
arg, u, location);
222 template <
typename Float>
223 void gaugePhase(GaugeField &u) {
231 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type G;
232 gaugePhase<Float,length>(G(u), u, location);
234 errorQuda(
"Unsupported reconstruction type");
237 errorQuda(
"Gauge field %d order not supported", u.Order());
246 #ifdef GPU_GAUGE_TOOLS 248 gaugePhase<double>(u);
250 gaugePhase<float>(u);
QudaVerbosity getVerbosity()
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
void applyGaugePhase(GaugeField &u)
__host__ __device__ ValueType sin(ValueType x)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Main header file for host and device accessors to GaugeFields.
enum QudaFieldLocation_s QudaFieldLocation
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__host__ __device__ ValueType cos(ValueType x)
virtual void initTuneParam(TuneParam ¶m) const
virtual bool advanceBlockDim(TuneParam ¶m) const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
static __inline__ size_t size_t d
QudaPrecision Precision() const
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)