QUDA  v1.1.0
A library for QCD on GPUs
staggered_prolong_restrict.cu
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <color_spinor_field_order.h>
3 #include <tune_quda.h>
4 #include <multigrid_helper.cuh>
5 #include <index_helper.cuh>
6 
7 namespace quda {
8 
9 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
10 
11  enum class StaggeredTransferType {
12  STAGGERED_TRANSFER_PROLONG,
13  STAGGERED_TRANSFER_RESTRICT,
14  STAGGERED_TRANSFER_INVALID = QUDA_INVALID_ENUM
15  };
16 
17  using namespace quda::colorspinor;
18 
19  // Use a trait to define whether the "out" spin is the fine or coarse spin
20  template<int fineSpin, int coarseSpin, StaggeredTransferType transferType> struct StaggeredTransferOutSpin { static constexpr int outSpin = -1; };
21  template<int fineSpin, int coarseSpin> struct StaggeredTransferOutSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int outSpin = fineSpin; };
22  template<int fineSpin, int coarseSpin> struct StaggeredTransferOutSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int outSpin = coarseSpin; };
23 
24  // Use a trait to define whether the "in" spin is the fine or coarse spin
25  template<int fineSpin, int coarseSpin, StaggeredTransferType transferType> struct StaggeredTransferInSpin { static constexpr int inSpin = -1; };
26  template<int fineSpin, int coarseSpin> struct StaggeredTransferInSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int inSpin = coarseSpin; };
27  template<int fineSpin, int coarseSpin> struct StaggeredTransferInSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int inSpin = fineSpin; };
28 
29  // Use a trait to define whether the "out" color is the fine or coarse color
30  template<int fineColor, int coarseColor, StaggeredTransferType transferType> struct StaggeredTransferOutColor { static constexpr int outColor = -1; };
31  template<int fineColor, int coarseColor> struct StaggeredTransferOutColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int outColor = fineColor; };
32  template<int fineColor, int coarseColor> struct StaggeredTransferOutColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int outColor = coarseColor; };
33 
34  // Use a trait to define whether the "in" color is the fine or coarse color
35  template<int fineColor, int coarseColor, StaggeredTransferType transferType> struct StaggeredTransferInColor { static constexpr int inColor = -1; };
36  template<int fineColor, int coarseColor> struct StaggeredTransferInColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int inColor = coarseColor; };
37  template<int fineColor, int coarseColor> struct StaggeredTransferInColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int inColor = fineColor; };
38 
39  // Function to return the fine ColorSpinorField
40  template<StaggeredTransferType transferType>
41  inline const ColorSpinorField& fineColorSpinorField(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
42  errorQuda("Invalid transfer type %d for fineColorSpinorField", (int)transferType);
43  return quoteIn;
44  }
45 
46  // on prolong, the out vector is the fine vector
47  template<>
48  inline const ColorSpinorField& fineColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
49  return quoteOut;
50  }
51 
52  // on restrict, the in vector is the fine vector
53  template<>
54  inline const ColorSpinorField& fineColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
55  return quoteIn;
56  }
57 
58  // Function to return the coarse ColorSpinorField
59  template<StaggeredTransferType transferType>
60  inline const ColorSpinorField& coarseColorSpinorField(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
61  errorQuda("Invalid transfer type %d for coarseColorSpinorField", (int)transferType);
62  return quoteIn;
63  }
64 
65  // on prolong, the out vector is the fine vector
66  template<>
67  inline const ColorSpinorField& coarseColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
68  return quoteIn;
69  }
70 
71  // on restrict, the in vector is the fine vector
72  template<>
73  inline const ColorSpinorField& coarseColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
74  return quoteOut;
75  }
76  /**
77  Kernel argument struct
78  */
79  template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, QudaFieldOrder order, StaggeredTransferType theTransferType>
80  struct StaggeredProlongRestrictArg {
81  FieldOrderCB<Float, StaggeredTransferOutSpin<fineSpin,coarseSpin,theTransferType>::outSpin, StaggeredTransferOutColor<fineColor,coarseColor,theTransferType>::outColor,1,order> out;
82  const FieldOrderCB<Float, StaggeredTransferInSpin<fineSpin,coarseSpin,theTransferType>::inSpin, StaggeredTransferInColor<fineColor,coarseColor,theTransferType>::inColor,1,order> in;
83  const int *geo_map; // need to make a device copy of this
84  const spin_mapper<fineSpin,coarseSpin> spin_map;
85  const int parity; // the parity of the output field (if single parity)
86  const int nParity; // number of parities of input fine field
87  const int fineX[4]; // fine spatial volume
88  const int fineVolumeCB; // fine spatial volume
89  const int coarseVolumeCB; // coarse spatial volume
90  static constexpr StaggeredTransferType transferType = theTransferType;
91 
92  StaggeredProlongRestrictArg(ColorSpinorField &out, const ColorSpinorField &in,
93  const int *geo_map, const int parity)
94  : out(out), in(in), geo_map(geo_map), spin_map(), parity(parity),
95  nParity(fineColorSpinorField<transferType>(in,out).SiteSubset()),
96  fineX{fineColorSpinorField<transferType>(in,out).X()[0],
97  fineColorSpinorField<transferType>(in,out).X()[1],
98  fineColorSpinorField<transferType>(in,out).X()[2],
99  fineColorSpinorField<transferType>(in,out).X()[3]},
100  fineVolumeCB(fineColorSpinorField<transferType>(in,out).VolumeCB()),
101  coarseVolumeCB(coarseColorSpinorField<transferType>(in,out).VolumeCB())
102  {;}
103 
104  StaggeredProlongRestrictArg(const StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,order,theTransferType> &arg)
105  : out(arg.out), in(arg.in), geo_map(arg.geo_map), spin_map(),
106  parity(arg.parity), nParity(arg.nParity), fineX{arg.fineX[0],arg.fineX[1],arg.fineX[2],arg.fineX[3]},
107  fineVolumeCB(arg.fineVolumeCB), coarseVolumeCB(arg.coarseVolumeCB)
108  {;}
109  };
110 
111  /**
112  Performs the permutation from a coarse degree of freedom to a
113  fine degree of freedom
114  */
115  template <StaggeredTransferType transferType, class OutAccessor, class InAccessor, typename S>
116  __device__ __host__ inline void staggeredProlongRestrict(OutAccessor& out, const InAccessor &in,
117  int parity, int x_cb, int c, const int *geo_map, const S& spin_map, const int fineVolumeCB, const int coarseVolumeCB, const int X[]) {
118  int x = parity*fineVolumeCB + x_cb;
119  int x_coarse = geo_map[x];
120  int parity_coarse = (x_coarse >= coarseVolumeCB) ? 1 : 0;
121  int x_coarse_cb = x_coarse - parity_coarse*coarseVolumeCB;
122 
123  // coarse_color = 8*fine_color + corner of the hypercube
124  int fineCoords[5];
125  fineCoords[4] = 0;
126  getCoords(fineCoords,x_cb,X,parity);
127  int hyperCorner = 4*(fineCoords[3]%2)+2*(fineCoords[2]%2)+(fineCoords[1]%2);
128 
129  if (transferType == StaggeredTransferType::STAGGERED_TRANSFER_PROLONG) {
130  out(parity,x_cb,0,c) = in(parity_coarse, x_coarse_cb, spin_map(0,parity), 8*c+hyperCorner);
131  } else {
132  out(parity_coarse, x_coarse_cb, spin_map(0,parity), 8*c+hyperCorner) = in(parity,x_cb,0,c);
133  }
134  }
135 
136  template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg>
137  void StaggeredProlongRestrict(Arg &arg) {
138  for (int parity=0; parity<arg.nParity; parity++) {
139  parity = (arg.nParity == 2) ? parity : arg.parity;
140 
141  // We don't actually have to loop over spin because fineSpin = 1, coarseSpin = fine parity
142  for (int x_cb=0; x_cb<arg.fineVolumeCB; x_cb++) {
143  for (int c=0; c<fineColor; c++) {
144  staggeredProlongRestrict<Arg::transferType>(arg.out, arg.in, parity, x_cb, c, arg.geo_map, arg.spin_map, arg.fineVolumeCB, arg.coarseVolumeCB, arg.fineX);
145  }
146  }
147  }
148  }
149 
150  template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg>
151  __global__ void StaggeredProlongRestrictKernel(Arg arg) {
152  int x_cb = blockIdx.x*blockDim.x + threadIdx.x;
153  int parity = arg.nParity == 2 ? blockDim.y*blockIdx.y + threadIdx.y : arg.parity;
154  if (x_cb >= arg.fineVolumeCB) return;
155 
156  int c = blockDim.z*blockIdx.z + threadIdx.z;
157  if (c >= fineColor) return;
158 
159  staggeredProlongRestrict<Arg::transferType>(arg.out, arg.in, parity, x_cb, c, arg.geo_map, arg.spin_map, arg.fineVolumeCB, arg.coarseVolumeCB, arg.fineX);
160  }
161 
162  template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
163  class StaggeredProlongRestrictLaunch : public TunableVectorYZ {
164 
165  ColorSpinorField &out;
166  const ColorSpinorField &in;
167  const int *fine_to_coarse;
168  int parity;
169  QudaFieldLocation location;
170  char vol[TuneKey::volume_n];
171 
172  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
173  unsigned int minThreads() const { return fineColorSpinorField<transferType>(in,out).VolumeCB(); } // fine parity is the block y dimension
174 
175  public:
176  StaggeredProlongRestrictLaunch(ColorSpinorField &out, const ColorSpinorField &in,
177  const int *fine_to_coarse, int parity)
178  : TunableVectorYZ(fineColorSpinorField<transferType>(in,out).SiteSubset(), fineColor), out(out), in(in),
179  fine_to_coarse(fine_to_coarse), parity(parity), location(checkLocation(out, in))
180  {
181  strcpy(vol, fineColorSpinorField<transferType>(in,out).VolString());
182  strcat(vol, ",");
183  strcat(vol, coarseColorSpinorField<transferType>(in,out).VolString());
184 
185  strcpy(aux, fineColorSpinorField<transferType>(in,out).AuxString());
186  strcat(aux, ",");
187  strcat(aux, coarseColorSpinorField<transferType>(in,out).AuxString());
188  }
189 
190  void apply(const qudaStream_t &stream) {
191  if (location == QUDA_CPU_FIELD_LOCATION) {
192  if (out.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
193  StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER,transferType>
194  arg(out, in, fine_to_coarse, parity);
195  StaggeredProlongRestrict<Float,fineSpin,fineColor,coarseSpin,coarseColor>(arg);
196  } else {
197  errorQuda("Unsupported field order %d", out.FieldOrder());
198  }
199  } else {
200  if (out.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
201  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
202 
203  StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,QUDA_FLOAT2_FIELD_ORDER,transferType>
204  arg(out, in, fine_to_coarse, parity);
205  qudaLaunchKernel(StaggeredProlongRestrictKernel<Float,fineSpin,fineColor,coarseSpin,coarseColor,decltype(arg)>, tp, stream, arg);
206  } else {
207  errorQuda("Unsupported field order %d", out.FieldOrder());
208  }
209  }
210  }
211 
212  TuneKey tuneKey() const { return TuneKey(vol, typeid(*this).name(), aux); }
213 
214  long long flops() const { return 0; }
215 
216  long long bytes() const {
217  return in.Bytes() + out.Bytes() + fineColorSpinorField<transferType>(in,out).SiteSubset()*fineColorSpinorField<transferType>(in,out).VolumeCB()*sizeof(int);
218  }
219 
220  };
221 
222  template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
223  void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
224  const int *fine_to_coarse, int parity) {
225 
226  StaggeredProlongRestrictLaunch<Float, fineSpin, fineColor, coarseSpin, coarseColor, transferType>
227  staggered_prolong_restrict(out, in, fine_to_coarse, parity);
228  staggered_prolong_restrict.apply(0);
229  }
230 
231  template <int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
232  void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
233  const int *fine_to_coarse, int parity) {
234  // check precision
235  QudaPrecision precision = checkPrecision(out, in);
236 
237  if (precision == QUDA_DOUBLE_PRECISION) {
238 #ifdef GPU_MULTIGRID_DOUBLE
239  StaggeredProlongRestrict<double,fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
240 #else
241  errorQuda("Double precision multigrid has not been enabled");
242 #endif
243  } else if (precision == QUDA_SINGLE_PRECISION) {
244  StaggeredProlongRestrict<float,fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
245  } else {
246  errorQuda("Unsupported precision %d", out.Precision());
247  }
248 
249  }
250 
251  template <StaggeredTransferType transferType>
252  void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
253  const int *fine_to_coarse, const int * const * spin_map, int parity) {
254 
255  if (out.FieldOrder() != in.FieldOrder())
256  errorQuda("Field orders do not match (out=%d, in=%d)",
257  out.FieldOrder(), in.FieldOrder());
258 
259  if (fineColorSpinorField<transferType>(in,out).Nspin() != 1) errorQuda("Fine spin %d is not supported", fineColorSpinorField<transferType>(in,out).Nspin());
260  const int fineSpin = 1;
261 
262  if (coarseColorSpinorField<transferType>(in,out).Nspin() != 2) errorQuda("Coarse spin %d is not supported", coarseColorSpinorField<transferType>(in,out).Nspin());
263  const int coarseSpin = 2;
264 
265  // first check that the spin_map matches the spin_mapper
266  spin_mapper<fineSpin,coarseSpin> mapper;
267  for (int s=0; s<fineSpin; s++)
268  for (int p=0; p<2; p++)
269  if (mapper(s,p) != spin_map[s][p]) errorQuda("Spin map does not match spin_mapper");
270 
271  if (fineColorSpinorField<transferType>(in,out).Ncolor() != 3) errorQuda("Unsupported fine nColor %d",fineColorSpinorField<transferType>(in,out).Ncolor());
272  const int fineColor = 3;
273 
274  if (coarseColorSpinorField<transferType>(in,out).Ncolor() != 8*fineColor) errorQuda("Unsupported coarse nColor %d", coarseColorSpinorField<transferType>(in,out).Ncolor());
275  const int coarseColor = 8*fineColor;
276 
277  StaggeredProlongRestrict<fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
278  }
279 
280 #endif // defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
281 
282  void StaggeredProlongate(ColorSpinorField &out, const ColorSpinorField &in,
283  const int *fine_to_coarse, const int * const * spin_map, int parity) {
284 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
285 
286  StaggeredProlongRestrict<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(out, in, fine_to_coarse, spin_map, parity);
287 
288 #else
289  errorQuda("Staggered multigrid has not been build");
290 #endif
291  }
292 
293  void StaggeredRestrict(ColorSpinorField &out, const ColorSpinorField &in,
294  const int *fine_to_coarse, const int * const * spin_map, int parity) {
295 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
296 
297 
298  StaggeredProlongRestrict<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(out, in, fine_to_coarse, spin_map, parity);
299 
300 #else
301  errorQuda("Staggered multigrid has not been build");
302 #endif
303  }
304 
305 
306 
307 } // end namespace quda