QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
copy_color_spinor_mg.cuh
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
13 #include <tune_quda.h>
14 #include <utility> // for std::swap
15 
16 namespace quda {
17 
18  using namespace colorspinor;
19 
21  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder>
22  void packSpinor(OutOrder &outOrder, const InOrder &inOrder, int volume) {
23  for (int x=0; x<volume; x++) {
24  for (int s=0; s<Ns; s++) {
25  for (int c=0; c<Nc; c++) {
26  outOrder(0, x, s, c) = inOrder(0, x, s, c);
27  }
28  }
29  }
30  }
31 
33  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder>
34  __global__ void packSpinorKernel(OutOrder outOrder, const InOrder inOrder, int volume) {
35  int x = blockIdx.x * blockDim.x + threadIdx.x;
36  if (x >= volume) return;
37 
38  for (int s=0; s<Ns; s++) {
39  for (int c=0; c<Nc; c++) {
40  outOrder(0, x, s, c) = inOrder(0, x, s, c);
41  }
42  }
43  }
44 
45  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder>
46  class CopySpinor : Tunable {
47  const InOrder &in;
48  OutOrder &out;
49  const ColorSpinorField &meta; // this reference is for meta data only
51 
52  private:
53  unsigned int sharedBytesPerThread() const { return 0; }
54 
55  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
56  bool advanceSharedBytes(TuneParam &param) const { return false; } // Don't tune shared mem
57  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
58  unsigned int minThreads() const { return meta.VolumeCB(); }
59 
60  public:
61  CopySpinor(OutOrder &out, const InOrder &in, const ColorSpinorField &meta, QudaFieldLocation location)
62  : out(out), in(in), meta(meta), location(location) { }
63  virtual ~CopySpinor() { ; }
64 
65  void apply(const cudaStream_t &stream) {
66  if (location == QUDA_CPU_FIELD_LOCATION) {
67  packSpinor<FloatOut, FloatIn, Ns, Nc>(out, in, meta.VolumeCB());
68  } else {
69  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
70  packSpinorKernel<FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder>
71  <<<tp.grid, tp.block, tp.shared_bytes, stream>>>
72  (out, in, meta.VolumeCB());
73  }
74  }
75 
76  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), meta.AuxString()); }
77 
78  long long flops() const { return 0; }
79  long long bytes() const { return in.Bytes() + out.Bytes(); }
80  };
81 
82 
83  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder>
84  void genericCopyColorSpinor(OutOrder &outOrder, const InOrder &inOrder,
85  const ColorSpinorField &out, QudaFieldLocation location) {
86  CopySpinor<FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder> copy(outOrder, inOrder, out, location);
87  copy.apply(0);
88  }
89 
91  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename InOrder>
92  void genericCopyColorSpinor(InOrder &inOrder, ColorSpinorField &out,
93  QudaFieldLocation location, FloatOut *Out) {
94 
95  if (out.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
97  ColorSpinor outOrder(out, 1, Out);
98  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(outOrder, inOrder, out, location);
99  } else if (out.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
101  ColorSpinor outOrder(out, 1, Out);
102  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(outOrder, inOrder, out, location);
103  } else {
104  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", out.FieldOrder(), Ns, Nc);
105  }
106 
107  }
108 
110  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
112  QudaFieldLocation location, FloatOut *Out, FloatIn *In) {
113 
114  if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
116  ColorSpinor inOrder(in, 1, In);
117  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, location, Out);
118  } else if (in.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
120  ColorSpinor inOrder(in, 1, In);
121  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, location, Out);
122  } else {
123  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", in.FieldOrder(), Ns, Nc);
124  }
125 
126  }
127 
128 
129  template <int Ns, int Nc, typename dstFloat, typename srcFloat>
131  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src) {
132 
133  if (dst.Ndim() != src.Ndim())
134  errorQuda("Number of dimensions %d %d don't match", dst.Ndim(), src.Ndim());
135 
136  if (dst.Volume() != src.Volume())
137  errorQuda("Volumes %d %d don't match", dst.Volume(), src.Volume());
138 
139  if (!( dst.SiteOrder() == src.SiteOrder() ||
143  src.SiteOrder() == QUDA_EVEN_ODD_SITE_ORDER) ) ) {
144  errorQuda("Subset orders %d %d don't match", dst.SiteOrder(), src.SiteOrder());
145  }
146 
147  if (dst.SiteSubset() != src.SiteSubset())
148  errorQuda("Subset types do not match %d %d", dst.SiteSubset(), src.SiteSubset());
149 
150  // We currently only support parity-ordered fields; even-odd or odd-even
152  errorQuda("Copying to full fields with lexicographical ordering is not currently supported");
153  }
154 
155  if (dst.SiteSubset() == QUDA_FULL_SITE_SUBSET) { // full field
156  if (src.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER ||
158  errorQuda("QDPJIT field ordering not supported for full site fields");
159  }
160 
161  // set for the source subset ordering
162  srcFloat *srcEven = Src ? Src : (srcFloat*)src.V();
163  srcFloat *srcOdd = (srcFloat*)((char*)srcEven + src.Bytes()/2);
164  if (src.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER) {
165  std::swap<srcFloat*>(srcEven, srcOdd);
166  }
167 
168  // set for the destination subset ordering
169  dstFloat *dstEven = Dst ? Dst : (dstFloat*)dst.V();
170  dstFloat *dstOdd = (dstFloat*)((char*)dstEven + dst.Bytes()/2);
171  if (dst.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER) {
172  std::swap<dstFloat*>(dstEven, dstOdd);
173  }
174 
175  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>(dst, src, location, dstEven, srcEven);
176  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>(dst, src, location, dstOdd, srcOdd);
177  } else { // parity field
178  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>(dst, src, location, Dst, Src);
179  }
180 
181  }
182 
183  template <int Nc, typename dstFloat, typename srcFloat>
185  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src)
186  {
187 
188  if (dst.Nspin() != src.Nspin())
189  errorQuda("source and destination spins must match");
190 
191  if (dst.Nspin() == 4) {
192 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC)
193  copyGenericColorSpinor<4,Nc>(dst, src, location, Dst, Src);
194 #else
195  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
196 #endif
197  } else if (dst.Nspin() == 2) {
198 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_STAGGERED_DIRAC)
199  copyGenericColorSpinor<2,Nc>(dst, src, location, Dst, Src);
200 #else
201  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
202 #endif
203  } else if (dst.Nspin() == 1) {
204 #ifdef GPU_STAGGERED_DIRAC
205  copyGenericColorSpinor<1,Nc>(dst, src, location, Dst, Src);
206 #else
207  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
208 #endif
209  } else {
210  errorQuda("Nspin=%d unsupported", dst.Nspin());
211  }
212 
213  }
214 
215 #ifdef GPU_MULTIGRID
216 #define INSTANTIATE_COLOR \
217  switch (src.Ncolor()) { \
218  case 1: CopyGenericColorSpinor<1>(dst, src, location, dst_ptr, src_ptr); break; \
219  case 2: CopyGenericColorSpinor<2>(dst, src, location, dst_ptr, src_ptr); break; \
220  case 4: CopyGenericColorSpinor<4>(dst, src, location, dst_ptr, src_ptr); break; \
221  case 6: CopyGenericColorSpinor<6>(dst, src, location, dst_ptr, src_ptr); break; \
222  case 9: CopyGenericColorSpinor<9>(dst, src, location, dst_ptr, src_ptr); break; \
223  case 12: CopyGenericColorSpinor<12>(dst, src, location, dst_ptr, src_ptr); break; \
224  case 16: CopyGenericColorSpinor<16>(dst, src, location, dst_ptr, src_ptr); break; \
225  case 18: CopyGenericColorSpinor<18>(dst, src, location, dst_ptr, src_ptr); break; \
226  case 24: CopyGenericColorSpinor<24>(dst, src, location, dst_ptr, src_ptr); break; \
227  case 32: CopyGenericColorSpinor<32>(dst, src, location, dst_ptr, src_ptr); break; \
228  case 36: CopyGenericColorSpinor<36>(dst, src, location, dst_ptr, src_ptr); break; \
229  case 48: CopyGenericColorSpinor<48>(dst, src, location, dst_ptr, src_ptr); break; \
230  case 72: CopyGenericColorSpinor<72>(dst, src, location, dst_ptr, src_ptr); break; \
231  case 96: CopyGenericColorSpinor<96>(dst, src, location, dst_ptr, src_ptr); break; \
232  case 256: CopyGenericColorSpinor<256>(dst, src, location, dst_ptr, src_ptr); break; \
233  case 576: CopyGenericColorSpinor<576>(dst, src, location, dst_ptr, src_ptr); break; \
234  case 768: CopyGenericColorSpinor<768>(dst, src, location, dst_ptr, src_ptr); break; \
235  case 1024: CopyGenericColorSpinor<1024>(dst, src, location, dst_ptr, src_ptr); break; \
236  default: errorQuda("Ncolors=%d not supported", src.Ncolor()); \
237  }
238 #else
239 #define INSTANTIATE_COLOR
240 #endif
241 
242 
243 } // namespace quda
QudaFieldLocation location
const ColorSpinorField & meta
void CopyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
unsigned int sharedBytesPerThread() const
const char * AuxString() const
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
void genericCopyColorSpinor(Out &outOrder, const In &inOrder, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)
cudaStream_t * stream
long long flops() const
const char * VolString() const
__host__ __device__ void copy(T1 &a, const T2 &b)
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
void apply(const cudaStream_t &stream)
QudaGaugeParam param
Definition: pack_test.cpp:17
void packSpinor(OutOrder &outOrder, const InOrder &inOrder, int volume)
unsigned int minThreads() const
cpuColorSpinorField * in
QudaSiteSubset SiteSubset() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
__global__ void packSpinorKernel(OutOrder outOrder, const InOrder inOrder, int volume)
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
__shared__ float s[]
QudaSiteOrder SiteOrder() const
CopySpinor(OutOrder &out, const InOrder &in, const ColorSpinorField &meta, QudaFieldLocation location)
long long bytes() const
bool advanceSharedBytes(TuneParam &param) const
unsigned int sharedBytesPerBlock(const TuneParam &param) const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaFieldOrder FieldOrder() const
TuneKey tuneKey() const