QUDA  v1.1.0
A library for QCD on GPUs
copy_gauge_mg.cu
Go to the documentation of this file.
1 #include <gauge_field_order.h>
2 
3 #define FINE_GRAINED_ACCESS
4 
5 #include <copy_gauge_helper.cuh>
6 
7 namespace quda {
8 
9  template <typename sFloatOut, typename FloatIn, int Nc, typename InOrder>
10  void copyGaugeMG(const InOrder &inOrder, GaugeField &out, const GaugeField &in,
11  QudaFieldLocation location, sFloatOut *Out, sFloatOut **outGhost, int type)
12  {
13  typedef typename mapper<sFloatOut>::type FloatOut;
14  constexpr int length = 2*Nc*Nc;
15 
16  if (out.Reconstruct() != QUDA_RECONSTRUCT_NO)
17  errorQuda("Reconstruct type %d not supported", out.Reconstruct());
18 
19 #ifdef FINE_GRAINED_ACCESS
20  if (out.Precision() == QUDA_HALF_PRECISION) {
21  if (in.Precision() == QUDA_HALF_PRECISION) {
22  out.Scale(in.Scale());
23  } else {
24  InOrder in_(const_cast<GaugeField&>(in));
25  out.Scale( in.abs_max() );
26  }
27  }
28 #endif
29 
30  if (out.isNative()) {
31 
32 #ifdef FINE_GRAINED_ACCESS
33  if (outGhost) {
34  typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,false,sFloatOut> G;
35  copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
36  } else {
37  typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,true,sFloatOut> G;
38  copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
39  }
40 #else
41  typedef typename gauge_mapper<FloatOut,QUDA_RECONSTRUCT_NO,length>::type G;
42  copyGauge<FloatOut,FloatIn,length>
43  (G(out,Out,outGhost), inOrder, out, in, location, type);
44 #endif
45 
46  } else if (out.Order() == QUDA_QDP_GAUGE_ORDER) {
47 
48 #ifdef FINE_GRAINED_ACCESS
49  typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_QDP_GAUGE_ORDER,true,sFloatOut> G;
50  copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
51 #else
52  typedef typename QDPOrder<FloatOut,length> G;
53  copyGauge<FloatOut,FloatIn,length>(G(out, Out, outGhost), inOrder, out, in, location, type);
54 #endif
55 
56  } else if (out.Order() == QUDA_MILC_GAUGE_ORDER) {
57 
58 #ifdef FINE_GRAINED_ACCESS
59  typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_MILC_GAUGE_ORDER,true,sFloatOut> G;
60  copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
61 #else
62  typedef typename MILCOrder<FloatOut,length> G;
63  copyGauge<FloatOut,FloatIn,length>(G(out, Out, outGhost), inOrder, out, in, location, type);
64 #endif
65 
66  } else {
67  errorQuda("Gauge field %d order not supported", out.Order());
68  }
69 
70  }
71 
72  template <typename sFloatOut, typename sFloatIn, int Nc>
73  void copyGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location,
74  sFloatOut *Out, sFloatIn *In, sFloatOut **outGhost, sFloatIn **inGhost, int type)
75  {
76  typedef typename mapper<sFloatOut>::type FloatOut;
77  typedef typename mapper<sFloatIn>::type FloatIn;
78 #ifndef FINE_GRAINED_ACCESS
79  constexpr int length = 2*Nc*Nc;
80 #endif
81 
82  if (in.Reconstruct() != QUDA_RECONSTRUCT_NO)
83  errorQuda("Reconstruct type %d not supported", in.Reconstruct());
84 
85  if (in.isNative()) {
86 #ifdef FINE_GRAINED_ACCESS
87  if (inGhost) {
88  typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,false,sFloatIn> G;
89  copyGaugeMG<sFloatOut,FloatIn,Nc> (G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
90  } else {
91  typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,true,sFloatIn> G;
92  copyGaugeMG<sFloatOut,FloatIn,Nc> (G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
93  }
94 #else
95  typedef typename gauge_mapper<FloatIn,QUDA_RECONSTRUCT_NO,length>::type G;
96  copyGaugeMG<FloatOut,FloatIn,Nc> (G(in, In,inGhost), out, in, location, Out, outGhost, type);
97 #endif
98  } else if (in.Order() == QUDA_QDP_GAUGE_ORDER) {
99 
100 #ifdef FINE_GRAINED_ACCESS
101  typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_QDP_GAUGE_ORDER,true,sFloatIn> G;
102  copyGaugeMG<sFloatOut,FloatIn,Nc>(G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
103 #else
104  typedef typename QDPOrder<FloatIn,length> G;
105  copyGaugeMG<FloatOut,FloatIn,Nc>(G(in, In, inGhost), out, in, location, Out, outGhost, type);
106 #endif
107 
108  } else if (in.Order() == QUDA_MILC_GAUGE_ORDER) {
109 
110 #ifdef FINE_GRAINED_ACCESS
111  typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_MILC_GAUGE_ORDER,true,sFloatIn> G;
112  copyGaugeMG<sFloatOut,FloatIn,Nc>(G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
113 #else
114  typedef typename MILCOrder<FloatIn,length> G;
115  copyGaugeMG<FloatOut,FloatIn,Nc>(G(in, In, inGhost), out, in, location, Out, outGhost, type);
116 #endif
117 
118  } else {
119  errorQuda("Gauge field %d order not supported", in.Order());
120  }
121 
122  }
123 
124  template <typename FloatOut, typename FloatIn>
125  void copyGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out,
126  FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
127  {
128  switch (in.Ncolor()) {
129 #ifdef GPU_MULTIGRID
130  case 48: copyGaugeMG<FloatOut,FloatIn,48>(out, in, location, Out, In, outGhost, inGhost, type); break;
131 #ifdef NSPIN4
132  case 12: copyGaugeMG<FloatOut,FloatIn,12>(out, in, location, Out, In, outGhost, inGhost, type); break;
133  case 64: copyGaugeMG<FloatOut,FloatIn,64>(out, in, location, Out, In, outGhost, inGhost, type); break;
134 #endif
135 #ifdef NSPIN1
136  case 128: copyGaugeMG<FloatOut,FloatIn,128>(out, in, location, Out, In, outGhost, inGhost, type); break;
137  case 192: copyGaugeMG<FloatOut,FloatIn,192>(out, in, location, Out, In, outGhost, inGhost, type); break;
138 #endif
139 #endif // GPU_MULTIGRID
140  default: errorQuda("Unsupported number of colors; out.Nc=%d, in.Nc=%d", out.Ncolor(), in.Ncolor());
141  }
142  }
143 
144  // this is the function that is actually called, from here on down we instantiate all required templates
145  void copyGenericGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location,
146  void *Out, void *In, void **ghostOut, void **ghostIn, int type)
147  {
148 #ifndef FINE_GRAINED_ACCESS
149  if (out.Precision() == QUDA_HALF_PRECISION || in.Precision() == QUDA_HALF_PRECISION)
150  errorQuda("Precision format not supported");
151 #endif
152 
153  if (out.Precision() == QUDA_DOUBLE_PRECISION) {
154 #ifdef GPU_MULTIGRID_DOUBLE
155  if (in.Precision() == QUDA_DOUBLE_PRECISION) {
156  copyGaugeMG(out, in, location, (double*)Out, (double*)In, (double**)ghostOut, (double**)ghostIn, type);
157  } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
158  copyGaugeMG(out, in, location, (double*)Out, (float*)In, (double**)ghostOut, (float**)ghostIn, type);
159  } else if (in.Precision() == QUDA_HALF_PRECISION) {
160  copyGaugeMG(out, in, location, (double*)Out, (short*)In, (double**)ghostOut, (short**)ghostIn, type);
161  } else {
162  errorQuda("Precision %d not supported", in.Precision());
163  }
164 #else
165  errorQuda("Double precision multigrid has not been enabled");
166 #endif
167  } else if (out.Precision() == QUDA_SINGLE_PRECISION) {
168  if (in.Precision() == QUDA_DOUBLE_PRECISION) {
169 #ifdef GPU_MULTIGRID_DOUBLE
170  copyGaugeMG(out, in, location, (float*)Out, (double*)In, (float**)ghostOut, (double**)ghostIn, type);
171 #else
172  errorQuda("Double precision multigrid has not been enabled");
173 #endif
174  } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
175  copyGaugeMG(out, in, location, (float*)Out, (float*)In, (float**)ghostOut, (float**)ghostIn, type);
176  } else if (in.Precision() == QUDA_HALF_PRECISION) {
177  copyGaugeMG(out, in, location, (float*)Out, (short*)In, (float**)ghostOut, (short**)ghostIn, type);
178  } else {
179  errorQuda("Precision %d not supported", in.Precision());
180  }
181  } else if (out.Precision() == QUDA_HALF_PRECISION) {
182  if (in.Precision() == QUDA_DOUBLE_PRECISION) {
183 #ifdef GPU_MULTIGRID_DOUBLE
184  copyGaugeMG(out, in, location, (short*)Out, (double*)In, (short**)ghostOut, (double**)ghostIn, type);
185 #else
186  errorQuda("Double precision multigrid has not been enabled");
187 #endif
188  } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
189  copyGaugeMG(out, in, location, (short*)Out, (float*)In, (short**)ghostOut, (float**)ghostIn, type);
190  } else if (in.Precision() == QUDA_HALF_PRECISION) {
191  copyGaugeMG(out, in, location, (short*)Out, (short*)In, (short**)ghostOut, (short**)ghostIn, type);
192  } else {
193  errorQuda("Precision %d not supported", in.Precision());
194  }
195  } else {
196  errorQuda("Precision %d not supported", out.Precision());
197  }
198  }
199 
200 } // namespace quda