1 #include <gauge_field_order.h>
3 #define FINE_GRAINED_ACCESS
5 #include <copy_gauge_helper.cuh>
9 template <typename sFloatOut, typename FloatIn, int Nc, typename InOrder>
10 void copyGaugeMG(const InOrder &inOrder, GaugeField &out, const GaugeField &in,
11 QudaFieldLocation location, sFloatOut *Out, sFloatOut **outGhost, int type)
13 typedef typename mapper<sFloatOut>::type FloatOut;
14 constexpr int length = 2*Nc*Nc;
16 if (out.Reconstruct() != QUDA_RECONSTRUCT_NO)
17 errorQuda("Reconstruct type %d not supported", out.Reconstruct());
19 #ifdef FINE_GRAINED_ACCESS
20 if (out.Precision() == QUDA_HALF_PRECISION) {
21 if (in.Precision() == QUDA_HALF_PRECISION) {
22 out.Scale(in.Scale());
24 InOrder in_(const_cast<GaugeField&>(in));
25 out.Scale( in.abs_max() );
32 #ifdef FINE_GRAINED_ACCESS
34 typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,false,sFloatOut> G;
35 copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
37 typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,true,sFloatOut> G;
38 copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
41 typedef typename gauge_mapper<FloatOut,QUDA_RECONSTRUCT_NO,length>::type G;
42 copyGauge<FloatOut,FloatIn,length>
43 (G(out,Out,outGhost), inOrder, out, in, location, type);
46 } else if (out.Order() == QUDA_QDP_GAUGE_ORDER) {
48 #ifdef FINE_GRAINED_ACCESS
49 typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_QDP_GAUGE_ORDER,true,sFloatOut> G;
50 copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
52 typedef typename QDPOrder<FloatOut,length> G;
53 copyGauge<FloatOut,FloatIn,length>(G(out, Out, outGhost), inOrder, out, in, location, type);
56 } else if (out.Order() == QUDA_MILC_GAUGE_ORDER) {
58 #ifdef FINE_GRAINED_ACCESS
59 typedef typename gauge::FieldOrder<FloatOut,Nc,1,QUDA_MILC_GAUGE_ORDER,true,sFloatOut> G;
60 copyGauge<FloatOut,FloatIn,length>(G(out,(void*)Out,(void**)outGhost), inOrder, out, in, location, type);
62 typedef typename MILCOrder<FloatOut,length> G;
63 copyGauge<FloatOut,FloatIn,length>(G(out, Out, outGhost), inOrder, out, in, location, type);
67 errorQuda("Gauge field %d order not supported", out.Order());
72 template <typename sFloatOut, typename sFloatIn, int Nc>
73 void copyGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location,
74 sFloatOut *Out, sFloatIn *In, sFloatOut **outGhost, sFloatIn **inGhost, int type)
76 typedef typename mapper<sFloatOut>::type FloatOut;
77 typedef typename mapper<sFloatIn>::type FloatIn;
78 #ifndef FINE_GRAINED_ACCESS
79 constexpr int length = 2*Nc*Nc;
82 if (in.Reconstruct() != QUDA_RECONSTRUCT_NO)
83 errorQuda("Reconstruct type %d not supported", in.Reconstruct());
86 #ifdef FINE_GRAINED_ACCESS
88 typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,false,sFloatIn> G;
89 copyGaugeMG<sFloatOut,FloatIn,Nc> (G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
91 typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_FLOAT2_GAUGE_ORDER,true,sFloatIn> G;
92 copyGaugeMG<sFloatOut,FloatIn,Nc> (G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
95 typedef typename gauge_mapper<FloatIn,QUDA_RECONSTRUCT_NO,length>::type G;
96 copyGaugeMG<FloatOut,FloatIn,Nc> (G(in, In,inGhost), out, in, location, Out, outGhost, type);
98 } else if (in.Order() == QUDA_QDP_GAUGE_ORDER) {
100 #ifdef FINE_GRAINED_ACCESS
101 typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_QDP_GAUGE_ORDER,true,sFloatIn> G;
102 copyGaugeMG<sFloatOut,FloatIn,Nc>(G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
104 typedef typename QDPOrder<FloatIn,length> G;
105 copyGaugeMG<FloatOut,FloatIn,Nc>(G(in, In, inGhost), out, in, location, Out, outGhost, type);
108 } else if (in.Order() == QUDA_MILC_GAUGE_ORDER) {
110 #ifdef FINE_GRAINED_ACCESS
111 typedef typename gauge::FieldOrder<FloatIn,Nc,1,QUDA_MILC_GAUGE_ORDER,true,sFloatIn> G;
112 copyGaugeMG<sFloatOut,FloatIn,Nc>(G(const_cast<GaugeField&>(in),(void*)In,(void**)inGhost), out, in, location, Out, outGhost, type);
114 typedef typename MILCOrder<FloatIn,length> G;
115 copyGaugeMG<FloatOut,FloatIn,Nc>(G(in, In, inGhost), out, in, location, Out, outGhost, type);
119 errorQuda("Gauge field %d order not supported", in.Order());
124 template <typename FloatOut, typename FloatIn>
125 void copyGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out,
126 FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
128 switch (in.Ncolor()) {
130 case 48: copyGaugeMG<FloatOut,FloatIn,48>(out, in, location, Out, In, outGhost, inGhost, type); break;
132 case 12: copyGaugeMG<FloatOut,FloatIn,12>(out, in, location, Out, In, outGhost, inGhost, type); break;
133 case 64: copyGaugeMG<FloatOut,FloatIn,64>(out, in, location, Out, In, outGhost, inGhost, type); break;
136 case 128: copyGaugeMG<FloatOut,FloatIn,128>(out, in, location, Out, In, outGhost, inGhost, type); break;
137 case 192: copyGaugeMG<FloatOut,FloatIn,192>(out, in, location, Out, In, outGhost, inGhost, type); break;
139 #endif // GPU_MULTIGRID
140 default: errorQuda("Unsupported number of colors; out.Nc=%d, in.Nc=%d", out.Ncolor(), in.Ncolor());
144 // this is the function that is actually called, from here on down we instantiate all required templates
145 void copyGenericGaugeMG(GaugeField &out, const GaugeField &in, QudaFieldLocation location,
146 void *Out, void *In, void **ghostOut, void **ghostIn, int type)
148 #ifndef FINE_GRAINED_ACCESS
149 if (out.Precision() == QUDA_HALF_PRECISION || in.Precision() == QUDA_HALF_PRECISION)
150 errorQuda("Precision format not supported");
153 if (out.Precision() == QUDA_DOUBLE_PRECISION) {
154 #ifdef GPU_MULTIGRID_DOUBLE
155 if (in.Precision() == QUDA_DOUBLE_PRECISION) {
156 copyGaugeMG(out, in, location, (double*)Out, (double*)In, (double**)ghostOut, (double**)ghostIn, type);
157 } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
158 copyGaugeMG(out, in, location, (double*)Out, (float*)In, (double**)ghostOut, (float**)ghostIn, type);
159 } else if (in.Precision() == QUDA_HALF_PRECISION) {
160 copyGaugeMG(out, in, location, (double*)Out, (short*)In, (double**)ghostOut, (short**)ghostIn, type);
162 errorQuda("Precision %d not supported", in.Precision());
165 errorQuda("Double precision multigrid has not been enabled");
167 } else if (out.Precision() == QUDA_SINGLE_PRECISION) {
168 if (in.Precision() == QUDA_DOUBLE_PRECISION) {
169 #ifdef GPU_MULTIGRID_DOUBLE
170 copyGaugeMG(out, in, location, (float*)Out, (double*)In, (float**)ghostOut, (double**)ghostIn, type);
172 errorQuda("Double precision multigrid has not been enabled");
174 } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
175 copyGaugeMG(out, in, location, (float*)Out, (float*)In, (float**)ghostOut, (float**)ghostIn, type);
176 } else if (in.Precision() == QUDA_HALF_PRECISION) {
177 copyGaugeMG(out, in, location, (float*)Out, (short*)In, (float**)ghostOut, (short**)ghostIn, type);
179 errorQuda("Precision %d not supported", in.Precision());
181 } else if (out.Precision() == QUDA_HALF_PRECISION) {
182 if (in.Precision() == QUDA_DOUBLE_PRECISION) {
183 #ifdef GPU_MULTIGRID_DOUBLE
184 copyGaugeMG(out, in, location, (short*)Out, (double*)In, (short**)ghostOut, (double**)ghostIn, type);
186 errorQuda("Double precision multigrid has not been enabled");
188 } else if (in.Precision() == QUDA_SINGLE_PRECISION) {
189 copyGaugeMG(out, in, location, (short*)Out, (float*)In, (short**)ghostOut, (float**)ghostIn, type);
190 } else if (in.Precision() == QUDA_HALF_PRECISION) {
191 copyGaugeMG(out, in, location, (short*)Out, (short*)In, (short**)ghostOut, (short**)ghostIn, type);
193 errorQuda("Precision %d not supported", in.Precision());
196 errorQuda("Precision %d not supported", out.Precision());