1 #include <color_spinor_field.h>
2 #include <color_spinor_field_order.h>
4 #include <multigrid_helper.cuh>
5 #include <index_helper.cuh>
9 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
11 enum class StaggeredTransferType {
12 STAGGERED_TRANSFER_PROLONG,
13 STAGGERED_TRANSFER_RESTRICT,
14 STAGGERED_TRANSFER_INVALID = QUDA_INVALID_ENUM
17 using namespace quda::colorspinor;
19 // Use a trait to define whether the "out" spin is the fine or coarse spin
20 template<int fineSpin, int coarseSpin, StaggeredTransferType transferType> struct StaggeredTransferOutSpin { static constexpr int outSpin = -1; };
21 template<int fineSpin, int coarseSpin> struct StaggeredTransferOutSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int outSpin = fineSpin; };
22 template<int fineSpin, int coarseSpin> struct StaggeredTransferOutSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int outSpin = coarseSpin; };
24 // Use a trait to define whether the "in" spin is the fine or coarse spin
25 template<int fineSpin, int coarseSpin, StaggeredTransferType transferType> struct StaggeredTransferInSpin { static constexpr int inSpin = -1; };
26 template<int fineSpin, int coarseSpin> struct StaggeredTransferInSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int inSpin = coarseSpin; };
27 template<int fineSpin, int coarseSpin> struct StaggeredTransferInSpin<fineSpin,coarseSpin,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int inSpin = fineSpin; };
29 // Use a trait to define whether the "out" color is the fine or coarse color
30 template<int fineColor, int coarseColor, StaggeredTransferType transferType> struct StaggeredTransferOutColor { static constexpr int outColor = -1; };
31 template<int fineColor, int coarseColor> struct StaggeredTransferOutColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int outColor = fineColor; };
32 template<int fineColor, int coarseColor> struct StaggeredTransferOutColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int outColor = coarseColor; };
34 // Use a trait to define whether the "in" color is the fine or coarse color
35 template<int fineColor, int coarseColor, StaggeredTransferType transferType> struct StaggeredTransferInColor { static constexpr int inColor = -1; };
36 template<int fineColor, int coarseColor> struct StaggeredTransferInColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_PROLONG> { static constexpr int inColor = coarseColor; };
37 template<int fineColor, int coarseColor> struct StaggeredTransferInColor<fineColor,coarseColor,StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT> { static constexpr int inColor = fineColor; };
39 // Function to return the fine ColorSpinorField
40 template<StaggeredTransferType transferType>
41 inline const ColorSpinorField& fineColorSpinorField(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
42 errorQuda("Invalid transfer type %d for fineColorSpinorField", (int)transferType);
46 // on prolong, the out vector is the fine vector
48 inline const ColorSpinorField& fineColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
52 // on restrict, the in vector is the fine vector
54 inline const ColorSpinorField& fineColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
58 // Function to return the coarse ColorSpinorField
59 template<StaggeredTransferType transferType>
60 inline const ColorSpinorField& coarseColorSpinorField(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
61 errorQuda("Invalid transfer type %d for coarseColorSpinorField", (int)transferType);
65 // on prolong, the out vector is the fine vector
67 inline const ColorSpinorField& coarseColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
71 // on restrict, the in vector is the fine vector
73 inline const ColorSpinorField& coarseColorSpinorField<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(const ColorSpinorField& quoteIn, const ColorSpinorField& quoteOut) {
77 Kernel argument struct
79 template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, QudaFieldOrder order, StaggeredTransferType theTransferType>
80 struct StaggeredProlongRestrictArg {
81 FieldOrderCB<Float, StaggeredTransferOutSpin<fineSpin,coarseSpin,theTransferType>::outSpin, StaggeredTransferOutColor<fineColor,coarseColor,theTransferType>::outColor,1,order> out;
82 const FieldOrderCB<Float, StaggeredTransferInSpin<fineSpin,coarseSpin,theTransferType>::inSpin, StaggeredTransferInColor<fineColor,coarseColor,theTransferType>::inColor,1,order> in;
83 const int *geo_map; // need to make a device copy of this
84 const spin_mapper<fineSpin,coarseSpin> spin_map;
85 const int parity; // the parity of the output field (if single parity)
86 const int nParity; // number of parities of input fine field
87 const int fineX[4]; // fine spatial volume
88 const int fineVolumeCB; // fine spatial volume
89 const int coarseVolumeCB; // coarse spatial volume
90 static constexpr StaggeredTransferType transferType = theTransferType;
92 StaggeredProlongRestrictArg(ColorSpinorField &out, const ColorSpinorField &in,
93 const int *geo_map, const int parity)
94 : out(out), in(in), geo_map(geo_map), spin_map(), parity(parity),
95 nParity(fineColorSpinorField<transferType>(in,out).SiteSubset()),
96 fineX{fineColorSpinorField<transferType>(in,out).X()[0],
97 fineColorSpinorField<transferType>(in,out).X()[1],
98 fineColorSpinorField<transferType>(in,out).X()[2],
99 fineColorSpinorField<transferType>(in,out).X()[3]},
100 fineVolumeCB(fineColorSpinorField<transferType>(in,out).VolumeCB()),
101 coarseVolumeCB(coarseColorSpinorField<transferType>(in,out).VolumeCB())
104 StaggeredProlongRestrictArg(const StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,order,theTransferType> &arg)
105 : out(arg.out), in(arg.in), geo_map(arg.geo_map), spin_map(),
106 parity(arg.parity), nParity(arg.nParity), fineX{arg.fineX[0],arg.fineX[1],arg.fineX[2],arg.fineX[3]},
107 fineVolumeCB(arg.fineVolumeCB), coarseVolumeCB(arg.coarseVolumeCB)
112 Performs the permutation from a coarse degree of freedom to a
113 fine degree of freedom
115 template <StaggeredTransferType transferType, class OutAccessor, class InAccessor, typename S>
116 __device__ __host__ inline void staggeredProlongRestrict(OutAccessor& out, const InAccessor &in,
117 int parity, int x_cb, int c, const int *geo_map, const S& spin_map, const int fineVolumeCB, const int coarseVolumeCB, const int X[]) {
118 int x = parity*fineVolumeCB + x_cb;
119 int x_coarse = geo_map[x];
120 int parity_coarse = (x_coarse >= coarseVolumeCB) ? 1 : 0;
121 int x_coarse_cb = x_coarse - parity_coarse*coarseVolumeCB;
123 // coarse_color = 8*fine_color + corner of the hypercube
126 getCoords(fineCoords,x_cb,X,parity);
127 int hyperCorner = 4*(fineCoords[3]%2)+2*(fineCoords[2]%2)+(fineCoords[1]%2);
129 if (transferType == StaggeredTransferType::STAGGERED_TRANSFER_PROLONG) {
130 out(parity,x_cb,0,c) = in(parity_coarse, x_coarse_cb, spin_map(0,parity), 8*c+hyperCorner);
132 out(parity_coarse, x_coarse_cb, spin_map(0,parity), 8*c+hyperCorner) = in(parity,x_cb,0,c);
136 template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg>
137 void StaggeredProlongRestrict(Arg &arg) {
138 for (int parity=0; parity<arg.nParity; parity++) {
139 parity = (arg.nParity == 2) ? parity : arg.parity;
141 // We don't actually have to loop over spin because fineSpin = 1, coarseSpin = fine parity
142 for (int x_cb=0; x_cb<arg.fineVolumeCB; x_cb++) {
143 for (int c=0; c<fineColor; c++) {
144 staggeredProlongRestrict<Arg::transferType>(arg.out, arg.in, parity, x_cb, c, arg.geo_map, arg.spin_map, arg.fineVolumeCB, arg.coarseVolumeCB, arg.fineX);
150 template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg>
151 __global__ void StaggeredProlongRestrictKernel(Arg arg) {
152 int x_cb = blockIdx.x*blockDim.x + threadIdx.x;
153 int parity = arg.nParity == 2 ? blockDim.y*blockIdx.y + threadIdx.y : arg.parity;
154 if (x_cb >= arg.fineVolumeCB) return;
156 int c = blockDim.z*blockIdx.z + threadIdx.z;
157 if (c >= fineColor) return;
159 staggeredProlongRestrict<Arg::transferType>(arg.out, arg.in, parity, x_cb, c, arg.geo_map, arg.spin_map, arg.fineVolumeCB, arg.coarseVolumeCB, arg.fineX);
162 template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
163 class StaggeredProlongRestrictLaunch : public TunableVectorYZ {
165 ColorSpinorField &out;
166 const ColorSpinorField ∈
167 const int *fine_to_coarse;
169 QudaFieldLocation location;
170 char vol[TuneKey::volume_n];
172 bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
173 unsigned int minThreads() const { return fineColorSpinorField<transferType>(in,out).VolumeCB(); } // fine parity is the block y dimension
176 StaggeredProlongRestrictLaunch(ColorSpinorField &out, const ColorSpinorField &in,
177 const int *fine_to_coarse, int parity)
178 : TunableVectorYZ(fineColorSpinorField<transferType>(in,out).SiteSubset(), fineColor), out(out), in(in),
179 fine_to_coarse(fine_to_coarse), parity(parity), location(checkLocation(out, in))
181 strcpy(vol, fineColorSpinorField<transferType>(in,out).VolString());
183 strcat(vol, coarseColorSpinorField<transferType>(in,out).VolString());
185 strcpy(aux, fineColorSpinorField<transferType>(in,out).AuxString());
187 strcat(aux, coarseColorSpinorField<transferType>(in,out).AuxString());
190 void apply(const qudaStream_t &stream) {
191 if (location == QUDA_CPU_FIELD_LOCATION) {
192 if (out.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
193 StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER,transferType>
194 arg(out, in, fine_to_coarse, parity);
195 StaggeredProlongRestrict<Float,fineSpin,fineColor,coarseSpin,coarseColor>(arg);
197 errorQuda("Unsupported field order %d", out.FieldOrder());
200 if (out.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
201 TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
203 StaggeredProlongRestrictArg<Float,fineSpin,fineColor,coarseSpin,coarseColor,QUDA_FLOAT2_FIELD_ORDER,transferType>
204 arg(out, in, fine_to_coarse, parity);
205 qudaLaunchKernel(StaggeredProlongRestrictKernel<Float,fineSpin,fineColor,coarseSpin,coarseColor,decltype(arg)>, tp, stream, arg);
207 errorQuda("Unsupported field order %d", out.FieldOrder());
212 TuneKey tuneKey() const { return TuneKey(vol, typeid(*this).name(), aux); }
214 long long flops() const { return 0; }
216 long long bytes() const {
217 return in.Bytes() + out.Bytes() + fineColorSpinorField<transferType>(in,out).SiteSubset()*fineColorSpinorField<transferType>(in,out).VolumeCB()*sizeof(int);
222 template <typename Float, int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
223 void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
224 const int *fine_to_coarse, int parity) {
226 StaggeredProlongRestrictLaunch<Float, fineSpin, fineColor, coarseSpin, coarseColor, transferType>
227 staggered_prolong_restrict(out, in, fine_to_coarse, parity);
228 staggered_prolong_restrict.apply(0);
231 template <int fineSpin, int fineColor, int coarseSpin, int coarseColor, StaggeredTransferType transferType>
232 void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
233 const int *fine_to_coarse, int parity) {
235 QudaPrecision precision = checkPrecision(out, in);
237 if (precision == QUDA_DOUBLE_PRECISION) {
238 #ifdef GPU_MULTIGRID_DOUBLE
239 StaggeredProlongRestrict<double,fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
241 errorQuda("Double precision multigrid has not been enabled");
243 } else if (precision == QUDA_SINGLE_PRECISION) {
244 StaggeredProlongRestrict<float,fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
246 errorQuda("Unsupported precision %d", out.Precision());
251 template <StaggeredTransferType transferType>
252 void StaggeredProlongRestrict(ColorSpinorField &out, const ColorSpinorField &in,
253 const int *fine_to_coarse, const int * const * spin_map, int parity) {
255 if (out.FieldOrder() != in.FieldOrder())
256 errorQuda("Field orders do not match (out=%d, in=%d)",
257 out.FieldOrder(), in.FieldOrder());
259 if (fineColorSpinorField<transferType>(in,out).Nspin() != 1) errorQuda("Fine spin %d is not supported", fineColorSpinorField<transferType>(in,out).Nspin());
260 const int fineSpin = 1;
262 if (coarseColorSpinorField<transferType>(in,out).Nspin() != 2) errorQuda("Coarse spin %d is not supported", coarseColorSpinorField<transferType>(in,out).Nspin());
263 const int coarseSpin = 2;
265 // first check that the spin_map matches the spin_mapper
266 spin_mapper<fineSpin,coarseSpin> mapper;
267 for (int s=0; s<fineSpin; s++)
268 for (int p=0; p<2; p++)
269 if (mapper(s,p) != spin_map[s][p]) errorQuda("Spin map does not match spin_mapper");
271 if (fineColorSpinorField<transferType>(in,out).Ncolor() != 3) errorQuda("Unsupported fine nColor %d",fineColorSpinorField<transferType>(in,out).Ncolor());
272 const int fineColor = 3;
274 if (coarseColorSpinorField<transferType>(in,out).Ncolor() != 8*fineColor) errorQuda("Unsupported coarse nColor %d", coarseColorSpinorField<transferType>(in,out).Ncolor());
275 const int coarseColor = 8*fineColor;
277 StaggeredProlongRestrict<fineSpin,fineColor,coarseSpin,coarseColor,transferType>(out, in, fine_to_coarse, parity);
280 #endif // defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
282 void StaggeredProlongate(ColorSpinorField &out, const ColorSpinorField &in,
283 const int *fine_to_coarse, const int * const * spin_map, int parity) {
284 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
286 StaggeredProlongRestrict<StaggeredTransferType::STAGGERED_TRANSFER_PROLONG>(out, in, fine_to_coarse, spin_map, parity);
289 errorQuda("Staggered multigrid has not been build");
293 void StaggeredRestrict(ColorSpinorField &out, const ColorSpinorField &in,
294 const int *fine_to_coarse, const int * const * spin_map, int parity) {
295 #if defined(GPU_MULTIGRID) && defined(GPU_STAGGERED_DIRAC)
298 StaggeredProlongRestrict<StaggeredTransferType::STAGGERED_TRANSFER_RESTRICT>(out, in, fine_to_coarse, spin_map, parity);
301 errorQuda("Staggered multigrid has not been build");
307 } // end namespace quda