1 #include <color_spinor_field.h>
2 #include <color_spinor_field_order.h>
3 #include <index_helper.cuh>
5 #include <instantiate.h>
9 using namespace colorspinor;
12 Random number insertion over all field elements
16 for (int parity=0; parity<t.Nparity(); parity++) {
17 for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
18 for (int s=0; s<t.Nspin(); s++) {
19 for (int c=0; c<t.Ncolor(); c++) {
20 t(parity,x_cb,s,c).real(comm_drand());
21 t(parity,x_cb,s,c).imag(comm_drand());
29 Create a point source at spacetime point x, spin s and colour c
32 void point(T &t, int x, int s, int c) { t(x%2, x/2, s, c) = 1.0; }
35 Set all space-time real elements at spin s and color c of the
39 void constant(T &t, int k, int s, int c) {
40 for (int parity=0; parity<t.Nparity(); parity++) {
41 for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
42 // set all color-spin components to zero
43 for (int s2=0; s2<t.Nspin(); s2++) {
44 for (int c2=0; c2<t.Ncolor(); c2++) {
45 t(parity,x_cb,s2,c2) = 0.0;
48 t(parity,x_cb,s,c) = k; // now set the one we want
54 Insert a sinusoidal wave sin ( n * (x[d] / X[d]) * pi ) in dimension d
57 void sin(P &p, int d, int n, int offset) {
59 int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
60 X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
62 for (int parity=0; parity<p.Nparity(); parity++) {
63 for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
64 getCoords(coord, x_cb, X, parity);
66 double mode = n * (double)coord[d] / X[d];
67 double k = (double)offset + sin (M_PI * mode);
69 for (int s=0; s<p.Nspin(); s++)
70 for (int c=0; c<p.Ncolor(); c++)
71 p(parity, x_cb, s, c) = k;
77 Create a corner source with value "v" on color "c"
78 on a single corner overloaded into "s". "s" is
79 encoded via a bitmap: 1010 -> x = 0, y = 1, z = 0, t = 1
83 void corner(T &p, int v, int s, int c) {
85 errorQuda("ERROR: lib/color_spinor_util.cu, corner() is only defined for Nspin = 1 fields.\n");
88 int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
89 X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
91 for (int parity=0; parity<p.Nparity(); parity++) {
92 for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
95 getCoords(coord, x_cb, X, parity);
97 // Figure out corner of current site.
98 int corner = 8*(coord[3]%2)+4*(coord[2]%2)+2*(coord[1]%2)+(coord[0]%2);
100 // set all color components to zero
101 for (int c2=0; c2<p.Ncolor(); c2++) {
102 p(parity,x_cb,0,c2) = 0.0;
104 // except the corner and color we want
106 p(parity,x_cb,0,c) = (double)v;
111 // print out the vector at volume point x
112 template <typename Float, int nSpin, int nColor, QudaFieldOrder order>
113 void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
114 FieldOrderCB<Float,nSpin,nColor,1,order> A(a);
115 if (sourceType == QUDA_RANDOM_SOURCE) random(A);
116 else if (sourceType == QUDA_POINT_SOURCE) point(A, x, s, c);
117 else if (sourceType == QUDA_CONSTANT_SOURCE) constant(A, x, s, c);
118 else if (sourceType == QUDA_SINUSOIDAL_SOURCE) sin(A, x, s, c);
119 else if (sourceType == QUDA_CORNER_SOURCE) corner(A, x, s, c);
120 else errorQuda("Unsupported source type %d", sourceType);
123 template <typename Float, int nSpin, QudaFieldOrder order>
124 void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
125 if (a.Ncolor() == 3) {
126 genericSource<Float,nSpin,3,order>(a,sourceType, x, s, c);
128 } else if (a.Ncolor() == 4) {
129 genericSource<Float,nSpin,4,order>(a,sourceType, x, s, c);
130 } else if (a.Ncolor() == 6) { // for Wilson free field
131 genericSource<Float,nSpin,6,order>(a,sourceType, x, s, c);
132 } else if (a.Ncolor() == 8) {
133 genericSource<Float,nSpin,8,order>(a,sourceType, x, s, c);
134 } else if (a.Ncolor() == 12) {
135 genericSource<Float,nSpin,12,order>(a,sourceType, x, s, c);
136 } else if (a.Ncolor() == 16) {
137 genericSource<Float,nSpin,16,order>(a,sourceType, x, s, c);
138 } else if (a.Ncolor() == 20) {
139 genericSource<Float,nSpin,20,order>(a,sourceType, x, s, c);
140 } else if (a.Ncolor() == 24) {
141 genericSource<Float,nSpin,24,order>(a,sourceType, x, s, c);
143 } else if (a.Ncolor() == 32) {
144 genericSource<Float,nSpin,32,order>(a,sourceType, x, s, c);
147 } else if (a.Ncolor() == 64) {
148 genericSource<Float,nSpin,64,order>(a,sourceType, x, s, c);
149 } else if (a.Ncolor() == 96) {
150 genericSource<Float,nSpin,96,order>(a,sourceType, x, s, c);
152 #endif // GPU_MULTIGRID
154 errorQuda("Unsupported nColor=%d\n", a.Ncolor());
158 template <typename Float, QudaFieldOrder order>
159 void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
160 if (a.Nspin() == 1) {
162 genericSource<Float,1,order>(a,sourceType, x, s, c);
164 errorQuda("nSpin=1 not enabled for this build");
166 } else if (a.Nspin() == 2) {
168 genericSource<Float,2,order>(a,sourceType, x, s, c);
170 errorQuda("nSpin=2 not enabled for this build");
172 } else if (a.Nspin() == 4) {
174 genericSource<Float,4,order>(a,sourceType, x, s, c);
176 errorQuda("nSpin=4 not enabled for this build");
179 errorQuda("Unsupported nSpin=%d\n", a.Nspin());
183 template <typename Float>
184 void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
185 if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
186 genericSource<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,sourceType, x, s, c);
188 errorQuda("Unsupported field order %d\n", a.FieldOrder());
193 void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
195 if (a.Precision() == QUDA_DOUBLE_PRECISION) {
196 genericSource<double>(a,sourceType, x, s, c);
197 } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
198 genericSource<float>(a, sourceType, x, s, c);
200 errorQuda("Precision not supported");
206 template <class U, class V>
207 int compareSpinor(const U &u, const V &v, const int tol) {
208 int fail_check = 16*tol;
209 int *fail = new int[fail_check];
210 for (int f=0; f<fail_check; f++) fail[f] = 0;
212 int N = 2*u.Nspin()*u.Ncolor();
213 int *iter = new int[N];
214 for (int i=0; i<N; i++) iter[i] = 0;
216 for (int parity=0; parity<v.Nparity(); parity++) {
217 for (int x_cb=0; x_cb<u.VolumeCB(); x_cb++) {
219 for (int s=0; s<u.Nspin(); s++) {
220 for (int c=0; c<u.Ncolor(); c++) {
221 for (int z=0; z<2; z++) {
222 int j = (s*u.Ncolor() + c)*2+z;
224 double diff = z==0 ? fabs(u(parity,x_cb,s,c,z).real() - v(parity,x_cb,s,c,z).real()) :
225 fabs(u(parity,x_cb,s,c).imag() - v(parity,x_cb,s,c).imag());
227 for (int f=0; f<fail_check; f++) {
228 if (diff > pow(10.0,-(f+1)/(double)tol) || std::isnan(diff)) {
233 if (diff > 1e-3 || std::isnan(diff)) iter[j]++;
240 // reduce over all processes
241 for (int i=0; i<N; i++) comm_allreduce_int(&iter[i]);
242 for (int f=0; f<fail_check; f++) comm_allreduce_int(&fail[f]);
244 for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]);
246 int accuracy_level =0;
247 for (int f=0; f<fail_check; f++) {
248 if (fail[f] == 0) accuracy_level = f+1;
251 size_t total = u.Nparity()*u.VolumeCB()*N*comm_size();
252 for (int f=0; f<fail_check; f++) {
253 printfQuda("%e Failures: %d / %lu = %e\n", pow(10.0,-(f+1)/(double)tol),
254 fail[f], total, fail[f] / (double)total);
260 return accuracy_level;
263 template <typename oFloat, typename iFloat, QudaFieldOrder order>
264 int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
266 if (a.Ncolor() == 3) {
267 constexpr int Nc = 3;
268 if (a.Nspin() == 4) {
269 constexpr int Ns = 4;
270 FieldOrderCB<oFloat,Ns,Nc,1,order> A(a);
271 FieldOrderCB<iFloat,Ns,Nc,1,order> B(b);
273 double rescale = 1.0 / A.abs_max();
276 blas::ax(rescale, a_);
277 blas::ax(rescale, b_);
278 FieldOrderCB<oFloat, Ns, Nc, 1, order> A_(a_);
279 FieldOrderCB<iFloat, Ns, Nc, 1, order> B_(b_);
281 ret = compareSpinor(A_, B_, tol);
282 } else if (a.Nspin() == 1) {
283 constexpr int Ns = 1;
284 FieldOrderCB<oFloat,Ns,Nc,1,order> A(a);
285 FieldOrderCB<iFloat,Ns,Nc,1,order> B(b);
287 double rescale = 1.0 / A.abs_max();
290 blas::ax(rescale, a_);
291 blas::ax(rescale, b_);
292 FieldOrderCB<oFloat, Ns, Nc, 1, order> A_(a_);
293 FieldOrderCB<iFloat, Ns, Nc, 1, order> B_(b_);
295 ret = compareSpinor(A_, B_, tol);
298 errorQuda("Number of colors %d not supported", a.Ncolor());
304 template <typename oFloat, typename iFloat>
305 int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
307 if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER && b.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
308 ret = genericCompare<oFloat,iFloat,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a, b, tol);
310 errorQuda("Unsupported field order %d\n", a.FieldOrder());
316 template <typename oFloat>
317 int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
319 if (b.Precision() == QUDA_DOUBLE_PRECISION) {
320 ret = genericCompare<oFloat,double>(a, b, tol);
321 } else if (b.Precision() == QUDA_SINGLE_PRECISION) {
322 ret = genericCompare<oFloat,float>(a, b, tol);
324 errorQuda("Precision not supported");
330 int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
332 if (a.Precision() == QUDA_DOUBLE_PRECISION) {
333 ret = genericCompare<double>(a, b, tol);
334 } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
335 ret = genericCompare<float>(a, b, tol);
337 errorQuda("Precision not supported");
343 template <class Order>
344 void print_vector(const Order &o, unsigned int x) {
346 int x_cb = x / o.Nparity();
347 int parity = x%o.Nparity();
349 for (int s=0; s<o.Nspin(); s++) {
350 printfQuda("x = %u, s = %d, { ", x_cb, s);
351 for (int c=0; c<o.Ncolor(); c++) {
352 printfQuda("(%f,%f) ", o(parity, x_cb, s, c).real(), o(parity, x_cb, s, c).imag());
359 // print out the vector at volume point x
360 template <typename Float, QudaFieldOrder order> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
362 if (a.Ncolor() == 3 && a.Nspin() == 1) {
363 FieldOrderCB<Float,1,3,1,order> A(a);
366 else if (a.Ncolor() == 3 && a.Nspin() == 4) {
367 FieldOrderCB<Float,4,3,1,order> A(a);
370 else if (a.Ncolor() == 3 && a.Nspin() == 1) {
371 FieldOrderCB<Float,1,3,1,order> A(a);
374 else if (a.Ncolor() == 2 && a.Nspin() == 2) {
375 FieldOrderCB<Float,2,2,1,order> A(a);
378 else if (a.Ncolor() == 24 && a.Nspin() == 2) {
379 FieldOrderCB<Float,2,24,1,order> A(a);
382 else if (a.Ncolor() == 6 && a.Nspin() == 4) {
383 FieldOrderCB<Float,4,6,1,order> A(a);
386 else if (a.Ncolor() == 72 && a.Nspin() == 4) {
387 FieldOrderCB<Float,4,72,1,order> A(a);
390 else if (a.Ncolor() == 576 && a.Nspin() == 2) {
391 FieldOrderCB<Float,2,576,1,order> A(a);
394 #ifdef GPU_STAGGERED_DIRAC
395 else if (a.Ncolor() == 64 && a.Nspin() == 2) {
396 FieldOrderCB<Float,2,64,1,order> A(a);
399 else if (a.Ncolor() == 96 && a.Nspin() == 2) {
400 FieldOrderCB<Float,2,96,1,order> A(a);
405 errorQuda("Not supported Ncolor = %d, Nspin = %d", a.Ncolor(), a.Nspin());
409 // print out the vector at volume point x
410 template <typename Float> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
412 if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
413 genericPrintVector<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,x);
415 errorQuda("Unsupported field order %d\n", a.FieldOrder());
419 // print out the vector at volume point x
420 void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
422 if (a.Precision() == QUDA_DOUBLE_PRECISION) {
423 genericPrintVector<double>(a,x);
424 } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
425 genericPrintVector<float>(a,x);
427 errorQuda("Precision %d not implemented", a.Precision());
431 // Eventually we should merge the below device print function and
432 // the above host print function.
434 template <typename StoreType, int Ns, int Nc, QudaFieldOrder FieldOrder>
435 void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
438 typedef colorspinor::AccessorCB<StoreType, Ns, Nc, 1, FieldOrder> AccessorType;
440 AccessorType A(field);
443 typedef typename scalar<typename mapper<StoreType>::type>::type Float;
445 // Allocate a real+imag component for the storage type.
446 StoreType indiv_num[2];
448 // Allocate space for the full site.
449 Float *data_cpu = new Float[2 * Ns * Nc];
451 // Grab the pointer to the field.
452 complex<StoreType> *field_ptr = (complex<StoreType> *)field.V();
454 // Grab the pointer to the norm field. Might be ignored as appropriate.
455 float *norm_ptr = (float *)field.Norm();
458 if (isFixed<StoreType>::value) {
459 qudaMemcpy(&scale, &norm_ptr[i], sizeof(float), cudaMemcpyDeviceToHost);
460 scale *= fixedInvMaxValue<StoreType>::value;
463 for (int s = 0; s < Ns; s++) {
464 for (int c = 0; c < Nc; c++) {
465 qudaMemcpy(indiv_num, &field_ptr[A.index(i % 2, i / 2, s, c, 0)], 2 * sizeof(StoreType), cudaMemcpyDeviceToHost);
466 data_cpu[2 * (c + Nc * s)] = scale * static_cast<Float>(indiv_num[0]);
467 data_cpu[2 * (c + Nc * s) + 1] = scale * static_cast<Float>(indiv_num[1]);
471 for (int s = 0; s < Ns; s++) {
472 printfQuda("x = %u, s = %d, { ", i, s);
473 for (int c = 0; c < Nc; c++) {
474 printfQuda("(%f,%f) ", data_cpu[(s * Nc + c) * 2], data_cpu[(s * Nc + c) * 2 + 1]);
482 template <typename Float, int Ns, int Nc>
483 void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
485 switch (field.FieldOrder()) {
486 case QUDA_FLOAT_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT_FIELD_ORDER>(field, i); break;
487 case QUDA_FLOAT2_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT2_FIELD_ORDER>(field, i); break;
488 case QUDA_FLOAT4_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT4_FIELD_ORDER>(field, i); break;
489 case QUDA_FLOAT8_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT8_FIELD_ORDER>(field, i); break;
490 case QUDA_SPACE_SPIN_COLOR_FIELD_ORDER:
491 genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(field, i);
493 case QUDA_SPACE_COLOR_SPIN_FIELD_ORDER:
494 genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER>(field, i);
496 default: errorQuda("Unsupported field order %d", field.FieldOrder());
500 template <typename Float> struct GenericCudaPrintVector {
501 GenericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
503 if (field.Ncolor() == 3 && field.Nspin() == 4) {
504 genericCudaPrintVector<Float, 4, 3>(field, i);
505 } else if (field.Ncolor() == 3 && field.Nspin() == 1) {
506 genericCudaPrintVector<Float, 1, 3>(field, i);
507 } else if (field.Ncolor() == 6 && field.Nspin() == 2) { // wilson free field MG
508 genericCudaPrintVector<Float, 2, 6>(field, i);
509 } else if (field.Ncolor() == 24 && field.Nspin() == 2) { // common value for Wilson, also staggered free field
510 genericCudaPrintVector<Float, 2, 24>(field, i);
511 } else if (field.Ncolor() == 32 && field.Nspin() == 2) {
512 genericCudaPrintVector<Float, 2, 32>(field, i);
513 #ifdef GPU_STAGGERED_DIRAC
514 } else if (field.Ncolor() == 64 && field.Nspin() == 2) {
515 genericCudaPrintVector<Float, 2, 64>(field, i);
516 } else if (field.Ncolor() == 96 && field.Nspin() == 2) {
517 genericCudaPrintVector<Float, 2, 96>(field, i);
520 errorQuda("Not supported Ncolor = %d, Nspin = %d", field.Ncolor(), field.Nspin());
525 void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
527 instantiatePrecision<GenericCudaPrintVector>(field, i);