8 using namespace colorspinor;
16 for (
int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
17 for (
int s=0;
s<t.Nspin();
s++) {
18 for (
int c=0; c<t.Ncolor(); c++) {
31 void point(T &t,
int x,
int s,
int c) { t(x%2, x/2, s, c) = 1.0; }
40 for (
int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
42 for (
int s2=0; s2<t.Nspin(); s2++) {
43 for (
int c2=0; c2<t.Ncolor(); c2++) {
44 t(
parity,x_cb,s2,c2) = 0.0;
56 void sin(P &p,
int d,
int n,
int offset) {
58 int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
59 X[0] *= (p.Nparity() == 1) ? 2 : 1;
62 for (
int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
65 double mode = n * (double)coord[d] / X[d];
66 double k = (double)offset +
sin (M_PI * mode);
68 for (
int s=0;
s<p.Nspin();
s++)
69 for (
int c=0; c<p.Ncolor(); c++)
84 errorQuda(
"ERROR: lib/color_spinor_util.cu, corner() is only defined for Nspin = 1 fields.\n");
87 int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
88 X[0] *= (p.Nparity() == 1) ? 2 : 1;
91 for (
int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
97 int corner = 8*(coord[3]%2)+4*(coord[2]%2)+2*(coord[1]%2)+(coord[0]%2);
100 for (
int c2=0; c2<p.Ncolor(); c2++) {
101 p(
parity,x_cb,0,c2) = 0.0;
104 p(
parity,x_cb,0,c) = (double)v;
111 template <
typename Float,
int nSpin,
int nColor, QudaFieldOrder order>
119 else errorQuda(
"Unsupported source type %d", sourceType);
122 template <
typename Float,
int nSpin, QudaFieldOrder order>
125 genericSource<Float,nSpin,3,order>(a,sourceType, x,
s, c);
126 }
else if (a.
Ncolor() == 4) {
127 genericSource<Float,nSpin,4,order>(a,sourceType, x,
s, c);
128 }
else if (a.
Ncolor() == 6) {
129 genericSource<Float,nSpin,6,order>(a,sourceType, x,
s, c);
130 }
else if (a.
Ncolor() == 8) {
131 genericSource<Float,nSpin,8,order>(a,sourceType, x,
s, c);
132 }
else if (a.
Ncolor() == 12) {
133 genericSource<Float,nSpin,12,order>(a,sourceType, x,
s, c);
134 }
else if (a.
Ncolor() == 16) {
135 genericSource<Float,nSpin,16,order>(a,sourceType, x,
s, c);
136 }
else if (a.
Ncolor() == 20) {
137 genericSource<Float,nSpin,20,order>(a,sourceType, x,
s, c);
138 }
else if (a.
Ncolor() == 24) {
139 genericSource<Float,nSpin,24,order>(a,sourceType, x,
s, c);
140 }
else if (a.
Ncolor() == 32) {
141 genericSource<Float,nSpin,32,order>(a,sourceType, x,
s, c);
147 template <
typename Float, QudaFieldOrder order>
149 if (a.
Nspin() == 1) {
150 genericSource<Float,1,order>(a,sourceType, x,
s, c);
151 }
else if (a.
Nspin() == 2) {
152 genericSource<Float,2,order>(a,sourceType, x,
s, c);
153 }
else if (a.
Nspin() == 4) {
154 genericSource<Float,4,order>(a,sourceType, x,
s, c);
160 template <
typename Float>
163 genericSource<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,sourceType, x,
s, c);
173 genericSource<double>(a,sourceType, x,
s, c);
175 genericSource<float>(a, sourceType, x,
s, c);
183 template <
class U,
class V>
185 int fail_check = 16*
tol;
186 int *fail =
new int[fail_check];
187 for (
int f=0; f<fail_check; f++) fail[f] = 0;
189 int N = 2*u.Nspin()*u.Ncolor();
190 int *iter =
new int[N];
191 for (
int i=0; i<N; i++) iter[i] = 0;
194 for (
int x_cb=0; x_cb<u.VolumeCB(); x_cb++) {
196 for (
int s=0; s<u.Nspin(); s++) {
197 for (
int c=0; c<u.Ncolor(); c++) {
198 for (
int z=0; z<2; z++) {
199 int j = (s*u.Ncolor() + c)*2+z;
201 double diff = z==0 ? fabs(u(
parity,x_cb,s,c,z).real() - v(
parity,x_cb,s,c,z).real()) :
202 fabs(u(
parity,x_cb,s,c).imag() - v(
parity,x_cb,s,c).imag());
204 for (
int f=0; f<fail_check; f++) {
205 if (diff >
pow(10.0,-(f+1)/(
double)tol)) {
210 if (diff > 1e-3) iter[j]++;
221 for (
int i=0; i<N; i++)
printfQuda(
"%d fails = %d\n", i, iter[i]);
223 int accuracy_level =0;
224 for (
int f=0; f<fail_check; f++) {
225 if (fail[f] == 0) accuracy_level = f+1;
228 size_t total = u.Nparity()*u.VolumeCB()*N*
comm_size();
229 for (
int f=0; f<fail_check; f++) {
230 printfQuda(
"%e Failures: %d / %lu = %e\n",
pow(10.0,-(f+1)/(
double)tol),
231 fail[f], total, fail[f] / (
double)total);
237 return accuracy_level;
240 template <
typename oFloat,
typename iFloat, QudaFieldOrder order>
244 constexpr
int Nc = 3;
245 if (a.
Nspin() == 4) {
246 constexpr
int Ns = 4;
250 double rescale = 1.0 / A.
abs_max();
259 }
else if (a.
Nspin() == 1) {
260 constexpr
int Ns = 1;
264 double rescale = 1.0 / A.
abs_max();
281 template <
typename oFloat,
typename iFloat>
285 ret = genericCompare<oFloat,iFloat,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a, b,
tol);
293 template <
typename oFloat>
297 ret = genericCompare<oFloat,double>(a, b,
tol);
299 ret = genericCompare<oFloat,float>(a, b,
tol);
310 ret = genericCompare<double>(a, b,
tol);
312 ret = genericCompare<float>(a, b,
tol);
320 template <
class Order>
323 int x_cb = x / o.Nparity();
324 int parity = x%o.Nparity();
326 for (
int s=0; s<o.Nspin(); s++) {
328 for (
int c=0; c<o.Ncolor(); c++) {
329 printfQuda(
"(%f,%f) ", o(parity, x_cb, s, c).real(), o(parity, x_cb, s, c).imag());
375 genericPrintVector<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,x);
385 genericPrintVector<double>(a,x);
387 genericPrintVector<float>(a,x);
396 template <
typename StoreType,
int Ns,
int Nc, QudaFieldOrder FieldOrder>
402 AccessorType A(field);
408 StoreType indiv_num[2];
411 Float *data_cpu =
new Float[2 * Ns * Nc];
414 complex<StoreType> *field_ptr = (complex<StoreType> *)field.
V();
417 float *norm_ptr = (
float *)field.
Norm();
421 cudaMemcpy(&scale, &norm_ptr[i],
sizeof(
float), cudaMemcpyDeviceToHost);
425 for (
int s = 0; s < Ns; s++) {
426 for (
int c = 0; c < Nc; c++) {
427 cudaMemcpy(indiv_num, &field_ptr[A.index(i % 2, i / 2, s, c, 0)], 2 *
sizeof(StoreType), cudaMemcpyDeviceToHost);
428 data_cpu[2 * (c + Nc *
s)] = scale * static_cast<Float>(indiv_num[0]);
429 data_cpu[2 * (c + Nc *
s) + 1] = scale * static_cast<Float>(indiv_num[1]);
433 for (
int s = 0; s < Ns; s++) {
435 for (
int c = 0; c < Nc; c++) {
436 printfQuda(
"(%f,%f) ", data_cpu[(s * Nc + c) * 2], data_cpu[(s * Nc + c) * 2 + 1]);
444 template <
typename Float,
int Ns,
int Nc>
448 case QUDA_FLOAT_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT_FIELD_ORDER>(field, i);
break;
452 genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(field, i);
455 genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER>(field, i);
457 default:
errorQuda(
"Unsupported field order %d", field.FieldOrder());
464 genericCudaPrintVector<Float, 4, 3>(field, i);
465 }
else if (field.
Ncolor() == 3 && field.
Nspin() == 1) {
466 genericCudaPrintVector<Float, 1, 3>(field, i);
467 }
else if (field.
Ncolor() == 6 && field.
Nspin() == 2) {
468 genericCudaPrintVector<Float, 2, 6>(field, i);
469 }
else if (field.
Ncolor() == 24 && field.
Nspin() == 2) {
470 genericCudaPrintVector<Float, 2, 24>(field, i);
471 }
else if (field.
Ncolor() == 32 && field.
Nspin() == 2) {
472 genericCudaPrintVector<Float, 2, 32>(field, i);
486 default:
errorQuda(
"Unsupported precision = %d\n", field.Precision());
void ax(double a, ColorSpinorField &x)
int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
void print_vector(const Order &o, unsigned int x)
int compareSpinor(const U &u, const V &v, const int tol)
__host__ double abs_max(bool global=true) const
enum QudaSourceType_s QudaSourceType
void corner(T &p, int v, int s, int c)
__host__ __device__ ValueType sin(ValueType x)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
void point(T &t, int x, int s, int c)
void comm_allreduce_int(int *data)
QudaPrecision Precision() const
QudaFieldOrder FieldOrder() const
void constant(T &t, int k, int s, int c)
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.
void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x)