66 memcpy(
v, dynamic_cast<const cpuColorSpinorField&>(src).
v,
bytes);
70 errorQuda(
"Unknown input ColorSpinorField %s",
typeid(src).name());
84 errorQuda(
"Unknown input ColorSpinorField %s",
typeid(src).name());
109 src.saveSpinorField(*
this);
131 errorQuda(
"Half precision not supported");
155 void cpuColorSpinorField::createOrder() {
176 errorQuda(
"Precision %d not supported", precision);
181 void cpuColorSpinorField::destroy() {
188 errorQuda(
"Precision %d not supported", precision);
200 template <
class D,
class S>
203 for (
int x=0; x<dst.Volume(); x++) {
204 for (
int s=0;
s<dst.Nspin();
s++) {
205 for (
int c=0; c<dst.Ncolor(); c++) {
206 for (
int z=0; z<2; z++) {
207 dst(x,
s, c, z) = src(x,
s, c, z);
219 for (
int i=0; i<x[
nDim-1]; i++) memcpy(((
void**)
v)[i], ((
void**)src.
v)[i],
bytes);
247 for (
int x=0; x<t.Volume(); x++) {
248 for (
int s=0;
s<t.Nspin();
s++) {
249 for (
int c=0; c<t.Ncolor(); c++) {
250 for (
int z=0; z<2; z++) {
260 void point(T &t,
const int x,
const int s,
const int c) { t(x, s, c, 0) = 1.0; }
263 const int s,
const int c) {
270 else errorQuda(
"Precision not supported");
277 else errorQuda(
"Precision not supported");
281 errorQuda(
"Source type %d not implemented", sourceType);
287 template <
class U,
class V>
289 int fail_check = 16*tol;
290 int *fail =
new int[fail_check];
291 for (
int f=0; f<fail_check; f++) fail[f] = 0;
293 int N = 2*u.Nspin()*u.Ncolor();
294 int *iter =
new int[N];
295 for (
int i=0; i<N; i++) iter[i] = 0;
297 for (
int x=0; x<u.Volume(); x++) {
298 for (
int s=0;
s<u.Nspin();
s++) {
299 for (
int c=0; c<u.Ncolor(); c++) {
300 for (
int z=0; z<2; z++) {
301 double diff = fabs(u(x,
s,c,z) - v(x,
s,c,z));
303 for (
int f=0; f<fail_check; f++)
304 if (diff > pow(10.0,-(f+1)/(
double)tol)) fail[f]++;
306 int j = (
s*u.Ncolor() + c)*2+z;
307 if (diff > 1e-3) iter[j]++;
313 for (
int i=0; i<N; i++)
printfQuda(
"%d fails = %d\n", i, iter[i]);
315 int accuracy_level =0;
316 for (
int f=0; f<fail_check; f++) {
317 if (fail[f] == 0) accuracy_level = f+1;
320 for (
int f=0; f<fail_check; f++) {
321 printfQuda(
"%e Failures: %d / %d = %e\n", pow(10.0,-(f+1)/(
double)tol),
322 fail[f], u.Volume()*N, fail[f] / (double)(u.Volume()*N));
328 return accuracy_level;
338 ret =
compareSpinor(*(a.order_double), *(b.order_double), tol);
340 ret =
compareSpinor(*(a.order_double), *(b.order_single), tol);
343 ret =
compareSpinor(*(a.order_single), *(b.order_double), tol);
345 ret =
compareSpinor(*(a.order_single), *(b.order_single), tol);
351 template <
class Order>
354 for (
int s=0;
s<o.Nspin();
s++) {
355 std::cout <<
"x = " << x <<
", s = " <<
s <<
", { ";
356 for (
int c=0; c<o.Ncolor(); c++) {
357 std::cout <<
" ( " << o(x, s, c, 0) <<
" , " ;
358 if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) <<
" ) ," ;
359 else std::cout << o(x, s, c, 1) <<
" ) " ;
361 std::cout <<
" } " << std::endl;
377 errorQuda(
"Precision %d not implemented", precision);
387 errorQuda(
"Full spinor is not supported in alllocateGhostBuffer\n");
390 int X1 = this->x[0]*2;
394 int X5 = this->
nDim == 5 ? this->x[4] : 1;
396 int Vsh[4]={ X2*X3*X4*X5/2,
402 if(this->
nSpin == 1) num_faces = 3;
405 for (
int i=0; i<4; i++) {
406 size_t nbytes = num_faces*Vsh[i]*spinor_size;
421 for(
int i=0;i < 4; i++){
435 errorQuda(
"Full spinor is not supported in packGhost for cpu");
443 if(this->
nSpin == 1){
448 int X1 = this->x[0]*2;
452 int X5 = this->
nDim == 5 ? this->x[4]: 1;
455 for(
int i=0;i < this->
volume;i++){
468 int x1odd = (x2 + x3 + x4 + x5 + oddBit) & 1;
479 ghost_face_idx = (x1*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*
X2)+x3*X2 +x2)>>1;
480 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
483 if (x1 >=X1 - num_faces){
484 ghost_face_idx = ((x1-X1+num_faces)*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*X2)+x3*X2 +
x2)>>1;
485 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
493 ghost_face_idx = (x2*X5*X4*X3*X1 +x5*X4*X3*X1 + x4*X3*X1+x3*X1+
x1)>>1;
494 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
497 if (x2 >= X2 - num_faces){
498 ghost_face_idx = ((x2-X2+num_faces)*X5*X4*X3*X1 +x5*X4*X3*X1+ x4*X3*X1+x3*X1+x1)>>1;
499 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
507 ghost_face_idx = (x3*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1+x2*X1+
x1)>>1;
508 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
511 if (x3 >= X3 - num_faces){
512 ghost_face_idx = ((x3-X3+num_faces)*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1 + x2*X1 + x1)>>1;
513 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
521 ghost_face_idx = (x4*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+
x1)>>1;
522 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
525 if (x4 >= X4 - num_faces){
526 ghost_face_idx = ((x4-X4+num_faces)*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
527 memcpy( ((
char*)ghost_spinor) + ghost_face_idx*spinor_size, ((
char*)
v)+i*spinor_size, spinor_size);
542 errorQuda(
"Full spinor is not supported in unpackGhost for cpu");