26 extern void usage(
char** argv);
354 qudaGaugeParam.
X[0] =
xdim;
355 qudaGaugeParam.
X[1] =
ydim;
356 qudaGaugeParam.
X[2] =
zdim;
357 qudaGaugeParam.
X[3] =
tdim;
372 qudaGaugeParam.
ga_pad = 0;
377 void* sitelink =
nullptr;
378 void* sitelink_1d =
nullptr;
384 void* sitelink_2d[4];
392 for(
int dir = 0; dir < 4; dir++){
393 for(
int i=0; i <
V; i++){
394 char* src = ((
char*)sitelink_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.
cpu_prec;
395 char* dst = ((
char*)sitelink_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.
cpu_prec ;
396 memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.
cpu_prec);
400 sitelink = sitelink_1d;
402 sitelink = (
void**)sitelink_2d;
408 void* sitelink_ex_2d[4];
409 void* sitelink_ex_1d;
412 for(
int i=0;i < 4;i++) sitelink_ex_2d[i] =
pinned_malloc(
V_ex*gaugeSiteSize*gSize);
419 for(
int i=0; i <
V_ex; i++){
428 int x1h = sid - za*
E1h;
433 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
434 int x1 = 2*x1h + x1odd;
436 if( x1< 2 || x1 >= X1 +2
437 || x2< 2 || x2 >= X2 +2
438 || x3< 2 || x3 >= X3 +2
439 || x4< 2 || x4 >= X4 +2){
443 x1 = (x1 - 2 +
X1) % X1;
444 x2 = (x2 - 2 +
X2) % X2;
445 x3 = (x3 - 2 +
X3) % X3;
446 x4 = (x4 - 2 +
X4) % X4;
448 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
452 for(
int dir= 0; dir < 4; dir++){
453 char* src = (
char*)sitelink_2d[dir];
454 char* dst = (
char*)sitelink_ex_2d[dir];
455 memcpy(dst+i*gaugeSiteSize*gSize, src+idx*gaugeSiteSize*gSize, gaugeSiteSize*gSize);
460 for(
int dir = 0; dir < 4; dir++){
461 for(
int i=0; i <
V_ex; i++){
462 char* src = ((
char*)sitelink_ex_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.
cpu_prec;
463 char* dst = ((
char*)sitelink_ex_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.
cpu_prec ;
464 memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.
cpu_prec);
478 double loop_coeff_d[
sizeof(
loop_coeff_f)/
sizeof(
float)];
479 for(
unsigned int i=0;i <
sizeof(
loop_coeff_f)/
sizeof(
float); i++){
487 loop_coeff = loop_coeff_d;
492 int** input_path_buf[4];
493 for(
int dir =0; dir < 4; dir++){
494 input_path_buf[dir] = (
int**)
safe_malloc(num_paths*
sizeof(
int*));
495 for(
int i=0;i < num_paths;i++){
497 if(dir == 0) memcpy(input_path_buf[dir][i],
path_dir_x[i],
length[i]*
sizeof(
int));
498 else if(dir ==1) memcpy(input_path_buf[dir][i],
path_dir_y[i],
length[i]*
sizeof(
int));
499 else if(dir ==2) memcpy(input_path_buf[dir][i],
path_dir_z[i],
length[i]*
sizeof(
int));
500 else if(dir ==3) memcpy(input_path_buf[dir][i],
path_dir_t[i],
length[i]*
sizeof(
int));
508 loop_coeff_d, num_paths, max_length, eb3,
513 struct timeval t0, t1;
514 double total_time = 0.0;
516 for (
int i =0; i<
niter; i++){
518 gettimeofday(&t0, NULL);
520 loop_coeff_d, num_paths, max_length, eb3,
522 gettimeofday(&t1, NULL);
523 total_time += t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
533 int R[4] = {2, 2, 2, 2};
537 input_path_buf,
length, loop_coeff, num_paths);
540 input_path_buf,
length, loop_coeff, num_paths);
548 printfQuda(
"Test %s\n",(1 == res) ?
"PASSED" :
"FAILED");
551 double perf = 1.0*niter*flops*
V/(total_time*1e+9);
552 printfQuda(
"total time =%.2f ms\n", total_time*1e+3);
553 printfQuda(
"overall performance : %.2f GFLOPS\n",perf);
555 for(
int dir = 0; dir < 4; dir++){
556 for(
int i=0;i < num_paths; i++)
host_free(input_path_buf[dir][i]);
561 for(
int dir=0;dir < 4;dir++)
host_free(sitelink_2d[dir]);
565 for(
int dir=0; dir < 4; dir++)
host_free(sitelink_ex_2d[dir]);
580 printfQuda(
"link_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order niter\n");
595 printfQuda(
" --gauge-order <qdp/milc> # Gauge storing order in CPU\n");
603 for (i =1;i < argc; i++){
609 if( strcmp(argv[i],
"--gauge-order") == 0){
614 if(strcmp(argv[i+1],
"milc") == 0){
616 }
else if(strcmp(argv[i+1],
"qdp") == 0){
619 fprintf(stderr,
"Error: unsupported gauge-field order\n");
626 if( strcmp(argv[i],
"--verify") == 0){
631 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
static QudaGaugeParam qudaGaugeParam
QudaReconstructType reconstruct_sloppy
int main(int argc, char **argv)
void usage_extra(char **argv)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
void createMomCPU(void *mom, QudaPrecision precision)
static void gauge_force_test(void)
int process_command_line_option(int argc, char **argv, int *idx)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
QudaGaugeFieldOrder gauge_order
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
QudaReconstructType link_recon
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
void initQuda(int device)
static void display_test_info()
const char * get_recon_str(QudaReconstructType recon)
QudaPrecision cuda_prec_sloppy
int gridsize_from_cmdline[]
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
QudaReconstructType reconstruct
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
#define safe_malloc(size)
void * memset(void *s, int c, size_t n)
QudaGaugeFieldOrder gauge_order
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void initComms(int argc, char **argv, int *const commDims)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
QudaGaugeParam newQudaGaugeParam(void)