30 extern void usage(
char** argv);
352 gauge_force_test(
void)
361 qudaGaugeParam.
X[0] =
xdim;
362 qudaGaugeParam.
X[1] =
ydim;
363 qudaGaugeParam.
X[2] =
zdim;
364 qudaGaugeParam.
X[3] =
tdim;
379 qudaGaugeParam.
ga_pad = 0;
382 size_t gSize = qudaGaugeParam.
cpu_prec;
395 void* sitelink_2d[4];
407 for(
int dir = 0; dir < 4; dir++){
408 for(
int i=0; i <
V; i++){
409 char* src = ((
char*)sitelink_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.
cpu_prec;
410 char* dst = ((
char*)sitelink_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.
cpu_prec ;
411 memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.
cpu_prec);
415 sitelink = sitelink_1d;
417 sitelink = (
void**)sitelink_2d;
423 void* sitelink_ex_2d[4];
424 void* sitelink_ex_1d;
427 for(
int i=0;i < 4;i++) sitelink_ex_2d[i] =
pinned_malloc(
V_ex*gaugeSiteSize*gSize);
434 for(
int i=0; i <
V_ex; i++){
451 if( x1< 2 || x1 >= X1 +2
452 || x2< 2 || x2 >= X2 +2
453 || x3< 2 || x3 >= X3 +2
454 || x4< 2 || x4 >= X4 +2){
458 x1 = (x1 - 2 +
X1) % X1;
459 x2 = (x2 - 2 +
X2) % X2;
460 x3 = (x3 - 2 +
X3) % X3;
461 x4 = (x4 - 2 +
X4) % X4;
463 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+
x1)>>1;
467 for(
int dir= 0; dir < 4; dir++){
468 char* src = (
char*)sitelink_2d[dir];
469 char* dst = (
char*)sitelink_ex_2d[dir];
470 memcpy(dst+i*gaugeSiteSize*gSize, src+idx*gaugeSiteSize*gSize, gaugeSiteSize*gSize);
475 for(
int dir = 0; dir < 4; dir++){
476 for(
int i=0; i <
V_ex; i++){
477 char* src = ((
char*)sitelink_ex_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.
cpu_prec;
478 char* dst = ((
char*)sitelink_ex_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.
cpu_prec ;
479 memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.
cpu_prec);
494 double loop_coeff_d[
sizeof(
loop_coeff_f)/
sizeof(
float)];
495 for(
unsigned int i=0;i <
sizeof(
loop_coeff_f)/
sizeof(
float); i++){
503 loop_coeff = loop_coeff_d;
508 int** input_path_buf[4];
509 for(
int dir =0; dir < 4; dir++){
510 input_path_buf[dir] = (
int**)
safe_malloc(num_paths*
sizeof(
int*));
511 for(
int i=0;i < num_paths;i++){
513 if(dir == 0) memcpy(input_path_buf[dir][i],
path_dir_x[i],
length[i]*
sizeof(
int));
514 else if(dir ==1) memcpy(input_path_buf[dir][i],
path_dir_y[i],
length[i]*
sizeof(
int));
515 else if(dir ==2) memcpy(input_path_buf[dir][i],
path_dir_z[i],
length[i]*
sizeof(
int));
516 else if(dir ==3) memcpy(input_path_buf[dir][i],
path_dir_t[i],
length[i]*
sizeof(
int));
525 struct timeval t0, t1;
529 gettimeofday(&t0, NULL);
531 loop_coeff_d, num_paths, max_length, eb3,
532 &qudaGaugeParam, timeinfo);
533 gettimeofday(&t1, NULL);
536 double total_time = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
545 int R[4] = {2, 2, 2, 2};
549 input_path_buf,
length, loop_coeff, num_paths);
552 input_path_buf,
length, loop_coeff, num_paths);
561 printf(
"Test %s\n",(1 == res) ?
"PASSED" :
"FAILED");
564 double perf = 1.0* flops*V/(total_time*1e+9);
565 double kernel_perf = 1.0*flops*V/(timeinfo[1]*1e+9);
566 printf(
"init and cpu->gpu time: %.2f ms, kernel time: %.2f ms, gpu->cpu and cleanup time: %.2f total time =%.2f ms\n",
567 timeinfo[0]*1e+3, timeinfo[1]*1e+3, timeinfo[2]*1e+3, total_time*1e+3);
568 printf(
"kernel performance: %.2f GFLOPS, overall performance : %.2f GFLOPS\n", kernel_perf, perf);
570 for(
int dir = 0; dir < 4; dir++){
571 for(
int i=0;i < num_paths; i++)
host_free(input_path_buf[dir][i]);
576 for(
int dir=0;dir < 4;dir++)
host_free(sitelink_2d[dir]);
580 for(
int dir=0; dir < 4; dir++)
host_free(sitelink_ex_2d[dir]);
593 printf(
"running the following test:\n");
595 printf(
"link_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order Attempts\n");
596 printf(
"%s %s %d/%d/%d %d %s %d\n",
609 printf(
"Extra options:\n");
610 printf(
" --gauge-order <qdp/milc> # Gauge storing order in CPU\n");
611 printf(
" --attempts <n> # Number of tests\n");
619 for (i =1;i < argc; i++){
625 if( strcmp(argv[i],
"--gauge-order") == 0){
630 if(strcmp(argv[i+1],
"milc") == 0){
632 }
else if(strcmp(argv[i+1],
"qdp") == 0){
635 fprintf(stderr,
"Error: unsupported gauge-field order\n");
641 if( strcmp(argv[i],
"--attempts") == 0){
646 attempts = atoi(argv[i+1]);
648 printf(
"ERROR: invalid number of attempts(%d)\n", attempts);
654 if( strcmp(argv[i],
"--verify") == 0){
659 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
QudaReconstructType reconstruct_sloppy
int main(int argc, char **argv)
void usage_extra(char **argv)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
void createMomCPU(void *mom, QudaPrecision precision)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam, double *timeinfo)
int process_command_line_option(int argc, char **argv, int *idx)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
QudaGaugeFieldOrder gauge_order
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
QudaReconstructType link_recon
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
void setTuning(QudaTune tune)
void initQuda(int device)
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
const char * get_recon_str(QudaReconstructType recon)
QudaPrecision cuda_prec_sloppy
int gridsize_from_cmdline[]
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
QudaReconstructType reconstruct
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
#define safe_malloc(size)
void * memset(void *s, int c, size_t n)
QudaGaugeFieldOrder gauge_order
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void initComms(int argc, char **argv, const int *commDims)
QudaGaugeParam newQudaGaugeParam(void)