18 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
22 static void llfat_test()
26 void* ghost_sitelink[4];
27 void* ghost_sitelink_diag[16];
38 qudaGaugeParam.
X[0] =
xdim;
39 qudaGaugeParam.
X[1] =
ydim;
40 qudaGaugeParam.
X[2] =
zdim;
41 qudaGaugeParam.
X[3] =
tdim;
67 void* milc_sitelink_ex;
73 for(
int i=0; i<
V; ++i){
74 for(
int dir=0; dir<4; ++dir){
75 char* src = (
char*)sitelink[dir];
87 for(
int i=0; i <
V_ex; i++){
96 int x1h = sid - za*
E1h;
101 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
102 int x1 = 2*x1h + x1odd;
105 if( x1< 2 || x1 >= X1 +2
106 || x2< 2 || x2 >= X2 +2
107 || x3< 2 || x3 >= X3 +2
108 || x4< 2 || x4 >= X4 +2){
114 x1 = (x1 - 2 + X1) % X1;
115 x2 = (x2 - 2 + X2) % X2;
116 x3 = (x3 - 2 + X3) % X3;
117 x4 = (x4 - 2 + X4) % X4;
119 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
123 for(
int dir= 0; dir < 4; dir++){
124 char* src = (
char*)sitelink[dir];
125 char* dst = (
char*)sitelink_ex[dir];
136 double act_path_coeff[6];
137 for(
int i=0;i < 6;i++){
138 act_path_coeff[i]= 0.1*i;
144 struct timeval t0, t1;
146 void* longlink_ptr = longlink;
149 computeKSLinkQuda(fatlink, longlink_ptr, NULL, milc_sitelink, act_path_coeff, &qudaGaugeParam);
153 gettimeofday(&t0, NULL);
154 for (
int i=0; i<
niter; i++)
155 computeKSLinkQuda(fatlink, longlink_ptr, NULL, milc_sitelink, act_path_coeff, &qudaGaugeParam);
156 gettimeofday(&t1, NULL);
158 double secs =
TDIFF(t0,t1);
160 void* fat_reflink[4];
161 void* long_reflink[4];
162 for(
int i=0;i < 4;i++){
174 for (
int i=0; i < 6;i++) coeff_sp[i] = coeff_dp[i] = act_path_coeff[i];
182 for (
int i = 0; i < 4; i++)
191 for(
int nu=0;nu < 4;nu++){
192 for(
int mu=0;
mu < 4;
mu++){
194 ghost_sitelink_diag[nu*4+
mu] = NULL;
198 for(dir1= 0; dir1 < 4; dir1++){
199 if(dir1 !=nu && dir1 !=
mu){
203 for(dir2=0; dir2 < 4; dir2++){
204 if(dir2 != nu && dir2 !=
mu && dir2 != dir1){
219 int R[4] = {2,2,2,2};
233 for(
int i=0; i < 4; i++){
240 for(
int i=0; i <
V; i++){
241 for(
int dir=0; dir< 4; dir++){
255 for(
int dir=0; dir<4; dir++){
260 fat_reflink,
"CPU reference results:",
263 printfQuda(
"Fat-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
267 for(
int dir=0; dir<4; ++dir){
272 long_reflink,
"CPU reference results:",
275 printfQuda(
"Long-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
278 int volume = qudaGaugeParam.
X[0]*qudaGaugeParam.
X[1]*qudaGaugeParam.
X[2]*qudaGaugeParam.
X[3];
282 double perf =
flops*volume/(secs*1024*1024*1024);
283 printfQuda(
"link computation time =%.2f ms, flops= %.2f Gflops\n", (secs*1000)/
niter, perf);
285 for (
int i=0; i < 4; i++) {
292 for(
int i=0; i<4; i++){
294 for(
int j=0;j <4; j++){
302 for(
int i=0; i < 4; i++){
310 if(milc_sitelink)
host_free(milc_sitelink);
311 if(milc_sitelink_ex)
host_free(milc_sitelink_ex);
322 printfQuda(
"link_precision link_reconstruct space_dimension T_dimension Ordering\n");
341 int main(
int argc,
char **argv)
356 app->add_option(
"--gauge-order",
gauge_order,
"")->transform(CLI::QUDACheckedTransformer(gauge_order_map));
358 app->parse(argc, argv);
359 }
catch (
const CLI::ParseError &e) {
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
QudaReconstructType link_recon
std::array< int, 4 > gridsize_from_cmdline
void * memset(void *s, int c, size_t n)
@ QUDA_STAGGERED_PHASE_MILC
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
void exchange_llfat_cleanup(void)
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
QudaGaugeFieldOrder gauge_order
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
size_t host_gauge_data_type_size
int dimPartitioned(int dim)
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
void computeLongLinkCPU(void **longlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
int main(int argc, char **argv)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
#define safe_malloc(size)
#define pinned_malloc(size)
const char * get_prec_str(QudaPrecision prec)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
const char * get_recon_str(QudaReconstructType recon)
Main header file for the QUDA library.
QudaGaugeParam newQudaGaugeParam(void)
void initQuda(int device)
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
QudaReconstructType reconstruct
QudaPrecision cuda_prec_sloppy
QudaReconstructType reconstruct_sloppy
QudaGaugeFieldOrder gauge_order
QudaStaggeredPhase staggered_phase_type