6 #include <cuda_runtime.h> 23 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec)) 27 extern void usage(
char** argv);
78 errorQuda(
"Precision %d is unsupported in some link fattening routines\n",
prec);
92 qudaGaugeParam.
X[0] =
xdim;
93 qudaGaugeParam.
X[1] =
ydim;
94 qudaGaugeParam.
X[2] =
zdim;
95 qudaGaugeParam.
X[3] =
tdim;
111 qudaGaugeParam.
ga_pad = 0;
133 double act_path_coeff_1[6] = {
137 u4*( 1.0/8.0)*0.25*0.5,
138 u6*(-1.0/8.0)*0.125*(1.0/6.0),
143 double act_path_coeff_2[6] = {
144 (( 1.0/8.0)+(2.0*6.0/16.0)+(1.0/8.0)),
149 (-1.0/8.0)*0.125*(1.0/6.0),
154 double act_path_coeff_3[6] = {
188 for(
int i=0; i<
V; ++i){
189 for(
int dir=0; dir<4; ++dir){
190 char* src = (
char*)sitelink[dir];
208 void* fatlink_eps =
nullptr;
209 void* longlink_eps =
nullptr;
218 computeKSLinkQuda(vlink , longlink, wlink, milc_sitelink, act_path_coeff_2, &qudaGaugeParam);
221 struct timeval t0, t1;
223 gettimeofday(&t0, NULL);
224 for (
int n = 0; n <
niter; n++) {
229 computeKSLinkQuda(vlink,
nullptr, wlink, milc_sitelink, act_path_coeff_1, &qudaGaugeParam);
233 computeKSLinkQuda(fatlink, longlink,
nullptr, wlink, act_path_coeff_3, &qudaGaugeParam);
244 computeKSLinkQuda(fatlink, longlink,
nullptr, wlink, act_path_coeff_2, &qudaGaugeParam);
249 cpu_xpy(
prec, longlink, longlink_eps, V*4*gaugeSiteSize);
252 gettimeofday(&t1, NULL);
254 double secs =
TDIFF(t0,t1);
260 void* long_reflink[4];
261 void* fat_reflink[4];
262 for(
int i=0;i < 4;i++) {
267 void* long_reflink_eps[4];
268 void* fat_reflink_eps[4];
270 for(
int i=0;i < 4;i++) {
278 double* act_paths[3] = { act_path_coeff_1, act_path_coeff_2, act_path_coeff_3 };
281 fat_reflink_eps, long_reflink_eps,
282 sitelink, &qudaGaugeParam, act_paths,
eps_naik);
291 void* mylonglink [4];
292 void* myfatlink_eps [4];
293 void* mylonglink_eps [4];
294 for(
int i=0; i < 4; i++) {
309 for(
int i=0; i <
V; i++){
310 for(
int dir=0; dir< 4; dir++){
311 char* src = ((
char*)fatlink )+ (4*i+dir)*
gaugeSiteSize*gSize;
338 for(
int dir=0; dir<4; dir++){
343 fat_reflink,
"CPU reference results:",
346 printfQuda(
"Fat-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
352 for(
int dir=0; dir<4; ++dir){
357 long_reflink,
"CPU reference results:",
360 printfQuda(
"Long-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
364 printfQuda(
"Checking fat eps_naik links...\n");
366 for(
int dir=0; dir<4; dir++){
371 fat_reflink_eps,
"CPU reference results:",
374 printfQuda(
"Fat-link eps_naik test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
377 printfQuda(
"Checking long eps_naik links...\n");
379 for(
int dir=0; dir<4; ++dir){
384 long_reflink_eps,
"CPU reference results:",
387 printfQuda(
"Long-link eps_naik test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
392 int volume = qudaGaugeParam.
X[0]*qudaGaugeParam.
X[1]*qudaGaugeParam.
X[2]*qudaGaugeParam.
X[3];
393 long long flops = 61632 * (
long long)niter;
396 flops += 61632 * (
long long)niter;
397 flops += (252*4)*(
long long)
niter;
399 double perf = flops*volume/(secs*1024*1024*1024);
400 printfQuda(
"link computation time =%.2f ms, flops= %.2f Gflops\n", (secs*1000)/niter, perf);
402 for (
int i=0; i < 4; i++) {
412 for(
int i=0; i < 4; i++){
433 if(milc_sitelink)
host_free(milc_sitelink);
444 printfQuda(
"link_precision link_reconstruct space_dimension T_dimension Ordering\n");
465 int main(
int argc,
char **argv)
474 for (
int i = 1; i < argc; i++){
479 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
static QudaGaugeParam qudaGaugeParam
static bool reunit_allow_svd
int dimPartitioned(int dim)
QudaReconstructType link_recon
QudaReconstructType reconstruct_sloppy
QudaGhostExchange ghostExchange
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
int process_command_line_option(int argc, char **argv, int *idx)
QudaStaggeredPhase staggered_phase_type
void exchange_llfat_cleanup(void)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
QudaGaugeFieldOrder gauge_order
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
static QudaGaugeFieldOrder gauge_order
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
int gridsize_from_cmdline[]
static double svd_rel_error
void initQuda(int device)
static void display_test_info()
static bool reunit_svd_only
void cpu_xpy(QudaPrecision prec, void *x, void *y, int size)
const char * get_recon_str(QudaReconstructType recon)
QudaGaugeFieldOrder order
QudaPrecision cuda_prec_sloppy
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
static double svd_abs_error
void computeHISQLinksCPU(void **fatlink, void **longlink, void **fatlink_eps, void **longlink_eps, void **sitelink, void *qudaGaugeParamPtr, double **act_path_coeffs, double eps_naik)
QudaReconstructType reconstruct
#define safe_malloc(size)
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
void * memset(void *s, int c, size_t n)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
static double unitarize_eps
void cpu_axy(QudaPrecision prec, double a, void *x, void *y, int size)
static double max_allowed_error
int main(int argc, char **argv)
void initComms(int argc, char **argv, int *const commDims)
static QudaPrecision cpu_prec
QudaGaugeParam newQudaGaugeParam(void)