6 #include <cuda_runtime.h> 19 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec)) 21 extern void usage(
char** argv);
43 void* ghost_sitelink[4];
44 void* ghost_sitelink_diag[16];
56 qudaGaugeParam.
X[0] =
xdim;
57 qudaGaugeParam.
X[1] =
ydim;
58 qudaGaugeParam.
X[2] =
zdim;
59 qudaGaugeParam.
X[3] =
tdim;
85 void* milc_sitelink_ex;
91 for(
int i=0; i<
V; ++i){
92 for(
int dir=0; dir<4; ++dir){
93 char* src = (
char*)sitelink[dir];
104 for(
int i=0; i <
V_ex; i++){
113 int x1h = sid - za*
E1h;
118 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
119 int x1 = 2*x1h + x1odd;
122 if( x1< 2 || x1 >= X1 +2
123 || x2< 2 || x2 >= X2 +2
124 || x3< 2 || x3 >= X3 +2
125 || x4< 2 || x4 >= X4 +2){
133 x1 = (x1 - 2 +
X1) % X1;
134 x2 = (x2 - 2 +
X2) % X2;
135 x3 = (x3 - 2 +
X3) % X3;
136 x4 = (x4 - 2 +
X4) % X4;
138 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
142 for(
int dir= 0; dir < 4; dir++){
143 char* src = (
char*)sitelink[dir];
144 char* dst = (
char*)sitelink_ex[dir];
153 double act_path_coeff[6];
154 for(
int i=0;i < 6;i++){
155 act_path_coeff[i]= 0.1*i;
161 struct timeval t0, t1;
166 computeKSLinkQuda(fatlink, longlink_ptr, NULL, milc_sitelink, act_path_coeff, &qudaGaugeParam);
170 gettimeofday(&t0, NULL);
171 for (
int i=0; i<
niter; i++)
172 computeKSLinkQuda(fatlink, longlink_ptr, NULL, milc_sitelink, act_path_coeff, &qudaGaugeParam);
173 gettimeofday(&t1, NULL);
175 double secs =
TDIFF(t0,t1);
177 void* fat_reflink[4];
178 void* long_reflink[4];
179 for(
int i=0;i < 4;i++){
191 for (
int i=0; i < 6;i++) coeff_sp[i] = coeff_dp[i] = act_path_coeff[i];
207 for(
int nu=0;nu < 4;nu++){
208 for(
int mu=0;
mu < 4;
mu++){
210 ghost_sitelink_diag[nu*4+
mu] = NULL;
214 for(dir1= 0; dir1 < 4; dir1++){
215 if(dir1 !=nu && dir1 !=
mu){
219 for(dir2=0; dir2 < 4; dir2++){
220 if(dir2 != nu && dir2 !=
mu && dir2 != dir1){
235 int R[4] = {2,2,2,2};
249 for(
int i=0; i < 4; i++){
256 for(
int i=0; i <
V; i++){
257 for(
int dir=0; dir< 4; dir++){
271 for(
int dir=0; dir<4; dir++){
276 fat_reflink,
"CPU reference results:",
279 printfQuda(
"Fat-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
283 for(
int dir=0; dir<4; ++dir){
288 long_reflink,
"CPU reference results:",
291 printfQuda(
"Long-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
294 int volume = qudaGaugeParam.
X[0]*qudaGaugeParam.
X[1]*qudaGaugeParam.
X[2]*qudaGaugeParam.
X[3];
295 long long flops= 61632 * (
long long)niter;
296 flops += (252*4)*(
long long)
niter;
298 double perf = flops*volume/(secs*1024*1024*1024);
299 printfQuda(
"link computation time =%.2f ms, flops= %.2f Gflops\n", (secs*1000)/niter, perf);
301 for (
int i=0; i < 4; i++) {
308 for(
int i=0; i<4; i++){
310 for(
int j=0;j <4; j++){
318 for(
int i=0; i < 4; i++){
326 if(milc_sitelink)
host_free(milc_sitelink);
327 if(milc_sitelink_ex)
host_free(milc_sitelink_ex);
338 printfQuda(
"link_precision link_reconstruct space_dimension T_dimension Ordering\n");
359 printfQuda(
" --gauge-order <qdp/milc> # ordering of the input gauge-field\n");
363 int main(
int argc,
char **argv)
371 for (
int i = 1; i < argc; i++){
377 if( strcmp(argv[i],
"--gauge-order") == 0){
382 if(strcmp(argv[i+1],
"milc") == 0){
384 }
else if(strcmp(argv[i+1],
"qdp") == 0){
387 fprintf(stderr,
"Error: unsupported gauge-field order\n");
394 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
static QudaGaugeParam qudaGaugeParam
int dimPartitioned(int dim)
QudaReconstructType reconstruct_sloppy
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
static QudaGaugeFieldOrder gauge_order
int gridsize_from_cmdline[]
int process_command_line_option(int argc, char **argv, int *idx)
QudaStaggeredPhase staggered_phase_type
void exchange_llfat_cleanup(void)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
QudaGaugeFieldOrder gauge_order
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
void initQuda(int device)
static QudaPrecision cpu_prec
const char * get_recon_str(QudaReconstructType recon)
QudaPrecision cuda_prec_sloppy
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
QudaReconstructType reconstruct
#define safe_malloc(size)
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
void * memset(void *s, int c, size_t n)
void usage_extra(char **argv)
QudaReconstructType link_recon
void computeLongLinkCPU(void **longlink, su3_matrix **sitelink, Float *act_path_coeff)
static void display_test_info()
int main(int argc, char **argv)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
void initComms(int argc, char **argv, int *const commDims)
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
QudaGaugeParam newQudaGaugeParam(void)