6 #include <cuda_runtime.h> 19 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec)) 21 extern void usage(
char** argv);
43 void* ghost_sitelink[4];
44 void* ghost_sitelink_diag[16];
85 void* milc_sitelink_ex;
91 for(
int i=0;
i<
V; ++
i){
92 for(
int dir=0; dir<4; ++dir){
93 char*
src = (
char*)sitelink[dir];
118 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
119 int x1 = 2*x1h + x1odd;
122 if( x1< 2 || x1 >= X1 +2
123 || x2< 2 || x2 >= X2 +2
124 || x3< 2 || x3 >= X3 +2
125 || x4< 2 || x4 >= X4 +2){
133 x1 = (x1 - 2 + X1) % X1;
134 x2 = (x2 - 2 + X2) % X2;
135 x3 = (x3 - 2 + X3) % X3;
136 x4 = (x4 - 2 + X4) % X4;
138 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
142 for(
int dir= 0; dir < 4; dir++){
143 char*
src = (
char*)sitelink[dir];
144 char* dst = (
char*)sitelink_ex[dir];
153 double act_path_coeff[6];
154 for(
int i=0;
i < 6;
i++){
155 act_path_coeff[
i]= 0.1*
i;
170 gettimeofday(&t0, NULL);
173 gettimeofday(&t1, NULL);
175 double secs =
TDIFF(t0,t1);
177 void* fat_reflink[4];
178 void* long_reflink[4];
179 for(
int i=0;
i < 4;
i++){
191 for (
int i=0;
i < 6;
i++) coeff_sp[
i] = coeff_dp[
i] = act_path_coeff[
i];
207 for(
int nu=0;nu < 4;nu++){
208 for(
int mu=0;
mu < 4;
mu++){
210 ghost_sitelink_diag[nu*4+
mu] = NULL;
214 for(dir1= 0; dir1 < 4; dir1++){
215 if(dir1 !=nu && dir1 !=
mu){
219 for(dir2=0; dir2 < 4; dir2++){
220 if(dir2 != nu && dir2 !=
mu && dir2 != dir1){
235 int R[4] = {2,2,2,2};
249 for(
int i=0;
i < 4;
i++){
256 for(
int i=0;
i <
V;
i++){
257 for(
int dir=0; dir< 4; dir++){
271 for(
int dir=0; dir<4; dir++){
276 fat_reflink,
"CPU reference results:",
279 printfQuda(
"Fat-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
283 for(
int dir=0; dir<4; ++dir){
288 long_reflink,
"CPU reference results:",
291 printfQuda(
"Long-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
298 double perf =
flops*volume/(secs*1024*1024*1024);
299 printfQuda(
"link computation time =%.2f ms, flops= %.2f Gflops\n", (secs*1000)/
niter, perf);
301 for (
int i=0;
i < 4;
i++) {
308 for(
int i=0;
i<4;
i++){
310 for(
int j=0;j <4; j++){
318 for(
int i=0;
i < 4;
i++){
326 if(milc_sitelink)
host_free(milc_sitelink);
327 if(milc_sitelink_ex)
host_free(milc_sitelink_ex);
338 printfQuda(
"link_precision link_reconstruct space_dimension T_dimension Ordering\n");
359 printfQuda(
" --gauge-order <qdp/milc> # ordering of the input gauge-field\n");
363 int main(
int argc,
char **argv)
371 for (
int i = 1;
i < argc;
i++){
377 if(
strcmp(argv[
i],
"--gauge-order") == 0){
382 if(
strcmp(argv[
i+1],
"milc") == 0){
384 }
else if(
strcmp(argv[
i+1],
"qdp") == 0){
387 fprintf(stderr,
"Error: unsupported gauge-field order\n");
394 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
static QudaGaugeParam qudaGaugeParam
int dimPartitioned(int dim)
QudaReconstructType reconstruct_sloppy
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
#define pinned_malloc(size)
enum QudaPrecision_s QudaPrecision
static QudaGaugeFieldOrder gauge_order
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
int gridsize_from_cmdline[]
int process_command_line_option(int argc, char **argv, int *idx)
QudaStaggeredPhase staggered_phase_type
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
QudaGaugeFieldOrder gauge_order
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
void exit(int) __attribute__((noreturn))
else return(__swbuf(_c, _p))
int strcmp(const char *__s1, const char *__s2)
void initQuda(int device)
static QudaPrecision cpu_prec
const char * get_recon_str(QudaReconstructType recon)
QudaPrecision cuda_prec_sloppy
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
QudaReconstructType reconstruct
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
void * memcpy(void *__dst, const void *__src, size_t __n)
#define safe_malloc(size)
void exchange_llfat_cleanup(void)
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
void usage_extra(char **argv)
QudaReconstructType link_recon
void computeLongLinkCPU(void **longlink, su3_matrix **sitelink, Float *act_path_coeff)
void * memset(void *__b, int __c, size_t __len)
static void display_test_info()
int main(int argc, char **argv)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
void initComms(int argc, char **argv, const int *commDims)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
QudaGaugeParam newQudaGaugeParam(void)