6 #include <cuda_runtime.h>
19 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
21 extern void usage(
char** argv);
42 void* ghost_sitelink[4];
43 void* ghost_sitelink_diag[16];
55 qudaGaugeParam.
X[0] =
xdim;
56 qudaGaugeParam.
X[1] =
ydim;
57 qudaGaugeParam.
X[2] =
zdim;
58 qudaGaugeParam.
X[3] =
tdim;
74 if (cudaMallocHost((
void**)&fatlink, 4*
V*
gaugeSiteSize*gSize) == cudaErrorMemoryAllocation) {
75 errorQuda(
"ERROR: cudaMallocHost failed for fatlink\n");
79 if (cudaMallocHost((
void**)&longlink, 4*
V*
gaugeSiteSize*gSize) == cudaErrorMemoryAllocation) {
80 errorQuda(
"ERROR: cudaMallocHost failed for longlink\n");
84 for(
int i=0;i < 4;i++){
85 if (cudaMallocHost((
void**)&sitelink[i],
V*
gaugeSiteSize*gSize) == cudaErrorMemoryAllocation) {
86 errorQuda(
"ERROR: cudaMallocHost failed for sitelink\n");
91 for(
int i=0;i < 4;i++){
92 if (cudaMallocHost((
void**)&sitelink_ex[i],
V_ex*
gaugeSiteSize*gSize) == cudaErrorMemoryAllocation) {
93 errorQuda(
"ERROR: cudaMallocHost failed for sitelink_ex\n");
100 if(milc_sitelink == NULL){
101 errorQuda(
"ERROR: allocating milc_sitelink failed\n");
104 void* milc_sitelink_ex;
106 if(milc_sitelink_ex == NULL){
107 errorQuda(
"Error: allocating milc_sitelink failed\n");
115 for(
int i=0; i<
V; ++i){
116 for(
int dir=0; dir<4; ++dir){
117 char* src = (
char*)sitelink[dir];
128 for(
int i=0; i <
V_ex; i++){
146 if( x1< 2 || x1 >= X1 +2
147 || x2< 2 || x2 >= X2 +2
148 || x3< 2 || x3 >= X3 +2
149 || x4< 2 || x4 >= X4 +2){
157 x1 = (x1 - 2 +
X1) % X1;
158 x2 = (x2 - 2 +
X2) % X2;
159 x3 = (x3 - 2 +
X3) % X3;
160 x4 = (x4 - 2 +
X4) % X4;
162 int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+
x1)>>1;
166 for(
int dir= 0; dir < 4; dir++){
167 char* src = (
char*)sitelink[dir];
168 char* dst = (
char*)sitelink_ex[dir];
177 double act_path_coeff[6];
178 for(
int i=0;i < 6;i++){
179 act_path_coeff[i]= 0.1*i;
185 struct timeval t0, t1;
191 sitelink_ptr = (
test) ? (
void**)sitelink_ex : (
void**)sitelink;
193 sitelink_ptr = (
test) ? (
void**)milc_sitelink_ex : (
void**)milc_sitelink;
197 if(!test) longlink_ptr = NULL;
200 gettimeofday(&t0, NULL);
201 computeKSLinkQuda(fatlink, longlink_ptr, NULL, milc_sitelink, act_path_coeff, &qudaGaugeParam, method);
202 gettimeofday(&t1, NULL);
205 double secs =
TDIFF(t0,t1);
207 void* fat_reflink[4];
208 void* long_reflink[4];
209 for(
int i=0;i < 4;i++){
211 if(fat_reflink[i] == NULL){
212 errorQuda(
"ERROR; allocate fat_reflink[%d] failed\n", i);
215 if(long_reflink[i] == NULL)
errorQuda(
"ERROR; allocate long_reflink[%d] failed\n", i);
225 for(
int i=0;i < 6;i++){
226 coeff_sp[i] = coeff_dp[i] = act_path_coeff[i];
238 for(
int i=0;i < 4; i++){
240 if (ghost_sitelink[i] == NULL){
241 printf(
"ERROR: malloc failed for ghost_sitelink[%d] \n",i);
252 for(
int nu=0;nu < 4;nu++){
253 for(
int mu=0;
mu < 4;
mu++){
255 ghost_sitelink_diag[nu*4+
mu] = NULL;
259 for(dir1= 0; dir1 < 4; dir1++){
260 if(dir1 !=nu && dir1 !=
mu){
264 for(dir2=0; dir2 < 4; dir2++){
265 if(dir2 != nu && dir2 !=
mu && dir2 != dir1){
270 if(ghost_sitelink_diag[nu*4+
mu] == NULL){
271 errorQuda(
"malloc failed for ghost_sitelink_diag\n");
285 int R[4] = {2,2,2,2};
299 for(
int i=0;i < 4;i++){
301 if(myfatlink[i] == NULL){
302 printf(
"Error: malloc failed for myfatlink[%d]\n", i);
306 if(mylonglink[i] == NULL){
307 printf(
"Error: malloc failed for mylonglink[%d]\n", i);
314 for(
int i=0;i <
V; i++){
315 for(
int dir=0; dir< 4; dir++){
329 for(
int dir=0; dir<4; dir++){
334 fat_reflink,
"CPU reference results:",
337 printfQuda(
"Fat-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
343 for(
int dir=0; dir<4; ++dir){
348 long_reflink,
"CPU reference results:",
351 printfQuda(
"Long-link test %s\n\n",(1 == res) ?
"PASSED" :
"FAILED");
355 printfQuda(
"Extended volume is required for multi-GPU long-link construction\n");
360 int volume = qudaGaugeParam.
X[0]*qudaGaugeParam.
X[1]*qudaGaugeParam.
X[2]*qudaGaugeParam.
X[3];
363 if(test) flops += (252*4);
368 double perf = 1.0* flops*volume/(secs*1024*1024*1024);
369 printfQuda(
"link computation time =%.2f ms, flops= %.2f Gflops\n", secs*1000, perf);
372 for(
int i=0;i < 4;i++){
380 free(ghost_sitelink[i]);
383 for(
int j=0;j <4; j++){
387 free(ghost_sitelink_diag[i*4+j]);
393 for(
int i=0;i < 4; i++){
394 cudaFreeHost(sitelink[i]);
395 cudaFreeHost(sitelink_ex[i]);
396 free(fat_reflink[i]);
398 cudaFreeHost(fatlink);
399 cudaFreeHost(longlink);
400 if(milc_sitelink) free(milc_sitelink);
401 if(milc_sitelink_ex) free(milc_sitelink_ex);
413 printfQuda(
"link_precision link_reconstruct space_dimension T_dimension Test Ordering\n");
440 printfQuda(
" --gauge-order <qdp/milc> # ordering of the input gauge-field\n");
459 for (i =1;i < argc; i++){
465 if( strcmp(argv[i],
"--gauge-order") == 0){
470 if(strcmp(argv[i+1],
"milc") == 0){
472 }
else if(strcmp(argv[i+1],
"qdp") == 0){
475 fprintf(stderr,
"Error: unsupported gauge-field order\n");
482 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
489 errorQuda(
"ERROR: milc format for multi-gpu with test0 is not supported yet!\n");
int dimPartitioned(int dim)
void exchange_llfat_cleanup(void)
enum QudaPrecision_s QudaPrecision
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param, QudaComputeFatMethod method)
int gridsize_from_cmdline[]
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
int process_command_line_option(int argc, char **argv, int *idx)
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
QudaGaugeFieldOrder gauge_order
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
const char * get_prec_str(QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
void initQuda(int device)
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
const char * get_recon_str(QudaReconstructType recon)
__constant__ double coeff
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
QudaReconstructType reconstruct
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
void usage_extra(char **argv)
QudaReconstructType link_recon
void computeLongLinkCPU(void **longlink, su3_matrix **sitelink, Float *act_path_coeff)
void * memset(void *s, int c, size_t n)
int main(int argc, char **argv)
QudaGaugeFieldOrder gauge_order
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
enum QudaComputeFatMethod_s QudaComputeFatMethod
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
void initComms(int argc, char **argv, const int *commDims)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
QudaGaugeParam newQudaGaugeParam(void)