QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
llfat_quda_itf.cpp
Go to the documentation of this file.
1 
2 #include <stdio.h>
3 #include <cuda_runtime.h>
4 #include <cuda.h>
5 
6 #include <quda_internal.h>
7 #include <read_gauge.h>
8 #include "gauge_field.h"
9 #include <force_common.h>
10 #include "llfat_quda.h"
11 #include <face_quda.h>
12 
13 #define BLOCK_DIM 64
14 #define MIN_COEFF 1e-7
15 
16 extern void exchange_gpu_staple_start(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
17 extern void exchange_gpu_staple_comms(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
18 extern void exchange_gpu_staple_wait(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
19 
20 namespace quda {
21 
22  void
24  cudaGaugeField* cudaLongLink,
25  cudaGaugeField& cudaSiteLink,
26  cudaGaugeField& cudaStaple, cudaGaugeField& cudaStaple1,
27  QudaGaugeParam* param, double* act_path_coeff)
28  {
29  int volume = param->X[0]*param->X[1]*param->X[2]*param->X[3];
30  int Vh = volume/2;
31  dim3 gridDim((volume + BLOCK_DIM-1)/BLOCK_DIM,1,1);
32  dim3 halfGridDim((Vh + BLOCK_DIM-1)/BLOCK_DIM,1,1);
33  dim3 blockDim(BLOCK_DIM , 1, 1);
34 
35  QudaPrecision prec = cudaSiteLink.Precision();
36  QudaReconstructType recon = cudaSiteLink.Reconstruct();
37 
38  if( ((param->X[0] % 2 != 0)
39  ||(param->X[1] % 2 != 0)
40  ||(param->X[2] % 2 != 0)
41  ||(param->X[3] % 2 != 0))
42  && (recon == QUDA_RECONSTRUCT_12)){
43  errorQuda("12 reconstruct and odd dimensionsize is not supported by link fattening code (yet)\n");
44 
45  }
46 
47  int nStream=9;
48  cudaStream_t stream[nStream];
49  for(int i = 0;i < nStream; i++){
50  cudaStreamCreate(&stream[i]);
51  }
52 
54  kparam.blockDim = blockDim;
55  kparam.threads = Vh;
56  kparam.halfGridDim = halfGridDim;
57  kparam.D1 = param->X[0];
58  kparam.D2 = param->X[1];
59  kparam.D3 = param->X[2];
60  kparam.D4 = param->X[3];
61  kparam.D1h = param->X[0]/2;
62 
63  for(int i=0;i < 4;i++){
64  kparam.ghostDim[i] = commDimPartitioned(i);
65  }
66 
67  llfatOneLinkKernel(*cudaFatLink, cudaSiteLink,cudaStaple, cudaStaple1,
68  param, act_path_coeff);
69 #ifdef MULTI_GPU
70  if(cudaLongLink)
71  errorQuda("Multi-GPU long-link calculation requires extended gauge field\n");
72 #else
73  if(cudaLongLink)
74  computeLongLinkCuda((void*)cudaLongLink->Even_p(), (void*)cudaLongLink->Odd_p(),
75  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
76  act_path_coeff[1], recon, prec, halfGridDim, kparam);
77 #endif
78 
79  if(fabs(act_path_coeff[2]) < MIN_COEFF &&
80  fabs(act_path_coeff[3]) < MIN_COEFF &&
81  fabs(act_path_coeff[4]) < MIN_COEFF &&
82  fabs(act_path_coeff[5]) < MIN_COEFF) return;
83 
84 #ifdef MULTI_GPU
85  int ktype[8] = {
94  };
95 #endif
96 
97 
98 
99  for(int dir = 0;dir < 4; dir++){
100  for(int nu = 0; nu < 4; nu++){
101  if (nu != dir){
102 
103 #ifdef MULTI_GPU
104  //start of one call
105  for(int k=3; k >= 0 ;k--){
106  if(!commDimPartitioned(k)) continue;
107 
108  kparam.kernel_type = ktype[2*k];
109  siteComputeGenStapleParityKernel((void*)cudaStaple.Even_p(), (void*)cudaStaple.Odd_p(),
110  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
111  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
112  dir, nu,
113  act_path_coeff[2],
114  recon, prec, halfGridDim,
115  kparam, &stream[2*k]);
116 
117  exchange_gpu_staple_start(param->X, &cudaStaple, k, (int)QUDA_BACKWARDS, &stream[2*k]);
118 
119  kparam.kernel_type = ktype[2*k+1];
120  siteComputeGenStapleParityKernel((void*)cudaStaple.Even_p(), (void*)cudaStaple.Odd_p(),
121  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
122  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
123  dir, nu,
124  act_path_coeff[2],
125  recon, prec, halfGridDim,
126  kparam, &stream[2*k+1]);
127  exchange_gpu_staple_start(param->X, &cudaStaple, k, (int)QUDA_FORWARDS, &stream[2*k+1]);
128  }
129 #endif
131  siteComputeGenStapleParityKernel((void*)cudaStaple.Even_p(), (void*)cudaStaple.Odd_p(),
132  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
133  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
134  dir, nu,
135  act_path_coeff[2],
136  recon, prec, halfGridDim,
137  kparam, &stream[nStream-1]);
138 
139 #ifdef MULTI_GPU
140  for(int k=3; k >= 0 ;k--){
141  if(!commDimPartitioned(k)) continue;
142  exchange_gpu_staple_comms(param->X, &cudaStaple, k, (int)QUDA_BACKWARDS, &stream[2*k]);
143  exchange_gpu_staple_comms(param->X, &cudaStaple, k, (int)QUDA_FORWARDS, &stream[2*k+1]);
144  }
145  for(int k=3; k >= 0 ;k--){
146  if(!commDimPartitioned(k)) continue;
147  exchange_gpu_staple_wait(param->X, &cudaStaple, k, (int)QUDA_BACKWARDS, &stream[2*k]);
148  exchange_gpu_staple_wait(param->X, &cudaStaple, k, (int)QUDA_FORWARDS, &stream[2*k+1]);
149  }
150  for(int k=3; k >= 0 ;k--){
151  if(!commDimPartitioned(k)) continue;
152  cudaStreamSynchronize(stream[2*k]);
153  cudaStreamSynchronize(stream[2*k+1]);
154  }
155  cudaStreamSynchronize(stream[nStream-1]);
156 #endif
157  //end
158 
159  //start of one call
160  kparam.kernel_type = LLFAT_INTERIOR_KERNEL;
161  if(act_path_coeff[5] != 0.0){
162  computeGenStapleFieldParityKernel((void*)NULL, (void*)NULL,
163  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
164  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
165  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
166  dir, nu, 0,
167  act_path_coeff[5],
168  recon, prec, halfGridDim, kparam, &stream[nStream-1]);
169  }
170 
171 #ifdef MULTI_GPU
172  cudaStreamSynchronize(stream[nStream-1]);
173 #endif
174  //end
175 
176  for(int rho = 0; rho < 4; rho++){
177  if (rho != dir && rho != nu){
178 
179  //start of one call
180 #ifdef MULTI_GPU
181  for(int k=3; k >= 0 ;k--){
182  if(!commDimPartitioned(k)) continue;
183  kparam.kernel_type = ktype[2*k];
184  computeGenStapleFieldParityKernel((void*)cudaStaple1.Even_p(), (void*)cudaStaple1.Odd_p(),
185  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
186  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
187  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
188  dir, rho, 1,
189  act_path_coeff[3],
190  recon, prec, halfGridDim, kparam, &stream[2*k]);
191  exchange_gpu_staple_start(param->X, &cudaStaple1, k, (int)QUDA_BACKWARDS, &stream[2*k]);
192  kparam.kernel_type = ktype[2*k+1];
193  computeGenStapleFieldParityKernel((void*)cudaStaple1.Even_p(), (void*)cudaStaple1.Odd_p(),
194  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
195  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
196  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
197  dir, rho, 1,
198  act_path_coeff[3],
199  recon, prec, halfGridDim, kparam, &stream[2*k+1]);
200  exchange_gpu_staple_start(param->X, &cudaStaple1, k, (int)QUDA_FORWARDS, &stream[2*k+1]);
201  }
202 #endif
203 
204  kparam.kernel_type = LLFAT_INTERIOR_KERNEL;
205  computeGenStapleFieldParityKernel((void*)cudaStaple1.Even_p(), (void*)cudaStaple1.Odd_p(),
206  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
207  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
208  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
209  dir, rho, 1,
210  act_path_coeff[3],
211  recon, prec, halfGridDim, kparam, &stream[nStream-1]);
212 
213 #ifdef MULTI_GPU
214  for(int k=3; k >= 0 ;k--){
215  if(!commDimPartitioned(k)) continue;
216  exchange_gpu_staple_comms(param->X, &cudaStaple1, k, (int)QUDA_BACKWARDS, &stream[2*k]);
217  exchange_gpu_staple_comms(param->X, &cudaStaple1, k, (int)QUDA_FORWARDS, &stream[2*k+1]);
218  }
219  for(int k=3; k >= 0 ;k--){
220  if(!commDimPartitioned(k)) continue;
221  exchange_gpu_staple_wait(param->X, &cudaStaple1, k, QUDA_BACKWARDS, &stream[2*k]);
222  exchange_gpu_staple_wait(param->X, &cudaStaple1, k, QUDA_FORWARDS, &stream[2*k+1]);
223  }
224  for(int k=3; k >= 0 ;k--){
225  if(!commDimPartitioned(k)) continue;
226  cudaStreamSynchronize(stream[2*k]);
227  cudaStreamSynchronize(stream[2*k+1]);
228  }
229  cudaStreamSynchronize(stream[nStream-1]);
230 #endif
231  //end
232 
233 
234  for(int sig = 0; sig < 4; sig++){
235  if (sig != dir && sig != nu && sig != rho){
236 
237  //start of one call
238  kparam.kernel_type = LLFAT_INTERIOR_KERNEL;
239  computeGenStapleFieldParityKernel((void*)NULL, (void*)NULL,
240  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
241  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
242  (const void*)cudaStaple1.Even_p(), (const void*)cudaStaple1.Odd_p(),
243  dir, sig, 0,
244  act_path_coeff[4],
245  recon, prec, halfGridDim, kparam, &stream[nStream-1]);
246 
247  //end
248 
249  }
250  }//sig
251 
252 #ifdef MULTI_GPU
253  cudaStreamSynchronize(stream[nStream-1]);
254 #endif
255 
256  }
257  }//rho
258  }
259  }//nu
260  }//dir
261 
262 
263  cudaDeviceSynchronize();
264  checkCudaError();
265 
266  for(int i=0;i < nStream; i++){
267  cudaStreamDestroy(stream[i]);
268  }
269 
270  return;
271  }
272 
273 
274 
275  void
277  cudaGaugeField& cudaSiteLink,
278  cudaGaugeField& cudaStaple, cudaGaugeField& cudaStaple1,
279  QudaGaugeParam* param, double* act_path_coeff)
280  {
281 
282  dim3 blockDim(BLOCK_DIM, 1,1);
283 
284  int volume = (param->X[0])*(param->X[1])*(param->X[2])*(param->X[3]);
285  int Vh = volume/2;
286  dim3 halfGridDim((Vh+blockDim.x-1)/blockDim.x,1,1);
287 
288  int volume_1g = (param->X[0]+2)*(param->X[1]+2)*(param->X[2]+2)*(param->X[3]+2);
289  int Vh_1g = volume_1g/2;
290  dim3 halfGridDim_1g((Vh_1g+blockDim.x-1)/blockDim.x,1,1);
291 
292  int volume_2g = (param->X[0]+4)*(param->X[1]+4)*(param->X[2]+4)*(param->X[3]+4);
293  int Vh_2g = volume_2g/2;
294  dim3 halfGridDim_2g((Vh_2g+blockDim.x-1)/blockDim.x,1,1);
295 
296  QudaPrecision prec = cudaSiteLink.Precision();
297  QudaReconstructType recon = cudaSiteLink.Reconstruct();
298 
299  if( ((param->X[0] % 2 != 0)
300  ||(param->X[1] % 2 != 0)
301  ||(param->X[2] % 2 != 0)
302  ||(param->X[3] % 2 != 0))
303  && (recon == QUDA_RECONSTRUCT_12)){
304  errorQuda("12 reconstruct and odd dimensionsize is not supported by link fattening code (yet)\n");
305 
306  }
307 
308  dim3 blockDim_ll(2*BLOCK_DIM, 1, 1);
309  dim3 halfGridDim_ll((Vh+blockDim.x-1)/blockDim_ll.x,1,1);
310 
312  llfat_kernel_param_t kparam_1g;
313  llfat_kernel_param_t kparam_2g;
314  llfat_kernel_param_t kparam_ll; // for the long-link calculation
315 
316  kparam.threads= Vh;
317  kparam.halfGridDim = halfGridDim;
318  kparam.D1 = param->X[0];
319  kparam.D2 = param->X[1];
320  kparam.D3 = param->X[2];
321  kparam.D4 = param->X[3];
322  kparam.D1h = param->X[0]/2;
323  kparam.base_idx = 2;
324 
325  kparam_ll.threads = Vh;
326  kparam_ll.halfGridDim = halfGridDim_ll;
327  kparam_ll.D1 = param->X[0];
328  kparam_ll.D2 = param->X[1];
329  kparam_ll.D3 = param->X[2];
330  kparam_ll.D4 = param->X[3];
331  kparam_ll.D1h = param->X[0]/2;
332  kparam_ll.base_idx = 2;
333  kparam_ll.blockDim = blockDim_ll;
334 
335  kparam_1g.threads= Vh_1g;
336  kparam_1g.halfGridDim = halfGridDim_1g;
337  kparam_1g.D1 = param->X[0] + 2;
338  kparam_1g.D2 = param->X[1] + 2;
339  kparam_1g.D3 = param->X[2] + 2;
340  kparam_1g.D4 = param->X[3] + 2;
341  kparam_1g.D1h = (param->X[0] + 2)/2;
342  kparam_1g.base_idx = 1;
343 
344  kparam_2g.threads= Vh_2g;
345  kparam_2g.halfGridDim = halfGridDim_2g;
346  kparam_2g.D1 = param->X[0] + 4;
347  kparam_2g.D2 = param->X[1] + 4;
348  kparam_2g.D3 = param->X[2] + 4;
349  kparam_2g.D4 = param->X[3] + 4;
350  kparam_2g.D1h = (param->X[0] + 4)/2;
351  kparam_2g.base_idx = 0;
352 
353  kparam_1g.blockDim = kparam_2g.blockDim = kparam.blockDim = blockDim;
354 
355  llfatOneLinkKernel_ex(*cudaFatLink, cudaSiteLink,cudaStaple, cudaStaple1,
356  param, act_path_coeff, kparam);
357 
358  if(cudaLongLink) // if this pointer is not NULL, compute the long link
359  computeLongLinkCuda((void*)cudaLongLink->Even_p(), (void*)cudaLongLink->Odd_p(),
360  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
361  act_path_coeff[1], recon, prec, kparam_ll.halfGridDim, kparam_ll);
362 
363 
364  // Check the coefficients. If all of the following are zero, return.
365  if(fabs(act_path_coeff[2]) < MIN_COEFF &&
366  fabs(act_path_coeff[3]) < MIN_COEFF &&
367  fabs(act_path_coeff[4]) < MIN_COEFF &&
368  fabs(act_path_coeff[5]) < MIN_COEFF) return;
369 
370 
371 
372 
373  for(int dir = 0;dir < 4; dir++){
374  for(int nu = 0; nu < 4; nu++){
375  if (nu != dir){
376 
377 
378  siteComputeGenStapleParityKernel_ex((void*)cudaStaple.Even_p(), (void*)cudaStaple.Odd_p(),
379  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
380  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
381  dir, nu,
382  act_path_coeff[2],
383  recon, prec, kparam_1g);
384 
385  if(act_path_coeff[5] != 0.0){
386  computeGenStapleFieldParityKernel_ex((void*)NULL, (void*)NULL,
387  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
388  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
389  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
390  dir, nu, 0,
391  act_path_coeff[5],
392  recon, prec, kparam);
393  }
394 
395  for(int rho = 0; rho < 4; rho++){
396  if (rho != dir && rho != nu){
397 
398  computeGenStapleFieldParityKernel_ex((void*)cudaStaple1.Even_p(), (void*)cudaStaple1.Odd_p(),
399  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
400  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
401  (const void*)cudaStaple.Even_p(), (const void*)cudaStaple.Odd_p(),
402  dir, rho, 1,
403  act_path_coeff[3],
404  recon, prec, kparam_1g);
405 
406 
407  if(fabs(act_path_coeff[4]) > MIN_COEFF){
408  for(int sig = 0; sig < 4; sig++){
409  if (sig != dir && sig != nu && sig != rho){
410 
411  computeGenStapleFieldParityKernel_ex((void*)NULL, (void*)NULL,
412  (const void*)cudaSiteLink.Even_p(), (const void*)cudaSiteLink.Odd_p(),
413  (void*)cudaFatLink->Even_p(), (void*)cudaFatLink->Odd_p(),
414  (const void*)cudaStaple1.Even_p(), (const void*)cudaStaple1.Odd_p(),
415  dir, sig, 0,
416  act_path_coeff[4],
417  recon, prec, kparam);
418 
419  }
420  }//sig
421  } // MIN_COEFF
422  }
423  }//rho
424  }
425  }//nu
426  }//dir
427 
428 
429  cudaDeviceSynchronize();
430  checkCudaError();
431 
432  return;
433  }
434 
435 } // namespace quda
436 
437 #undef BLOCK_DIM
#define LLFAT_EXTERIOR_KERNEL_BACK_X
Definition: llfat_quda.h:9
__constant__ int Vh
#define MIN_COEFF
void llfat_cuda(cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
enum QudaPrecision_s QudaPrecision
void exchange_gpu_staple_start(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
int commDimPartitioned(int dir)
#define LLFAT_EXTERIOR_KERNEL_FWD_X
Definition: llfat_quda.h:8
#define errorQuda(...)
Definition: util_quda.h:73
void exchange_gpu_staple_wait(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
cudaStream_t * stream
void computeLongLinkCuda(void *outEven, void *outOdd, const void *const inEven, const void *const inOdd, double coeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam)
#define LLFAT_EXTERIOR_KERNEL_FWD_Z
Definition: llfat_quda.h:12
#define LLFAT_EXTERIOR_KERNEL_BACK_T
Definition: llfat_quda.h:15
#define LLFAT_EXTERIOR_KERNEL_FWD_Y
Definition: llfat_quda.h:10
QudaGaugeParam param
Definition: pack_test.cpp:17
QudaPrecision Precision() const
#define LLFAT_INTERIOR_KERNEL
Definition: llfat_quda.h:7
void siteComputeGenStapleParityKernel_ex(void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
void llfatOneLinkKernel(cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
Definition: llfat_quda.cu:1187
void computeGenStapleFieldParityKernel_ex(void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
void llfat_cuda_ex(cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
#define LLFAT_EXTERIOR_KERNEL_BACK_Y
Definition: llfat_quda.h:11
cudaGaugeField * cudaFatLink
void exchange_gpu_staple_comms(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:168
void computeGenStapleFieldParityKernel(void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
int X[4]
Definition: quda.h:29
void siteComputeGenStapleParityKernel(void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
#define LLFAT_EXTERIOR_KERNEL_BACK_Z
Definition: llfat_quda.h:13
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int RealTypeId< RealA >::Type RealA *const RealA *const RealA *const RealA *const RealA *const RealA *const RealA *const RealA *const hisq_kernel_param_t kparam
#define LLFAT_EXTERIOR_KERNEL_FWD_T
Definition: llfat_quda.h:14
enum QudaReconstructType_s QudaReconstructType
#define checkCudaError()
Definition: util_quda.h:110
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int sig
QudaPrecision prec
Definition: test_util.cpp:1551
void llfatOneLinkKernel_ex(cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff, llfat_kernel_param_t kparam)
Definition: llfat_quda.cu:1232
#define BLOCK_DIM