QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
staggered_dslash_def.h
Go to the documentation of this file.
1
// staggered_dslash_def.h - staggered Dslash kernel definitions
2
//
3
// See comments in wilson_dslash_def.h
4
5
// initialize on first iteration
6
7
#ifndef DD_LOOP
8
#define DD_LOOP
9
10
#define DD_AXPY 0
11
#define DD_RECON 8
12
#define DD_PREC 0
13
#endif
14
15
// set options for current iteration
16
17
#if (DD_IMPROVED==1)
18
#define DD_FNAME improvedStaggeredDslash
19
#else
20
#define DD_FNAME staggeredDslash
21
#endif
22
23
#if (DD_AXPY==0) // no axpy
24
#define DD_AXPY_F
25
#else // axpy
26
#define DD_AXPY_F Axpy
27
#define DSLASH_AXPY
28
#endif
29
30
#if (DD_PREC == 0)
31
#define DD_PARAM_AXPY const double2 *x, const float *xNorm, const double a, const DslashParam param
32
#elif (DD_PREC == 1)
33
#define DD_PARAM_AXPY const float2 *x, const float *xNorm, const float a, const DslashParam param
34
#else
35
#define DD_PARAM_AXPY const short2 *x, const float *xNorm, const float a, const DslashParam param
36
#endif
37
38
39
#define READ_LONG_PHASE(phase, dir, idx, stride) // May be a problem below with redefinitions
40
41
#if (DD_RECON==8) // reconstruct from 8 reals
42
#define DD_RECON_F 8
43
44
#if (DD_PREC==0) // DOUBLE PRECISION
45
46
#if (DD_IMPROVED==1)
47
#if (__COMPUTE_CAPABILITY__ >= 200)
48
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1, const double* longPhase0, const double* longPhase1
49
#else
50
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1
51
#endif
52
#else
53
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1
54
#endif
55
56
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
57
58
#ifdef DIRECT_ACCESS_FAT_LINK
59
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
60
#else
61
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
62
#endif // DIRECT_ACCESS_FAT_LINK
63
#ifdef DIRECT_ACCESS_LONG_LINK
64
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride)
65
#else
66
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
67
#endif // DIRECT_ACCESS_LONG_LINK
68
69
#elif (DD_PREC==1) // SINGLE PRECISION
70
#if (DD_IMPROVED==1)
71
#if (__COMPUTE_CAPABILITY__ >= 200)
72
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1, const float* longPhase0, const float* longPhase1
73
#else
74
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1
75
#endif
76
#else
77
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1
78
#endif
79
80
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
81
82
#ifdef DIRECT_ACCESS_FAT_LINK
83
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
84
#else
85
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
86
#endif // DIRECT_ACCESS_FAT_LINK
87
#ifdef DIRECT_ACCESS_LONG_LINK
88
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride)
89
#else
90
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
91
#endif // DIRECT_ACCESS_LONG_LINK
92
93
#else // HALF PRECISION
94
#if (DD_IMPROVED==1)
95
#if (__COMPUTE_CAPABILITY__ >= 200)
96
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1, const short4* longGauge0, const short4* longGauge1, const short* longPhase0, const short* longPhase1
97
#else
98
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1, const short4* longGauge0, const short4* longGauge1
99
#endif
100
#else
101
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1
102
#endif
103
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
104
105
/*#ifdef DIRECT_ACCESS_FAT_LINK
106
#define READ_FAT_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, fat_ga_stride); RESCALE2(FAT, fat_link_max);
107
#else*/
108
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
109
/*#endif // DIRECT_ACCESS_FAT_LINK
110
#ifdef DIRECT_ACCESS_LONG_LINK
111
#define READ_LONG_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, long_ga_stride)
112
#else*/
113
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride)
114
//#endif // DIRECT_ACCESS_LONG_LINK
115
116
#endif // DD_PREC
117
118
#elif (DD_RECON == 9) // reconstruct from 9 reals
119
120
#define DD_RECON_F 9
121
122
#if (DD_PREC==0) // DOUBLE PRECISION
123
#if (DD_IMPROVED==1)
124
#if (__COMPUTE_CAPABILITY__ >= 200)
125
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1, const double* longPhase0, const double* longPhase1
126
#else
127
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1
128
#endif
129
#else
130
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1
131
#endif
132
133
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_DOUBLE
134
135
#ifdef DIRECT_ACCESS_FAT_LINK
136
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
137
#else
138
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
139
#endif // DIRECT_ACCESS_FAT_LINK
140
#undef READ_LONG_PHASE
141
#ifdef DIRECT_ACCESS_LONG_LINK
142
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride)
143
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride);
144
#else
145
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
146
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride);
147
#endif // DIRECT_ACCESS_LONG_LINK
148
149
#elif (DD_PREC==1) // SINGLE PRECISION
150
#if (DD_IMPROVED==1)
151
#if (__COMPUTE_CAPABILITY__ >= 200)
152
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1, const float* longPhase0, const float* longPhase1
153
#else
154
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1
155
#endif
156
#else
157
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1
158
#endif
159
160
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE
161
162
#ifdef DIRECT_ACCESS_FAT_LINK
163
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
164
#else
165
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
166
#endif // DIRECT_ACCESS_FAT_LINK
167
#undef READ_LONG_PHASE
168
#ifdef DIRECT_ACCESS_LONG_LINK
169
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride)
170
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride);
171
#else
172
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
173
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride);
174
#endif // DIRECT_ACCESS_LONG_LINK
175
176
#else // HALF PRECISION
177
#if (DD_IMPROVED==1)
178
#if (__COMPUTE_CAPABILITY__ >= 200)
179
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1, const short4* longGauge0, const short4* longGauge1, const short* longPhase0, const short* longPhase1
180
#else
181
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1, const short4* longGauge0, const short4* longGauge1
182
#endif
183
#else
184
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2* fatGauge1
185
#endif
186
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE
187
188
/*#ifdef DIRECT_ACCESS_FAT_LINK
189
#define READ_FAT_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, fat_ga_stride); RESCALE2(FAT, fat_link_max);
190
#else*/
191
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
192
/*#endif // DIRECT_ACCESS_FAT_LINK
193
#ifdef DIRECT_ACCESS_LONG_LINK
194
#define READ_LONG_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, long_ga_stride)
195
#else*/
196
#undef READ_LONG_PHASE
197
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride)
198
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride);
199
//#endif // DIRECT_ACCESS_LONG_LINK
200
201
#endif // DD_PREC
202
203
#elif (DD_RECON == 12)// reconstruct from 12 reals
204
205
#define DD_RECON_F 12
206
207
#if (DD_PREC==0) // DOUBLE PRECISION
208
#if (DD_IMPROVED==1)
209
#if (__COMPUTE_CAPABILITY__ >= 200)
210
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1, const double* longPhase0, const double* longPhase1
211
#else
212
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1
213
#endif
214
#else
215
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1
216
#endif
217
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
218
219
#ifdef DIRECT_ACCESS_FAT_LINK
220
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
221
#else
222
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
223
#endif // DIRECT_ACCESS_FAT_LINK
224
#ifdef DIRECT_ACCESS_LONG_LINK
225
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride)
226
#else
227
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
228
#endif // DIRECT_ACCESS_LONG_LINK
229
230
#elif (DD_PREC==1) // SINGLE PRECISION
231
#if (DD_IMPROVED==1)
232
#if (__COMPUTE_CAPABILITY__ >= 200)
233
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1, const float* longPhase0, const float* longPhase1
234
#else
235
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1
236
#endif
237
#else
238
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1
239
#endif
240
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
241
242
#ifdef DIRECT_ACCESS_FAT_LINK
243
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
244
#else
245
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
246
#endif // DIRECT_ACCESS_FAT_LINK
247
#ifdef DIRECT_ACCESS_LONG_LINK
248
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride)
249
#else
250
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
251
#endif // DIRECT_ACCESS_LONG_LINK
252
253
#else // HALF PRECISION
254
#if (DD_IMPROVED==1)
255
#if (__COMPUTE_CAPABILITY__ >= 200)
256
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1, const short* longPhase0, const short* longPhase1
257
#else
258
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1
259
#endif
260
#else
261
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1
262
#endif
263
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
264
265
/*#ifdef DIRECT_ACCESS_FAT_LINK
266
#define READ_FAT_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, fat_ga_stride); RESCALE2(FAT, fat_link_max);
267
#else*/
268
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
269
/*#endif // DIRECT_ACCCESS_FAT_LINK
270
#ifdef DIRECT_ACCESS_LONG_LINK
271
#define READ_LONG_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, long_ga_stride)
272
#else*/
273
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride)
274
//#endif // DIRECT_ACCCESS_LONG_LINK
275
276
#endif // DD_PREC
277
278
#elif (DD_RECON == 13)
279
#define DD_RECON_F 13
280
281
#if (DD_PREC==0) // DOUBLE PRECISION
282
#if (DD_IMPROVED==1)
283
#if (__COMPUTE_CAPABILITY__ >= 200)
284
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1, const double* longPhase0, const double* longPhase1
285
#else
286
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1
287
#endif
288
#else
289
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1
290
#endif
291
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_DOUBLE
292
293
#ifdef DIRECT_ACCESS_FAT_LINK
294
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
295
#else
296
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
297
#endif // DIRECT_ACCESS_FAT_LINK
298
#undef READ_LONG_PHASE
299
#ifdef DIRECT_ACCESS_LONG_LINK
300
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride)
301
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride);
302
#else
303
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
304
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride);
305
#endif // DIRECT_ACCESS_LONG_LINK
306
307
#elif (DD_PREC==1) // SINGLE PRECISION
308
#if (DD_IMPROVED==1)
309
#if (__COMPUTE_CAPABILITY__ >= 200)
310
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1, const float* longPhase0, const float* longPhase1
311
#else
312
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1
313
#endif
314
#else
315
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1
316
#endif
317
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE
318
319
#ifdef DIRECT_ACCESS_FAT_LINK
320
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
321
#else
322
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
323
#endif // DIRECT_ACCESS_FAT_LINK
324
#undef READ_LONG_PHASE
325
#ifdef DIRECT_ACCESS_LONG_LINK
326
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride)
327
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride);
328
#else
329
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
330
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride);
331
#endif // DIRECT_ACCESS_LONG_LINK
332
333
#else // HALF PRECISION
334
#if (DD_IMPROVED==1)
335
#if (__COMPUTE_CAPABILITY__ >= 200)
336
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1, const short* longPhase0, const short* longPhase1
337
#else
338
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1
339
#endif
340
#else
341
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1
342
#endif
343
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE
344
345
/*#ifdef DIRECT_ACCESS_FAT_LINK
346
#define READ_FAT_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, fat_ga_stride); RESCALE2(FAT, fat_link_max);
347
#else*/
348
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
349
/*#endif // DIRECT_ACCCESS_FAT_LINK
350
#ifdef DIRECT_ACCESS_LONG_LINK
351
#define READ_LONG_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, long_ga_stride)
352
#else*/
353
#undef READ_LONG_PHASE
354
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride)
355
#define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride);
356
//#endif // DIRECT_ACCCESS_LONG_LINK
357
358
#endif // DD_PREC
359
360
#else //18 reconstruct
361
#define DD_RECON_F 18
362
#define RECONSTRUCT_GAUGE_MATRIX(dir, gauge, idx, sign)
363
364
#if (DD_PREC==0) // DOUBLE PRECISION
365
#if (DD_IMPROVED==1)
366
#if (__COMPUTE_CAPABILITY__ >= 200)
367
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1, const double* longPhase0, const double* longPhase1
368
#else
369
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1, const double2* longGauge0, const double2* longGauge1
370
#endif
371
#else
372
#define DD_PARAM_GAUGE const double2 *fatGauge0, const double2 *fatGauge1
373
#endif
374
#ifdef DIRECT_ACCESS_FAT_LINK
375
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
376
#else
377
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
378
#endif // DIRECT_ACCCESS_FAT_LINK
379
#ifdef DIRECT_ACCESS_LONG_LINK
380
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(LONG, gauge, dir, idx, stride)
381
#else
382
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
383
#endif // DIRECT_ACCCESS_LONG_LINK
384
385
#elif (DD_PREC==1) // SINGLE PRECISION
386
387
#if (DD_IMPROVED==1)
388
#if (__COMPUTE_CAPABILITY__ >= 200)
389
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1, const float* longPhase0, const float* longPhase1
390
#else
391
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1, const float4* longGauge0, const float4* longGauge1
392
#endif
393
#else
394
#define DD_PARAM_GAUGE const float2 *fatGauge0, const float2 *fatGauge1
395
#endif
396
397
#ifdef DIRECT_ACCESS_FAT_LINK
398
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
399
#else
400
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
401
#endif // DIRECT_ACCCESS_FAT_LINK
402
#ifdef DIRECT_ACCESS_LONG_LINK
403
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(LONG, gauge, dir, idx, stride)
404
#else
405
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(LONG, gauge, dir, idx, stride)
406
#endif // DIRECT_ACCCESS_LONG_LINK
407
408
#else // HALF PRECISION
409
410
#if (DD_IMPROVED==1)
411
#if (__COMPUTE_CAPABILITY__ >= 200)
412
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1, const short* longPhase0, const short* longPhase1
413
#else // Tesla doesn't support reconstruct 9/13
414
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1, const short4* longGauge0, const short4* longGauge1
415
#endif
416
#else
417
#define DD_PARAM_GAUGE const short2 *fatGauge0, const short2 *fatGauge1
418
#endif
419
420
/*#ifdef DIRECT_ACCESS_FAT_LINK
421
#define READ_FAT_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, fat_ga_stride); RESCALE2(FAT, fat_link_max);
422
#else*/
423
#define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
424
/*#endif // DIRECT_ACCESS_FAT_LINK
425
#ifdef DIRECT_ACCESS_LONG_LINK
426
#define READ_LONG_MATRIX(gauge, dir, idx) READ_GAUGE_MATRIX_18_SHORT2(LONG, gauge, dir, idx, long_ga_stride)
427
#else*/
428
#define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(LONG, gauge, dir, idx, stride)
429
//#endif // DIRECT_ACCCESS_LONG_LINK
430
431
#endif // DD_PREC
432
433
#endif // DD_RECON
434
435
#if (DD_PREC==0) // double-precision fields
436
437
// gauge field
438
#define DD_PREC_F D
439
#if (defined DIRECT_ACCESS_FAT_LINK) || (defined FERMI_NO_DBLE_TEX)
440
#define FATLINK0TEX fatGauge0
441
#define FATLINK1TEX fatGauge1
442
#else
443
#ifdef USE_TEXTURE_OBJECTS
444
#define FATLINK0TEX param.gauge0Tex
445
#define FATLINK1TEX param.gauge1Tex
446
#else
447
#define FATLINK0TEX fatGauge0TexDouble
448
#define FATLINK1TEX fatGauge1TexDouble
449
#endif // USE_TEXTURE_OBJECTS
450
#endif
451
452
#if (defined DIRECT_ACCESS_LONG_LINK) || (defined FERMI_NO_DBLE_TEX)
453
#define LONGLINK0TEX longGauge0
454
#define LONGLINK1TEX longGauge1
455
#define LONGPHASE0TEX longPhase0
456
#define LONGPHASE1TEX longPhase1
457
#else
458
#ifdef USE_TEXTURE_OBJECTS
459
#define LONGLINK0TEX param.longGauge0Tex
460
#define LONGLINK1TEX param.longGauge1Tex
461
#define LONGPHASE0TEX param.longPhase0Tex
462
#define LONGPHASE1TEX param.longPhase1Tex
463
#else
464
#define LONGLINK0TEX longGauge0TexDouble
465
#define LONGLINK1TEX longGauge1TexDouble
466
#define LONGPHASE0TEX longPhase0TexDouble
467
#define LONGPHASE1TEX longPhase1TexDouble
468
#endif // USE_TEXTURE_OBJECTS
469
#endif
470
471
#define GAUGE_DOUBLE
472
473
// spinor fields
474
#define DD_PARAM_OUT double2* out, float *null1
475
#define DD_PARAM_IN const double2* in, const float *null4
476
#if (defined DIRECT_ACCESS_SPINOR) || (defined FERMI_NO_DBLE_TEX)
477
#define SPINORTEX in
478
#define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE
479
#define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE
480
#else
481
#ifdef USE_TEXTURE_OBJECTS
482
#define SPINORTEX param.inTex
483
#else
484
#define SPINORTEX spinorTexDouble
485
#endif // USE_TEXTURE_OBJECTS
486
#define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE_TEX
487
#define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE_TEX
488
#endif
489
#if (defined DIRECT_ACCESS_INTER) || (defined FERMI_NO_DBLE_TEX)
490
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR
491
#define INTERTEX out
492
#else
493
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_DOUBLE_TEX
494
#ifdef USE_TEXTURE_OBJECTS
495
#define INTERTEX param.outTex
496
#else
497
#define INTERTEX interTexDouble
498
#endif
499
#endif
500
#define WRITE_SPINOR WRITE_ST_SPINOR_DOUBLE2
501
#define SPINOR_DOUBLE
502
#if (DD_AXPY==1)
503
#if (defined DIRECT_ACCESS_ACCUM) || (defined FERMI_NO_DBLE_TEX)
504
#define ACCUMTEX x
505
#define READ_ACCUM READ_ST_ACCUM_DOUBLE
506
#else
507
#ifdef USE_TEXTURE_OBJECTS
508
#define ACCUMTEX param.xTex
509
#else
510
#define ACCUMTEX accumTexDouble
511
#endif // USE_TEXTURE_OBJECTS
512
#define READ_ACCUM READ_ST_ACCUM_DOUBLE_TEX
513
#endif
514
#endif // DD_AXPY
515
516
517
#elif (DD_PREC==1) // single-precision fields
518
519
// gauge fields
520
#define DD_PREC_F S
521
522
#ifndef DIRECT_ACCESS_FAT_LINK
523
#ifdef USE_TEXTURE_OBJECTS
524
#define FATLINK0TEX param.gauge0Tex
525
#define FATLINK1TEX param.gauge1Tex
526
#else
527
#define FATLINK0TEX fatGauge0TexSingle
528
#define FATLINK1TEX fatGauge1TexSingle
529
#endif
530
#else
531
#define FATLINK0TEX fatGauge0
532
#define FATLINK1TEX fatGauge1
533
#endif
534
535
#ifndef DIRECT_ACCESS_LONG_LINK //longlink access
536
#ifdef USE_TEXTURE_OBJECTS
537
#define LONGLINK0TEX param.longGauge0Tex
538
#define LONGLINK1TEX param.longGauge1Tex
539
#define LONGPHASE0TEX param.longPhase0Tex
540
#define LONGPHASE1TEX param.longPhase1Tex
541
#else
542
#if (DD_RECON ==18)
543
#define LONGLINK0TEX longGauge0TexSingle_norecon
544
#define LONGLINK1TEX longGauge1TexSingle_norecon
545
#else
546
#define LONGLINK0TEX longGauge0TexSingle
547
#define LONGLINK1TEX longGauge1TexSingle
548
#define LONGPHASE0TEX longPhase0TexSingle
549
#define LONGPHASE1TEX longPhase1TexSingle
550
#endif
551
#endif // USE_TEXTURE_OBJECTS
552
#else
553
#define LONGLINK0TEX longGauge0
554
#define LONGLINK1TEX longGauge1
555
#define LONGPHASE0TEX longPhase0
556
#define LONGPHASE1TEX longPhase1
557
#endif
558
559
// spinor fields
560
#define DD_PARAM_OUT float2* out, float *null1
561
#define DD_PARAM_IN const float2* in, const float *null4
562
#ifndef DIRECT_ACCESS_SPINOR
563
#ifdef USE_TEXTURE_OBJECTS
564
#define SPINORTEX param.inTex
565
#else
566
#define SPINORTEX spinorTexSingle2
567
#endif // USE_TEXTURE_OBJECTS
568
#define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE_TEX
569
#define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE_TEX
570
#else
571
#define SPINORTEX in
572
#define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE
573
#define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE
574
#endif
575
#if (defined DIRECT_ACCESS_INTER)
576
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR
577
#define INTERTEX out
578
#else
579
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_SINGLE_TEX
580
#ifdef USE_TEXTURE_OBJECTS
581
#define INTERTEX param.outTex
582
#else
583
#define INTERTEX interTexSingle2
584
#endif // USE_TEXTURE_OBJECTS
585
#endif
586
#define WRITE_SPINOR WRITE_ST_SPINOR_FLOAT2
587
#if (DD_AXPY==1)
588
#if (defined DIRECT_ACCESS_ACCUM)
589
#define ACCUMTEX x
590
#define READ_ACCUM READ_ST_ACCUM_SINGLE
591
#else
592
#ifdef USE_TEXTURE_OBJECTS
593
#define ACCUMTEX param.xTex
594
#else
595
#define ACCUMTEX accumTexSingle2
596
#endif // USE_TEXTURE_OBJECTS
597
#define READ_ACCUM READ_ST_ACCUM_SINGLE_TEX
598
#endif
599
#endif // DD_AXPY
600
601
602
#else // half-precision fields
603
604
// all reads done through texture cache regardless
605
606
// gauge fields
607
#define DD_PREC_F H
608
#ifdef USE_TEXTURE_OBJECTS
609
#define FATLINK0TEX param.gauge0Tex
610
#define FATLINK1TEX param.gauge1Tex
611
#define LONGLINK0TEX param.longGauge0Tex
612
#define LONGLINK1TEX param.longGauge1Tex
613
#define LONGPHASE0TEX param.longPhase0Tex
614
#define LONGPHASE1TEX param.longPhase1Tex
615
#else
616
#define FATLINK0TEX fatGauge0TexHalf
617
#define FATLINK1TEX fatGauge1TexHalf
618
#if (DD_RECON ==18)
619
#define LONGLINK0TEX longGauge0TexHalf_norecon
620
#define LONGLINK1TEX longGauge1TexHalf_norecon
621
#else
622
#define LONGLINK0TEX longGauge0TexHalf
623
#define LONGLINK1TEX longGauge1TexHalf
624
#define LONGPHASE0TEX longPhase0TexHalf
625
#define LONGPHASE1TEX longPhase1TexHalf
626
#endif
627
#endif // USE_TEXTURE_OBJECTS
628
629
#define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_HALF_TEX
630
#define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_HALF_TEX
631
#ifdef USE_TEXTURE_OBJECTS
632
#define SPINORTEX param.inTex
633
#else
634
#define SPINORTEX spinorTexHalf2
635
#endif // USE_TEXTURE_OBJECTS
636
#define DD_PARAM_OUT short2* out, float *outNorm
637
#define DD_PARAM_IN const short2* in, const float *inNorm
638
#if (defined DIRECT_ACCESS_INTER)
639
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF
640
#define INTERTEX out
641
#else
642
#define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF_TEX
643
#ifdef USE_TEXTURE_OBJECTS
644
#define INTERTEX param.outTex
645
#else
646
#define INTERTEX interTexHalf2
647
#endif // USE_TEXTURE_OBJECTS
648
#endif
649
#define WRITE_SPINOR WRITE_ST_SPINOR_SHORT2
650
#if (DD_AXPY==1)
651
#ifdef USE_TEXTURE_OBJECTS
652
#define ACCUMTEX param.xTex
653
#else
654
#define ACCUMTEX accumTexHalf2
655
#endif // USE_TEXTURE_OBJECTS
656
#define READ_ACCUM READ_ST_ACCUM_HALF_TEX
657
#endif // DD_AXPY
658
659
#endif
660
661
#ifdef GPU_STAGGERED_DIRAC
662
663
// only build double precision if supported
664
#if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
665
666
#define DD_CONCAT(n,r,x) n ## r ## x ## Kernel
667
#define DD_FUNC(n,r,x) DD_CONCAT(n,r,x)
668
669
// define the kernel
670
671
#if (DD_IMPROVED==1)
672
673
template
<KernelType kernel_type>
674
__global__
void
DD_FUNC
(
DD_FNAME
,
DD_RECON_F
,
DD_AXPY_F
)
675
(
DD_PARAM_OUT
,
DD_PARAM_GAUGE
,
DD_PARAM_IN
,
DD_PARAM_AXPY
) {
676
#ifdef GPU_STAGGERED_DIRAC
677
#include "
staggered_dslash_core.h
"
678
#endif
679
}
680
681
template
<>
682
__global__
void
DD_FUNC
(
DD_FNAME
,
DD_RECON_F
,
DD_AXPY_F
)<
EXTERIOR_KERNEL_ALL
>
683
(
DD_PARAM_OUT
,
DD_PARAM_GAUGE
,
DD_PARAM_IN
,
DD_PARAM_AXPY
) {
684
#ifdef GPU_STAGGERED_DIRAC
685
#include "
staggered_fused_exterior_dslash_core.h
"
686
#endif
687
}
688
689
#else // naive staggered kernel
690
691
#undef READ_LONG_MATRIX
692
#define READ_LONG_MATRIX(gauge, dir, idx, stride)
693
694
#undef READ_LONG_PHASE
695
#define READ_LONG_PHASE(phase, dir, idx, stride)
696
697
template
<KernelType kernel_type>
698
__global__
void
DD_FUNC
(
DD_FNAME
,
DD_RECON_F
,
DD_AXPY_F
)
699
(
DD_PARAM_OUT
,
DD_PARAM_GAUGE
,
DD_PARAM_IN
,
DD_PARAM_AXPY
) {
700
#ifdef GPU_STAGGERED_DIRAC
701
#include "
staggered_dslash_core.h
"
702
#endif
703
}
704
705
template
<>
706
__global__
void
DD_FUNC
(
DD_FNAME
,
DD_RECON_F
,
DD_AXPY_F
)<
EXTERIOR_KERNEL_ALL
>
707
(
DD_PARAM_OUT
,
DD_PARAM_GAUGE
,
DD_PARAM_IN
,
DD_PARAM_AXPY
) {
708
#ifdef GPU_STAGGERED_DIRAC
709
#include "
staggered_fused_exterior_dslash_core.h
"
710
#endif
711
}
712
713
#endif
714
715
#endif // !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
716
717
#endif // ! GPU_STAGGERED_DIRAC
718
719
// clean up
720
721
#undef DD_PREC_F
722
#undef DD_RECON_F
723
#undef DD_AXPY_F
724
#undef DD_PARAM_OUT
725
#undef DD_PARAM_GAUGE
726
#undef DD_PARAM_IN
727
#undef DD_PARAM_AXPY
728
#undef DD_FNAME
729
#undef DD_CONCAT
730
#undef DD_FUNC
731
732
#undef DSLASH_AXPY
733
#undef READ_GAUGE_MATRIX
734
#undef RECONSTRUCT_GAUGE_MATRIX
735
#undef FATLINK0TEX
736
#undef FATLINK1TEX
737
#undef LONGLINK0TEX
738
#undef LONGLINK1TEX
739
#undef LONGPHASE0TEX
740
#undef LONGPHASE1TEX
741
#undef SPINORTEX
742
#undef WRITE_SPINOR
743
#undef READ_AND_SUM_SPINOR
744
#undef INTERTEX
745
#undef ACCUMTEX
746
#undef READ_ACCUM
747
#undef CLOVERTEX
748
#undef READ_CLOVER
749
#undef DSLASH_CLOVER
750
#undef GAUGE_DOUBLE
751
#undef SPINOR_DOUBLE
752
#undef CLOVER_DOUBLE
753
#undef READ_FAT_MATRIX
754
#undef READ_LONG_MATRIX
755
#undef READ_LONG_PHASE
756
#undef READ_1ST_NBR_SPINOR
757
#undef READ_3RD_NBR_SPINOR
758
759
760
// prepare next set of options, or clean up after final iteration
761
762
#if (DD_AXPY==0)
763
#undef DD_AXPY
764
#define DD_AXPY 1
765
#else
766
#undef DD_AXPY
767
#define DD_AXPY 0
768
769
#if (DD_RECON==8)
770
#undef DD_RECON
771
#define DD_RECON 9
772
#elif (DD_RECON==9)
773
#undef DD_RECON
774
#define DD_RECON 12
775
#elif (DD_RECON==12)
776
#undef DD_RECON
777
#define DD_RECON 13
778
#elif (DD_RECON==13)
779
#undef DD_RECON
780
#define DD_RECON 18
781
#else
782
#undef DD_RECON
783
784
#define DD_RECON 8
785
786
#if (DD_PREC==0)
787
#undef DD_PREC
788
#define DD_PREC 1
789
#elif (DD_PREC==1)
790
#undef DD_PREC
791
#define DD_PREC 2
792
#else
793
#undef DD_PREC
794
#define DD_PREC 0
795
796
#undef DD_LOOP
797
#undef DD_AXPY
798
#undef DD_RECON
799
#undef DD_PREC
800
801
#endif // DD_PREC
802
#endif // DD_RECON
803
#endif // DD_AXPY
804
805
#ifdef DD_LOOP
806
#include "
staggered_dslash_def.h
"
807
#endif
DD_PARAM_GAUGE
#define DD_PARAM_GAUGE
Definition:
staggered_dslash_def.h:53
DD_AXPY_F
#define DD_AXPY_F
Definition:
staggered_dslash_def.h:24
DD_RECON_F
#define DD_RECON_F
Definition:
staggered_dslash_def.h:42
DD_PARAM_IN
#define DD_PARAM_IN
Definition:
staggered_dslash_def.h:475
DD_FUNC
#define DD_FUNC(x)
Definition:
clover_def.h:141
staggered_dslash_core.h
DD_FNAME
#define DD_FNAME
Definition:
staggered_dslash_def.h:20
DD_PARAM_OUT
#define DD_PARAM_OUT
Definition:
staggered_dslash_def.h:474
EXTERIOR_KERNEL_ALL
Definition:
dslash_constants.h:3
DD_PARAM_AXPY
#define DD_PARAM_AXPY
Definition:
staggered_dslash_def.h:31
staggered_dslash_def.h
staggered_fused_exterior_dslash_core.h
Generated on Wed Feb 4 2015 17:00:12 for QUDA by
1.8.6