QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
read_clover.h
Go to the documentation of this file.
1
#define READ_CLOVER_DOUBLE(clover, chi) \
2
double2 C0 = clover[sid + (18*chi+0)*param.cl_stride]; \
3
double2 C1 = clover[sid + (18*chi+1)*param.cl_stride]; \
4
double2 C2 = clover[sid + (18*chi+2)*param.cl_stride]; \
5
double2 C3 = clover[sid + (18*chi+3)*param.cl_stride]; \
6
double2 C4 = clover[sid + (18*chi+4)*param.cl_stride]; \
7
double2 C5 = clover[sid + (18*chi+5)*param.cl_stride]; \
8
double2 C6 = clover[sid + (18*chi+6)*param.cl_stride]; \
9
double2 C7 = clover[sid + (18*chi+7)*param.cl_stride]; \
10
double2 C8 = clover[sid + (18*chi+8)*param.cl_stride]; \
11
double2 C9 = clover[sid + (18*chi+9)*param.cl_stride]; \
12
double2 C10 = clover[sid + (18*chi+10)*param.cl_stride]; \
13
double2 C11 = clover[sid + (18*chi+11)*param.cl_stride]; \
14
double2 C12 = clover[sid + (18*chi+12)*param.cl_stride]; \
15
double2 C13 = clover[sid + (18*chi+13)*param.cl_stride]; \
16
double2 C14 = clover[sid + (18*chi+14)*param.cl_stride]; \
17
double2 C15 = clover[sid + (18*chi+15)*param.cl_stride]; \
18
double2 C16 = clover[sid + (18*chi+16)*param.cl_stride]; \
19
double2 C17 = clover[sid + (18*chi+17)*param.cl_stride];
20
21
#define READ_CLOVER_DOUBLE_STR(clover, chi) \
22
double2 C0, C1, C2, C3, C4, C5, C6, C7, C8, C9; \
23
double2 C10, C11, C12, C13, C14, C15, C16, C17; \
24
load_streaming_double2(C0, &clover[sid + (18*chi+0)*param.cl_stride]); \
25
load_streaming_double2(C1, &clover[sid + (18*chi+1)*param.cl_stride]); \
26
load_streaming_double2(C2, &clover[sid + (18*chi+2)*param.cl_stride]); \
27
load_streaming_double2(C3, &clover[sid + (18*chi+3)*param.cl_stride]); \
28
load_streaming_double2(C4, &clover[sid + (18*chi+4)*param.cl_stride]); \
29
load_streaming_double2(C5, &clover[sid + (18*chi+5)*param.cl_stride]); \
30
load_streaming_double2(C6, &clover[sid + (18*chi+6)*param.cl_stride]); \
31
load_streaming_double2(C7, &clover[sid + (18*chi+7)*param.cl_stride]); \
32
load_streaming_double2(C8, &clover[sid + (18*chi+8)*param.cl_stride]); \
33
load_streaming_double2(C9, &clover[sid + (18*chi+9)*param.cl_stride]); \
34
load_streaming_double2(C10, &clover[sid + (18*chi+10)*param.cl_stride]); \
35
load_streaming_double2(C11, &clover[sid + (18*chi+11)*param.cl_stride]); \
36
load_streaming_double2(C12, &clover[sid + (18*chi+12)*param.cl_stride]); \
37
load_streaming_double2(C13, &clover[sid + (18*chi+13)*param.cl_stride]); \
38
load_streaming_double2(C14, &clover[sid + (18*chi+14)*param.cl_stride]); \
39
load_streaming_double2(C15, &clover[sid + (18*chi+15)*param.cl_stride]); \
40
load_streaming_double2(C16, &clover[sid + (18*chi+16)*param.cl_stride]); \
41
load_streaming_double2(C17, &clover[sid + (18*chi+17)*param.cl_stride]);
42
43
#define READ_CLOVER_SINGLE(clover, chi) \
44
float4 C0 = clover[sid + (9*chi+0)*param.cl_stride]; \
45
float4 C1 = clover[sid + (9*chi+1)*param.cl_stride]; \
46
float4 C2 = clover[sid + (9*chi+2)*param.cl_stride]; \
47
float4 C3 = clover[sid + (9*chi+3)*param.cl_stride]; \
48
float4 C4 = clover[sid + (9*chi+4)*param.cl_stride]; \
49
float4 C5 = clover[sid + (9*chi+5)*param.cl_stride]; \
50
float4 C6 = clover[sid + (9*chi+6)*param.cl_stride]; \
51
float4 C7 = clover[sid + (9*chi+7)*param.cl_stride]; \
52
float4 C8 = clover[sid + (9*chi+8)*param.cl_stride];
53
54
#define READ_CLOVER_HALF(clover, chi) \
55
float4 C0 = short42float4(clover[sid + (9*chi+0)*param.cl_stride]); \
56
float4 C1 = short42float4(clover[sid + (9*chi+1)*param.cl_stride]); \
57
float4 C2 = short42float4(clover[sid + (9*chi+2)*param.cl_stride]); \
58
float4 C3 = short42float4(clover[sid + (9*chi+3)*param.cl_stride]); \
59
float4 C4 = short42float4(clover[sid + (9*chi+4)*param.cl_stride]); \
60
float4 C5 = short42float4(clover[sid + (9*chi+5)*param.cl_stride]); \
61
float4 C6 = short42float4(clover[sid + (9*chi+6)*param.cl_stride]); \
62
float4 C7 = short42float4(clover[sid + (9*chi+7)*param.cl_stride]); \
63
float4 C8 = short42float4(clover[sid + (9*chi+8)*param.cl_stride]); \
64
float K = cloverNorm[sid + chi*param.cl_stride]; \
65
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
66
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
67
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
68
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
69
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
70
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
71
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
72
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
73
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
74
75
#define READ_CLOVER_DOUBLE_TEX(clover, chi) \
76
double2 C0 = fetch_double2((clover), sid + (18*chi+0)*param.cl_stride); \
77
double2 C1 = fetch_double2((clover), sid + (18*chi+1)*param.cl_stride); \
78
double2 C2 = fetch_double2((clover), sid + (18*chi+2)*param.cl_stride); \
79
double2 C3 = fetch_double2((clover), sid + (18*chi+3)*param.cl_stride); \
80
double2 C4 = fetch_double2((clover), sid + (18*chi+4)*param.cl_stride); \
81
double2 C5 = fetch_double2((clover), sid + (18*chi+5)*param.cl_stride); \
82
double2 C6 = fetch_double2((clover), sid + (18*chi+6)*param.cl_stride); \
83
double2 C7 = fetch_double2((clover), sid + (18*chi+7)*param.cl_stride); \
84
double2 C8 = fetch_double2((clover), sid + (18*chi+8)*param.cl_stride); \
85
double2 C9 = fetch_double2((clover), sid + (18*chi+9)*param.cl_stride); \
86
double2 C10 = fetch_double2((clover), sid + (18*chi+10)*param.cl_stride); \
87
double2 C11 = fetch_double2((clover), sid + (18*chi+11)*param.cl_stride); \
88
double2 C12 = fetch_double2((clover), sid + (18*chi+12)*param.cl_stride); \
89
double2 C13 = fetch_double2((clover), sid + (18*chi+13)*param.cl_stride); \
90
double2 C14 = fetch_double2((clover), sid + (18*chi+14)*param.cl_stride); \
91
double2 C15 = fetch_double2((clover), sid + (18*chi+15)*param.cl_stride); \
92
double2 C16 = fetch_double2((clover), sid + (18*chi+16)*param.cl_stride); \
93
double2 C17 = fetch_double2((clover), sid + (18*chi+17)*param.cl_stride);
94
95
//#endif // USE_TEXTURE_OBJECTS
96
97
#define READ_CLOVER_SINGLE_TEX(clover, chi) \
98
float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*param.cl_stride); \
99
float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*param.cl_stride); \
100
float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*param.cl_stride); \
101
float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*param.cl_stride); \
102
float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*param.cl_stride); \
103
float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*param.cl_stride); \
104
float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*param.cl_stride); \
105
float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*param.cl_stride); \
106
float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*param.cl_stride);
107
108
#define READ_CLOVER_HALF_TEX(clover, chi) \
109
float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*param.cl_stride); \
110
float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*param.cl_stride); \
111
float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*param.cl_stride); \
112
float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*param.cl_stride); \
113
float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*param.cl_stride); \
114
float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*param.cl_stride); \
115
float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*param.cl_stride); \
116
float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*param.cl_stride); \
117
float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*param.cl_stride); \
118
float K = TEX1DFETCH(float, (CLOVERTEXNORM), sid + chi*param.cl_stride); \
119
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
120
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
121
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
122
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
123
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
124
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
125
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
126
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
127
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
128
129
#define ASSN_CLOVER_DOUBLE(clover, chi) \
130
C0 = clover[sid + (18*chi+0)*param.cl_stride]; \
131
C1 = clover[sid + (18*chi+1)*param.cl_stride]; \
132
C2 = clover[sid + (18*chi+2)*param.cl_stride]; \
133
C3 = clover[sid + (18*chi+3)*param.cl_stride]; \
134
C4 = clover[sid + (18*chi+4)*param.cl_stride]; \
135
C5 = clover[sid + (18*chi+5)*param.cl_stride]; \
136
C6 = clover[sid + (18*chi+6)*param.cl_stride]; \
137
C7 = clover[sid + (18*chi+7)*param.cl_stride]; \
138
C8 = clover[sid + (18*chi+8)*param.cl_stride]; \
139
C9 = clover[sid + (18*chi+9)*param.cl_stride]; \
140
C10 = clover[sid + (18*chi+10)*param.cl_stride]; \
141
C11 = clover[sid + (18*chi+11)*param.cl_stride]; \
142
C12 = clover[sid + (18*chi+12)*param.cl_stride]; \
143
C13 = clover[sid + (18*chi+13)*param.cl_stride]; \
144
C14 = clover[sid + (18*chi+14)*param.cl_stride]; \
145
C15 = clover[sid + (18*chi+15)*param.cl_stride]; \
146
C16 = clover[sid + (18*chi+16)*param.cl_stride]; \
147
C17 = clover[sid + (18*chi+17)*param.cl_stride];
148
149
#define ASSN_CLOVER_DOUBLE_STR(clover, chi) \
150
load_streaming_double2(C0, &clover[sid + (18*chi+0)*param.cl_stride]); \
151
load_streaming_double2(C1, &clover[sid + (18*chi+1)*param.cl_stride]); \
152
load_streaming_double2(C2, &clover[sid + (18*chi+2)*param.cl_stride]); \
153
load_streaming_double2(C3, &clover[sid + (18*chi+3)*param.cl_stride]); \
154
load_streaming_double2(C4, &clover[sid + (18*chi+4)*param.cl_stride]); \
155
load_streaming_double2(C5, &clover[sid + (18*chi+5)*param.cl_stride]); \
156
load_streaming_double2(C6, &clover[sid + (18*chi+6)*param.cl_stride]); \
157
load_streaming_double2(C7, &clover[sid + (18*chi+7)*param.cl_stride]); \
158
load_streaming_double2(C8, &clover[sid + (18*chi+8)*param.cl_stride]); \
159
load_streaming_double2(C9, &clover[sid + (18*chi+9)*param.cl_stride]); \
160
load_streaming_double2(C10, &clover[sid + (18*chi+10)*param.cl_stride]); \
161
load_streaming_double2(C11, &clover[sid + (18*chi+11)*param.cl_stride]); \
162
load_streaming_double2(C12, &clover[sid + (18*chi+12)*param.cl_stride]); \
163
load_streaming_double2(C13, &clover[sid + (18*chi+13)*param.cl_stride]); \
164
load_streaming_double2(C14, &clover[sid + (18*chi+14)*param.cl_stride]); \
165
load_streaming_double2(C15, &clover[sid + (18*chi+15)*param.cl_stride]); \
166
load_streaming_double2(C16, &clover[sid + (18*chi+16)*param.cl_stride]); \
167
load_streaming_double2(C17, &clover[sid + (18*chi+17)*param.cl_stride]);
168
169
#define ASSN_CLOVER_SINGLE(clover, chi) \
170
C0 = clover[sid + (9*chi+0)*param.cl_stride]; \
171
C1 = clover[sid + (9*chi+1)*param.cl_stride]; \
172
C2 = clover[sid + (9*chi+2)*param.cl_stride]; \
173
C3 = clover[sid + (9*chi+3)*param.cl_stride]; \
174
C4 = clover[sid + (9*chi+4)*param.cl_stride]; \
175
C5 = clover[sid + (9*chi+5)*param.cl_stride]; \
176
C6 = clover[sid + (9*chi+6)*param.cl_stride]; \
177
C7 = clover[sid + (9*chi+7)*param.cl_stride]; \
178
C8 = clover[sid + (9*chi+8)*param.cl_stride];
179
180
#define ASSN_CLOVER_HALF(clover, chi) \
181
C0 = short42float4(clover[sid + (9*chi+0)*param.cl_stride]); \
182
C1 = short42float4(clover[sid + (9*chi+1)*param.cl_stride]); \
183
C2 = short42float4(clover[sid + (9*chi+2)*param.cl_stride]); \
184
C3 = short42float4(clover[sid + (9*chi+3)*param.cl_stride]); \
185
C4 = short42float4(clover[sid + (9*chi+4)*param.cl_stride]); \
186
C5 = short42float4(clover[sid + (9*chi+5)*param.cl_stride]); \
187
C6 = short42float4(clover[sid + (9*chi+6)*param.cl_stride]); \
188
C7 = short42float4(clover[sid + (9*chi+7)*param.cl_stride]); \
189
C8 = short42float4(clover[sid + (9*chi+8)*param.cl_stride]); \
190
K = cloverNorm[sid + chi*param.cl_stride]; \
191
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
192
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
193
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
194
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
195
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
196
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
197
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
198
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
199
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
200
201
#define ASSN_CLOVER_DOUBLE_TEX(clover, chi) \
202
C0 = fetch_double2((clover), sid + (18*chi+0)*param.cl_stride); \
203
C1 = fetch_double2((clover), sid + (18*chi+1)*param.cl_stride); \
204
C2 = fetch_double2((clover), sid + (18*chi+2)*param.cl_stride); \
205
C3 = fetch_double2((clover), sid + (18*chi+3)*param.cl_stride); \
206
C4 = fetch_double2((clover), sid + (18*chi+4)*param.cl_stride); \
207
C5 = fetch_double2((clover), sid + (18*chi+5)*param.cl_stride); \
208
C6 = fetch_double2((clover), sid + (18*chi+6)*param.cl_stride); \
209
C7 = fetch_double2((clover), sid + (18*chi+7)*param.cl_stride); \
210
C8 = fetch_double2((clover), sid + (18*chi+8)*param.cl_stride); \
211
C9 = fetch_double2((clover), sid + (18*chi+9)*param.cl_stride); \
212
C10 = fetch_double2((clover), sid + (18*chi+10)*param.cl_stride); \
213
C11 = fetch_double2((clover), sid + (18*chi+11)*param.cl_stride); \
214
C12 = fetch_double2((clover), sid + (18*chi+12)*param.cl_stride); \
215
C13 = fetch_double2((clover), sid + (18*chi+13)*param.cl_stride); \
216
C14 = fetch_double2((clover), sid + (18*chi+14)*param.cl_stride); \
217
C15 = fetch_double2((clover), sid + (18*chi+15)*param.cl_stride); \
218
C16 = fetch_double2((clover), sid + (18*chi+16)*param.cl_stride); \
219
C17 = fetch_double2((clover), sid + (18*chi+17)*param.cl_stride);
220
221
//#endif // USE_TEXTURE_OBJECTS
222
223
#define ASSN_CLOVER_SINGLE_TEX(clover, chi) \
224
C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*param.cl_stride); \
225
C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*param.cl_stride); \
226
C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*param.cl_stride); \
227
C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*param.cl_stride); \
228
C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*param.cl_stride); \
229
C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*param.cl_stride); \
230
C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*param.cl_stride); \
231
C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*param.cl_stride); \
232
C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*param.cl_stride);
233
234
#define ASSN_CLOVER_HALF_TEX(clover, chi) \
235
C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*param.cl_stride); \
236
C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*param.cl_stride); \
237
C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*param.cl_stride); \
238
C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*param.cl_stride); \
239
C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*param.cl_stride); \
240
C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*param.cl_stride); \
241
C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*param.cl_stride); \
242
C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*param.cl_stride); \
243
C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*param.cl_stride); \
244
K = TEX1DFETCH(float, (TMCLOVERTEXNORM), sid + chi*param.cl_stride); \
245
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
246
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
247
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
248
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
249
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
250
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
251
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
252
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
253
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
254
255
#define PACK_CLOVER_DOUBLE(clover, chi) \
256
double2 C0 = clover[idx + (18*chi+0)*param.cl_stride]; \
257
double2 C1 = clover[idx + (18*chi+1)*param.cl_stride]; \
258
double2 C2 = clover[idx + (18*chi+2)*param.cl_stride]; \
259
double2 C3 = clover[idx + (18*chi+3)*param.cl_stride]; \
260
double2 C4 = clover[idx + (18*chi+4)*param.cl_stride]; \
261
double2 C5 = clover[idx + (18*chi+5)*param.cl_stride]; \
262
double2 C6 = clover[idx + (18*chi+6)*param.cl_stride]; \
263
double2 C7 = clover[idx + (18*chi+7)*param.cl_stride]; \
264
double2 C8 = clover[idx + (18*chi+8)*param.cl_stride]; \
265
double2 C9 = clover[idx + (18*chi+9)*param.cl_stride]; \
266
double2 C10 = clover[idx + (18*chi+10)*param.cl_stride]; \
267
double2 C11 = clover[idx + (18*chi+11)*param.cl_stride]; \
268
double2 C12 = clover[idx + (18*chi+12)*param.cl_stride]; \
269
double2 C13 = clover[idx + (18*chi+13)*param.cl_stride]; \
270
double2 C14 = clover[idx + (18*chi+14)*param.cl_stride]; \
271
double2 C15 = clover[idx + (18*chi+15)*param.cl_stride]; \
272
double2 C16 = clover[idx + (18*chi+16)*param.cl_stride]; \
273
double2 C17 = clover[idx + (18*chi+17)*param.cl_stride];
274
275
#define PACK_CLOVER_SINGLE(clover, chi) \
276
float4 C0 = clover[idx + (9*chi+0)*param.cl_stride]; \
277
float4 C1 = clover[idx + (9*chi+1)*param.cl_stride]; \
278
float4 C2 = clover[idx + (9*chi+2)*param.cl_stride]; \
279
float4 C3 = clover[idx + (9*chi+3)*param.cl_stride]; \
280
float4 C4 = clover[idx + (9*chi+4)*param.cl_stride]; \
281
float4 C5 = clover[idx + (9*chi+5)*param.cl_stride]; \
282
float4 C6 = clover[idx + (9*chi+6)*param.cl_stride]; \
283
float4 C7 = clover[idx + (9*chi+7)*param.cl_stride]; \
284
float4 C8 = clover[idx + (9*chi+8)*param.cl_stride];
285
286
#define PACK_CLOVER_HALF(clover, chi) \
287
float4 C0 = short42float4(clover[idx + (9*chi+0)*param.cl_stride]); \
288
float4 C1 = short42float4(clover[idx + (9*chi+1)*param.cl_stride]); \
289
float4 C2 = short42float4(clover[idx + (9*chi+2)*param.cl_stride]); \
290
float4 C3 = short42float4(clover[idx + (9*chi+3)*param.cl_stride]); \
291
float4 C4 = short42float4(clover[idx + (9*chi+4)*param.cl_stride]); \
292
float4 C5 = short42float4(clover[idx + (9*chi+5)*param.cl_stride]); \
293
float4 C6 = short42float4(clover[idx + (9*chi+6)*param.cl_stride]); \
294
float4 C7 = short42float4(clover[idx + (9*chi+7)*param.cl_stride]); \
295
float4 C8 = short42float4(clover[idx + (9*chi+8)*param.cl_stride]); \
296
float K = cloverNorm[idx + chi*param.cl_stride]; \
297
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
298
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
299
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
300
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
301
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
302
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
303
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
304
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
305
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
306
307
#define PACK_CLOVER_DOUBLE_TEX(clover, chi) \
308
double2 C0 = fetch_double2((clover), idx + (18*chi+0)*param.cl_stride); \
309
double2 C1 = fetch_double2((clover), idx + (18*chi+1)*param.cl_stride); \
310
double2 C2 = fetch_double2((clover), idx + (18*chi+2)*param.cl_stride); \
311
double2 C3 = fetch_double2((clover), idx + (18*chi+3)*param.cl_stride); \
312
double2 C4 = fetch_double2((clover), idx + (18*chi+4)*param.cl_stride); \
313
double2 C5 = fetch_double2((clover), idx + (18*chi+5)*param.cl_stride); \
314
double2 C6 = fetch_double2((clover), idx + (18*chi+6)*param.cl_stride); \
315
double2 C7 = fetch_double2((clover), idx + (18*chi+7)*param.cl_stride); \
316
double2 C8 = fetch_double2((clover), idx + (18*chi+8)*param.cl_stride); \
317
double2 C9 = fetch_double2((clover), idx + (18*chi+9)*param.cl_stride); \
318
double2 C10 = fetch_double2((clover), idx + (18*chi+10)*param.cl_stride); \
319
double2 C11 = fetch_double2((clover), idx + (18*chi+11)*param.cl_stride); \
320
double2 C12 = fetch_double2((clover), idx + (18*chi+12)*param.cl_stride); \
321
double2 C13 = fetch_double2((clover), idx + (18*chi+13)*param.cl_stride); \
322
double2 C14 = fetch_double2((clover), idx + (18*chi+14)*param.cl_stride); \
323
double2 C15 = fetch_double2((clover), idx + (18*chi+15)*param.cl_stride); \
324
double2 C16 = fetch_double2((clover), idx + (18*chi+16)*param.cl_stride); \
325
double2 C17 = fetch_double2((clover), idx + (18*chi+17)*param.cl_stride);
326
327
#define PACK_CLOVER_SINGLE_TEX(clover, chi) \
328
float4 C0 = TEX1DFETCH(float4, (clover), idx + (9*chi+0)*param.cl_stride); \
329
float4 C1 = TEX1DFETCH(float4, (clover), idx + (9*chi+1)*param.cl_stride); \
330
float4 C2 = TEX1DFETCH(float4, (clover), idx + (9*chi+2)*param.cl_stride); \
331
float4 C3 = TEX1DFETCH(float4, (clover), idx + (9*chi+3)*param.cl_stride); \
332
float4 C4 = TEX1DFETCH(float4, (clover), idx + (9*chi+4)*param.cl_stride); \
333
float4 C5 = TEX1DFETCH(float4, (clover), idx + (9*chi+5)*param.cl_stride); \
334
float4 C6 = TEX1DFETCH(float4, (clover), idx + (9*chi+6)*param.cl_stride); \
335
float4 C7 = TEX1DFETCH(float4, (clover), idx + (9*chi+7)*param.cl_stride); \
336
float4 C8 = TEX1DFETCH(float4, (clover), idx + (9*chi+8)*param.cl_stride);
337
338
#define PACK_CLOVER_HALF_TEX(clover, chi) \
339
float4 C0 = TEX1DFETCH(float4, (clover), idx + (9*chi+0)*param.cl_stride); \
340
float4 C1 = TEX1DFETCH(float4, (clover), idx + (9*chi+1)*param.cl_stride); \
341
float4 C2 = TEX1DFETCH(float4, (clover), idx + (9*chi+2)*param.cl_stride); \
342
float4 C3 = TEX1DFETCH(float4, (clover), idx + (9*chi+3)*param.cl_stride); \
343
float4 C4 = TEX1DFETCH(float4, (clover), idx + (9*chi+4)*param.cl_stride); \
344
float4 C5 = TEX1DFETCH(float4, (clover), idx + (9*chi+5)*param.cl_stride); \
345
float4 C6 = TEX1DFETCH(float4, (clover), idx + (9*chi+6)*param.cl_stride); \
346
float4 C7 = TEX1DFETCH(float4, (clover), idx + (9*chi+7)*param.cl_stride); \
347
float4 C8 = TEX1DFETCH(float4, (clover), idx + (9*chi+8)*param.cl_stride); \
348
float K = TEX1DFETCH(float, (TMCLOVERTEXNORM), idx + chi*param.cl_stride); \
349
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
350
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
351
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
352
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
353
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
354
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
355
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
356
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
357
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
358
Generated on Wed Feb 4 2015 17:00:12 for QUDA by
1.8.6