QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
read_clover.h
Go to the documentation of this file.
1 #define READ_CLOVER_DOUBLE(clover, chi) \
2  double2 C0 = clover[sid + (18*chi+0)*cl_stride]; \
3  double2 C1 = clover[sid + (18*chi+1)*cl_stride]; \
4  double2 C2 = clover[sid + (18*chi+2)*cl_stride]; \
5  double2 C3 = clover[sid + (18*chi+3)*cl_stride]; \
6  double2 C4 = clover[sid + (18*chi+4)*cl_stride]; \
7  double2 C5 = clover[sid + (18*chi+5)*cl_stride]; \
8  double2 C6 = clover[sid + (18*chi+6)*cl_stride]; \
9  double2 C7 = clover[sid + (18*chi+7)*cl_stride]; \
10  double2 C8 = clover[sid + (18*chi+8)*cl_stride]; \
11  double2 C9 = clover[sid + (18*chi+9)*cl_stride]; \
12  double2 C10 = clover[sid + (18*chi+10)*cl_stride]; \
13  double2 C11 = clover[sid + (18*chi+11)*cl_stride]; \
14  double2 C12 = clover[sid + (18*chi+12)*cl_stride]; \
15  double2 C13 = clover[sid + (18*chi+13)*cl_stride]; \
16  double2 C14 = clover[sid + (18*chi+14)*cl_stride]; \
17  double2 C15 = clover[sid + (18*chi+15)*cl_stride]; \
18  double2 C16 = clover[sid + (18*chi+16)*cl_stride]; \
19  double2 C17 = clover[sid + (18*chi+17)*cl_stride];
20 
21 #define READ_CLOVER_DOUBLE_STR(clover, chi) \
22  double2 C0, C1, C2, C3, C4, C5, C6, C7, C8, C9; \
23  double2 C10, C11, C12, C13, C14, C15, C16, C17; \
24  load_streaming_double2(C0, &clover[sid + (18*chi+0)*cl_stride]); \
25  load_streaming_double2(C1, &clover[sid + (18*chi+1)*cl_stride]); \
26  load_streaming_double2(C2, &clover[sid + (18*chi+2)*cl_stride]); \
27  load_streaming_double2(C3, &clover[sid + (18*chi+3)*cl_stride]); \
28  load_streaming_double2(C4, &clover[sid + (18*chi+4)*cl_stride]); \
29  load_streaming_double2(C5, &clover[sid + (18*chi+5)*cl_stride]); \
30  load_streaming_double2(C6, &clover[sid + (18*chi+6)*cl_stride]); \
31  load_streaming_double2(C7, &clover[sid + (18*chi+7)*cl_stride]); \
32  load_streaming_double2(C8, &clover[sid + (18*chi+8)*cl_stride]); \
33  load_streaming_double2(C9, &clover[sid + (18*chi+9)*cl_stride]); \
34  load_streaming_double2(C10, &clover[sid + (18*chi+10)*cl_stride]); \
35  load_streaming_double2(C11, &clover[sid + (18*chi+11)*cl_stride]); \
36  load_streaming_double2(C12, &clover[sid + (18*chi+12)*cl_stride]); \
37  load_streaming_double2(C13, &clover[sid + (18*chi+13)*cl_stride]); \
38  load_streaming_double2(C14, &clover[sid + (18*chi+14)*cl_stride]); \
39  load_streaming_double2(C15, &clover[sid + (18*chi+15)*cl_stride]); \
40  load_streaming_double2(C16, &clover[sid + (18*chi+16)*cl_stride]); \
41  load_streaming_double2(C17, &clover[sid + (18*chi+17)*cl_stride]);
42 
43 #define READ_CLOVER_SINGLE(clover, chi) \
44  float4 C0 = clover[sid + (9*chi+0)*cl_stride]; \
45  float4 C1 = clover[sid + (9*chi+1)*cl_stride]; \
46  float4 C2 = clover[sid + (9*chi+2)*cl_stride]; \
47  float4 C3 = clover[sid + (9*chi+3)*cl_stride]; \
48  float4 C4 = clover[sid + (9*chi+4)*cl_stride]; \
49  float4 C5 = clover[sid + (9*chi+5)*cl_stride]; \
50  float4 C6 = clover[sid + (9*chi+6)*cl_stride]; \
51  float4 C7 = clover[sid + (9*chi+7)*cl_stride]; \
52  float4 C8 = clover[sid + (9*chi+8)*cl_stride];
53 
54 #define READ_CLOVER_HALF(clover, chi) \
55  float4 C0 = short42float4(clover[sid + (9*chi+0)*cl_stride]); \
56  float4 C1 = short42float4(clover[sid + (9*chi+1)*cl_stride]); \
57  float4 C2 = short42float4(clover[sid + (9*chi+2)*cl_stride]); \
58  float4 C3 = short42float4(clover[sid + (9*chi+3)*cl_stride]); \
59  float4 C4 = short42float4(clover[sid + (9*chi+4)*cl_stride]); \
60  float4 C5 = short42float4(clover[sid + (9*chi+5)*cl_stride]); \
61  float4 C6 = short42float4(clover[sid + (9*chi+6)*cl_stride]); \
62  float4 C7 = short42float4(clover[sid + (9*chi+7)*cl_stride]); \
63  float4 C8 = short42float4(clover[sid + (9*chi+8)*cl_stride]); \
64  float K = cloverNorm[sid + chi*cl_stride]; \
65  C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
66  C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
67  C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
68  C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
69  C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
70  C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
71  C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
72  C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
73  C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
74 
75 #define READ_CLOVER_DOUBLE_TEX(clover, chi) \
76  double2 C0 = fetch_double2((clover), sid + (18*chi+0)*cl_stride); \
77  double2 C1 = fetch_double2((clover), sid + (18*chi+1)*cl_stride); \
78  double2 C2 = fetch_double2((clover), sid + (18*chi+2)*cl_stride); \
79  double2 C3 = fetch_double2((clover), sid + (18*chi+3)*cl_stride); \
80  double2 C4 = fetch_double2((clover), sid + (18*chi+4)*cl_stride); \
81  double2 C5 = fetch_double2((clover), sid + (18*chi+5)*cl_stride); \
82  double2 C6 = fetch_double2((clover), sid + (18*chi+6)*cl_stride); \
83  double2 C7 = fetch_double2((clover), sid + (18*chi+7)*cl_stride); \
84  double2 C8 = fetch_double2((clover), sid + (18*chi+8)*cl_stride); \
85  double2 C9 = fetch_double2((clover), sid + (18*chi+9)*cl_stride); \
86  double2 C10 = fetch_double2((clover), sid + (18*chi+10)*cl_stride); \
87  double2 C11 = fetch_double2((clover), sid + (18*chi+11)*cl_stride); \
88  double2 C12 = fetch_double2((clover), sid + (18*chi+12)*cl_stride); \
89  double2 C13 = fetch_double2((clover), sid + (18*chi+13)*cl_stride); \
90  double2 C14 = fetch_double2((clover), sid + (18*chi+14)*cl_stride); \
91  double2 C15 = fetch_double2((clover), sid + (18*chi+15)*cl_stride); \
92  double2 C16 = fetch_double2((clover), sid + (18*chi+16)*cl_stride); \
93  double2 C17 = fetch_double2((clover), sid + (18*chi+17)*cl_stride);
94 
95 //#endif // USE_TEXTURE_OBJECTS
96 
97 #define READ_CLOVER_SINGLE_TEX(clover, chi) \
98  float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*cl_stride); \
99  float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*cl_stride); \
100  float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*cl_stride); \
101  float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*cl_stride); \
102  float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*cl_stride); \
103  float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*cl_stride); \
104  float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*cl_stride); \
105  float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*cl_stride); \
106  float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*cl_stride);
107 
108 #define READ_CLOVER_HALF_TEX(clover, chi) \
109  float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*cl_stride); \
110  float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*cl_stride); \
111  float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*cl_stride); \
112  float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*cl_stride); \
113  float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*cl_stride); \
114  float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*cl_stride); \
115  float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*cl_stride); \
116  float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*cl_stride); \
117  float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*cl_stride); \
118  float K = TEX1DFETCH(float, (CLOVERTEXNORM), sid + chi*cl_stride); \
119  C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
120  C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
121  C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
122  C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
123  C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
124  C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
125  C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
126  C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
127  C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;