QUDA
v0.5.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
read_clover.h
Go to the documentation of this file.
1
#define READ_CLOVER_DOUBLE(clover, chi) \
2
double2 C0 = clover[sid + (18*chi+0)*cl_stride]; \
3
double2 C1 = clover[sid + (18*chi+1)*cl_stride]; \
4
double2 C2 = clover[sid + (18*chi+2)*cl_stride]; \
5
double2 C3 = clover[sid + (18*chi+3)*cl_stride]; \
6
double2 C4 = clover[sid + (18*chi+4)*cl_stride]; \
7
double2 C5 = clover[sid + (18*chi+5)*cl_stride]; \
8
double2 C6 = clover[sid + (18*chi+6)*cl_stride]; \
9
double2 C7 = clover[sid + (18*chi+7)*cl_stride]; \
10
double2 C8 = clover[sid + (18*chi+8)*cl_stride]; \
11
double2 C9 = clover[sid + (18*chi+9)*cl_stride]; \
12
double2 C10 = clover[sid + (18*chi+10)*cl_stride]; \
13
double2 C11 = clover[sid + (18*chi+11)*cl_stride]; \
14
double2 C12 = clover[sid + (18*chi+12)*cl_stride]; \
15
double2 C13 = clover[sid + (18*chi+13)*cl_stride]; \
16
double2 C14 = clover[sid + (18*chi+14)*cl_stride]; \
17
double2 C15 = clover[sid + (18*chi+15)*cl_stride]; \
18
double2 C16 = clover[sid + (18*chi+16)*cl_stride]; \
19
double2 C17 = clover[sid + (18*chi+17)*cl_stride];
20
21
#define READ_CLOVER_DOUBLE_STR(clover, chi) \
22
double2 C0, C1, C2, C3, C4, C5, C6, C7, C8, C9; \
23
double2 C10, C11, C12, C13, C14, C15, C16, C17; \
24
load_streaming_double2(C0, &clover[sid + (18*chi+0)*cl_stride]); \
25
load_streaming_double2(C1, &clover[sid + (18*chi+1)*cl_stride]); \
26
load_streaming_double2(C2, &clover[sid + (18*chi+2)*cl_stride]); \
27
load_streaming_double2(C3, &clover[sid + (18*chi+3)*cl_stride]); \
28
load_streaming_double2(C4, &clover[sid + (18*chi+4)*cl_stride]); \
29
load_streaming_double2(C5, &clover[sid + (18*chi+5)*cl_stride]); \
30
load_streaming_double2(C6, &clover[sid + (18*chi+6)*cl_stride]); \
31
load_streaming_double2(C7, &clover[sid + (18*chi+7)*cl_stride]); \
32
load_streaming_double2(C8, &clover[sid + (18*chi+8)*cl_stride]); \
33
load_streaming_double2(C9, &clover[sid + (18*chi+9)*cl_stride]); \
34
load_streaming_double2(C10, &clover[sid + (18*chi+10)*cl_stride]); \
35
load_streaming_double2(C11, &clover[sid + (18*chi+11)*cl_stride]); \
36
load_streaming_double2(C12, &clover[sid + (18*chi+12)*cl_stride]); \
37
load_streaming_double2(C13, &clover[sid + (18*chi+13)*cl_stride]); \
38
load_streaming_double2(C14, &clover[sid + (18*chi+14)*cl_stride]); \
39
load_streaming_double2(C15, &clover[sid + (18*chi+15)*cl_stride]); \
40
load_streaming_double2(C16, &clover[sid + (18*chi+16)*cl_stride]); \
41
load_streaming_double2(C17, &clover[sid + (18*chi+17)*cl_stride]);
42
43
#define READ_CLOVER_SINGLE(clover, chi) \
44
float4 C0 = clover[sid + (9*chi+0)*cl_stride]; \
45
float4 C1 = clover[sid + (9*chi+1)*cl_stride]; \
46
float4 C2 = clover[sid + (9*chi+2)*cl_stride]; \
47
float4 C3 = clover[sid + (9*chi+3)*cl_stride]; \
48
float4 C4 = clover[sid + (9*chi+4)*cl_stride]; \
49
float4 C5 = clover[sid + (9*chi+5)*cl_stride]; \
50
float4 C6 = clover[sid + (9*chi+6)*cl_stride]; \
51
float4 C7 = clover[sid + (9*chi+7)*cl_stride]; \
52
float4 C8 = clover[sid + (9*chi+8)*cl_stride];
53
54
#define READ_CLOVER_HALF(clover, chi) \
55
float4 C0 = short42float4(clover[sid + (9*chi+0)*cl_stride]); \
56
float4 C1 = short42float4(clover[sid + (9*chi+1)*cl_stride]); \
57
float4 C2 = short42float4(clover[sid + (9*chi+2)*cl_stride]); \
58
float4 C3 = short42float4(clover[sid + (9*chi+3)*cl_stride]); \
59
float4 C4 = short42float4(clover[sid + (9*chi+4)*cl_stride]); \
60
float4 C5 = short42float4(clover[sid + (9*chi+5)*cl_stride]); \
61
float4 C6 = short42float4(clover[sid + (9*chi+6)*cl_stride]); \
62
float4 C7 = short42float4(clover[sid + (9*chi+7)*cl_stride]); \
63
float4 C8 = short42float4(clover[sid + (9*chi+8)*cl_stride]); \
64
float K = cloverNorm[sid + chi*cl_stride]; \
65
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
66
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
67
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
68
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
69
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
70
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
71
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
72
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
73
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
74
75
#define READ_CLOVER_DOUBLE_TEX(clover, chi) \
76
double2 C0 = fetch_double2((clover), sid + (18*chi+0)*cl_stride); \
77
double2 C1 = fetch_double2((clover), sid + (18*chi+1)*cl_stride); \
78
double2 C2 = fetch_double2((clover), sid + (18*chi+2)*cl_stride); \
79
double2 C3 = fetch_double2((clover), sid + (18*chi+3)*cl_stride); \
80
double2 C4 = fetch_double2((clover), sid + (18*chi+4)*cl_stride); \
81
double2 C5 = fetch_double2((clover), sid + (18*chi+5)*cl_stride); \
82
double2 C6 = fetch_double2((clover), sid + (18*chi+6)*cl_stride); \
83
double2 C7 = fetch_double2((clover), sid + (18*chi+7)*cl_stride); \
84
double2 C8 = fetch_double2((clover), sid + (18*chi+8)*cl_stride); \
85
double2 C9 = fetch_double2((clover), sid + (18*chi+9)*cl_stride); \
86
double2 C10 = fetch_double2((clover), sid + (18*chi+10)*cl_stride); \
87
double2 C11 = fetch_double2((clover), sid + (18*chi+11)*cl_stride); \
88
double2 C12 = fetch_double2((clover), sid + (18*chi+12)*cl_stride); \
89
double2 C13 = fetch_double2((clover), sid + (18*chi+13)*cl_stride); \
90
double2 C14 = fetch_double2((clover), sid + (18*chi+14)*cl_stride); \
91
double2 C15 = fetch_double2((clover), sid + (18*chi+15)*cl_stride); \
92
double2 C16 = fetch_double2((clover), sid + (18*chi+16)*cl_stride); \
93
double2 C17 = fetch_double2((clover), sid + (18*chi+17)*cl_stride);
94
95
//#endif // USE_TEXTURE_OBJECTS
96
97
#define READ_CLOVER_SINGLE_TEX(clover, chi) \
98
float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*cl_stride); \
99
float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*cl_stride); \
100
float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*cl_stride); \
101
float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*cl_stride); \
102
float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*cl_stride); \
103
float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*cl_stride); \
104
float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*cl_stride); \
105
float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*cl_stride); \
106
float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*cl_stride);
107
108
#define READ_CLOVER_HALF_TEX(clover, chi) \
109
float4 C0 = TEX1DFETCH(float4, (clover), sid + (9*chi+0)*cl_stride); \
110
float4 C1 = TEX1DFETCH(float4, (clover), sid + (9*chi+1)*cl_stride); \
111
float4 C2 = TEX1DFETCH(float4, (clover), sid + (9*chi+2)*cl_stride); \
112
float4 C3 = TEX1DFETCH(float4, (clover), sid + (9*chi+3)*cl_stride); \
113
float4 C4 = TEX1DFETCH(float4, (clover), sid + (9*chi+4)*cl_stride); \
114
float4 C5 = TEX1DFETCH(float4, (clover), sid + (9*chi+5)*cl_stride); \
115
float4 C6 = TEX1DFETCH(float4, (clover), sid + (9*chi+6)*cl_stride); \
116
float4 C7 = TEX1DFETCH(float4, (clover), sid + (9*chi+7)*cl_stride); \
117
float4 C8 = TEX1DFETCH(float4, (clover), sid + (9*chi+8)*cl_stride); \
118
float K = TEX1DFETCH(float, (CLOVERTEXNORM), sid + chi*cl_stride); \
119
C0.x *= K; C0.y *= K; C0.z *= K; C0.w *= K; \
120
C1.x *= K; C1.y *= K; C1.z *= K; C1.w *= K; \
121
C2.x *= K; C2.y *= K; C2.z *= K; C2.w *= K; \
122
C3.x *= K; C3.y *= K; C3.z *= K; C3.w *= K; \
123
C4.x *= K; C4.y *= K; C4.z *= K; C4.w *= K; \
124
C5.x *= K; C5.y *= K; C5.z *= K; C5.w *= K; \
125
C6.x *= K; C6.y *= K; C6.z *= K; C6.w *= K; \
126
C7.x *= K; C7.y *= K; C7.z *= K; C7.w *= K; \
127
C8.x *= K; C8.y *= K; C8.z *= K; C8.w *= K;
Generated on Wed Mar 20 2013 12:52:17 for QUDA by
1.8.2