29 threads(x.VolumeCB()),
34 for (
int dir = 0; dir < 4; dir++) X[dir] = x.
X()[dir];
40 int x_cb = threadIdx.x + blockIdx.x * blockDim.x;
41 int parity = threadIdx.y + blockIdx.y * blockDim.y;
42 if (x_cb >= arg.threads)
return;
44 constexpr
int nSpin = Arg::nSpin;
48 Vector
x = arg.x(x_cb, parity);
49 Vector
y = arg.y(x_cb, parity);
55 for (
int nu = 0; nu <
nSpin; nu++) {
62 arg.s.save(A, x_cb, parity);
67 int x_cb = threadIdx.x + blockIdx.x * blockDim.x;
68 int parity = threadIdx.y + blockIdx.y * blockDim.y;
69 const int nSpin = arg.nSpin;
70 const int nColor = arg.nColor;
72 if (x_cb >= arg.threads)
return;
76 Vector
x = arg.x(x_cb, parity);
77 Vector
y = arg.y(x_cb, parity);
79 complex<real> I(0.0, 1.0);
81 complex<real> result_local(0.0, 0.0);
89 complex<real> A[nSpin *
nSpin];
101 result_local += spin_elem[0][0];
102 result_local += spin_elem[1][1];
103 result_local += spin_elem[2][2];
104 result_local += spin_elem[3][3];
105 A[G_idx++] = result_local;
110 result_local += I * spin_elem[0][3];
111 result_local += I * spin_elem[1][2];
112 result_local -= I * spin_elem[2][1];
113 result_local -= I * spin_elem[3][0];
114 A[G_idx++] = result_local;
118 result_local -= spin_elem[0][3];
119 result_local += spin_elem[1][2];
120 result_local += spin_elem[2][1];
121 result_local -= spin_elem[3][0];
122 A[G_idx++] = result_local;
126 result_local += I * spin_elem[0][2];
127 result_local -= I * spin_elem[1][3];
128 result_local -= I * spin_elem[2][0];
129 result_local += I * spin_elem[3][1];
130 A[G_idx++] = result_local;
134 result_local += spin_elem[0][2];
135 result_local += spin_elem[1][3];
136 result_local += spin_elem[2][0];
137 result_local += spin_elem[3][1];
138 A[G_idx++] = result_local;
143 result_local += spin_elem[0][0];
144 result_local += spin_elem[1][1];
145 result_local -= spin_elem[2][2];
146 result_local -= spin_elem[3][3];
147 A[G_idx++] = result_local;
153 result_local += I * spin_elem[0][3];
154 result_local += I * spin_elem[1][2];
155 result_local += I * spin_elem[2][1];
156 result_local += I * spin_elem[3][0];
157 A[G_idx++] = result_local;
161 result_local -= spin_elem[0][3];
162 result_local += spin_elem[1][2];
163 result_local -= spin_elem[2][1];
164 result_local += spin_elem[3][0];
165 A[G_idx++] = result_local;
169 result_local += I * spin_elem[0][2];
170 result_local -= I * spin_elem[1][3];
171 result_local += I * spin_elem[2][0];
172 result_local -= I * spin_elem[3][1];
173 A[G_idx++] = result_local;
177 result_local += spin_elem[0][2];
178 result_local += spin_elem[1][3];
179 result_local -= spin_elem[2][0];
180 result_local -= spin_elem[3][1];
181 A[G_idx++] = result_local;
186 result_local += spin_elem[0][0];
187 result_local -= spin_elem[1][1];
188 result_local += spin_elem[2][2];
189 result_local -= spin_elem[3][3];
190 A[G_idx++] = result_local;
194 result_local -= I * spin_elem[0][2];
195 result_local -= I * spin_elem[1][3];
196 result_local += I * spin_elem[2][0];
197 result_local += I * spin_elem[3][1];
198 A[G_idx++] = result_local;
202 result_local -= spin_elem[0][1];
203 result_local -= spin_elem[1][0];
204 result_local += spin_elem[2][3];
205 result_local += spin_elem[3][2];
206 A[G_idx++] = result_local;
210 result_local += spin_elem[0][1];
211 result_local += spin_elem[1][0];
212 result_local += spin_elem[2][3];
213 result_local += spin_elem[3][2];
214 A[G_idx++] = result_local;
218 result_local -= I * spin_elem[0][1];
219 result_local += I * spin_elem[1][0];
220 result_local += I * spin_elem[2][3];
221 result_local -= I * spin_elem[3][2];
222 A[G_idx++] = result_local;
226 result_local -= spin_elem[0][0];
227 result_local -= spin_elem[1][1];
228 result_local += spin_elem[2][2];
229 result_local += spin_elem[3][3];
230 A[G_idx++] = result_local;
232 arg.s.save(A, x_cb, parity);
__global__ void computeColorContraction(Arg arg)
matrix_field< complex< real >, nSpin > s
static constexpr bool spinor_direct_load
colorspinor_mapper< real, nSpin, nColor, spin_project, spinor_direct_load >::type F
__global__ void computeDegrandRossiContraction(Arg arg)
static constexpr int nSpin
__device__ __host__ complex< Float > innerProduct(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
static constexpr int nColor
static constexpr bool spin_project
ContractionArg(const ColorSpinorField &x, const ColorSpinorField &y, complex< real > *s)