12 #if (CUDA_VERSION < 8000) 13 #define MAX_NVECTOR 1 // multi-vector code doesn't seem to work well with CUDA 7.x 18 template <
typename Float,
typename Output,
typename InputA,
typename InputB>
struct CloverSigmaOprodArg {
26 CloverSigmaOprodArg(Output &oprod, InputA *inA_, InputB *inB_,
const std::vector<std::vector<double>> &coeff_,
29 length(meta.VolumeCB()),
32 for (
int i = 0; i <
nvector; i++) {
35 coeff[i][0] = coeff_[i][0];
36 coeff[i][1] = coeff_[i][1];
41 template <
typename real,
int nvector,
int mu,
int nu,
int parity,
typename Arg>
48 for (
int i = 0; i <
nvector; i++) {
51 arg.inA[i].load(static_cast<Complex *>(A.
data), idx,
parity);
52 arg.inB[i].load(static_cast<Complex *>(B.
data), idx,
parity);
59 result -=
conj(result);
63 arg.oprod((
mu - 1) *
mu / 2 + nu, idx,
parity) = temp;
69 int idx = blockIdx.x * blockDim.x + threadIdx.x;
70 int parity = blockIdx.y * blockDim.y + threadIdx.y;
71 int mu_nu = blockIdx.z * blockDim.z + threadIdx.z;
73 if (idx >= arg.length)
return;
74 if (mu_nu >= 6)
return;
79 case 0: sigmaOprod<real, nvector, 1, 0, 0>(
arg, idx);
break;
80 case 1: sigmaOprod<real, nvector, 2, 0, 0>(
arg, idx);
break;
81 case 2: sigmaOprod<real, nvector, 2, 1, 0>(
arg, idx);
break;
82 case 3: sigmaOprod<real, nvector, 3, 0, 0>(
arg, idx);
break;
83 case 4: sigmaOprod<real, nvector, 3, 1, 0>(
arg, idx);
break;
84 case 5: sigmaOprod<real, nvector, 3, 2, 0>(
arg, idx);
break;
89 case 0: sigmaOprod<real, nvector, 1, 0, 1>(
arg, idx);
break;
90 case 1: sigmaOprod<real, nvector, 2, 0, 1>(
arg, idx);
break;
91 case 2: sigmaOprod<real, nvector, 2, 1, 1>(
arg, idx);
break;
92 case 3: sigmaOprod<real, nvector, 3, 0, 1>(
arg, idx);
break;
93 case 4: sigmaOprod<real, nvector, 3, 1, 1>(
arg, idx);
break;
94 case 5: sigmaOprod<real, nvector, 3, 2, 1>(
arg, idx);
break;
complex< Float > data[size]
__device__ __host__ Matrix< complex< Float >, Nc > outerProdSpinTrace(const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
__device__ void sigmaOprod(Arg &arg, int idx)
__global__ void sigmaOprodKernel(Arg arg)
CloverSigmaOprodArg(Output &oprod, InputA *inA_, InputB *inB_, const std::vector< std::vector< double >> &coeff_, const GaugeField &meta, int nvector)
Main header file for host and device accessors to GaugeFields.
std::complex< double > Complex
Float coeff[MAX_NVECTOR][2]
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__host__ __device__ ValueType conj(ValueType x)