13 template <
typename sFloat>
20 unsigned int sharedBytesPerThread()
const {
return 0; }
21 unsigned int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0; }
22 bool tuneGridDim()
const {
return false; }
23 unsigned int minThreads()
const {
return in->
X(0) * in->
X(1) * in->
X(2) * in->
X(3); }
25 char *saveOut, *saveOutNorm;
29 out(out), in(in) { bindSpinorTex<sFloat>(in, out); strcpy(
aux,
"gamma5");}
46 saveOut =
new char[out->
Bytes()];
47 cudaMemcpy(saveOut, out->
V(), out->
Bytes(), cudaMemcpyDeviceToHost);
49 if (
typeid(sFloat) ==
typeid(short4))
52 cudaMemcpy(saveOutNorm, out->
Norm(), out->
NormBytes(), cudaMemcpyDeviceToHost);
58 cudaMemcpy(out->
V(), saveOut, out->
Bytes(), cudaMemcpyHostToDevice);
61 if (
typeid(sFloat) ==
typeid(short4))
63 cudaMemcpy(out->
Norm(), saveOutNorm, out->
NormBytes(), cudaMemcpyHostToDevice);
71 ps <<
"block=(" << param.
block.x <<
"," << param.
block.y <<
"," << param.
block.z <<
"), ";
87 dslashParam.threads = in->
Volume();
93 #if (__COMPUTE_CAPABILITY__ >= 130)
96 errorQuda(
"Double precision not supported on this GPU");
101 errorQuda(
"Half precision not supported for gamma5 kernel yet");
114 #ifndef _TWIST_QUDA_CONTRACT
115 #error "Contraction core undefined"
118 #ifndef _TWIST_QUDA_CONTRACT_PLUS
119 #error "Contraction core (plus) undefined"
122 #ifndef _TWIST_QUDA_CONTRACT_MINUS
123 #error "Contraction core (minus) undefined"
126 #define checkSpinor(a, b) \
128 if (a.Precision() != b.Precision()) \
129 errorQuda("precisions do not match: %d %d", a.Precision(), b.Precision()); \
130 if (a.Length() != b.Length()) \
131 errorQuda("lengths do not match: %d %d", a.Length(), b.Length()); \
132 if (a.Stride() != b.Stride()) \
133 errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \
141 template <
typename Float2,
typename rFloat>
175 unsigned int sharedBytesPerThread()
const {
return 16*
sizeof(rFloat); }
176 unsigned int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0; }
177 bool tuneGridDim()
const {
return false; }
178 unsigned int minThreads()
const {
return x.
X(0) * x.
X(1) * x.
X(2) * x.
X(3); }
180 char *saveOut, *saveOutNorm;
182 void fillAux(
QudaContractType contract_type,
const char *contract_str) { strcpy(aux[contract_type], contract_str); }
186 x(x), y(y), result(result), parity(parity), contract_type(contract_type), nTSlice(-1) {
197 bindSpinorTex<Float2>(&x, &y);
201 x(x), y(y), result(result), parity(parity), contract_type(contract_type), nTSlice(tSlice) {
212 bindSpinorTex<Float2>(&x, &y);
227 switch (contract_type)
274 std::stringstream ps;
275 ps <<
"block=(" << param.
block.x <<
"," << param.
block.y <<
"," << param.
block.z <<
"), ";
293 errorQuda(
"No time-slice specified for contraction\n");
297 dslashParam.threads = x.
Volume();
303 #if (__COMPUTE_CAPABILITY__ >= 130)
306 errorQuda(
"Double precision not supported on this GPU");
311 errorQuda(
"Half precision not supported for gamma5 kernel yet");
328 errorQuda(
"No time-slice input allowed for volume contractions\n");
332 dslashParam.threads = x.
X(0)*x.
X(1)*x.
X(2);
338 #if (__COMPUTE_CAPABILITY__ >= 130)
341 errorQuda(
"Double precision not supported on this GPU");
346 errorQuda(
"Half precision not supported for gamma5 kernel yet");
QudaVerbosity getVerbosity()
__global__ void contractTsliceKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Tslice, const int Parity, const DslashParam param)
__global__ void contractGamma5MinusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
__global__ void contractTslicePlusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Tslice, const int Parity, const DslashParam param)
__global__ void contractMinusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
__global__ void contractGamma5PlusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
std::string paramString(const TuneParam ¶m) const
ContractCuda(const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaParity parity, const QudaContractType contract_type, const int tSlice)
__global__ void contractKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void apply(const cudaStream_t &stream)
const char * VolString() const
enum QudaParity_s QudaParity
QudaContractType ContractType() const
__global__ void gamma5Kernel(float4 *out, float *outNorm, float4 *in, float *inNorm, DslashParam param, int myStride)
void apply(const cudaStream_t &stream)
Gamma5Cuda(cudaColorSpinorField *out, const cudaColorSpinorField *in)
cpuColorSpinorField * out
void contractCuda(const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const QudaParity parity)
QudaPrecision Precision() const
std::string paramString(const TuneParam ¶m) const
enum QudaContractType_s QudaContractType
__global__ void contractPlusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
__global__ void contractGamma5Kernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Parity, const DslashParam param)
void gamma5Cuda(cudaColorSpinorField *out, const cudaColorSpinorField *in)
ContractCuda(const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaParity parity, const QudaContractType contract_type)
virtual void apply(const cudaStream_t &stream)=0
__global__ void contractTsliceMinusKernel(float2 *out, float4 *in1, float4 *in2, int myStride, const int Tslice, const int Parity, const DslashParam param)