QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_twisted_mass.cu
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <iostream>
5 
6 #include <color_spinor_field.h>
7 #include <clover_field.h>
8 
9 // these control the Wilson-type actions
10 #ifdef GPU_WILSON_DIRAC
11 //#define DIRECT_ACCESS_LINK
12 //#define DIRECT_ACCESS_WILSON_SPINOR
13 //#define DIRECT_ACCESS_WILSON_ACCUM
14 //#define DIRECT_ACCESS_WILSON_INTER
15 //#define DIRECT_ACCESS_WILSON_PACK_SPINOR
16 //#define DIRECT_ACCESS_CLOVER
17 #endif // GPU_WILSON_DIRAC
18 
19 
20 #include <quda_internal.h>
21 #include <dslash_quda.h>
22 #include <sys/time.h>
23 #include <blas_quda.h>
24 #include <face_quda.h>
25 
26 #include <inline_ptx.h>
27 
28 namespace quda {
29 
30  namespace twisted {
31 
32 #undef GPU_STAGGERED_DIRAC
33 #include <dslash_constants.h>
34 #include <dslash_textures.h>
35 #include <dslash_index.cuh>
36 
37  // Enable shared memory dslash for Fermi architecture
38  //#define SHARED_WILSON_DSLASH
39  //#define SHARED_8_BYTE_WORD_SIZE // 8-byte shared memory access
40 
41 #ifdef GPU_TWISTED_MASS_DIRAC
42 #include <tm_dslash_def.h> // Twisted Mass kernels
43 #endif
44 
45 #ifndef DSLASH_SHARED_FLOATS_PER_THREAD
46 #define DSLASH_SHARED_FLOATS_PER_THREAD 0
47 #endif
48 
49 #include <dslash_quda.cuh>
50 
51  } // end namespace twisted
52 
53  // declare the dslash events
54 #include <dslash_events.cuh>
55 
56  using namespace twisted;
57 
58 #ifdef GPU_TWISTED_MASS_DIRAC
59  template <typename sFloat, typename gFloat>
60  class TwistedDslashCuda : public SharedDslashCuda {
61 
62  private:
63  const gFloat *gauge0, *gauge1;
64  const QudaTwistDslashType dslashType;
65  double a, b, c, d;
66 
67  protected:
68  unsigned int sharedBytesPerThread() const
69  {
70 #if (__COMPUTE_CAPABILITY__ >= 200)
71  if (dslashParam.kernel_type == INTERIOR_KERNEL) {
72  int reg_size = (typeid(sFloat)==typeid(double2) ? sizeof(double) : sizeof(float));
73  return DSLASH_SHARED_FLOATS_PER_THREAD * reg_size;
74  } else {
75  return 0;
76  }
77 #else
78  int reg_size = (typeid(sFloat)==typeid(double2) ? sizeof(double) : sizeof(float));
79  return DSLASH_SHARED_FLOATS_PER_THREAD * reg_size;
80 #endif
81  }
82 
83  public:
84  TwistedDslashCuda(cudaColorSpinorField *out, const gFloat *gauge0, const gFloat *gauge1,
85  const QudaReconstructType reconstruct, const cudaColorSpinorField *in, const cudaColorSpinorField *x,
86  const QudaTwistDslashType dslashType, const double kappa, const double mu,
87  const double epsilon, const double k, const int dagger)
88  : SharedDslashCuda(out, in, x, reconstruct, dagger), gauge0(gauge0), gauge1(gauge1), dslashType(dslashType)
89  {
90  bindSpinorTex<sFloat>(in, out, x);
91  a = kappa;
92  b = mu;
93  c = epsilon;
94  d = k;
95  if (dslashType == QUDA_NONDEG_DSLASH) errorQuda("Invalid dslashType for twisted-mass Dslash");
96  dslashParam.fl_stride = in->VolumeCB();
97  }
98  virtual ~TwistedDslashCuda() { unbindSpinorTex<sFloat>(in, out, x); }
99 
100  TuneKey tuneKey() const
101  {
102  TuneKey key = DslashCuda::tuneKey();
103  switch(dslashType){
105  strcat(key.aux,",TwistInvDslash");
106  break;
108  strcat(key.aux,",");
109  break;
111  strcat(key.aux,",DslashTwist");
112  break;
113  }
114  return key;
115  }
116 
117  void apply(const cudaStream_t &stream)
118  {
119 #ifdef SHARED_WILSON_DSLASH
120  if (dslashParam.kernel_type == EXTERIOR_KERNEL_X)
121  errorQuda("Shared dslash does not yet support X-dimension partitioning");
122 #endif
123  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
124 
125  switch(dslashType){
127  DSLASH(twistedMassTwistInvDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
128  (sFloat*)out->V(), (float*)out->Norm(), gauge0, gauge1,
129  (sFloat*)in->V(), (float*)in->Norm(), a, b, (sFloat*)(x ? x->V() : 0), (float*)(x ? x->Norm() : 0));
130  break;
132  DSLASH(twistedMassDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
133  (sFloat*)out->V(), (float*)out->Norm(), gauge0, gauge1,
134  (sFloat*)in->V(), (float*)in->Norm(), a, b, (sFloat*)(x ? x->V() : 0), (float*)(x ? x->Norm() : 0));
135  break;
137  DSLASH(twistedMassDslashTwist, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
138  (sFloat*)out->V(), (float*)out->Norm(), gauge0, gauge1,
139  (sFloat*)in->V(), (float*)in->Norm(), a, b, (sFloat*)x->V(), (float*)x->Norm());
140  break;
141  default: errorQuda("Invalid twisted mass dslash type");
142  }
143  }
144 
145  long long flops() const { return (x ? 1416ll : 1392ll) * in->VolumeCB(); } // FIXME for multi-GPU
146  };
147 #endif // GPU_TWISTED_MASS_DIRAC
148 
149 #include <dslash_policy.cuh>
150 
152  const cudaColorSpinorField *in, const int parity, const int dagger,
153  const cudaColorSpinorField *x, const QudaTwistDslashType type,
154  const double &kappa, const double &mu, const double &epsilon,
155  const double &k, const int *commOverride, TimeProfile &profile,
156  const QudaDslashPolicy &dslashPolicy)
157  {
158  inSpinor = (cudaColorSpinorField*)in; // EVIL
159 #ifdef GPU_TWISTED_MASS_DIRAC
160  int Npad = (in->Ncolor()*in->Nspin()*2)/in->FieldOrder(); // SPINOR_HOP in old code
161 
162  int ghost_threads[4] = {0};
163  int bulk_threads = in->Volume();
164 
165  for(int i=0;i<4;i++){
166  dslashParam.ghostDim[i] = commDimPartitioned(i); // determines whether to use regular or ghost indexing at boundary
167  dslashParam.ghostOffset[i] = Npad*(in->GhostOffset(i) + in->Stride());
168  dslashParam.ghostNormOffset[i] = in->GhostNormOffset(i) + in->Stride();
169  dslashParam.commDim[i] = (!commOverride[i]) ? 0 : commDimPartitioned(i); // switch off comms if override = 0
170  ghost_threads[i] = in->GhostFace()[i];
171  }
172 
173 #ifdef MULTI_GPU
174  if(type == QUDA_DEG_TWIST_INV_DSLASH){
175  setTwistPack(true);
176  twist_a = kappa;
177  twist_b = mu;
178  }
179 #endif
180 
181  void *gauge0, *gauge1;
182  bindGaugeTex(gauge, parity, &gauge0, &gauge1);
183 
184  if (in->Precision() != gauge.Precision())
185  errorQuda("Mixing gauge and spinor precision not supported");
186 
187  DslashCuda *dslash = 0;
188  size_t regSize = sizeof(float);
189 
190  if (in->Precision() == QUDA_DOUBLE_PRECISION) {
191 #if (__COMPUTE_CAPABILITY__ >= 130)
192  dslash = new TwistedDslashCuda<double2,double2>(out, (double2*)gauge0,(double2*)gauge1, gauge.Reconstruct(), in, x, type, kappa, mu, epsilon, k, dagger);
193  regSize = sizeof(double);
194 #else
195  errorQuda("Double precision not supported on this GPU");
196 #endif
197  } else if (in->Precision() == QUDA_SINGLE_PRECISION) {
198  dslash = new TwistedDslashCuda<float4,float4>(out, (float4*)gauge0,(float4*)gauge1, gauge.Reconstruct(), in, x, type, kappa, mu, epsilon, k, dagger);
199 
200  } else if (in->Precision() == QUDA_HALF_PRECISION) {
201  dslash = new TwistedDslashCuda<short4,short4>(out, (short4*)gauge0,(short4*)gauge1, gauge.Reconstruct(), in, x, type, kappa, mu, epsilon, k, dagger);
202  }
203 
204 
205 #ifndef GPU_COMMS
206  DslashPolicyImp* dslashImp = DslashFactory::create(dslashPolicy);
207 #else
208  DslashPolicyImp* dslashImp = DslashFactory::create(QUDA_GPU_COMMS_DSLASH);
209 #endif
210 
211  (*dslashImp)(*dslash, const_cast<cudaColorSpinorField*>(in), regSize, parity, dagger, bulk_threads, ghost_threads, profile);
212  delete dslashImp;
213 
214  delete dslash;
215 #ifdef MULTI_GPU
216  if(type == QUDA_DEG_TWIST_INV_DSLASH){
217  setTwistPack(false);
218  twist_a = 0.0;
219  twist_b = 0.0;
220  }
221 #endif
222 
223  unbindGaugeTex(gauge);
224 
225  checkCudaError();
226 #else
227  errorQuda("Twisted mass dslash has not been built");
228 #endif
229  }
230 
231 }
void unbindGaugeTex(const cudaGaugeField &gauge)
int commDimPartitioned(int dir)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:73
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
cudaStream_t * stream
int GhostNormOffset(const int i) const
QudaDagType dagger
Definition: test_util.cpp:1558
QudaPrecision Precision() const
VOLATILE spinorFloat kappa
cpuColorSpinorField * in
enum QudaDslashPolicy_s QudaDslashPolicy
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:271
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:168
void setTwistPack(bool pack)
Definition: dslash_quda.cu:90
int x[4]
QudaFieldOrder FieldOrder() const
enum QudaTwistDslashType_s QudaTwistDslashType
#define DSLASH_SHARED_FLOATS_PER_THREAD
cpuColorSpinorField * out
enum QudaReconstructType_s QudaReconstructType
QudaPrecision Precision() const
void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
#define checkCudaError()
Definition: util_quda.h:110
QudaTune getTuning()
Definition: util_quda.cpp:32
int GhostOffset(const int i) const
void twistedMassDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
const QudaParity parity
Definition: dslash_test.cpp:29
void * gauge[4]
Definition: su3_test.cpp:15
const int * GhostFace() const