QUDA  0.9.0
dslash_twisted_mass.cu
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <iostream>
5 
6 #include <color_spinor_field.h>
7 #include <clover_field.h>
8 
9 // these control the Wilson-type actions
10 #ifdef GPU_WILSON_DIRAC
11 //#define DIRECT_ACCESS_LINK
12 //#define DIRECT_ACCESS_WILSON_SPINOR
13 //#define DIRECT_ACCESS_WILSON_ACCUM
14 //#define DIRECT_ACCESS_WILSON_INTER
15 //#define DIRECT_ACCESS_WILSON_PACK_SPINOR
16 //#define DIRECT_ACCESS_CLOVER
17 #endif // GPU_WILSON_DIRAC
18 
19 
20 #include <quda_internal.h>
21 #include <dslash_quda.h>
22 #include <sys/time.h>
23 #include <blas_quda.h>
24 
25 #include <inline_ptx.h>
26 
27 namespace quda {
28 
29  namespace twisted {
30 
31 #undef GPU_STAGGERED_DIRAC
32 #include <dslash_constants.h>
33 #include <dslash_textures.h>
34 #include <dslash_index.cuh>
35 
36  // Enable shared memory dslash for Fermi architecture
37  //#define SHARED_WILSON_DSLASH
38  //#define SHARED_8_BYTE_WORD_SIZE // 8-byte shared memory access
39 
40 #ifdef GPU_TWISTED_MASS_DIRAC
41 #include <tm_dslash_def.h> // Twisted Mass kernels
42 #endif
43 
44 #ifndef DSLASH_SHARED_FLOATS_PER_THREAD
45 #define DSLASH_SHARED_FLOATS_PER_THREAD 0
46 #endif
47 
48 #include <dslash_quda.cuh>
49 
50  } // end namespace twisted
51 
52  // declare the dslash events
53 #include <dslash_events.cuh>
54 
55  using namespace twisted;
56 
57 #ifdef GPU_TWISTED_MASS_DIRAC
58  template <typename sFloat, typename gFloat>
59  class TwistedDslashCuda : public SharedDslashCuda {
60 
61  private:
62  const QudaTwistDslashType dslashType;
63  double a, b, c, d;
64 
65  protected:
66  unsigned int sharedBytesPerThread() const
67  {
68  if (dslashParam.kernel_type == INTERIOR_KERNEL) {
69  int reg_size = (typeid(sFloat)==typeid(double2) ? sizeof(double) : sizeof(float));
70  return DSLASH_SHARED_FLOATS_PER_THREAD * reg_size;
71  } else {
72  return 0;
73  }
74  }
75 
76  public:
77  TwistedDslashCuda(cudaColorSpinorField *out, const GaugeField &gauge,
78  const cudaColorSpinorField *in, const cudaColorSpinorField *x,
79  const QudaTwistDslashType dslashType, const double kappa, const double mu,
80  const double epsilon, const double k, const int parity, const int dagger,
81  const int *commOverride)
82  : SharedDslashCuda(out, in, x, gauge, parity, dagger, commOverride), dslashType(dslashType)
83  {
84  a = kappa;
85  b = mu;
86  c = epsilon;
87  d = k;
88  if (dslashType == QUDA_NONDEG_DSLASH) errorQuda("Invalid dslashType for twisted-mass Dslash");
89 
90  dslashParam.twist_a = (dslashType == QUDA_DEG_TWIST_INV_DSLASH) ? kappa : 0.0;
91  dslashParam.twist_b = (dslashType == QUDA_DEG_TWIST_INV_DSLASH) ? mu : 0.0;
92  dslashParam.a = kappa;
93  dslashParam.a_f = kappa;
94  dslashParam.b = mu;
95  dslashParam.b_f = mu;
96  dslashParam.fl_stride = in->VolumeCB();
97  }
98  virtual ~TwistedDslashCuda() { unbindSpinorTex<sFloat>(in, out, x); }
99 
100  TuneKey tuneKey() const
101  {
102  TuneKey key = DslashCuda::tuneKey();
103  switch(dslashType){
105  strcat(key.aux,",TwistInvDslash");
106  break;
108  strcat(key.aux,",");
109  break;
111  strcat(key.aux,",DslashTwist");
112  break;
113  default:
114  errorQuda("Unsupported twisted-dslash type %d", dslashType);
115  }
116  return key;
117  }
118 
119  void apply(const cudaStream_t &stream)
120  {
121 #ifdef SHARED_WILSON_DSLASH
122  if (dslashParam.kernel_type == EXTERIOR_KERNEL_X) errorQuda("Shared dslash does not yet support X-dimension partitioning");
123 #endif
124 #ifndef USE_TEXTURE_OBJECTS
125  if (dslashParam.kernel_type == INTERIOR_KERNEL) bindSpinorTex<sFloat>(in, out, x);
126 #endif // USE_TEXTURE_OBJECTS
127  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
128  setParam();
129  dslashParam.block[0] = tp.aux.x; dslashParam.block[1] = tp.aux.y; dslashParam.block[2] = tp.aux.z; dslashParam.block[3] = tp.aux.w;
130  for (int i=0; i<4; i++) dslashParam.grid[i] = ( (i==0 ? 2 : 1) * in->X(i)) / dslashParam.block[i];
131 
132  switch(dslashType){
134  DSLASH(twistedMassTwistInvDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam);
135  break;
137  DSLASH(twistedMassDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam);
138  break;
140  DSLASH(twistedMassDslashTwist, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam);
141  break;
142  default: errorQuda("Invalid twisted mass dslash type");
143  }
144  }
145 
146  long long flops() const {
147  int twisted_flops = 48;
148  long long flops = DslashCuda::flops();
149  switch(dslashParam.kernel_type) {
150  case EXTERIOR_KERNEL_X:
151  case EXTERIOR_KERNEL_Y:
152  case EXTERIOR_KERNEL_Z:
153  case EXTERIOR_KERNEL_T:
154  case EXTERIOR_KERNEL_ALL:
155  break;
156  case INTERIOR_KERNEL:
157  case KERNEL_POLICY:
158  // twisted mass flops are done in the interior kernel
159  flops += twisted_flops * in->VolumeCB();
160  break;
161  }
162  return flops;
163  }
164  };
165 #endif // GPU_TWISTED_MASS_DIRAC
166 
167 #include <dslash_policy.cuh>
168 
170  const cudaColorSpinorField *in, const int parity, const int dagger,
171  const cudaColorSpinorField *x, const QudaTwistDslashType type,
172  const double &kappa, const double &mu, const double &epsilon,
173  const double &k, const int *commOverride, TimeProfile &profile)
174  {
175 #ifdef GPU_TWISTED_MASS_DIRAC
176  const_cast<cudaColorSpinorField*>(in)->createComms(1);
177 
178  if (type == QUDA_DEG_TWIST_INV_DSLASH) setKernelPackT(true);
179 
180  DslashCuda *dslash = nullptr;
181  if (in->Precision() == QUDA_DOUBLE_PRECISION) {
182  dslash = new TwistedDslashCuda<double2,double2>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
183  } else if (in->Precision() == QUDA_SINGLE_PRECISION) {
184  dslash = new TwistedDslashCuda<float4,float4>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
185  } else if (in->Precision() == QUDA_HALF_PRECISION) {
186  dslash = new TwistedDslashCuda<short4,short4>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
187  }
188 
189  DslashPolicyTune dslash_policy(*dslash, const_cast<cudaColorSpinorField*>(in), in->Volume(), in->GhostFace(), profile);
190  dslash_policy.apply(0);
191 
192  delete dslash;
193 
194  if (type == QUDA_DEG_TWIST_INV_DSLASH) setKernelPackT(false);
195 #else
196  errorQuda("Twisted mass dslash has not been built");
197 #endif
198  }
199 
200 }
double mu
Definition: test_util.cpp:1643
void setParam(int kernel, int prec, int threads, int blocks)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:90
cudaStream_t * stream
char * strcat(char *__s1, const char *__s2)
#define b
VOLATILE spinorFloat kappa
cpuColorSpinorField * in
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:603
enum QudaTwistDslashType_s QudaTwistDslashType
#define DSLASH_SHARED_FLOATS_PER_THREAD
#define DSLASH(FUNC, gridDim, blockDim, shared, stream, param)
cpuColorSpinorField * out
void twistedMassDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile)
unsigned long long flops
Definition: blas_quda.cu:42
virtual TuneKey tuneKey() const
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:59
const void * c
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:51
virtual long long flops() const
static __inline__ size_t size_t d
QudaParity parity
Definition: covdev_test.cpp:53
#define a