QUDA  0.9.0
dslash_ndeg_twisted_mass.cu
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <iostream>
5 
6 #include <color_spinor_field.h>
7 #include <clover_field.h>
8 
9 // these control the Wilson-type actions
10 #ifdef GPU_WILSON_DIRAC
11 //#define DIRECT_ACCESS_LINK
12 //#define DIRECT_ACCESS_WILSON_SPINOR
13 //#define DIRECT_ACCESS_WILSON_ACCUM
14 //#define DIRECT_ACCESS_WILSON_INTER
15 //#define DIRECT_ACCESS_WILSON_PACK_SPINOR
16 //#define DIRECT_ACCESS_CLOVER
17 #endif // GPU_WILSON_DIRAC
18 
19 
20 #include <quda_internal.h>
21 #include <dslash_quda.h>
22 #include <sys/time.h>
23 #include <blas_quda.h>
24 
25 #include <inline_ptx.h>
26 
27 namespace quda {
28 
29  namespace ndegtwisted {
30 
31 #include <dslash_constants.h>
32 #include <dslash_textures.h>
33 #include <dslash_index.cuh>
34 
35  // Enable shared memory dslash for Fermi architecture
36  //#define SHARED_WILSON_DSLASH
37  //#define SHARED_8_BYTE_WORD_SIZE // 8-byte shared memory access
38 
39 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
40 #include <tm_ndeg_dslash_def.h> // Non-degenerate twisted Mass
41 #endif
42 
43 #ifndef NDEGTM_SHARED_FLOATS_PER_THREAD
44 #define NDEGTM_SHARED_FLOATS_PER_THREAD 0
45 #endif
46 
47 #include <dslash_quda.cuh>
48 
49  } // end namespace twisted
50 
51  // declare the dslash events
52 #include <dslash_events.cuh>
53 
54  using namespace ndegtwisted;
55 
56 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
57  template <typename sFloat, typename gFloat>
58  class NdegTwistedDslashCuda : public SharedDslashCuda {
59 
60  private:
61  const QudaTwistDslashType dslashType;
62  double a, b, c, d;
63 
64  protected:
65  unsigned int sharedBytesPerThread() const
66  {
67  if (dslashParam.kernel_type == INTERIOR_KERNEL) {
68  int reg_size = (typeid(sFloat)==typeid(double2) ? sizeof(double) : sizeof(float));
69  return NDEGTM_SHARED_FLOATS_PER_THREAD * reg_size;
70  } else {
71  return 0;
72  }
73  }
74 
75  public:
76  NdegTwistedDslashCuda(cudaColorSpinorField *out, const GaugeField &gauge,
77  const cudaColorSpinorField *in, const cudaColorSpinorField *x,
78  const QudaTwistDslashType dslashType, const double kappa, const double mu,
79  const double epsilon, const double k, const int parity, const int dagger, const int *commOverride)
80  : SharedDslashCuda(out, in, x, gauge, parity, dagger, commOverride), dslashType(dslashType)
81  {
82  a = kappa;
83  b = mu;
84  c = epsilon;
85  d = k;
86  dslashParam.a = kappa;
87  dslashParam.a_f = kappa;
88  dslashParam.b = mu;
89  dslashParam.b_f = mu;
90  dslashParam.c = epsilon;
91  dslashParam.c_f = epsilon;
92  dslashParam.d = k;
93  dslashParam.d_f = k;
94 
95  if (dslashType != QUDA_NONDEG_DSLASH) errorQuda("Invalid dslashType for non-degenerate twisted-mass Dslash");
96  dslashParam.fl_stride = in->VolumeCB()/2;
97  }
98  virtual ~NdegTwistedDslashCuda() { unbindSpinorTex<sFloat>(in, out, x); }
99 
100  TuneKey tuneKey() const
101  {
102  TuneKey key = DslashCuda::tuneKey();
103  strcat(key.aux,",NdegDslash");
104  return key;
105  }
106 
107  void apply(const cudaStream_t &stream)
108  {
109 #ifdef SHARED_WILSON_DSLASH
110  if (dslashParam.kernel_type == EXTERIOR_KERNEL_X) errorQuda("Shared dslash does not yet support X-dimension partitioning");
111 #endif
112 #ifndef USE_TEXTURE_OBJECTS
113  if (dslashParam.kernel_type == INTERIOR_KERNEL) bindSpinorTex<sFloat>(in, out, x);
114 #endif // USE_TEXTURE_OBJECTS
115  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
116  setParam();
117  NDEG_TM_DSLASH(twistedNdegMassDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam);
118  }
119 
120  long long flops() const {
121  int twisted_flops = 48;
122  long long flops = DslashCuda::flops();
123  switch(dslashParam.kernel_type) {
124  case EXTERIOR_KERNEL_X:
125  case EXTERIOR_KERNEL_Y:
126  case EXTERIOR_KERNEL_Z:
127  case EXTERIOR_KERNEL_T:
128  case EXTERIOR_KERNEL_ALL:
129  break;
130  case INTERIOR_KERNEL:
131  case KERNEL_POLICY:
132  // twisted-mass flops are done in the interior kernel
133  flops += twisted_flops * in->VolumeCB();
134  break;
135  }
136  return flops;
137  }
138  };
139 #endif // GPU_NDEG_TWISTED_MASS_DIRAC
140 
141 
142 #include <dslash_policy.cuh>
143 
145  const cudaColorSpinorField *in, const int parity, const int dagger,
146  const cudaColorSpinorField *x, const QudaTwistDslashType type,
147  const double &kappa, const double &mu, const double &epsilon,
148  const double &k, const int *commOverride, TimeProfile &profile)
149  {
150 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
151  const_cast<cudaColorSpinorField*>(in)->createComms(1);
152 
153  DslashCuda *dslash = nullptr;
154  if (in->Precision() == QUDA_DOUBLE_PRECISION) {
155  dslash = new NdegTwistedDslashCuda<double2,double2>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
156  } else if (in->Precision() == QUDA_SINGLE_PRECISION) {
157  dslash = new NdegTwistedDslashCuda<float4,float4>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
158  } else if (in->Precision() == QUDA_HALF_PRECISION) {
159  dslash = new NdegTwistedDslashCuda<short4,short4>(out, gauge, in, x, type, kappa, mu, epsilon, k, parity, dagger, commOverride);
160  }
161 
162  int bulk_threads = in->Volume() / 2;
163  int ghost_threads[4] = {0};
164  for(int i=0;i<4;i++) ghost_threads[i] = in->GhostFace()[i] / 2;
165  DslashPolicyTune dslash_policy(*dslash, const_cast<cudaColorSpinorField*>(in), bulk_threads, ghost_threads, profile);
166  dslash_policy.apply(0);
167 
168  delete dslash;
169 #else
170  errorQuda("Non-degenerate twisted mass dslash has not been built");
171 #endif
172  }
173 
174 }
#define NDEGTM_SHARED_FLOATS_PER_THREAD
double mu
Definition: test_util.cpp:1643
void setParam(int kernel, int prec, int threads, int blocks)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:90
cudaStream_t * stream
char * strcat(char *__s1, const char *__s2)
#define b
VOLATILE spinorFloat kappa
cpuColorSpinorField * in
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:603
enum QudaTwistDslashType_s QudaTwistDslashType
cpuColorSpinorField * out
void ndegTwistedMassDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile)
unsigned long long flops
Definition: blas_quda.cu:42
virtual TuneKey tuneKey() const
const void * c
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:51
virtual long long flops() const
static __inline__ size_t size_t d
#define NDEG_TM_DSLASH(FUNC, gridDim, blockDim, shared, stream, param)
QudaParity parity
Definition: covdev_test.cpp:53
#define a