QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
texture.h
Go to the documentation of this file.
1 #pragma once
2 
3 #ifdef USE_TEXTURE_OBJECTS
4 
5 #include <texture_helper.cuh>
6 
7 template <typename OutputType, typename InputType> class Texture
8 {
9 
10  typedef typename quda::mapper<InputType>::type RegType;
11 
12  private:
13  cudaTextureObject_t spinor;
14 
15  public:
16  Texture() : spinor(0) {}
17  Texture(const cudaColorSpinorField *x, bool use_ghost = false)
18  : spinor(use_ghost ? x->GhostTex() : x->Tex()) { }
19  Texture(const Texture &tex) : spinor(tex.spinor) { }
20  ~Texture() { }
21 
22  Texture& operator=(const Texture &tex) {
23  if (this != &tex) spinor = tex.spinor;
24  return *this;
25  }
26 
27  __device__ inline OutputType fetch(unsigned int idx) const
28  {
29  OutputType rtn;
30  copyFloatN(rtn, tex1Dfetch_<RegType>(spinor, idx));
31  return rtn;
32  }
33 
34  __device__ inline OutputType operator[](unsigned int idx) const { return fetch(idx); }
35 };
36 
37 __device__ inline double fetch_double(int2 v)
38 { return __hiloint2double(v.y, v.x); }
39 
40 __device__ inline double2 fetch_double2(int4 v)
41 { return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); }
42 
43 template <> __device__ inline double2 Texture<double2, double2>::fetch(unsigned int idx) const
44 { double2 out; copyFloatN(out, fetch_double2(tex1Dfetch_<int4>(spinor, idx))); return out; }
45 
46 template <> __device__ inline float2 Texture<float2, double2>::fetch(unsigned int idx) const
47 { float2 out; copyFloatN(out, fetch_double2(tex1Dfetch_<int4>(spinor, idx))); return out; }
48 
49 #else // !USE_TEXTURE_OBJECTS - use direct reads
50 
51 template <typename OutputType, typename InputType> class Texture
52 {
53 
55 
56  private:
57  const InputType *spinor; // used when textures are disabled
58 
59  public:
60  Texture() : spinor(0) {}
61  Texture(const cudaColorSpinorField *x, bool use_ghost = false) :
62  spinor(use_ghost ? (const InputType *)(x->Ghost2()) : (const InputType *)(x->V()))
63  {
64  }
65  Texture(const Texture &tex) : spinor(tex.spinor) {}
66  ~Texture() {}
67 
68  Texture& operator=(const Texture &tex) {
69  if (this != &tex) spinor = tex.spinor;
70  return *this;
71  }
72 
73  __device__ __host__ inline OutputType operator[](unsigned int idx) const
74  {
75  OutputType out;
76  copyFloatN(out, spinor[idx]);
77  return out;
78  }
79 };
80 
81 #endif
82 
94 template <typename RegType, typename InterType, typename StoreType> void checkTypes()
95 {
96 
97  const size_t reg_size = sizeof(((RegType *)0)->x);
98  const size_t inter_size = sizeof(((InterType *)0)->x);
99  const size_t store_size = sizeof(((StoreType *)0)->x);
100 
101  if (reg_size != inter_size && store_size != 2 && store_size != 1 && inter_size != 4)
102  errorQuda("Precision of register (%lu) and intermediate (%lu) types must match\n", (unsigned long)reg_size,
103  (unsigned long)inter_size);
104 
105  if (vecLength<InterType>() != vecLength<StoreType>()) {
106  errorQuda("Vector lengths intermediate and register types must match\n");
107  }
108 
109  if (vecLength<RegType>() == 0) errorQuda("Vector type not supported\n");
110  if (vecLength<InterType>() == 0) errorQuda("Vector type not supported\n");
111  if (vecLength<StoreType>() == 0) errorQuda("Vector type not supported\n");
112 }
113 
114 template <int M, typename FloatN, typename FixedType>
115 __device__ inline float store_norm(float *norm, FloatN x[M], int i)
116 {
117  float c[M];
118 #pragma unroll
119  for (int j = 0; j < M; j++) c[j] = max_fabs(x[j]);
120 #pragma unroll
121  for (int j = 1; j < M; j++) c[0] = fmaxf(c[j], c[0]);
122  norm[i] = c[0];
123  return __fdividef(fixedMaxValue<FixedType>::value, c[0]);
124 }
125 
132 template <typename RegType, typename StoreType, int N> class SpinorTexture
133 {
134 
135  typedef typename bridge_mapper<RegType,StoreType>::type InterType;
136 
137  protected:
140  float *norm; // always use direct reads for norm
141 
142  int stride;
143  unsigned int cb_offset;
144  unsigned int cb_norm_offset;
145 #ifndef BLAS_SPINOR
146  int ghost_stride[4];
147 #endif
148 
149  public:
150  SpinorTexture() : tex(), ghostTex(), norm(0), stride(0), cb_offset(0), cb_norm_offset(0) {} // default constructor
151 
152  // Spinor must only ever called with cudaColorSpinorField references!!!!
153  SpinorTexture(const ColorSpinorField &x, int nFace = 1) :
154  tex(&(static_cast<const cudaColorSpinorField &>(x))),
155  ghostTex(&(static_cast<const cudaColorSpinorField &>(x)), true),
156  norm((float *)x.Norm()),
157  stride(x.Stride()),
158  cb_offset(x.Bytes() / (2 * sizeof(StoreType))),
159  cb_norm_offset(x.NormBytes() / (2 * sizeof(float)))
160  {
161  checkTypes<RegType, InterType, StoreType>();
162 #ifndef BLAS_SPINOR
163  for (int d = 0; d < 4; d++) ghost_stride[d] = nFace * x.SurfaceCB(d);
164 #endif
165  }
166 
168  tex(st.tex),
169  ghostTex(st.ghostTex),
170  norm(st.norm),
171  stride(st.stride),
172  cb_offset(st.cb_offset),
173  cb_norm_offset(st.cb_norm_offset)
174  {
175 #ifndef BLAS_SPINOR
176  for (int d = 0; d < 4; d++) ghost_stride[d] = st.ghost_stride[d];
177 #endif
178  }
179 
181  {
182  if (&src != this) {
183  tex = src.tex;
184  ghostTex = src.ghostTex;
185  norm = src.norm;
186  stride = src.stride;
187  cb_offset = src.cb_offset;
188  cb_norm_offset = src.cb_norm_offset;
189 #ifndef BLAS_SPINOR
190  for (int d = 0; d < 4; d++) ghost_stride[d] = src.ghost_stride[d];
191 #endif
192  }
193  return *this;
194  }
195 
196  void set(const cudaColorSpinorField &x, int nFace = 1)
197  {
199  ghostTex = Texture<InterType, StoreType>(&x, true);
200  norm = (float *)x.Norm();
201  stride = x.Stride();
202  cb_offset = x.Bytes() / (2 * sizeof(StoreType));
203  cb_norm_offset = x.NormBytes() / (2 * sizeof(float));
204 #ifndef BLAS_SPINOR
205  for (int d = 0; d < 4; d++) ghost_stride[d] = nFace * x.SurfaceCB(d);
206 #endif
207  checkTypes<RegType, InterType, StoreType>();
208  }
209 
210  virtual ~SpinorTexture() {}
211 
212  __device__ inline void load(RegType x[], const int i, const int parity = 0) const
213  {
214  // load data into registers first using the storage order
215  constexpr int M = (N * vec_length<RegType>::value) / vec_length<InterType>::value;
216  InterType y[M];
217 
218  // fixed precision
219  if (isFixed<StoreType>::value) {
220  float xN = norm[cb_norm_offset * parity + i];
221 #pragma unroll
222  for (int j = 0; j < M; j++) y[j] = xN * tex[cb_offset * parity + i + j * stride];
223  } else { // other types
224 #pragma unroll
225  for (int j = 0; j < M; j++) copyFloatN(y[j], tex[cb_offset * parity + i + j * stride]);
226  }
227 
228  // now convert into desired register order
229  convert<RegType, InterType>(x, y, N);
230  }
231 
232 #ifndef BLAS_SPINOR
233 
237  __device__ inline void loadGhost(RegType x[], const int i, const int dim) const
238  {
239  // load data into registers first using the storage order
240  const int Nspin = (N * vec_length<RegType>::value) / (3 * 2);
241  // if Wilson, then load only half the number of components
242  constexpr int M = ((N * vec_length<RegType>::value ) / vec_length<InterType>::value) / ((Nspin == 4) ? 2 : 1);
243 
244  InterType y[M];
245 
246  // fixed precision types (FIXME - these don't look correct?)
247  if (isFixed<StoreType>::value) {
248  float xN = norm[i];
249 #pragma unroll
250  for (int j = 0; j < M; j++) y[j] = xN * ghostTex[i + j * ghost_stride[dim]];
251  } else { // other types
252 #pragma unroll
253  for (int j = 0; j < M; j++) copyFloatN(y[j], ghostTex[i + j * ghost_stride[dim]]);
254  }
255 
256  // now convert into desired register order
257  convert<RegType, InterType>(x, y, N);
258  }
259 #endif
260 
262  {
264  if (sizeof(((StoreType *)0)->x) == sizeof(double))
265  precision = QUDA_DOUBLE_PRECISION;
266  else if (sizeof(((StoreType *)0)->x) == sizeof(float))
267  precision = QUDA_SINGLE_PRECISION;
268  else if (sizeof(((StoreType *)0)->x) == sizeof(short))
269  precision = QUDA_HALF_PRECISION;
270  else if (sizeof(((StoreType *)0)->x) == sizeof(char))
271  precision = QUDA_QUARTER_PRECISION;
272  else
273  errorQuda("Unknown precision type\n");
274  return precision;
275  }
276 
277  int Stride() const { return stride; }
278  int Bytes() const { return N * sizeof(RegType); }
279 };
280 
287 template <typename RegType, typename StoreType, int N, int write>
288 class Spinor : public SpinorTexture<RegType, StoreType, N>
289 {
290 
291  typedef typename bridge_mapper<RegType,StoreType>::type InterType;
293 
294  private:
295  StoreType *spinor;
296  StoreType *ghost_spinor;
297 
298  public:
299  Spinor() : ST(), spinor(0), ghost_spinor(0) {} // default constructor
300 
301  // Spinor must only ever called with cudaColorSpinorField references!!!!
302  Spinor(const ColorSpinorField &x, int nFace = 1) :
303  ST(x, nFace),
304  spinor((StoreType *)x.V()),
305  ghost_spinor((StoreType *)x.Ghost2())
306  {
307  }
308 
309  Spinor(const Spinor &st) : ST(st), spinor(st.spinor), ghost_spinor(st.ghost_spinor) {}
310 
311  Spinor &operator=(const Spinor &src)
312  {
313  ST::operator=(src);
314  if (&src != this) {
315  spinor = src.spinor;
316  ghost_spinor = src.ghost_spinor;
317  }
318  return *this;
319  }
320 
321  void set(const cudaColorSpinorField &x)
322  {
323  ST::set(x);
324  spinor = (StoreType *)x.V();
325  ghost_spinor = (StoreType *)x.Ghost2();
326  }
327 
328  ~Spinor() {}
329 
330  // default store used for simple fields
331  __device__ inline void save(RegType x[], int i, const int parity = 0)
332  {
333  if (write) {
334  constexpr int M = (N * vec_length<RegType>::value) / vec_length<InterType>::value;
335  InterType y[M];
336  convert<InterType, RegType>(y, x, M);
337 
338  if (isFixed<StoreType>::value) {
339  float C = store_norm<M, InterType, StoreType>(ST::norm, y, ST::cb_norm_offset * parity + i);
340 #pragma unroll
341  for (int j = 0; j < M; j++) copyFloatN(spinor[ST::cb_offset * parity + i + j * ST::stride], C * y[j]);
342  } else {
343 #pragma unroll
344  for (int j = 0; j < M; j++) copyFloatN(spinor[ST::cb_offset * parity + i + j * ST::stride], y[j]);
345  }
346  }
347  }
348 
349  // used to backup the field to the host
350  void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
351  {
352  if (write) {
353  *spinor_h = new char[bytes];
354  cudaMemcpy(*spinor_h, spinor, bytes, cudaMemcpyDeviceToHost);
355  if (norm_bytes > 0) {
356  *norm_h = new char[norm_bytes];
357  cudaMemcpy(*norm_h, ST::norm, norm_bytes, cudaMemcpyDeviceToHost);
358  }
359  checkCudaError();
360  }
361  }
362 
363  // restore the field from the host
364  void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
365  {
366  if (write) {
367  cudaMemcpy(spinor, *spinor_h, bytes, cudaMemcpyHostToDevice);
368  if (norm_bytes > 0) {
369  cudaMemcpy(ST::norm, *norm_h, norm_bytes, cudaMemcpyHostToDevice);
370  delete[] * norm_h;
371  *norm_h = 0;
372  }
373  delete[] * spinor_h;
374  *spinor_h = 0;
375  checkCudaError();
376  }
377  }
378 
379  void *V() { return (void *)spinor; }
380  float *Norm() { return ST::norm; }
381 };
__host__ __device__ double set(double &x)
Definition: blas_helper.cuh:58
~Texture()
Definition: texture.h:66
unsigned int cb_offset
Definition: texture.h:143
Texture(const cudaColorSpinorField *x, bool use_ghost=false)
Definition: texture.h:61
enum QudaPrecision_s QudaPrecision
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
int Bytes() const
Definition: texture.h:278
SpinorTexture & operator=(const SpinorTexture &src)
Definition: texture.h:180
void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:350
bridge_mapper< RegType, StoreType >::type InterType
Definition: texture.h:291
#define errorQuda(...)
Definition: util_quda.h:121
~Spinor()
Definition: texture.h:328
SpinorTexture(const ColorSpinorField &x, int nFace=1)
Definition: texture.h:153
unsigned int cb_norm_offset
Definition: texture.h:144
StoreType * ghost_spinor
Definition: texture.h:296
void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:364
int Nspin
Definition: blas_test.cu:45
Texture & operator=(const Texture &tex)
Definition: texture.h:68
StoreType * spinor
Definition: texture.h:295
Spinor()
Definition: texture.h:299
const InputType * spinor
Definition: texture.h:57
Spinor & operator=(const Spinor &src)
Definition: texture.h:311
void * V()
Definition: texture.h:379
__device__ void loadGhost(RegType x[], const int i, const int dim) const
Definition: texture.h:237
quda::mapper< InputType >::type RegType
Definition: texture.h:54
__device__ float store_norm(float *norm, FloatN x[M], int i)
Definition: texture.h:115
Spinor(const ColorSpinorField &x, int nFace=1)
Definition: texture.h:302
SpinorTexture(const SpinorTexture &st)
Definition: texture.h:167
Texture(const Texture &tex)
Definition: texture.h:65
SpinorTexture< RegType, StoreType, N > ST
Definition: texture.h:292
void checkTypes()
Definition: texture.h:94
float * norm
Definition: texture.h:140
__device__ void save(RegType x[], int i, const int parity=0)
Definition: texture.h:331
__device__ __host__ OutputType operator[](unsigned int idx) const
Definition: texture.h:73
__device__ void copyFloatN(FloatN &a, const FloatN &b)
Definition: convert.h:61
int V
Definition: test_util.cpp:27
Spinor(const Spinor &st)
Definition: texture.h:309
cpuColorSpinorField * out
float * Norm()
Definition: texture.h:380
__device__ void load(RegType x[], const int i, const int parity=0) const
Definition: texture.h:212
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:198
Texture< InterType, StoreType > ghostTex
Definition: texture.h:139
QudaPrecision Precision() const
Definition: texture.h:261
Texture< InterType, StoreType > tex
Definition: texture.h:138
bridge_mapper< RegType, StoreType >::type InterType
Definition: texture.h:135
#define checkCudaError()
Definition: util_quda.h:161
QudaParity parity
Definition: covdev_test.cpp:54
Texture()
Definition: texture.h:60
int Stride() const
Definition: texture.h:277
int ghost_stride[4]
Definition: texture.h:146
unsigned long long bytes
Definition: blas_quda.cu:23
virtual ~SpinorTexture()
Definition: texture.h:210