QUDA  0.9.0
texture.h
Go to the documentation of this file.
1 #ifndef _TEXTURE_H
2 #define _TEXTURE_H
3 
4 // FIXME - it would not be too hard to get this working on the host as well
5 
6 #include <convert.h>
7 
8 #ifdef USE_TEXTURE_OBJECTS
9 
10 template<typename OutputType, typename InputType>
11 class Texture {
12 
13  typedef typename quda::mapper<InputType>::type RegType;
14 
15 private:
16 #ifndef DIRECT_ACCESS_BLAS
17  cudaTextureObject_t spinor;
18 #else
19  const InputType *spinor; // used when textures are disabled
20 #endif
21 
22 public:
23  Texture() : spinor(0) { }
24 #ifndef DIRECT_ACCESS_BLAS
25  Texture(const cudaColorSpinorField *x, bool use_ghost = false)
26  : spinor(use_ghost ? x->GhostTex() : x->Tex()) { }
27 #else
28  Texture(const cudaColorSpinorField *x, bool use_ghost = false)
29  : spinor(use_ghost ? (const InputType*)(x->Ghost2()) : (const InputType*)(x->V())) { }
30 #endif
31  Texture(const Texture &tex) : spinor(tex.spinor) { }
32  ~Texture() { }
33 
34  Texture& operator=(const Texture &tex) {
35  if (this != &tex) spinor = tex.spinor;
36  return *this;
37  }
38 
39 #ifndef DIRECT_ACCESS_BLAS
40  __device__ inline OutputType fetch(unsigned int idx)
41  {
42  OutputType rtn;
43  copyFloatN(rtn, tex1Dfetch<RegType>(spinor, idx));
44  return rtn;
45  }
46 #else
47  __device__ inline OutputType fetch(unsigned int idx)
48  { OutputType out; copyFloatN(out, spinor[idx]); return out; }
49 #endif
50 
51  __device__ inline OutputType operator[](unsigned int idx) { return fetch(idx); }
52 };
53 
54 #ifndef DIRECT_ACCESS_BLAS
55 __device__ inline double fetch_double(int2 v)
56 { return __hiloint2double(v.y, v.x); }
57 
58 __device__ inline double2 fetch_double2(int4 v)
59 { return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); }
60 
61 template<> __device__ inline double2 Texture<double2,double2>::fetch(unsigned int idx)
62 { double2 out; copyFloatN(out, fetch_double2(tex1Dfetch<int4>(spinor, idx))); return out; }
63 
64 template<> __device__ inline float2 Texture<float2,double2>::fetch(unsigned int idx)
65 { float2 out; copyFloatN(out, fetch_double2(tex1Dfetch<int4>(spinor, idx))); return out; }
66 #endif
67 
68 #else
69 
70 // legacy Texture references
71 
72 #if (__COMPUTE_CAPABILITY__ >= 130)
73 
74  __inline__ __device__ double fetch_double(texture<int2, 1> t, int i)
75  {
76  int2 v = tex1Dfetch(t,i);
77  return __hiloint2double(v.y, v.x);
78  }
79 
80  __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)
81  {
82  int4 v = tex1Dfetch(t,i);
83  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
84  }
85 #else
86  __inline__ __device__ double fetch_double(texture<int2, 1> t, int i){ return 0.0; }
87 
88  __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)
89  {
90  // do nothing
91  return make_double2(0.0, 0.0);
92  }
93 #endif
94 
95 #define MAX_TEXELS (1<<27)
96 
97 #define MAX_TEX_ID 4
98 
99 // dynamically keep track of texture references we've already bound to
101 
102 template<typename OutputType, typename InputType, int tex_id>
103  class Texture {
104 
105  private:
106 #ifdef DIRECT_ACCESS_BLAS
107  const InputType *spinor; // used when textures are disabled
108  size_t bytes;
109 #endif
110  static bool bound;
111  static int count;
112 
113  public:
115 #ifdef DIRECT_ACCESS_BLAS
116  : spinor(0), bytes(0)
117 #endif
118  { count++; }
119 
120  Texture(const cudaColorSpinorField *x, bool use_ghost = false)
121 #ifdef DIRECT_ACCESS_BLAS
122  : spinor( use_ghost ? (const InputType*)(x->Ghost2()) : (const InputType*)(x->V())) { }
123 #endif
124  {
125  // only bind if bytes > 0
126  if (x->Bytes()) {
127  if (tex_id >= 0 && tex_id < MAX_TEX_ID) {
128  if (tex_id_table[(tex_id >= 0 && tex_id < MAX_TEX_ID) ? tex_id : 0]) {
129  errorQuda("Already bound to this texture reference");
130  } else {
131  tex_id_table[(tex_id >= 0 && tex_id < MAX_TEX_ID) ? tex_id : 0] = true;
132  }
133  }
134  if (use_ghost) bind((const InputType*)(x->Ghost2()), x->GhostBytes());
135  else bind((const InputType*)x->V(), x->Bytes()); bound = true;
136  }
137  count++;
138  }
139 
140  Texture(const Texture &tex)
141 #ifdef DIRECT_ACCESS_BLAS
142  : spinor(tex.spinor), bytes(tex.bytes)
143 #endif
144  { count++; }
145 
147  if (bound && !--count) {
148  unbind(); bound = false; tex_id_table[(tex_id >= 0 && tex_id < MAX_TEX_ID) ? tex_id : 0]=false;
149  }
150  }
151 
153 #ifdef DIRECT_ACCESS_BLAS
154  spinor = tex.spinor;
155  bytes = tex.bytes;
156 #endif
157  return *this;
158  }
159 
160  inline void bind(const InputType*, size_t bytes){ /*errorQuda("Texture id is out of range");*/ }
161  inline void unbind() { /*errorQuda("Texture id is out of range");*/ }
162 
163  //default should only be called if a tex_id is out of range
164  __device__ inline OutputType fetch(unsigned int idx) { OutputType x; x.x =0; return x; };
165  __device__ inline OutputType operator[](unsigned int idx) { return fetch(idx); }
166 };
167 
168  template<typename OutputType, typename InputType, int tex_id>
170 
171  template<typename OutputType, typename InputType, int tex_id>
173 
174 #define DECL_TEX(id) \
175  texture<short2,1,cudaReadModeNormalizedFloat> tex_short2_##id; \
176  texture<short4,1,cudaReadModeNormalizedFloat> tex_short4_##id; \
177  texture<float,1> tex_float_##id; \
178  texture<float2,1> tex_float2_##id; \
179  texture<float4,1> tex_float4_##id; \
180  texture<int4,1> tex_double2_##id;
181 
182 
183 #define DEF_BIND_UNBIND(outtype, intype, id) \
184  template<> inline void Texture<outtype,intype,id>::bind(const intype *ptr, size_t bytes) \
185  { cudaBindTexture(0,tex_##intype##_##id, ptr, bytes); } \
186  template<> inline void Texture<outtype,intype,id>::unbind() { cudaUnbindTexture(tex_##intype##_##id); }
187 
188 
189 #define DEF_FETCH_TEX(outtype, intype, id) \
190  template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \
191  { return tex1Dfetch(tex_##intype##_##id,idx); }
192 
193 
194 #define DEF_FETCH_DIRECT(outtype, intype, id) \
195  template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \
196  { outtype out; copyFloatN(out, spinor[idx]); return out; }
197 
198 
199 #if defined(DIRECT_ACCESS_BLAS)
200 #define DEF_FETCH DEF_FETCH_DIRECT
201 #else
202 #define DEF_FETCH DEF_FETCH_TEX
203 #endif
204 
205 
206 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX)
207 #define DEF_FETCH_DBLE DEF_FETCH_DIRECT
208 #else
209 #define DEF_FETCH_DBLE(outtype, intype, id) \
210  template<> __device__ inline outtype Texture<outtype,double2,id>::fetch(unsigned int idx) \
211  { outtype out; copyFloatN(out, fetch_double2(tex_double2_##id,idx)); return out; }
212 #endif
213 
214 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX)
215 #define DEF_FETCH_DBLE_MIXED DEF_FETCH_DIRECT
216 #else
217 #define DEF_FETCH_DBLE_MIXED(outtype, intype, id) \
218  template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \
219  { outtype out; copyFloatN(out, tex1Dfetch(tex_##intype##_##id,idx)); return out; }
220 #endif
221 
222 
223 #define DEF_BIND_UNBIND_FETCH(outtype, intype, id) \
224  DEF_BIND_UNBIND(outtype, intype, id) \
225  DEF_FETCH(outtype, intype, id)
226 
227 
228 #define DEF_ALL(id) \
229  DECL_TEX(id) \
230  DEF_BIND_UNBIND_FETCH(float2, short2, id) \
231  DEF_BIND_UNBIND_FETCH(float4, short4, id) \
232  DEF_BIND_UNBIND_FETCH(float, float, id) \
233  DEF_BIND_UNBIND_FETCH(float2, float2, id) \
234  DEF_BIND_UNBIND_FETCH(float4, float4, id) \
235  DEF_BIND_UNBIND(double2, double2, id) \
236  DEF_BIND_UNBIND(float2, double2, id) \
237  DEF_FETCH_DBLE(double2, double2, id) \
238  DEF_FETCH_DBLE(float2, double2, id) \
239  DEF_BIND_UNBIND(double2, float2, id) \
240  DEF_BIND_UNBIND(double4, float4, id) \
241  DEF_BIND_UNBIND(double2, short2, id) \
242  DEF_BIND_UNBIND(double4, short4, id) \
243  DEF_FETCH_DBLE_MIXED(double2, float2, id) \
244  DEF_FETCH_DBLE_MIXED(double4, float4, id) \
245  DEF_FETCH_DBLE_MIXED(double2, short2, id) \
246  DEF_FETCH_DBLE_MIXED(double4, short4, id)
247 
248  // Declare the textures and define the member functions of the corresponding templated classes.
249  DEF_ALL(0)
250  DEF_ALL(1)
251  DEF_ALL(2)
252  DEF_ALL(3)
253  DEF_ALL(4)
254 
255 #undef DECL_TEX
256 #undef DEF_BIND_UNBIND
257 #undef DEF_FETCH_DIRECT
258 #undef DEF_FETCH_TEX
259 #undef DEF_FETCH
260 #undef DEF_FETCH_DBLE
261 #undef DEF_BIND_UNBIND_FETCH
262 #undef DEF_ALL
263 
264 #endif // USE_TEXTURE_OBJECTS
265 
266 
278  template <typename RegType, typename InterType, typename StoreType>
279  void checkTypes() {
280 
281  const size_t reg_size = sizeof(((RegType*)0)->x);
282  const size_t inter_size = sizeof(((InterType*)0)->x);
283  const size_t store_size = sizeof(((StoreType*)0)->x);
284 
285  if (reg_size != inter_size && store_size != 2 && inter_size != 4)
286  errorQuda("Precision of register (%lu) and intermediate (%lu) types must match\n",
287  (unsigned long)reg_size, (unsigned long)inter_size);
288 
289  if (vecLength<InterType>() != vecLength<StoreType>()) {
290  errorQuda("Vector lengths intermediate and register types must match\n");
291  }
292 
293  if (vecLength<RegType>() == 0) errorQuda("Vector type not supported\n");
294  if (vecLength<InterType>() == 0) errorQuda("Vector type not supported\n");
295  if (vecLength<StoreType>() == 0) errorQuda("Vector type not supported\n");
296 
297  }
298 
299  template <typename FloatN, int M>
300  __device__ inline float store_norm(float *norm, FloatN x[M], int i) {
301  float c[M];
302 #pragma unroll
303  for (int j=0; j<M; j++) c[j] = max_fabs(x[j]);
304 #pragma unroll
305  for (int j=1; j<M; j++) c[0] = fmaxf(c[j],c[0]);
306  norm[i] = c[0];
307  return __fdividef(MAX_SHORT, c[0]);
308  }
309 
318 template <typename RegType, typename StoreType, int N, int tex_id=-1>
320 
321  typedef typename bridge_mapper<RegType,StoreType>::type InterType;
322 
323  protected:
324 #ifdef USE_TEXTURE_OBJECTS // texture objects
327 #else
328  StoreType *spinor;
329  StoreType *ghost_spinor;
331  Texture<InterType, StoreType, -1> ghostTex;
332 #endif
333  float *norm; // always use direct reads for norm
334 
335  int stride;
336  unsigned int cb_offset;
337  unsigned int cb_norm_offset;
338 #ifndef BLAS_SPINOR
339  int ghost_stride[4];
340 #endif
341 
342  public:
344 #ifndef USE_TEXTURE_OBJECTS
345  spinor(0), ghost_spinor(0),
346 #endif
347  tex(), ghostTex(), norm(0), stride(0), cb_offset(0), cb_norm_offset(0) { } // default constructor
348 
349  // Spinor must only ever called with cudaColorSpinorField references!!!!
350  SpinorTexture(const ColorSpinorField &x, int nFace=1) :
351 #ifndef USE_TEXTURE_OBJECTS
352  spinor((StoreType*)x.V()), ghost_spinor((StoreType*)x.Ghost2()),
353 #endif
354  tex(&(static_cast<const cudaColorSpinorField&>(x))),
355  ghostTex(&(static_cast<const cudaColorSpinorField&>(x)), true),
356  norm((float*)x.Norm()), stride(x.Stride()),
357  cb_offset(x.Bytes()/(2*sizeof(StoreType))),
358  cb_norm_offset(x.NormBytes()/(2*sizeof(float)))
359  {
360  checkTypes<RegType,InterType,StoreType>();
361 #ifndef BLAS_SPINOR
362  for (int d=0; d<4; d++) ghost_stride[d] = nFace*x.SurfaceCB(d);
363 #endif
364  }
365 
367 #ifndef USE_TEXTURE_OBJECTS
369 #endif
370  tex(st.tex), ghostTex(st.ghostTex), norm(st.norm), stride(st.stride),
372  {
373 #ifndef BLAS_SPINOR
374  for (int d=0; d<4; d++) ghost_stride[d] = st.ghost_stride[d];
375 #endif
376  }
377 
379  if (&src != this) {
380 #ifndef USE_TEXTURE_OBJECTS
381  spinor = src.spinor;
382  ghost_spinor = src.ghost_spinor;
383 #endif
384  tex = src.tex;
385  ghostTex = src.ghostTex;
386  norm = src.norm;
387  stride = src.stride;
388  cb_offset = src.cb_offset;
389  cb_norm_offset = src.cb_norm_offset;
390 #ifndef BLAS_SPINOR
391  for (int d=0; d<4; d++) ghost_stride[d] = src.ghost_stride[d];
392 #endif
393  }
394  return *this;
395  }
396 
397  void set(const cudaColorSpinorField &x, int nFace=1){
398 #ifdef USE_TEXTURE_OBJECTS
401 #else
402  spinor = (StoreType*)x.V();
403  ghost_spinor = (StoreType*)x.Ghost2();
405  ghostTex = Texture<InterType, StoreType, -1>(&x,true);
406 #endif
407  norm = (float*)x.Norm();
408  stride = x.Stride();
409  cb_offset = x.Bytes()/(2*sizeof(StoreType));
410  cb_norm_offset = x.NormBytes()/(2*sizeof(float));
411 #ifndef BLAS_SPINOR
412  for (int d=0; d<4; d++) ghost_stride[d] = nFace*x.SurfaceCB(d);
413 #endif
414  checkTypes<RegType,InterType,StoreType>();
415  }
416 
417  virtual ~SpinorTexture() { }
418 
419  __device__ inline void load(RegType x[], const int i, const int parity=0) {
420  // load data into registers first using the storage order
421  constexpr int M = (N * vec_length<RegType>::value ) / vec_length<InterType>::value;
422  InterType y[M];
423 
424  // If we are using tex references, then we can only use the predeclared texture ids
425 #ifndef USE_TEXTURE_OBJECTS
426  if (tex_id >= 0 && tex_id <= MAX_TEX_ID) {
427 #endif
428  // half precision types
429  if ( isHalf<StoreType>::value ) {
430  float xN = norm[cb_norm_offset*parity + i];
431 #pragma unroll
432  for (int j=0; j<M; j++) y[j] = xN*tex[cb_offset*parity + i + j*stride];
433  } else { // other types
434 #pragma unroll
435  for (int j=0; j<M; j++) copyFloatN(y[j], tex[cb_offset*parity + i + j*stride]);
436  }
437 #ifndef USE_TEXTURE_OBJECTS
438  } else { // default load when out of tex_id range
439 
440  if ( isHalf<StoreType>::value ) {
441  float xN = norm[cb_norm_offset*parity + i];
442 #pragma unroll
443  for (int j=0; j<M; j++) {
444  copyFloatN(y[j], spinor[cb_offset*parity + i + j*stride]);
445  y[j] *= xN;
446  }
447  } else { // other types
448 #pragma unroll
449  for (int j=0; j<M; j++) copyFloatN(y[j],spinor[cb_offset*parity + i + j*stride]);
450  }
451  }
452 #endif // !USE_TEXTURE_OBJECTS
453 
454  // now convert into desired register order
455  convert<RegType, InterType>(x, y, N);
456  }
457 
458 #ifndef BLAS_SPINOR
459 
463  __device__ inline void loadGhost(RegType x[], const int i, const int dim) {
464  // load data into registers first using the storage order
465  const int Nspin = (N * vec_length<RegType>::value) / (3 * 2);
466  // if Wilson, then load only half the number of components
467  constexpr int M = ((N * vec_length<RegType>::value ) / vec_length<InterType>::value) / ((Nspin == 4) ? 2 : 1);
468 
469  InterType y[M];
470 
471  // If we are using tex references, then we can only use the predeclared texture ids
472 #ifndef USE_TEXTURE_OBJECTS
473  if (tex_id >= 0 && tex_id <= MAX_TEX_ID) {
474 #endif
475  // half precision types (FIXME - these don't look correct?)
476  if ( isHalf<StoreType>::value ) {
477  float xN = norm[i];
478 #pragma unroll
479  for (int j=0; j<M; j++) y[j] = xN*ghostTex[i + j*ghost_stride[dim]];
480  } else { // other types
481 #pragma unroll
482  for (int j=0; j<M; j++) copyFloatN(y[j], ghostTex[i + j*ghost_stride[dim]]);
483  }
484 #ifndef USE_TEXTURE_OBJECTS
485  } else { // default load when out of tex_id range
486 
487  if ( isHalf<StoreType>::value ) {
488  float xN = norm[i];
489 #pragma unroll
490  for (int j=0; j<M; j++) {
492  y[j] *= xN;
493  }
494  } else { // other types
495 #pragma unroll
496  for (int j=0; j<M; j++) copyFloatN(y[j],ghost_spinor[i + j*ghost_stride[dim]]);
497  }
498  }
499 #endif // !USE_TEXTURE_OBJECTS
500 
501  // now convert into desired register order
502  convert<RegType, InterType>(x, y, N);
503  }
504 #endif
505 
508  if (sizeof(((StoreType*)0)->x) == sizeof(double)) precision = QUDA_DOUBLE_PRECISION;
509  else if (sizeof(((StoreType*)0)->x) == sizeof(float)) precision = QUDA_SINGLE_PRECISION;
510  else if (sizeof(((StoreType*)0)->x) == sizeof(short)) precision = QUDA_HALF_PRECISION;
511  else errorQuda("Unknown precision type\n");
512  return precision;
513  }
514 
515  int Stride() const { return stride; }
516  int Bytes() const { return N*sizeof(RegType); }
517  };
518 
527 template <typename RegType, typename StoreType, int N, int write, int tex_id=-1>
528  class Spinor : public SpinorTexture<RegType,StoreType,N,tex_id> {
529 
530  typedef typename bridge_mapper<RegType,StoreType>::type InterType;
532 
533  private:
534 #ifdef USE_TEXTURE_OBJECTS
535  StoreType *spinor;
536  StoreType *ghost_spinor;
537 #define SPINOR spinor
538 #else
539 #define SPINOR ST::spinor
540 #endif
541  public:
542  Spinor() : ST()
543 #ifdef USE_TEXTURE_OBJECTS
544  , spinor(0), ghost_spinor(0)
545 #endif
546  {} // default constructor
547 
548  // Spinor must only ever called with cudaColorSpinorField references!!!!
549  Spinor(const ColorSpinorField &x, int nFace=1) : ST(x, nFace)
550 #ifdef USE_TEXTURE_OBJECTS
551  , spinor((StoreType*)x.V()), ghost_spinor((StoreType*)x.Ghost2())
552 #endif
553  {}
554 
555  Spinor(const Spinor &st) : ST(st)
556 #ifdef USE_TEXTURE_OBJECTS
558 #endif
559  {}
560 
563  if (&src != this) {
564 #ifdef USE_TEXTURE_OBJECTS
565  spinor = src.spinor;
566  ghost_spinor = src.ghost_spinor;
567 #endif
568  }
569  return *this;
570  }
571 
572  void set(const cudaColorSpinorField &x){
573  ST::set(x);
574 #ifdef USE_TEXTURE_OBJECTS
575  spinor = (StoreType*)x.V();
576  ghost_spinor = (StoreType*)x.Ghost2();
577 #endif
578  }
579 
580  ~Spinor() { }
581 
582  // default store used for simple fields
583  __device__ inline void save(RegType x[], int i, const int parity = 0) {
584  if (write) {
585  constexpr int M = (N * vec_length<RegType>::value ) / vec_length<InterType>::value;
586  InterType y[M];
587  convert<InterType, RegType>(y, x, M);
588 
589  if ( isHalf<StoreType>::value ) {
590  float C = store_norm<InterType, M>(ST::norm, y, ST::cb_norm_offset*parity + i);
591 #pragma unroll
592  for (int j=0; j<M; j++) copyFloatN(SPINOR[ST::cb_offset*parity + i + j*ST::stride], C*y[j]);
593  } else {
594 #pragma unroll
595  for (int j=0; j<M; j++) copyFloatN(SPINOR[ST::cb_offset*parity + i + j*ST::stride], y[j]);
596  }
597  }
598  }
599 
600  // used to backup the field to the host
601  void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes) {
602  if (write) {
603  *spinor_h = new char[bytes];
604  cudaMemcpy(*spinor_h, SPINOR, bytes, cudaMemcpyDeviceToHost);
605  if (norm_bytes > 0) {
606  *norm_h = new char[norm_bytes];
607  cudaMemcpy(*norm_h, ST::norm, norm_bytes, cudaMemcpyDeviceToHost);
608  }
609  checkCudaError();
610  }
611  }
612 
613  // restore the field from the host
614  void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes) {
615  if (write) {
616  cudaMemcpy(SPINOR, *spinor_h, bytes, cudaMemcpyHostToDevice);
617  if (norm_bytes > 0) {
618  cudaMemcpy(ST::norm, *norm_h, norm_bytes, cudaMemcpyHostToDevice);
619  delete []*norm_h;
620  *norm_h = 0;
621  }
622  delete []*spinor_h;
623  *spinor_h = 0;
624  checkCudaError();
625  }
626  }
627 
628  void* V() { return (void*)SPINOR; }
629  float* Norm() { return ST::norm; }
630  };
631 
632 
633 #ifndef USE_TEXTURE_OBJECTS
634 #undef MAX_TEX_ID
635 #endif
636 
637 #endif // _TEXTURE_H
int Bytes() const
Definition: texture.h:516
enum QudaPrecision_s QudaPrecision
#define SPINOR
Definition: texture.h:539
float * Norm()
Definition: texture.h:629
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:896
~Texture()
Definition: texture.h:146
const void * src
#define errorQuda(...)
Definition: util_quda.h:90
unsigned int cb_offset
Definition: texture.h:336
Texture(const cudaColorSpinorField *x, bool use_ghost=false)
Definition: texture.h:120
~Spinor()
Definition: texture.h:580
SpinorTexture(const SpinorTexture &st)
Definition: texture.h:366
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
#define DEF_ALL(id)
Definition: texture.h:228
void * V()
Definition: texture.h:628
bridge_mapper< RegType, StoreType >::type InterType
Definition: texture.h:530
Texture< InterType, StoreType, -1 > ghostTex
Definition: texture.h:331
bridge_mapper< RegType, StoreType >::type InterType
Definition: texture.h:321
int Nspin
Definition: blas_test.cu:45
__inline__ __device__ double fetch_double(texture< int2, 1 > t, int i)
Definition: texture.h:86
__device__ void load(RegType x[], const int i, const int parity=0)
Definition: texture.h:419
void unbind()
Definition: texture.h:161
StoreType * ghost_spinor
Definition: texture.h:329
QudaPrecision Precision() const
Definition: texture.h:506
SpinorTexture< RegType, StoreType, N, tex_id > ST
Definition: texture.h:531
__device__ void copyFloatN(FloatN &a, const FloatN &b)
Definition: convert.h:33
StoreType * spinor
Definition: texture.h:328
Spinor(const ColorSpinorField &x, int nFace=1)
Definition: texture.h:549
int Stride() const
Definition: texture.h:515
int V
Definition: test_util.cpp:28
int ghost_stride[4]
Definition: texture.h:339
float fmaxf(float, float)
__device__ float store_norm(float *norm, FloatN x[M], int i)
Definition: texture.h:300
__device__ void loadGhost(RegType x[], const int i, const int dim)
Definition: texture.h:463
void checkTypes()
Definition: texture.h:279
__inline__ __device__ double2 fetch_double2(texture< int4, 1 > t, int i)
Definition: texture.h:88
Texture(const Texture &tex)
Definition: texture.h:140
static bool bound
Definition: texture.h:110
void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:614
void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:601
static int count
Definition: texture.h:111
Texture()
Definition: texture.h:114
Spinor & operator=(const Spinor &src)
Definition: texture.h:561
cpuColorSpinorField * out
float * norm
Definition: texture.h:333
unsigned int cb_norm_offset
Definition: texture.h:337
Spinor(const Spinor &st)
Definition: texture.h:555
SpinorTexture(const ColorSpinorField &x, int nFace=1)
Definition: texture.h:350
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:198
void set(const cudaColorSpinorField &x, int nFace=1)
Definition: texture.h:397
const void * c
#define MAX_SHORT
Definition: quda_internal.h:29
Texture & operator=(const Texture &tex)
Definition: texture.h:152
__device__ OutputType operator[](unsigned int idx)
Definition: texture.h:165
#define checkCudaError()
Definition: util_quda.h:129
virtual ~SpinorTexture()
Definition: texture.h:417
Spinor()
Definition: texture.h:542
static __inline__ size_t size_t d
Texture< InterType, StoreType, tex_id > tex
Definition: texture.h:330
QudaParity parity
Definition: covdev_test.cpp:53
__device__ OutputType fetch(unsigned int idx)
Definition: texture.h:164
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:41
unsigned long long bytes
Definition: blas_quda.cu:43
bool tex_id_table[MAX_TEX_ID]
Definition: texture.h:100
void bind(const InputType *, size_t bytes)
Definition: texture.h:160
#define MAX_TEX_ID
Definition: texture.h:97
SpinorTexture & operator=(const SpinorTexture &src)
Definition: texture.h:378
__device__ void save(RegType x[], int i, const int parity=0)
Definition: texture.h:583