8 #ifdef USE_TEXTURE_OBJECTS 10 template<
typename OutputType,
typename InputType>
16 #ifndef DIRECT_ACCESS_BLAS 17 cudaTextureObject_t
spinor;
24 #ifndef DIRECT_ACCESS_BLAS 25 Texture(
const cudaColorSpinorField *
x,
bool use_ghost =
false)
26 :
spinor(use_ghost ?
x->GhostTex() :
x->Tex()) { }
28 Texture(
const cudaColorSpinorField *
x,
bool use_ghost =
false)
29 :
spinor(use_ghost ? (const InputType*)(
x->Ghost2()) : (const InputType*)(
x->
V())) { }
39 #ifndef DIRECT_ACCESS_BLAS 40 __device__
inline OutputType
fetch(
unsigned int idx)
47 __device__
inline OutputType
fetch(
unsigned int idx)
54 #ifndef DIRECT_ACCESS_BLAS 56 {
return __hiloint2double(v.y, v.x); }
59 {
return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); }
72 #if (__COMPUTE_CAPABILITY__ >= 130) 74 __inline__ __device__
double fetch_double(texture<int2, 1>
t,
int i)
76 int2 v = tex1Dfetch(
t,
i);
77 return __hiloint2double(v.y, v.x);
82 int4 v = tex1Dfetch(
t,
i);
83 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
86 __inline__ __device__
double fetch_double(texture<int2, 1>
t,
int i){
return 0.0; }
91 return make_double2(0.0, 0.0);
95 #define MAX_TEXELS (1<<27) 102 template<
typename OutputType,
typename InputType,
int tex_
id>
106 #ifdef DIRECT_ACCESS_BLAS 115 #ifdef DIRECT_ACCESS_BLAS 120 Texture(
const cudaColorSpinorField *
x,
bool use_ghost =
false)
121 #ifdef DIRECT_ACCESS_BLAS 122 :
spinor( use_ghost ? (
const InputType*)(
x->Ghost2()) : (
const InputType*)(
x->V())) { }
129 errorQuda(
"Already bound to this texture reference");
134 if (use_ghost)
bind((
const InputType*)(
x->Ghost2()),
x->GhostBytes());
135 else bind((
const InputType*)
x->V(),
x->Bytes());
bound =
true;
141 #ifdef DIRECT_ACCESS_BLAS 153 #ifdef DIRECT_ACCESS_BLAS 164 __device__
inline OutputType
fetch(
unsigned int idx) { OutputType
x;
x.x =0;
return x; };
168 template<
typename OutputType,
typename InputType,
int tex_
id>
171 template<
typename OutputType,
typename InputType,
int tex_
id>
174 #define DECL_TEX(id) \ 175 texture<short2,1,cudaReadModeNormalizedFloat> tex_short2_##id; \ 176 texture<short4,1,cudaReadModeNormalizedFloat> tex_short4_##id; \ 177 texture<float,1> tex_float_##id; \ 178 texture<float2,1> tex_float2_##id; \ 179 texture<float4,1> tex_float4_##id; \ 180 texture<int4,1> tex_double2_##id; 183 #define DEF_BIND_UNBIND(outtype, intype, id) \ 184 template<> inline void Texture<outtype,intype,id>::bind(const intype *ptr, size_t bytes) \ 185 { cudaBindTexture(0,tex_##intype##_##id, ptr, bytes); } \ 186 template<> inline void Texture<outtype,intype,id>::unbind() { cudaUnbindTexture(tex_##intype##_##id); } 189 #define DEF_FETCH_TEX(outtype, intype, id) \ 190 template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \ 191 { return tex1Dfetch(tex_##intype##_##id,idx); } 194 #define DEF_FETCH_DIRECT(outtype, intype, id) \ 195 template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \ 196 { outtype out; copyFloatN(out, spinor[idx]); return out; } 199 #if defined(DIRECT_ACCESS_BLAS) 200 #define DEF_FETCH DEF_FETCH_DIRECT 202 #define DEF_FETCH DEF_FETCH_TEX 206 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX) 207 #define DEF_FETCH_DBLE DEF_FETCH_DIRECT 209 #define DEF_FETCH_DBLE(outtype, intype, id) \ 210 template<> __device__ inline outtype Texture<outtype,double2,id>::fetch(unsigned int idx) \ 211 { outtype out; copyFloatN(out, fetch_double2(tex_double2_##id,idx)); return out; } 214 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX) 215 #define DEF_FETCH_DBLE_MIXED DEF_FETCH_DIRECT 217 #define DEF_FETCH_DBLE_MIXED(outtype, intype, id) \ 218 template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \ 219 { outtype out; copyFloatN(out, tex1Dfetch(tex_##intype##_##id,idx)); return out; } 223 #define DEF_BIND_UNBIND_FETCH(outtype, intype, id) \ 224 DEF_BIND_UNBIND(outtype, intype, id) \ 225 DEF_FETCH(outtype, intype, id) 228 #define DEF_ALL(id) \ 230 DEF_BIND_UNBIND_FETCH(float2, short2, id) \ 231 DEF_BIND_UNBIND_FETCH(float4, short4, id) \ 232 DEF_BIND_UNBIND_FETCH(float, float, id) \ 233 DEF_BIND_UNBIND_FETCH(float2, float2, id) \ 234 DEF_BIND_UNBIND_FETCH(float4, float4, id) \ 235 DEF_BIND_UNBIND(double2, double2, id) \ 236 DEF_BIND_UNBIND(float2, double2, id) \ 237 DEF_FETCH_DBLE(double2, double2, id) \ 238 DEF_FETCH_DBLE(float2, double2, id) \ 239 DEF_BIND_UNBIND(double2, float2, id) \ 240 DEF_BIND_UNBIND(double4, float4, id) \ 241 DEF_BIND_UNBIND(double2, short2, id) \ 242 DEF_BIND_UNBIND(double4, short4, id) \ 243 DEF_FETCH_DBLE_MIXED(double2, float2, id) \ 244 DEF_FETCH_DBLE_MIXED(double4, float4, id) \ 245 DEF_FETCH_DBLE_MIXED(double2, short2, id) \ 246 DEF_FETCH_DBLE_MIXED(double4, short4, id) 256 #undef DEF_BIND_UNBIND 257 #undef DEF_FETCH_DIRECT 260 #undef DEF_FETCH_DBLE 261 #undef DEF_BIND_UNBIND_FETCH 264 #endif // USE_TEXTURE_OBJECTS 278 template <
typename RegType,
typename InterType,
typename StoreType>
281 const size_t reg_size =
sizeof(((RegType*)0)->x);
282 const size_t inter_size =
sizeof(((InterType*)0)->x);
283 const size_t store_size =
sizeof(((StoreType*)0)->x);
285 if (reg_size != inter_size && store_size != 2 && inter_size != 4)
286 errorQuda(
"Precision of register (%lu) and intermediate (%lu) types must match\n",
287 (
unsigned long)reg_size, (
unsigned long)inter_size);
289 if (vecLength<InterType>() != vecLength<StoreType>()) {
290 errorQuda(
"Vector lengths intermediate and register types must match\n");
293 if (vecLength<RegType>() == 0)
errorQuda(
"Vector type not supported\n");
294 if (vecLength<InterType>() == 0)
errorQuda(
"Vector type not supported\n");
295 if (vecLength<StoreType>() == 0)
errorQuda(
"Vector type not supported\n");
299 template <
typename FloatN,
int M>
303 for (
int j=0; j<M; j++)
c[j] =
max_fabs(
x[j]);
305 for (
int j=1; j<M; j++)
c[0] =
fmaxf(
c[j],
c[0]);
318 template <
typename RegType,
typename StoreType,
int N,
int tex_id=-1>
321 typedef typename bridge_mapper<RegType,StoreType>::type
InterType;
324 #ifdef USE_TEXTURE_OBJECTS // texture objects 344 #ifndef USE_TEXTURE_OBJECTS
351 #ifndef USE_TEXTURE_OBJECTS
354 tex(&(static_cast<const cudaColorSpinorField&>(
x))),
355 ghostTex(&(static_cast<const cudaColorSpinorField&>(
x)), true),
360 checkTypes<RegType,InterType,StoreType>();
367 #ifndef USE_TEXTURE_OBJECTS
380 #ifndef USE_TEXTURE_OBJECTS 397 void set(
const cudaColorSpinorField &
x,
int nFace=1){
398 #ifdef USE_TEXTURE_OBJECTS 407 norm = (
float*)
x.Norm();
414 checkTypes<RegType,InterType,StoreType>();
419 __device__
inline void load(RegType
x[],
const int i,
const int parity=0) {
425 #ifndef USE_TEXTURE_OBJECTS 437 #ifndef USE_TEXTURE_OBJECTS 443 for (
int j=0; j<M; j++) {
452 #endif // !USE_TEXTURE_OBJECTS 455 convert<RegType, InterType>(
x,
y, N);
472 #ifndef USE_TEXTURE_OBJECTS 484 #ifndef USE_TEXTURE_OBJECTS 490 for (
int j=0; j<M; j++) {
499 #endif // !USE_TEXTURE_OBJECTS 502 convert<RegType, InterType>(
x,
y, N);
511 else errorQuda(
"Unknown precision type\n");
516 int Bytes()
const {
return N*
sizeof(RegType); }
527 template <
typename RegType,
typename StoreType,
int N,
int write,
int tex_id=-1>
530 typedef typename bridge_mapper<RegType,StoreType>::type
InterType;
534 #ifdef USE_TEXTURE_OBJECTS 537 #define SPINOR spinor 539 #define SPINOR ST::spinor 543 #ifdef USE_TEXTURE_OBJECTS
549 Spinor(
const ColorSpinorField &
x,
int nFace=1) :
ST(
x, nFace)
550 #ifdef USE_TEXTURE_OBJECTS
556 #ifdef USE_TEXTURE_OBJECTS
564 #ifdef USE_TEXTURE_OBJECTS 572 void set(
const cudaColorSpinorField &
x){
574 #ifdef USE_TEXTURE_OBJECTS 583 __device__
inline void save(RegType
x[],
int i,
const int parity = 0) {
587 convert<InterType, RegType>(
y,
x, M);
601 void backup(
char **spinor_h,
char **norm_h,
size_t bytes,
size_t norm_bytes) {
603 *spinor_h =
new char[
bytes];
604 cudaMemcpy(*spinor_h,
SPINOR,
bytes, cudaMemcpyDeviceToHost);
605 if (norm_bytes > 0) {
606 *norm_h =
new char[norm_bytes];
607 cudaMemcpy(*norm_h,
ST::norm, norm_bytes, cudaMemcpyDeviceToHost);
614 void restore(
char **spinor_h,
char **norm_h,
size_t bytes,
size_t norm_bytes) {
616 cudaMemcpy(
SPINOR, *spinor_h,
bytes, cudaMemcpyHostToDevice);
617 if (norm_bytes > 0) {
618 cudaMemcpy(
ST::norm, *norm_h, norm_bytes, cudaMemcpyHostToDevice);
633 #ifndef USE_TEXTURE_OBJECTS
enum QudaPrecision_s QudaPrecision
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Texture(const cudaColorSpinorField *x, bool use_ghost=false)
SpinorTexture(const SpinorTexture &st)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
bridge_mapper< RegType, StoreType >::type InterType
Texture< InterType, StoreType, -1 > ghostTex
bridge_mapper< RegType, StoreType >::type InterType
__inline__ __device__ double fetch_double(texture< int2, 1 > t, int i)
__device__ void load(RegType x[], const int i, const int parity=0)
QudaPrecision Precision() const
SpinorTexture< RegType, StoreType, N, tex_id > ST
__device__ void copyFloatN(FloatN &a, const FloatN &b)
Spinor(const ColorSpinorField &x, int nFace=1)
float fmaxf(float, float)
__device__ float store_norm(float *norm, FloatN x[M], int i)
__device__ void loadGhost(RegType x[], const int i, const int dim)
__inline__ __device__ double2 fetch_double2(texture< int4, 1 > t, int i)
Texture(const Texture &tex)
void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Spinor & operator=(const Spinor &src)
cpuColorSpinorField * out
unsigned int cb_norm_offset
SpinorTexture(const ColorSpinorField &x, int nFace=1)
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
void set(const cudaColorSpinorField &x, int nFace=1)
Texture & operator=(const Texture &tex)
__device__ OutputType operator[](unsigned int idx)
static __inline__ size_t size_t d
Texture< InterType, StoreType, tex_id > tex
__device__ OutputType fetch(unsigned int idx)
cpuColorSpinorField * spinor
bool tex_id_table[MAX_TEX_ID]
void bind(const InputType *, size_t bytes)
SpinorTexture & operator=(const SpinorTexture &src)
__device__ void save(RegType x[], int i, const int parity=0)