3 #ifdef USE_TEXTURE_OBJECTS 7 template <
typename OutputType,
typename InputType>
class Texture 13 cudaTextureObject_t
spinor;
17 Texture(
const cudaColorSpinorField *x,
bool use_ghost =
false)
18 : spinor(use_ghost ? x->GhostTex() : x->Tex()) { }
23 if (
this != &tex) spinor = tex.
spinor;
27 __device__
inline OutputType fetch(
unsigned int idx)
const 30 copyFloatN(rtn, tex1Dfetch_<RegType>(spinor, idx));
34 __device__
inline OutputType
operator[](
unsigned int idx)
const {
return fetch(idx); }
37 __device__
inline double fetch_double(int2 v)
38 {
return __hiloint2double(v.y, v.x); }
40 __device__
inline double2 fetch_double2(int4 v)
41 {
return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); }
44 { double2
out;
copyFloatN(out, fetch_double2(tex1Dfetch_<int4>(spinor, idx)));
return out; }
47 { float2
out;
copyFloatN(out, fetch_double2(tex1Dfetch_<int4>(spinor, idx)));
return out; }
49 #else // !USE_TEXTURE_OBJECTS - use direct reads 51 template <
typename OutputType,
typename InputType>
class Texture 61 Texture(
const cudaColorSpinorField *x,
bool use_ghost =
false) :
62 spinor(use_ghost ? (const InputType *)(x->Ghost2()) : (const InputType *)(x->
V()))
69 if (
this != &tex) spinor = tex.
spinor;
73 __device__ __host__
inline OutputType
operator[](
unsigned int idx)
const 94 template <
typename RegType,
typename InterType,
typename StoreType>
void checkTypes()
97 const size_t reg_size =
sizeof(((RegType *)0)->x);
98 const size_t inter_size =
sizeof(((InterType *)0)->x);
99 const size_t store_size =
sizeof(((StoreType *)0)->x);
101 if (reg_size != inter_size && store_size != 2 && store_size != 1 && inter_size != 4)
102 errorQuda(
"Precision of register (%lu) and intermediate (%lu) types must match\n", (
unsigned long)reg_size,
103 (
unsigned long)inter_size);
105 if (vecLength<InterType>() != vecLength<StoreType>()) {
106 errorQuda(
"Vector lengths intermediate and register types must match\n");
109 if (vecLength<RegType>() == 0)
errorQuda(
"Vector type not supported\n");
110 if (vecLength<InterType>() == 0)
errorQuda(
"Vector type not supported\n");
111 if (vecLength<StoreType>() == 0)
errorQuda(
"Vector type not supported\n");
114 template <
int M,
typename FloatN,
typename FixedType>
119 for (
int j = 0; j < M; j++) c[j] =
max_fabs(x[j]);
121 for (
int j = 1; j < M; j++) c[0] = fmaxf(c[j], c[0]);
123 return __fdividef(fixedMaxValue<FixedType>::value, c[0]);
135 typedef typename bridge_mapper<RegType,StoreType>::type
InterType;
150 SpinorTexture() : tex(), ghostTex(), norm(0), stride(0), cb_offset(0), cb_norm_offset(0) {}
154 tex(&(static_cast<const cudaColorSpinorField &>(x))),
155 ghostTex(&(static_cast<const cudaColorSpinorField &>(x)), true),
156 norm((float *)x.Norm()),
158 cb_offset(x.Bytes() / (2 * sizeof(StoreType))),
159 cb_norm_offset(x.NormBytes() / (2 * sizeof(float)))
161 checkTypes<RegType, InterType, StoreType>();
163 for (
int d = 0; d < 4; d++) ghost_stride[d] = nFace * x.SurfaceCB(d);
169 ghostTex(st.ghostTex),
172 cb_offset(st.cb_offset),
173 cb_norm_offset(st.cb_norm_offset)
176 for (
int d = 0; d < 4; d++) ghost_stride[d] = st.
ghost_stride[d];
190 for (
int d = 0; d < 4; d++) ghost_stride[d] = src.
ghost_stride[d];
196 void set(
const cudaColorSpinorField &x,
int nFace = 1)
200 norm = (
float *)x.Norm();
202 cb_offset = x.Bytes() / (2 *
sizeof(StoreType));
203 cb_norm_offset = x.NormBytes() / (2 *
sizeof(float));
205 for (
int d = 0; d < 4; d++) ghost_stride[d] = nFace * x.SurfaceCB(d);
207 checkTypes<RegType, InterType, StoreType>();
212 __device__
inline void load(RegType x[],
const int i,
const int parity = 0)
const 215 constexpr
int M = (N * vec_length<RegType>::value) / vec_length<InterType>::value;
219 if (isFixed<StoreType>::value) {
220 float xN = norm[cb_norm_offset *
parity + i];
222 for (
int j = 0; j < M; j++) y[j] = xN * tex[cb_offset *
parity + i + j * stride];
225 for (
int j = 0; j < M; j++)
copyFloatN(y[j], tex[cb_offset *
parity + i + j * stride]);
229 convert<RegType, InterType>(x, y, N);
237 __device__
inline void loadGhost(RegType x[],
const int i,
const int dim)
const 240 const int Nspin = (N * vec_length<RegType>::value) / (3 * 2);
242 constexpr
int M = ((N * vec_length<RegType>::value ) / vec_length<InterType>::value) / ((Nspin == 4) ? 2 : 1);
247 if (isFixed<StoreType>::value) {
250 for (
int j = 0; j < M; j++) y[j] = xN * ghostTex[i + j * ghost_stride[dim]];
253 for (
int j = 0; j < M; j++)
copyFloatN(y[j], ghostTex[i + j * ghost_stride[dim]]);
257 convert<RegType, InterType>(x, y, N);
264 if (
sizeof(((StoreType *)0)->x) ==
sizeof(
double))
266 else if (
sizeof(((StoreType *)0)->x) ==
sizeof(
float))
268 else if (
sizeof(((StoreType *)0)->x) ==
sizeof(
short))
270 else if (
sizeof(((StoreType *)0)->x) ==
sizeof(
char))
287 template <
typename RegType,
typename StoreType,
int N,
int write>
291 typedef typename bridge_mapper<RegType,StoreType>::type
InterType;
299 Spinor() : ST(), spinor(0), ghost_spinor(0) {}
302 Spinor(
const ColorSpinorField &x,
int nFace = 1) :
304 spinor((StoreType *)x.
V()),
305 ghost_spinor((StoreType *)x.Ghost2())
309 Spinor(
const Spinor &st) : ST(st), spinor(st.spinor), ghost_spinor(st.ghost_spinor) {}
321 void set(
const cudaColorSpinorField &x)
324 spinor = (StoreType *)x.V();
325 ghost_spinor = (StoreType *)x.Ghost2();
331 __device__
inline void save(RegType x[],
int i,
const int parity = 0)
334 constexpr
int M = (N * vec_length<RegType>::value) / vec_length<InterType>::value;
336 convert<InterType, RegType>(y, x, M);
338 if (isFixed<StoreType>::value) {
339 float C = store_norm<M, InterType, StoreType>(
ST::norm, y, ST::cb_norm_offset *
parity + i);
341 for (
int j = 0; j < M; j++)
copyFloatN(spinor[ST::cb_offset *
parity + i + j * ST::stride], C * y[j]);
344 for (
int j = 0; j < M; j++)
copyFloatN(spinor[ST::cb_offset *
parity + i + j * ST::stride], y[j]);
350 void backup(
char **spinor_h,
char **norm_h,
size_t bytes,
size_t norm_bytes)
353 *spinor_h =
new char[
bytes];
354 cudaMemcpy(*spinor_h, spinor, bytes, cudaMemcpyDeviceToHost);
355 if (norm_bytes > 0) {
356 *norm_h =
new char[norm_bytes];
357 cudaMemcpy(*norm_h,
ST::norm, norm_bytes, cudaMemcpyDeviceToHost);
364 void restore(
char **spinor_h,
char **norm_h,
size_t bytes,
size_t norm_bytes)
367 cudaMemcpy(spinor, *spinor_h, bytes, cudaMemcpyHostToDevice);
368 if (norm_bytes > 0) {
369 cudaMemcpy(
ST::norm, *norm_h, norm_bytes, cudaMemcpyHostToDevice);
__host__ __device__ double set(double &x)
Texture(const cudaColorSpinorField *x, bool use_ghost=false)
enum QudaPrecision_s QudaPrecision
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
SpinorTexture & operator=(const SpinorTexture &src)
void backup(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
bridge_mapper< RegType, StoreType >::type InterType
SpinorTexture(const ColorSpinorField &x, int nFace=1)
unsigned int cb_norm_offset
void restore(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Texture & operator=(const Texture &tex)
Spinor & operator=(const Spinor &src)
__device__ void loadGhost(RegType x[], const int i, const int dim) const
quda::mapper< InputType >::type RegType
__device__ float store_norm(float *norm, FloatN x[M], int i)
Spinor(const ColorSpinorField &x, int nFace=1)
SpinorTexture(const SpinorTexture &st)
Texture(const Texture &tex)
SpinorTexture< RegType, StoreType, N > ST
__device__ void save(RegType x[], int i, const int parity=0)
__device__ __host__ OutputType operator[](unsigned int idx) const
__device__ void copyFloatN(FloatN &a, const FloatN &b)
cpuColorSpinorField * out
__device__ void load(RegType x[], const int i, const int parity=0) const
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Texture< InterType, StoreType > ghostTex
QudaPrecision Precision() const
Texture< InterType, StoreType > tex
bridge_mapper< RegType, StoreType >::type InterType