QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
texture_helper.cuh
Go to the documentation of this file.
1 #pragma once
2 
3 template <typename T>
4 __device__ __forceinline__ T tex1Dfetch_(cudaTextureObject_t tex, int i)
5 {
6  return tex1Dfetch<T>(tex, i);
7 }
8 
9 // clang-cuda seem incompatable with the CUDA texture headers, so we must resort to ptx
10 #if defined(__clang__) && defined(__CUDA__)
11 
12 template <>
13 __device__ __forceinline__ float tex1Dfetch_(cudaTextureObject_t tex, int i)
14 {
15  float4 temp;
16  asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" :
17  "=f"(temp.x), "=f"(temp.y), "=f"(temp.z), "=f"(temp.w) : "l"(tex), "r"(i));
18  return temp.x;
19 }
20 
21 template <>
22 __device__ __forceinline__ float2 tex1Dfetch_(cudaTextureObject_t tex, int i)
23 {
24  float4 temp;
25  asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" :
26  "=f"(temp.x), "=f"(temp.y), "=f"(temp.z), "=f"(temp.w) : "l"(tex), "r"(i));
27  return make_float2(temp.x, temp.y);
28 }
29 
30 template <>
31 __device__ __forceinline__ float4 tex1Dfetch_(cudaTextureObject_t tex, int i)
32 {
33  float4 temp;
34  asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" :
35  "=f"(temp.x), "=f"(temp.y), "=f"(temp.z), "=f"(temp.w) : "l"(tex), "r"(i));
36  return temp;
37 }
38 
39 template <>
40 __device__ __forceinline__ int4 tex1Dfetch_(cudaTextureObject_t tex, int i)
41 {
42  int4 temp;
43  asm("tex.1d.v4.s32.s32 {%0, %1, %2, %3}, [%4, {%5}];" :
44  "=r"(temp.x), "=r"(temp.y), "=r"(temp.z), "=r"(temp.w) : "l"(tex), "r"(i));
45  return temp;
46 }
47 
48 #endif
__device__ __forceinline__ T tex1Dfetch_(cudaTextureObject_t tex, int i)