8 #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
14 #if (__COMPUTE_CAPABILITY__ >= 200)
18 __device__
inline void load_streaming_double2(double2 &a,
const double2* addr)
21 asm(
"ld.cs.global.v2.f64 {%0, %1}, [%2+0];" :
"=d"(
x),
"=d"(y) :
__PTR(addr));
25 __device__
inline void load_streaming_float4(float4 &a,
const float4* addr)
28 asm(
"ld.cs.global.v4.f32 {%0, %1, %2, %3}, [%4+0];" :
"=f"(
x),
"=f"(y),
"=f"(z),
"=f"(w) :
__PTR(addr));
29 a.x =
x; a.y =
y; a.z = z; a.w = w;
32 __device__
inline void load_global_float4(float4 &a,
const float4* addr)
35 asm(
"ld.cg.global.v4.f32 {%0, %1, %2, %3}, [%4+0];" :
"=f"(
x),
"=f"(y),
"=f"(z),
"=f"(w) :
__PTR(addr));
36 a.x =
x; a.y =
y; a.z = z; a.w = w;
39 __device__
inline void store_streaming_float4(float4* addr,
float x,
float y,
float z,
float w)
41 asm(
"st.cs.global.v4.f32 [%0+0], {%1, %2, %3, %4};" :: __PTR(addr),
"f"(
x),
"f"(y),
"f"(z),
"f"(w));
44 __device__
inline void store_streaming_short4(short4* addr,
short x,
short y,
short z,
short w)
46 asm(
"st.cs.global.v4.s16 [%0+0], {%1, %2, %3, %4};" :: __PTR(addr),
"h"(
x),
"h"(y),
"h"(z),
"h"(w));
49 __device__
inline void store_streaming_double2(double2* addr,
double x,
double y)
51 asm(
"st.cs.global.v2.f64 [%0+0], {%1, %2};" :: __PTR(addr),
"d"(
x),
"d"(y));
54 __device__
inline void store_streaming_float2(float2* addr,
float x,
float y)
56 asm(
"st.cs.global.v2.f32 [%0+0], {%1, %2};" :: __PTR(addr),
"f"(
x),
"f"(y));
59 __device__
inline void store_streaming_short2(short2* addr,
short x,
short y)
61 asm(
"st.cs.global.v2.s16 [%0+0], {%1, %2};" :: __PTR(addr),
"h"(
x),
"h"(y));
66 #endif // COMPUTE_CAPABILITY