10 #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) 21 asm(
"ld.cs.global.v2.f64 {%0, %1}, [%2+0];" :
"=d"(
x),
"=d"(
y) :
__PTR(addr));
28 asm(
"ld.cs.global.v4.f32 {%0, %1, %2, %3}, [%4+0];" :
"=f"(
x),
"=f"(
y),
"=f"(
z),
"=f"(
w) :
__PTR(addr));
35 asm(
"ld.cg.global.v4.f32 {%0, %1, %2, %3}, [%4+0];" :
"=f"(
x),
"=f"(
y),
"=f"(
z),
"=f"(
w) :
__PTR(addr));
41 asm(
"st.cs.global.v4.f32 [%0+0], {%1, %2, %3, %4};" :: __PTR(addr),
"f"(
x),
"f"(
y),
"f"(
z),
"f"(
w));
46 asm(
"st.cs.global.v4.s16 [%0+0], {%1, %2, %3, %4};" :: __PTR(addr),
"h"(
x),
"h"(
y),
"h"(
z),
"h"(
w));
51 asm(
"st.cs.global.v2.f64 [%0+0], {%1, %2};" :: __PTR(addr),
"d"(
x),
"d"(
y));
56 asm(
"st.cs.global.v2.f32 [%0+0], {%1, %2};" :: __PTR(addr),
"f"(
x),
"f"(
y));
61 asm(
"st.cs.global.v2.s16 [%0+0], {%1, %2};" :: __PTR(addr),
"h"(
x),
"h"(
y));
__device__ void load_global_float4(float4 &a, const float4 *addr)
__device__ void store_streaming_double2(double2 *addr, double x, double y)
__device__ void store_streaming_float4(float4 *addr, float x, float y, float z, float w)
__device__ void load_streaming_double2(double2 &a, const double2 *addr)
__device__ void store_streaming_float2(float2 *addr, float x, float y)
__device__ void store_streaming_short2(short2 *addr, short x, short y)
__device__ void store_streaming_short4(short4 *addr, short x, short y, short z, short w)
__device__ void load_streaming_float4(float4 &a, const float4 *addr)