3 #include <thrust/detail/static_assert.h>
5 #if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
11 __device__ __forceinline__
12 static void impl(
unsigned int mask, array<int, s>& d,
const int& i) {
13 d.head = __shfl_sync(mask, d.head, i);
20 __device__ __forceinline__
21 static void impl(
unsigned int mask, array<int, 1>& d,
const int& i) {
22 d.head = __shfl_sync(mask, d.head, i);
28 __device__ __forceinline__
29 static void impl(
unsigned int mask, array<int, s>& d,
const int& i) {
30 d.head = __shfl_down_sync(mask, d.head, i);
36 struct shuffle_down<1> {
37 __device__ __forceinline__
38 static void impl(
unsigned int mask, array<int, 1>& d,
const int& i) {
39 d.head = __shfl_down_sync(mask, d.head, i);
45 __device__ __forceinline__
46 static void impl(
unsigned int mask, array<int, s>& d,
const int& i) {
47 d.head = __shfl_up_sync(mask, d.head, i);
53 struct shuffle_up<1> {
54 __device__ __forceinline__
55 static void impl(
unsigned int mask, array<int, 1>& d,
const int& i) {
56 d.head = __shfl_up_sync(mask, d.head, i);
62 __device__ __forceinline__
63 static void impl(
unsigned int mask, array<int, s>& d,
const int& i) {
64 d.head = __shfl_xor_sync(mask, d.head, i);
70 struct shuffle_xor<1> {
71 __device__ __forceinline__
72 static void impl(
unsigned int mask, array<int, 1>& d,
const int& i) {
73 d.head = __shfl_xor_sync(mask, d.head, i);
81 __device__ __forceinline__
82 T __shfl_sync(
unsigned int mask,
const T& t,
const int& i) {
89 aliased lysed = detail::lyse<int>(t);
91 return detail::fuse<T>(lysed);
95 __device__ __forceinline__
96 T __shfl_down_sync(
unsigned int mask,
const T& t,
const int& i) {
103 aliased lysed = detail::lyse<int>(t);
105 return detail::fuse<T>(lysed);
109 __device__ __forceinline__
110 T __shfl_up_sync(
unsigned int mask,
const T& t,
const int& i) {
117 aliased lysed = detail::lyse<int>(t);
119 return detail::fuse<T>(lysed);
123 __device__ __forceinline__
124 T __shfl_xor_sync(
unsigned int mask,
const T& t,
const int& i) {
131 aliased lysed = detail::lyse<int>(t);
133 return detail::fuse<T>(lysed);
143 __device__ __forceinline__
152 __device__ __forceinline__
160 __device__ __forceinline__
169 __device__ __forceinline__
177 __device__ __forceinline__
186 __device__ __forceinline__
194 __device__ __forceinline__
203 __device__ __forceinline__
213 __device__ __forceinline__
222 aliased lysed = detail::lyse<int>(t);
224 return detail::fuse<T>(lysed);
228 __device__ __forceinline__
236 aliased lysed = detail::lyse<int>(t);
238 return detail::fuse<T>(lysed);
242 __device__ __forceinline__
250 aliased lysed = detail::lyse<int>(t);
252 return detail::fuse<T>(lysed);
256 __device__ __forceinline__
265 aliased lysed = detail::lyse<int>(t);
267 return detail::fuse<T>(lysed);
__device__ __forceinline__ T __shfl(const T &t, const int &i)
__device__ __forceinline__ T __shfl_down(const T &t, const int &i)
__device__ __forceinline__ T __shfl_xor(const T &t, const int &i)
__device__ __forceinline__ T __shfl_up(const T &t, const int &i)
__device__ static __forceinline__ void impl(array< int, 1 > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, 1 > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, s > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, 1 > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, s > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, 1 > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, s > &d, const int &i)
__device__ static __forceinline__ void impl(array< int, s > &d, const int &i)