QUDA  v1.1.0
A library for QCD on GPUs
memory.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2013, NVIDIA Corporation
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #pragma once
29 #include <trove/utility.h>
30 #include <trove/array.h>
31 
32 namespace trove {
33 namespace detail {
34 
35 template<typename Array>
36 struct warp_store_array {};
37 
38 template<typename T, int s>
39 struct warp_store_array<array<T, s> > {
40  __host__ __device__ static void impl(
41  const array<T, s>& d,
42  T* ptr, int offset, int stride) {
43  ptr[offset] = d.head;
44  warp_store_array<array<T, s-1> >::impl(
45  d.tail, ptr, offset + stride, stride);
46  }
47 };
48 
49 template<typename T>
50 struct warp_store_array<array<T, 1> > {
51  __host__ __device__ static void impl(
52  const array<T, 1>& d,
53  T* ptr, int offset, int stride) {
54  ptr[offset] = d.head;
55  }
56 };
57 
58 template<typename Array>
60 
61 template<typename T, int s>
63  __host__ __device__ static void impl(
64  const array<T, s>& d,
65  T* ptr,
66  int offset=0,
67  int stride=1) {
68  ptr[offset] = d.head;
69  uncoalesced_store_array<array<T, s-1> >::impl(d.tail, ptr, offset+1,
70  stride);
71  }
72  __host__ __device__ static void impl(
73  const array<T, s>& d,
74  volatile T* ptr,
75  int offset=0,
76  int stride=1) {
77  ptr[offset] = d.head;
78  uncoalesced_store_array<array<T, s-1> >::impl(d.tail, ptr, offset+1,
79  stride);
80  }
81 };
82 
83 template<typename T>
85  __host__ __device__ static void impl(
86  const array<T, 1>& d,
87  T* ptr,
88  int offset=0,
89  int stride=1) {
90  ptr[offset] = d.head;
91  }
92  __host__ __device__ static void impl(
93  const array<T, 1>& d,
94  volatile T* ptr,
95  int offset=0,
96  int stride=1) {
97  ptr[offset] = d.head;
98  }
99 };
100 
101 template<typename Array>
103 
104 template<typename T, int s>
105 struct warp_load_array<array<T, s> > {
106  __host__ __device__ static array<T, s> impl(const T* ptr,
107  int offset,
108  int stride=32) {
109  return array<T, s>(ptr[offset],
110  warp_load_array<array<T, s-1> >::impl(ptr, offset+stride, stride));
111  }
112  __host__ __device__ static array<T, s> impl(const volatile T* ptr,
113  int offset,
114  int stride=32) {
115  return array<T, s>(ptr[offset],
116  warp_load_array<array<T, s-1> >::impl(ptr, offset+stride, stride));
117  }
118 };
119 
120 template<typename T>
121 struct warp_load_array<array<T, 1> > {
122  __host__ __device__ static array<T, 1> impl(const T* ptr,
123  int offset,
124  int stride=32) {
125  return array<T, 1>(ptr[offset]);
126  }
127  __host__ __device__ static array<T, 1> impl(const volatile T* ptr,
128  int offset,
129  int stride=32) {
130  return array<T, 1>(ptr[offset]);
131  }
132 };
133 
134 } //end namespace detail
135 
136 template<typename Array>
137 __host__ __device__ void warp_store(const Array& t,
138  typename Array::head_type* ptr,
139  int offset, int stride=32) {
140  detail::warp_store_array<Array>::impl(t, ptr, offset, stride);
141 }
142 
143 template<typename Array>
144 __host__ __device__ Array warp_load(const typename Array::head_type* ptr,
145  int offset, int stride=32) {
146  return detail::warp_load_array<Array>::impl(ptr, offset, stride);
147 }
148 
149 template<typename Array>
150 __host__ __device__ Array warp_load(
151  const volatile typename Array::head_type* ptr,
152  int offset, int stride=32) {
153  return detail::warp_load_array<Array>::impl(ptr, offset, stride);
154 }
155 
156 template<typename Array>
157 __host__ __device__ void uncoalesced_store(const Array& t,
158  typename Array::head_type* ptr,
159  int stride=1) {
161 }
162 
163 template<typename Array>
164 __host__ __device__ void uncoalesced_store(const Array& t,
165  volatile typename Array::head_type* ptr,
166  int stride=1) {
168 }
169 
170 } //end namespace trove
Definition: alias.h:4
Definition: aos.h:38
__host__ __device__ void uncoalesced_store(const Array &t, typename Array::head_type *ptr, int stride=1)
Definition: memory.h:157
__host__ __device__ void warp_store(const Array &t, typename Array::head_type *ptr, int offset, int stride=32)
Definition: memory.h:137
__host__ __device__ Array warp_load(const typename Array::head_type *ptr, int offset, int stride=32)
Definition: memory.h:144
head_type head
Definition: array.h:67
head_type head
Definition: array.h:38
tail_type tail
Definition: array.h:39
__host__ static __device__ void impl(const array< T, 1 > &d, volatile T *ptr, int offset=0, int stride=1)
Definition: memory.h:92
__host__ static __device__ void impl(const array< T, 1 > &d, T *ptr, int offset=0, int stride=1)
Definition: memory.h:85
__host__ static __device__ void impl(const array< T, s > &d, volatile T *ptr, int offset=0, int stride=1)
Definition: memory.h:72
__host__ static __device__ void impl(const array< T, s > &d, T *ptr, int offset=0, int stride=1)
Definition: memory.h:63
__host__ static __device__ array< T, 1 > impl(const volatile T *ptr, int offset, int stride=32)
Definition: memory.h:127
__host__ static __device__ array< T, 1 > impl(const T *ptr, int offset, int stride=32)
Definition: memory.h:122
__host__ static __device__ array< T, s > impl(const volatile T *ptr, int offset, int stride=32)
Definition: memory.h:112
__host__ static __device__ array< T, s > impl(const T *ptr, int offset, int stride=32)
Definition: memory.h:106
__host__ static __device__ void impl(const array< T, 1 > &d, T *ptr, int offset, int stride)
Definition: memory.h:51
__host__ static __device__ void impl(const array< T, s > &d, T *ptr, int offset, int stride)
Definition: memory.h:40