QUDA  v1.1.0
A library for QCD on GPUs
fast_intdiv.h
Go to the documentation of this file.
1 /*
2  * Copyright 2014 Maxim Milakov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef _INT_FASTDIV_KJGIUHFG
18 #define _INT_FASTDIV_KJGIUHFG
19 
21 {
22  public:
23  // divisor != 0
24  __host__ __device__ __forceinline__
25  int_fastdiv(int divisor = 0)
26  : d(divisor)
27  {
28  update_magic_numbers();
29  }
30 
31  __host__ __device__ __forceinline__
32  int_fastdiv& operator =(int divisor)
33  {
34  this->d = divisor;
35  update_magic_numbers();
36  return *this;
37  }
38 
39  __host__ __device__ __forceinline__
40  operator int() const
41  {
42  return d;
43  }
44 
45  private:
46  int d;
47  int M;
48  int s;
49  int n_add_sign;
50 
51  // Hacker's Delight, Second Edition, Chapter 10, Integer Division By Constants
52  __host__ __device__ __forceinline__
53  void update_magic_numbers()
54  {
55  if (d == 1)
56  {
57  M = 0;
58  s = -1;
59  n_add_sign = 1;
60  return;
61  }
62  else if (d == -1)
63  {
64  M = 0;
65  s = -1;
66  n_add_sign = -1;
67  return;
68  }
69 
70  int p;
71  unsigned int ad, anc, delta, q1, r1, q2, r2, t;
72  const unsigned two31 = 0x80000000;
73  ad = (d == 0) ? 1 : abs(d);
74  t = two31 + ((unsigned int)d >> 31);
75  anc = t - 1 - t % ad;
76  p = 31;
77  q1 = two31 / anc;
78  r1 = two31 - q1 * anc;
79  q2 = two31 / ad;
80  r2 = two31 - q2 * ad;
81  do
82  {
83  ++p;
84  q1 = 2 * q1;
85  r1 = 2 * r1;
86  if (r1 >= anc)
87  {
88  ++q1;
89  r1 -= anc;
90  }
91  q2 = 2 * q2;
92  r2 = 2 * r2;
93  if (r2 >= ad)
94  {
95  ++q2;
96  r2 -= ad;
97  }
98  delta = ad - r2;
99  } while (q1 < delta || (q1 == delta && r1 == 0));
100  this->M = q2 + 1;
101  if (d < 0)
102  this->M = -this->M;
103  this->s = p - 32;
104 
105  if ((d > 0) && (M < 0))
106  n_add_sign = 1;
107  else if ((d < 0) && (M > 0))
108  n_add_sign = -1;
109  else
110  n_add_sign = 0;
111  }
112 
113  __host__ __device__ __forceinline__
114  friend int operator/(const int divident, const int_fastdiv& divisor);
115 };
116 
117 __host__ __device__ __forceinline__
118 int operator/(const int n, const int_fastdiv& divisor)
119 {
120  int q;
121 #ifdef __CUDA_ARCH__
122  asm("mul.hi.s32 %0, %1, %2;" : "=r"(q) : "r"(divisor.M), "r"(n));
123 #else
124  q = (((unsigned long long)((long long)divisor.M * (long long)n)) >> 32);
125 #endif
126  q += n * divisor.n_add_sign;
127  if (divisor.s >= 0)
128  {
129  q >>= divisor.s; // we rely on this to be implemented as arithmetic shift
130  q += (((unsigned int)q) >> 31);
131  }
132  return q;
133 }
134 
135 __host__ __device__ __forceinline__
136 int operator%(const int n, const int_fastdiv& divisor)
137 {
138  int quotient = n / divisor;
139  int remainder = n - quotient * divisor;
140  return remainder;
141 }
142 
143 __host__ __device__ __forceinline__
144 int operator/(const unsigned int n, const int_fastdiv& divisor)
145 {
146  return ((int)n) / divisor;
147 }
148 
149 __host__ __device__ __forceinline__
150 int operator%(const unsigned int n, const int_fastdiv& divisor)
151 {
152  return ((int)n) % divisor;
153 }
154 
155 __host__ __device__ __forceinline__
156 int operator/(const short n, const int_fastdiv& divisor)
157 {
158  return ((int)n) / divisor;
159 }
160 
161 __host__ __device__ __forceinline__
162 int operator%(const short n, const int_fastdiv& divisor)
163 {
164  return ((int)n) % divisor;
165 }
166 
167 __host__ __device__ __forceinline__
168 int operator/(const unsigned short n, const int_fastdiv& divisor)
169 {
170  return ((int)n) / divisor;
171 }
172 
173 __host__ __device__ __forceinline__
174 int operator%(const unsigned short n, const int_fastdiv& divisor)
175 {
176  return ((int)n) % divisor;
177 }
178 
179 __host__ __device__ __forceinline__
180 int operator/(const char n, const int_fastdiv& divisor)
181 {
182  return ((int)n) / divisor;
183 }
184 
185 __host__ __device__ __forceinline__
186 int operator%(const char n, const int_fastdiv& divisor)
187 {
188  return ((int)n) % divisor;
189 }
190 
191 __host__ __device__ __forceinline__
192 int operator/(const unsigned char n, const int_fastdiv& divisor)
193 {
194  return ((int)n) / divisor;
195 }
196 
197 __host__ __device__ __forceinline__
198 int operator%(const unsigned char n, const int_fastdiv& divisor)
199 {
200  return ((int)n) % divisor;
201 }
202 
203 #endif
__host__ __device__ __forceinline__ int_fastdiv(int divisor=0)
Definition: fast_intdiv.h:25
__host__ __device__ __forceinline__ friend int operator/(const int divident, const int_fastdiv &divisor)
Definition: fast_intdiv.h:118
__host__ __device__ __forceinline__ int_fastdiv & operator=(int divisor)
Definition: fast_intdiv.h:32
__host__ __device__ __forceinline__ int operator%(const int n, const int_fastdiv &divisor)
Definition: fast_intdiv.h:136
__host__ __device__ __forceinline__ int operator/(const int n, const int_fastdiv &divisor)
Definition: fast_intdiv.h:118
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125