46 #if !defined(DBLDBL_H_)
49 #if defined(__cplusplus)
93 z.y = __dadd_rn (a, b);
94 t1 = __dadd_rn (z.y, -a);
95 t2 = __dadd_rn (z.y, -t1);
96 t1 = __dadd_rn (b, -t1);
97 t2 = __dadd_rn (a, -t2);
98 z.x = __dadd_rn (t1, t2);
106 z.y = __dmul_rn (a, b);
107 z.x = __fma_rn (a, b, -z.y);
130 double t1, t2, t3, t4, t5, e;
131 t1 = __dadd_rn (a.y, b.y);
132 t2 = __dadd_rn (t1, -a.y);
133 t3 = __dadd_rn (__dadd_rn (a.y, t2 - t1), __dadd_rn (b.y, -t2));
134 t4 = __dadd_rn (a.x, b.x);
135 t2 = __dadd_rn (t4, -a.x);
136 t5 = __dadd_rn (__dadd_rn (a.x, t2 - t4), __dadd_rn (b.x, -t2));
137 t3 = __dadd_rn (t3, t4);
138 t4 = __dadd_rn (t1, t3);
139 t3 = __dadd_rn (t1 - t4, t3);
140 t3 = __dadd_rn (t3, t5);
141 z.y = e = __dadd_rn (t4, t3);
142 z.x = __dadd_rn (t4 - e, t3);
156 double t1, t2, t3, t4, t5, e;
157 t1 = __dadd_rn (a.y, -b.y);
158 t2 = __dadd_rn (t1, -a.y);
159 t3 = __dadd_rn (__dadd_rn (a.y, t2 - t1), - __dadd_rn (b.y, t2));
160 t4 = __dadd_rn (a.x, -b.x);
161 t2 = __dadd_rn (t4, -a.x);
162 t5 = __dadd_rn (__dadd_rn (a.x, t2 - t4), - __dadd_rn (b.x, t2));
163 t3 = __dadd_rn (t3, t4);
164 t4 = __dadd_rn (t1, t3);
165 t3 = __dadd_rn (t1 - t4, t3);
166 t3 = __dadd_rn (t3, t5);
167 z.y = e = __dadd_rn (t4, t3);
168 z.x = __dadd_rn (t4 - e, t3);
181 t.y = __dmul_rn (a.y, b.y);
182 t.x = __fma_rn (a.y, b.y, -t.y);
183 t.x = __fma_rn (a.x, b.x, t.x);
184 t.x = __fma_rn (a.y, b.x, t.x);
185 t.x = __fma_rn (a.x, b.y, t.x);
186 z.y = e = __dadd_rn (t.y, t.x);
187 z.x = __dadd_rn (t.y - e, t.x);
204 t.y = __dmul_rn (a.y, r);
205 e = __fma_rn (b.y, -t.y, a.y);
206 t.y = __fma_rn (r, e, t.y);
207 t.x = __fma_rn (b.y, -t.y, a.y);
208 t.x = __dadd_rn (a.x, t.x);
209 t.x = __fma_rn (b.x, -t.y, t.x);
210 e = __dmul_rn (r, t.x);
211 t.x = __fma_rn (b.y, -e, t.x);
212 t.x = __fma_rn (r, t.x, e);
213 z.y = e = __dadd_rn (t.y, t.x);
214 z.x = __dadd_rn (t.y - e, t.x);
230 if (a.y == 0.0) r = 0.0;
231 y = __dmul_rn (a.y, r);
232 s = __fma_rn (y, -y, a.y);
233 r = __dmul_rn (0.5, r);
234 z.y = e = __dadd_rn (s, a.x);
235 z.x = __dadd_rn (s - e, a.x);
236 t.y = __dmul_rn (r, z.y);
237 t.x = __fma_rn (r, z.y, -t.y);
238 t.x = __fma_rn (r, z.x, t.x);
239 r = __dadd_rn (y, t.y);
240 s = __dadd_rn (y - r, t.y);
241 s = __dadd_rn (s, t.x);
242 z.y = e = __dadd_rn (r, s);
243 z.x = __dadd_rn (r - e, s);
257 e = __dmul_rn (a.y, r);
258 s = __fma_rn (e, -r, 1.0);
259 e = __fma_rn (a.y, r, -e);
260 s = __fma_rn (e, -r, s);
261 e = __dmul_rn (a.x, r);
262 s = __fma_rn (e, -r, s);
264 z.y = __dmul_rn (e, s);
265 z.x = __fma_rn (e, s, -z.y);
266 s = __dadd_rn (r, z.y);
267 r = __dadd_rn (r, -s);
268 r = __dadd_rn (r, z.y);
269 r = __dadd_rn (r, z.x);
270 z.y = e = __dadd_rn (s, r);
271 z.x = __dadd_rn (s - e, r);
275 #if defined(__cplusplus)
303 __device__ __host__
double head()
const {
return a.y; }
304 __device__ __host__
double tail()
const {
return a.x; }
306 __device__ __host__
void print()
const { printf(
"scalar: %16.14e + %16.14e\n",
head(),
tail()); }
353 __device__ __host__
void print()
const { printf(
"vec2: (%16.14e + %16.14e) (%16.14e + %16.14e)\n",
x.
head(),
x.
tail(),
y.
head(),
y.
tail()); }
__device__ doubledouble operator+(const doubledouble &a, const doubledouble &b)
__device__ doubledouble operator/(const doubledouble &a, const doubledouble &b)
__device__ __forceinline__ dbldbl neg_dbldbl(dbldbl a)
__device__ __forceinline__ dbldbl make_dbldbl(double head, double tail)
__device__ doubledouble add_double_to_doubledouble(const double &a, const double &b)
__device__ __forceinline__ dbldbl sub_dbldbl(dbldbl a, dbldbl b)
__device__ doubledouble mul_double_to_doubledouble(const double &a, const double &b)
__device__ bool operator>(const doubledouble &a, const double &b)
__device__ __forceinline__ double get_dbldbl_head(dbldbl a)
__device__ __forceinline__ dbldbl mul_dbldbl(dbldbl a, dbldbl b)
__device__ __forceinline__ dbldbl add_dbldbl(dbldbl a, dbldbl b)
__device__ doubledouble operator*(const doubledouble &a, const doubledouble &b)
__device__ __forceinline__ dbldbl mul_double_to_dbldbl(double a, double b)
__device__ __forceinline__ dbldbl rsqrt_dbldbl(dbldbl a)
__device__ __forceinline__ dbldbl sqrt_dbldbl(dbldbl a)
__device__ doubledouble operator-(const doubledouble &a, const doubledouble &b)
__device__ __forceinline__ dbldbl div_dbldbl(dbldbl a, dbldbl b)
__device__ __forceinline__ double get_dbldbl_tail(dbldbl a)
__device__ __forceinline__ dbldbl add_double_to_dbldbl(double a, double b)
__device__ __host__ doubledouble2(const doubledouble2 &a)
__device__ __host__ doubledouble2(const doubledouble &x, const doubledouble &y)
__device__ __host__ void print() const
__device__ __host__ doubledouble2(const double2 &a)
__device__ __host__ doubledouble2()
__device__ doubledouble2 & operator+=(const doubledouble2 &a)
__device__ __host__ doubledouble3(const doubledouble3 &a)
__device__ doubledouble3 & operator+=(const doubledouble3 &a)
__device__ __host__ doubledouble3()
__device__ __host__ doubledouble3(const doubledouble &x, const doubledouble &y, const doubledouble &z)
__device__ __host__ doubledouble3(const double3 &a)
__device__ __host__ void print() const
__device__ __host__ double tail() const
__device__ doubledouble & operator+=(const doubledouble &a)
__device__ __host__ void print() const
__device__ __host__ doubledouble(const doubledouble &a)
__device__ __host__ doubledouble(const double &head, const double &tail)
__device__ __host__ doubledouble()
__device__ __host__ doubledouble & operator=(const double &head)
__device__ __host__ doubledouble(const double &head)
__device__ __host__ doubledouble(const dbldbl &a)
__device__ __host__ double head() const