QUDA  v1.1.0
A library for QCD on GPUs
svd_quda.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <float.h>
4 
5 #define DEVICEHOST __device__ __host__
6 #define SVDPREC 1e-11
7 #define LOG2 0.69314718055994530942
8 #define INVALID_DOUBLE (-DBL_MAX)
9 
10 //namespace quda{
11 
12 // FIXME replace this with hypot
13 template<class Cmplx>
14 inline DEVICEHOST
15 typename std::remove_reference<decltype(Cmplx::x)>::type cabs(const Cmplx & z)
16 {
17  typedef typename std::remove_reference<decltype(Cmplx::x)>::type real;
18  real max, ratio, square;
19  if(fabs(z.x) > fabs(z.y)){ max = z.x; ratio = z.y/max; }else{ max=z.y; ratio = z.x/max; }
20  square = (max != 0.0) ? max*max*(1.0 + ratio*ratio) : 0.0;
21  return sqrt(square);
22 }
23 
24 template <class T, class U> inline DEVICEHOST typename PromoteTypeId<T, U>::type quadSum(const T &a, const U &b)
25 {
26  typename PromoteTypeId<T, U>::type ratio, square, max;
27  if (fabs(a) > fabs(b)) { max = a; ratio = b/a; } else { max=b; ratio = a/b; }
28  square = (max != 0.0) ? max*max*(1.0 + ratio*ratio) : 0.0;
29  return sqrt(square);
30 }
31 
32 // In future iterations of the code, I would like to use templates to compute the norm
34 inline float getNorm(const Array<complex<float>,3>& a){
35  float temp1, temp2, temp3;
36  temp1 = cabs(a[0]);
37  temp2 = cabs(a[1]);
38  temp3 = quadSum(temp1,temp2);
39  temp1 = cabs(a[2]);
40  return quadSum(temp1,temp3);
41 }
42 
43 
45 inline double getNorm(const Array<complex<double>,3>& a){
46  double temp1, temp2, temp3;
47  temp1 = cabs(a[0]);
48  temp2 = cabs(a[1]);
49  temp3 = quadSum(temp1,temp2);
50  temp1 = cabs(a[2]);
51  return quadSum(temp1,temp3);
52 }
53 
54 template <class T, class V> inline DEVICEHOST auto constructHHMat(const T &tau, const V &v)
55 {
56  Matrix<T, 3> hh, temp1, temp2;
57 
58  ColorSpinor<decltype(tau.real()), 3, 1> v_;
59  for (int i = 0; i < 3; i++) v_(0, i) = v[i];
60  temp1 = outerProduct(v_, v_);
61 
62  temp2 = conj(tau)*temp1;
63 
64  setIdentity(&temp1);
65 
66  hh = temp1 - temp2;
67  return hh;
68 }
69 
70 
71 template<class Real>
72 inline DEVICEHOST
73 void getLambdaMax(const Matrix<Real,3> & b, Real & lambda_max){
74 
75  Real m11 = b(1,1)*b(1,1) + b(0,1)*b(0,1);
76  Real m22 = b(2,2)*b(2,2) + b(1,2)*b(1,2);
77  Real m12 = b(1,1)*b(1,2);
78  Real dm = (m11 - m22) * 0.5;
79 
80  Real norm1 = quadSum(dm, m12);
81  if( dm >= 0.0 ){
82  lambda_max = m22 - (m12*m12)/(norm1 + dm);
83  }else{
84  lambda_max = m22 + (m12*m12)/(norm1 - dm);
85  }
86 }
87 
88 
89 template<class Real>
90 inline DEVICEHOST
91 void getGivensRotation(const Real & alpha, const Real & beta, Real & c, Real & s)
92 {
93  Real ratio;
94  if( beta == 0.0 ){
95  c = 1.0; s = 0.0;
96  }else if( fabs(beta) > fabs(alpha) ){
97  ratio = -alpha/beta;
98  s = rsqrt(1.0 + ratio*ratio);
99  c = ratio*s;
100  }else{
101  ratio = -beta/alpha;
102  c = rsqrt(1.0 + ratio*ratio);
103  s = ratio*c;
104  }
105  return;
106 }
107 
108 template<class Real>
109 inline DEVICEHOST
110 void accumGivensRotation(int index, const Real & c, const Real & s, Matrix<Real,3> & m){
111  int index_p1 = index+1;
112  Real alpha, beta;
113  for(int i=0; i<3; ++i){
114  alpha = m(i,index);
115  beta = m(i,index_p1);
116  m(i,index) = alpha*c - beta*s;
117  m(i,index_p1) = alpha*s + beta*c;
118  }
119  return;
120 }
121 
122 template<class Real>
123 inline DEVICEHOST
124 void assignGivensRotation(const Real & c, const Real & s, Matrix<Real,2> & m){
125  m(0,0) = c;
126  m(0,1) = s;
127  m(1,0) = -s;
128  m(1,1) = c;
129  return;
130 }
131 
132 template<class Real>
133 inline DEVICEHOST
134 void swap(Real & a, Real & b){
135  Real temp = a;
136  a = b;
137  b = temp;
138  return;
139 }
140 
141 template<class Real>
142 inline DEVICEHOST
144  // set u and v to the 2x2 identity matrix
145  setIdentity(&u);
146  setIdentity(&v);
147 
148  Real c, s;
149 
150  if( m(0,0) == 0.0 ){
151 
152  getGivensRotation(m(0,1), m(1,1), c, s);
153 
154  m(0,0) = m(0,1)*c - m(1,1)*s;
155  m(0,1) = 0.0;
156  m(1,1) = 0.0;
157 
158  // exchange columns in v
159  v(0,0) = 0.0;
160  v(0,1) = 1.0;
161  v(1,0) = 1.0;
162  v(1,1) = 0.0;
163 
164  // u is a Givens rotation
165  assignGivensRotation(c, s, u);
166 
167  }else if( m(1,1) == 0.0 ){
168 
169  getGivensRotation(m(0,0), m(0,1), c, s);
170 
171  m(0,0) = m(0,0)*c - m(0,1)*s;
172  m(0,1) = 0.0;
173  m(1,1) = 0.0;
174 
175  assignGivensRotation(c,s,v);
176 
177  }else if( m(0,1) != 0.0 ){
178 
179  // need to calculate (m(1,1)**2 + m(0,1)**2 - m(0,0)**2)/(2*m(0,0)*m(0,1))
180  Real abs01 = fabs(m(0,1));
181  Real abs11 = fabs(m(1,1));
182  Real min, max;
183  if( abs01 > abs11 ){ min = abs11; max = abs01; }
184  else { min = abs01; max = abs11; }
185 
186 
187  Real ratio = min/max;
188  Real alpha = 2.0*log(max) + log(1.0 + ratio*ratio);
189 
190  Real abs00 = fabs(m(0,0));
191  Real beta = 2.0*log(abs00);
192 
193  int sign;
194  Real temp;
195 
196  if( alpha > beta ){
197  sign = 1;
198  temp = alpha + log(1.0 - exp(beta-alpha));
199  }else{
200  sign = -1;
201  temp = beta + log(1.0 - exp(alpha-beta));
202  }
203  temp -= LOG2 + log(abs00) + log(abs01);
204  temp = sign*exp(temp);
205 
206  if( m(0,0) < 0.0 ){ temp *= -1.0; }
207  if( m(0,1) < 0.0 ){ temp *= -1.0; }
208 
209  beta = (temp >= 0.0) ? quadSum(1.0, temp) : -quadSum(1.0, temp);
210  temp = 1.0/(temp + beta);
211 
212  // Calculate beta = sqrt(1 + temp**2)
213  beta = quadSum(1.0, temp);
214 
215  c = 1.0/beta;
216  s = temp*c;
217 
218  Matrix<Real,2> p;
219 
220  p(0,0) = c*m(0,0) - s*m(0,1);
221  p(1,0) = - s*m(1,1);
222  p(0,1) = s*m(0,0) + c*m(0,1);
223  p(1,1) = c*m(1,1);
224 
225  assignGivensRotation(c, s, v);
226 
227  // Make the column with the largest norm the first column
228  alpha = quadSum(p(0,0),p(1,0));
229  beta = quadSum(p(0,1),p(1,1));
230 
231  if(alpha < beta){
232  swap(p(0,0),p(0,1));
233  swap(p(1,0),p(1,1));
234 
235  swap(v(0,0),v(0,1));
236  swap(v(1,0),v(1,1));
237  }
238 
239  getGivensRotation(p(0,0), p(1,0), c, s);
240 
241  m(0,0) = p(0,0)*c - s*p(1,0);
242  m(1,1) = p(0,1)*s + c*p(1,1);
243  m(0,1) = 0.0;
244 
245  assignGivensRotation(c,s,u);
246  }
247 
248  return;
249 }
250 
251 // Change this so that the bidiagonal terms are not returned
252 // as a complex matrix
253 template<class Float>
254  DEVICEHOST
255 void getRealBidiagMatrix(const Matrix<complex<Float>,3> &mat, Matrix<complex<Float>,3> &u, Matrix<complex<Float>,3> &v)
256 {
257  typedef complex<Float> Complex;
258 
259  Matrix<Complex, 3> p, temp;
260  Array<Complex,3> vec;
261 
262  const Complex COMPLEX_UNITY(1.0,0.0);
263  const Complex COMPLEX_ZERO = 0.0;
264  // Step 1: build the first left reflector v,
265  // calculate the first left rotation
266  // apply to the original matrix
267  Float x = cabs(mat(1,0));
268  Float y = cabs(mat(2,0));
269  Float norm1 = quadSum(x,y);
270  Float beta;
271  Complex w, tau, z;
272 
273  // Set them to values that would never be set. If at the end of the
274  // below algorithmic flow, these values have been preserved then we
275  // know that the input matrix was the unit matrix
276  u(0,0).x = INVALID_DOUBLE;
277  v(0,0).x = INVALID_DOUBLE;
278 
279  if(norm1 == 0 && mat(0,0).y == 0){
280  p = mat;
281  }else{
282  Array<Complex,3> temp_vec;
283  copyColumn(mat, 0, &temp_vec);
284 
285  beta = (mat(0,0).x > 0.0) ? -getNorm(temp_vec) : getNorm(temp_vec);
286 
287  w.x = mat(0,0).x - beta; // work around for LLVM
288  w.y = mat(0,0).y;
289  Float norm1_inv = 1.0/cabs(w);
290  w = conj(w)*norm1_inv;
291 
292  // Assign the vector elements
293  vec[0] = COMPLEX_UNITY;
294  vec[1] = mat(1,0)*w*norm1_inv;
295  vec[2] = mat(2,0)*w*norm1_inv;
296 
297  // Now compute tau
298  tau.x = (beta - mat(0,0).x)/beta;
299  tau.y = mat(0,0).y/beta;
300  // construct the Householder matrix
301  temp = constructHHMat(tau, vec);
302  p = conj(temp)*mat;
303  u = temp;
304  }
305 
306  // Step 2: build the first right reflector
307  Float norm2 = cabs(p(0,2));
308  if(norm2 != 0.0 || p(0,1).y != 0.0){
309  norm1 = cabs(p(0,1));
310  beta = (p(0,1).x > 0.0) ? -quadSum(norm1,norm2) : quadSum(norm1,norm2);
311  vec[0] = COMPLEX_ZERO;
312  vec[1] = COMPLEX_UNITY;
313 
314  w.x = p(0,1).x-beta; // work around for LLVM
315  w.y = p(0,1).y;
316  Float norm1_inv = 1.0/cabs(w);
317  w = conj(w)*norm1_inv;
318  z = conj(p(0,2))*norm1_inv;
319  vec[2] = z*conj(w);
320 
321  tau.x = (beta - p(0,1).x)/beta; // work around for LLVM
322  tau.y = (- p(0,1).y)/beta;
323  // construct the Householder matrix
324  temp = constructHHMat(tau, vec);
325  p = p*temp;
326  v = temp;
327  }
328 
329  // Step 3: build the second left reflector
330  norm2 = cabs(p(2,1));
331  if(norm2 != 0.0 || p(1,1).y != 0.0){
332  norm1 = cabs(p(1,1));
333  beta = (p(1,1).x > 0) ? -quadSum(norm1,norm2) : quadSum(norm1,norm2);
334 
335  // Set the first two elements of the vector
336  vec[0] = COMPLEX_ZERO;
337  vec[1] = COMPLEX_UNITY;
338 
339  w.x = p(1,1).x - beta; // work around for LLVM
340  w.y = p(1,1).y;
341  Float norm1_inv = 1.0/cabs(w);
342  w = conj(w)*norm1_inv;
343  z = p(2,1)*norm1_inv;
344  vec[2] = z*w;
345 
346  tau.x = (beta - p(1,1).x)/beta;
347  tau.y = p(1,1).y/beta;
348  // I could very easily change the definition of tau
349  // so that we wouldn't need to call conj(temp) below.
350  // I would have to be careful to make sure this change
351  // is consistent with the other parts of the code.
352  temp = constructHHMat(tau, vec);
353  p = conj(temp)*p;
354  u = u*temp;
355  }
356 
357  // Step 4: build the second right reflector
358  setIdentity(&temp);
359  if( p(1,2).y != 0.0 ){
360  beta = p(1,2).x > 0.0 ? -cabs(p(1,2)) : cabs(p(1,2));
361  temp(2,2) = conj(p(1,2))/beta;
362  p(2,2) = p(2,2)*temp(2,2); // This is the only element of p needed below
363  v = v*temp;
364  }
365 
366  // Step 5: build the third left reflector
367  if( p(2,2).y != 0.0 ){
368  beta = p(2,2).x > 0.0 ? -cabs(p(2,2)) : cabs(p(2,2));
369  temp(2,2) = p(2,2)/beta;
370  u = u*temp;
371  }
372 
373  // unit matrix
374  if (u(0,0).x == INVALID_DOUBLE && v(0,0).x == INVALID_DOUBLE) {
375  u = mat;
376  v = mat;
377  }
378 
379  return;
380 }
381 
382 
383 template<class Real>
384  DEVICEHOST
385 void bdSVD(Matrix<Real,3>& u, Matrix<Real,3>& v, Matrix<Real,3>& b, int max_it)
386 {
387 
388  Real c,s;
389 
390  // set u and v matrices equal to the identity
391  setIdentity(&u);
392  setIdentity(&v);
393 
394  Real alpha, beta;
395 
396  int it=0;
397  do{
398 
399  if( fabs(b(0,1)) < SVDPREC*( fabs(b(0,0)) + fabs(b(1,1)) ) ){ b(0,1) = 0.0; }
400  if( fabs(b(1,2)) < SVDPREC*( fabs(b(0,0)) + fabs(b(2,2)) ) ){ b(1,2) = 0.0; }
401 
402  if( b(0,1) != 0.0 && b(1,2) != 0.0 ){
403  if( b(0,0) == 0.0 ){
404 
405  getGivensRotation(-b(0,1), b(1,1), s, c);
406 
407  for(int i=0; i<3; ++i){
408  alpha = u(i,0);
409  beta = u(i,1);
410  u(i,0) = alpha*c - beta*s;
411  u(i,1) = alpha*s + beta*c;
412  }
413 
414  b(1,1) = b(0,1)*s + b(1,1)*c;
415  b(0,1) = 0.0;
416 
417  b(0,2) = -b(1,2)*s;
418  b(1,2) *= c;
419 
420  getGivensRotation(-b(0,2), b(2,2), s, c);
421 
422  for(int i=0; i<3; ++i){
423  alpha = u(i,0);
424  beta = u(i,2);
425  u(i,0) = alpha*c - beta*s;
426  u(i,2) = alpha*s + beta*c;
427  }
428  b(2,2) = b(0,2)*s + b(2,2)*c;
429  b(0,2) = 0.0;
430 
431  }else if( b(1,1) == 0.0 ){
432  // same block
433  getGivensRotation(-b(1,2), b(2,2), s, c);
434  for(int i=0; i<3; ++i){
435  alpha = u(i,1);
436  beta = u(i,2);
437  u(i,1) = alpha*c - beta*s;
438  u(i,2) = alpha*s + beta*c;
439  }
440  b(2,2) = b(1,2)*s + b(2,2)*c;
441  b(1,2) = 0.0;
442  // end block
443  }else if( b(2,2) == 0.0 ){
444 
445  getGivensRotation(b(1,1), -b(1,2), c, s);
446  for(int i=0; i<3; ++i){
447  alpha = v(i,1);
448  beta = v(i,2);
449  v(i,1) = alpha*c + beta*s;
450  v(i,2) = -alpha*s + beta*c;
451  }
452  b(1,1) = b(1,1)*c + b(1,2)*s;
453  b(1,2) = 0.0;
454 
455  b(0,2) = -b(0,1)*s;
456  b(0,1) *= c;
457 
458  // apply second rotation, to remove b02
459  getGivensRotation(b(0,0), -b(0,2), c, s);
460  for(int i=0; i<3; ++i){
461  alpha = v(i,0);
462  beta = v(i,2);
463  v(i,0) = alpha*c + beta*s;
464  v(i,2) = -alpha*s + beta*c;
465  }
466  b(0,0) = b(0,0)*c + b(0,2)*s;
467  b(0,2) = 0.0;
468 
469  }else{ // Else entering normal QR iteration
470 
471  Real lambda_max;
472  getLambdaMax(b, lambda_max); // defined above
473 
474  alpha = b(0,0)*b(0,0) - lambda_max;
475  beta = b(0,0)*b(0,1);
476 
477  // c*beta + s*alpha = 0
478  getGivensRotation(alpha, beta, c, s);
479  // Multiply right v matrix
480  accumGivensRotation(0, c, s, v);
481  // apply to bidiagonal matrix, this generates b(1,0)
482  alpha = b(0,0);
483  beta = b(0,1);
484 
485  b(0,0) = alpha*c - beta*s;
486  b(0,1) = alpha*s + beta*c;
487  b(1,0) = -b(1,1)*s;
488  b(1,1) = b(1,1)*c;
489 
490  // Calculate the second Givens rotation (this time on the left)
491  getGivensRotation(b(0,0), b(1,0), c, s);
492 
493  // Multiply left u matrix
494  accumGivensRotation(0, c, s, u);
495 
496  b(0,0) = b(0,0)*c - b(1,0)*s;
497  alpha = b(0,1);
498  beta = b(1,1);
499  b(0,1) = alpha*c - beta*s;
500  b(1,1) = alpha*s + beta*c;
501  b(0,2) = -b(1,2)*s;
502  b(1,2) = b(1,2)*c;
503  // Following from the definition of the Givens rotation, b(1,0) should be equal to zero.
504  b(1,0) = 0.0;
505 
506  // Calculate the third Givens rotation (on the right)
507  getGivensRotation(b(0,1), b(0,2), c, s);
508 
509  // Multiply the right v matrix
510  accumGivensRotation(1, c, s, v);
511 
512  b(0,1) = b(0,1)*c - b(0,2)*s;
513  alpha = b(1,1);
514  beta = b(1,2);
515 
516  b(1,1) = alpha*c - beta*s;
517  b(1,2) = alpha*s + beta*c;
518  b(2,1) = -b(2,2)*s;
519  b(2,2) *= c;
520  b(0,2) = 0.0;
521 
522 
523  // Calculate the fourth Givens rotation (on the left)
524  getGivensRotation(b(1,1), b(2,1), c, s);
525  // Multiply left u matrix
526  accumGivensRotation(1, c, s, u);
527  // Eliminate b(2,1)
528  b(1,1) = b(1,1)*c - b(2,1)*s;
529  alpha = b(1,2);
530  beta = b(2,2);
531  b(1,2) = alpha*c - beta*s;
532  b(2,2) = alpha*s + beta*c;
533  b(2,1) = 0.0;
534 
535  } // end of normal QR iteration
536 
537 
538  }else if( b(0,1) == 0.0 ){
539  Matrix<Real,2> m_small, u_small, v_small;
540 
541  m_small(0,0) = b(1,1);
542  m_small(0,1) = b(1,2);
543  m_small(1,0) = b(2,1);
544  m_small(1,1) = b(2,2);
545 
546  smallSVD(u_small, v_small, m_small);
547 
548  b(1,1) = m_small(0,0);
549  b(1,2) = m_small(0,1);
550  b(2,1) = m_small(1,0);
551  b(2,2) = m_small(1,1);
552 
553 
554  for(int i=0; i<3; ++i){
555  alpha = u(i,1);
556  beta = u(i,2);
557  u(i,1) = alpha*u_small(0,0) + beta*u_small(1,0);
558  u(i,2) = alpha*u_small(0,1) + beta*u_small(1,1);
559 
560  alpha = v(i,1);
561  beta = v(i,2);
562  v(i,1) = alpha*v_small(0,0) + beta*v_small(1,0);
563  v(i,2) = alpha*v_small(0,1) + beta*v_small(1,1);
564  }
565 
566 
567  }else if( b(1,2) == 0.0 ){
568  Matrix<Real,2> m_small, u_small, v_small;
569 
570  m_small(0,0) = b(0,0);
571  m_small(0,1) = b(0,1);
572  m_small(1,0) = b(1,0);
573  m_small(1,1) = b(1,1);
574 
575  smallSVD(u_small, v_small, m_small);
576 
577  b(0,0) = m_small(0,0);
578  b(0,1) = m_small(0,1);
579  b(1,0) = m_small(1,0);
580  b(1,1) = m_small(1,1);
581 
582  for(int i=0; i<3; ++i){
583  alpha = u(i,0);
584  beta = u(i,1);
585  u(i,0) = alpha*u_small(0,0) + beta*u_small(1,0);
586  u(i,1) = alpha*u_small(0,1) + beta*u_small(1,1);
587 
588  alpha = v(i,0);
589  beta = v(i,1);
590  v(i,0) = alpha*v_small(0,0) + beta*v_small(1,0);
591  v(i,1) = alpha*v_small(0,1) + beta*v_small(1,1);
592  }
593 
594  } // end if b(1,2) == 0
595  it++;
596  } while( (b(0,1) != 0.0 || b(1,2) != 0.0) && it < max_it);
597 
598  for(int i=0; i<3; ++i){
599  if( b(i,i) < 0.0) {
600  b(i,i) *= -1;
601  for(int j=0; j<3; ++j){
602  v(j,i) *= -1;
603  }
604  }
605  }
606  return;
607 }
608 
609 template <class Float>
610 DEVICEHOST void computeSVD(const Matrix<complex<Float>, 3> &m, Matrix<complex<Float>, 3> &u,
611  Matrix<complex<Float>, 3> &v, Float singular_values[3])
612 {
613  getRealBidiagMatrix<Float>(m, u, v);
614 
615  Matrix<Float,3> bd, u_real, v_real;
616  // Make real
617  for(int i=0; i<3; ++i){
618  for(int j=0; j<3; ++j){
619  bd(i,j) = (conj(u)*m*(v))(i,j).real();
620  }
621  }
622 
623  bdSVD(u_real, v_real, bd, 500);
624  for(int i=0; i<3; ++i){
625  singular_values[i] = bd(i,i);
626  }
627 
628  u = u*u_real;
629  v = v*v_real;
630 }
631 
632 //} // end namespace quda
633 
634 #undef INVALID_DOUBLE
int V
Definition: host_utils.cpp:37
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
double norm1(const ColorSpinorField &b)
double norm2(const ColorSpinorField &a)
__device__ __host__ Matrix< complex< Float >, Nc > outerProduct(const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, 1 > &b)
__host__ __device__ ValueType log(ValueType x)
Definition: complex_quda.h:101
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
__device__ __host__ void setIdentity(Matrix< T, N > *m)
Definition: quda_matrix.h:647
__host__ __device__ ValueType exp(ValueType x)
Definition: complex_quda.h:96
__device__ __host__ void copyColumn(const Matrix< T, N > &m, int c, Array< T, N > *a)
Definition: quda_matrix.h:796
FloatingPoint< float > Float
DEVICEHOST void getLambdaMax(const Matrix< Real, 3 > &b, Real &lambda_max)
Definition: svd_quda.h:73
#define LOG2
Definition: svd_quda.h:7
#define DEVICEHOST
Definition: svd_quda.h:5
DEVICEHOST float getNorm(const Array< complex< float >, 3 > &a)
Definition: svd_quda.h:34
DEVICEHOST void computeSVD(const Matrix< complex< Float >, 3 > &m, Matrix< complex< Float >, 3 > &u, Matrix< complex< Float >, 3 > &v, Float singular_values[3])
Definition: svd_quda.h:610
DEVICEHOST void getGivensRotation(const Real &alpha, const Real &beta, Real &c, Real &s)
Definition: svd_quda.h:91
DEVICEHOST PromoteTypeId< T, U >::type quadSum(const T &a, const U &b)
Definition: svd_quda.h:24
#define SVDPREC
Definition: svd_quda.h:6
DEVICEHOST void getRealBidiagMatrix(const Matrix< complex< Float >, 3 > &mat, Matrix< complex< Float >, 3 > &u, Matrix< complex< Float >, 3 > &v)
Definition: svd_quda.h:255
#define INVALID_DOUBLE
Definition: svd_quda.h:8
DEVICEHOST std::remove_reference< decltype(Cmplx::x)>::type cabs(const Cmplx &z)
Definition: svd_quda.h:15
DEVICEHOST void smallSVD(Matrix< Real, 2 > &u, Matrix< Real, 2 > &v, Matrix< Real, 2 > &m)
Definition: svd_quda.h:143
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:134
DEVICEHOST void assignGivensRotation(const Real &c, const Real &s, Matrix< Real, 2 > &m)
Definition: svd_quda.h:124
DEVICEHOST void accumGivensRotation(int index, const Real &c, const Real &s, Matrix< Real, 3 > &m)
Definition: svd_quda.h:110
DEVICEHOST void bdSVD(Matrix< Real, 3 > &u, Matrix< Real, 3 > &v, Matrix< Real, 3 > &b, int max_it)
Definition: svd_quda.h:385
DEVICEHOST auto constructHHMat(const T &tau, const V &v)
Definition: svd_quda.h:54