QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
svd_quda.h
Go to the documentation of this file.
1 #ifndef _SVD_QUDA_H_
2 #define _SVD_QUDA_H_
3 
4 #define DEVICEHOST __device__ __host__
5 #define SVDPREC 1e-11
6 #define LOG2 0.69314718055994530942
7 
8 
9 //namespace quda{
10 
11 template<class Cmplx>
12  inline DEVICEHOST
13 typename RealTypeId<Cmplx>::Type cabs(const Cmplx & z)
14 {
15  typename RealTypeId<Cmplx>::Type max, ratio, square;
16  if(fabs(z.x) > fabs(z.y)){ max = z.x; ratio = z.y/max; }else{ max=z.y; ratio = z.x/max; }
17  square = max*max*(1.0 + ratio*ratio);
18  return sqrt(square);
19 }
20 
21 
22 /*
23  template<class Cmplx>
24  inline DEVICEHOST
25  typename RealTypeId<Cmplx>::Type cabs(const Cmplx & z)
26  {
27  return sqrt(z.x*z.x + z.y*z.y);
28  }
29  */
30 
31 template<class T, class U>
32 inline DEVICEHOST typename PromoteTypeId<T,U>::Type quadSum(const T & a, const U & b){
33  typename PromoteTypeId<T,U>::Type ratio, square, max;
34  if(fabs(a) > fabs(b)){ max = a; ratio = b/a; }else{ max=b; ratio = a/b; }
35  square = max*max*(1.0 + ratio*ratio);
36  return sqrt(square);
37 }
38 
39 /*
40  template<class T, class U>
41  inline DEVICEHOST
42  T quadSum(const T & a, const U & b)
43  {
44  return sqrt(a*a + b*b);
45  }
46  */
47 
48 
49 
50 // In future iterations of the code, I would like to use templates to compute the norm
52 float getNorm(const Array<float2,3>& a){
53  float temp1, temp2, temp3;
54  temp1 = cabs(a[0]);
55  temp2 = cabs(a[1]);
56  temp3 = quadSum(temp1,temp2);
57  temp1 = cabs(a[2]);
58  return quadSum(temp1,temp3);
59 }
60 
61 
63 double getNorm(const Array<double2,3>& a){
64  double temp1, temp2, temp3;
65  temp1 = cabs(a[0]);
66  temp2 = cabs(a[1]);
67  temp3 = quadSum(temp1,temp2);
68  temp1 = cabs(a[2]);
69  return quadSum(temp1,temp3);
70 }
71 
72 
73 template<class T>
74  DEVICEHOST
75 void constructHHMat(const T & tau, const Array<T,3> & v, Matrix<T,3> & hh)
76 {
77  Matrix<T,3> temp1, temp2;
78  outerProd(v,v,&temp1);
79 
80  temp2 = Conj(tau)*temp1;
81 
82  setIdentity(&temp1);
83 
84  hh = temp1 - temp2;
85  return;
86 }
87 
88 
89 template<class Real>
91 void getLambdaMax(const Matrix<Real,3> & b, Real & lambda_max){
92 
93  Real m11 = b(1,1)*b(1,1) + b(0,1)*b(0,1);
94  Real m22 = b(2,2)*b(2,2) + b(1,2)*b(1,2);
95  Real m12 = b(1,1)*b(1,2);
96  Real dm = (m11 - m22)/2.0;
97 
98  Real norm1 = quadSum(dm, m12);
99  if( dm >= 0.0 ){
100  lambda_max = m22 - (m12*m12)/(norm1 + dm);
101  }else{
102  lambda_max = m22 + (m12*m12)/(norm1 - dm);
103  }
104  return;
105 }
106 
107 
108 template<class Real>
109  DEVICEHOST
110 void getGivensRotation(const Real & alpha, const Real & beta, Real & c, Real & s)
111 {
112  Real ratio;
113  if( beta == 0.0 ){
114  c = 1.0; s = 0.0;
115  }else if( fabs(beta) > fabs(alpha) ){
116  ratio = -alpha/beta;
117  s = rsqrt(1.0 + ratio*ratio);
118  c = ratio*s;
119  }else{
120  ratio = -beta/alpha;
121  c = rsqrt(1.0 + ratio*ratio);
122  s = ratio*c;
123  }
124  return;
125 }
126 
127 template<class Real>
128 inline DEVICEHOST
129 void accumGivensRotation(int index, const Real & c, const Real & s, Matrix<Real,3> & m){
130  int index_p1 = index+1;
131  Real alpha, beta;
132  for(int i=0; i<3; ++i){
133  alpha = m(i,index);
134  beta = m(i,index_p1);
135  m(i,index) = alpha*c - beta*s;
136  m(i,index_p1) = alpha*s + beta*c;
137  }
138  return;
139 }
140 
141 template<class Real>
142 inline DEVICEHOST
143 void assignGivensRotation(const Real & c, const Real & s, Matrix<Real,2> & m){
144  m(0,0) = c;
145  m(0,1) = s;
146  m(1,0) = -s;
147  m(1,1) = c;
148  return;
149 }
150 
151 template<class Real>
152 inline DEVICEHOST
153 void swap(Real & a, Real & b){
154  Real temp = a;
155  a = b;
156  b = temp;
157  return;
158 }
159 
160 template<class Real>
161 inline DEVICEHOST
163  // set u and v to the 2x2 identity matrix
164  setIdentity(&u);
165  setIdentity(&v);
166 
167  Real c, s;
168 
169  if( m(0,0) == 0.0 ){
170 
171  getGivensRotation(m(0,1), m(1,1), c, s);
172 
173  m(0,0) = m(0,1)*c - m(1,1)*s;
174  m(0,1) = 0.0;
175  m(1,1) = 0.0;
176 
177  // exchange columns in v
178  v(0,0) = 0.0;
179  v(0,1) = 1.0;
180  v(1,0) = 1.0;
181  v(1,1) = 0.0;
182 
183  // u is a Givens rotation
184  assignGivensRotation(c, s, u);
185 
186  }else if( m(1,1) == 0.0 ){
187 
188  getGivensRotation(m(0,0), m(0,1), c, s);
189 
190  m(0,0) = m(0,0)*c - m(0,1)*s;
191  m(0,1) = 0.0;
192  m(1,1) = 0.0;
193 
194  assignGivensRotation(c,s,v);
195 
196  }else if( m(0,1) != 0.0 ){
197 
198  // need to calculate (m(1,1)**2 + m(0,1)**2 - m(0,0)**2)/(2*m(0,0)*m(0,1))
199  Real abs01 = fabs(m(0,1));
200  Real abs11 = fabs(m(1,1));
201  Real min, max;
202  if( abs01 > abs11 ){ min = abs11; max = abs01; }
203  else { min = abs01; max = abs11; }
204 
205 
206  Real ratio = min/max;
207  Real alpha = 2.0*log(max) + log(1.0 + ratio*ratio);
208 
209  Real abs00 = fabs(m(0,0));
210  Real beta = 2.0*log(abs00);
211 
212  int sign;
213  Real temp;
214 
215  if( alpha > beta ){
216  sign = 1;
217  temp = alpha + log(1.0 - exp(beta-alpha));
218  }else{
219  sign = -1;
220  temp = beta + log(1.0 - exp(alpha-beta));
221  }
222  temp -= LOG2 + log(abs00) + log(abs01);
223  temp = sign*exp(temp);
224 
225  if( m(0,0) < 0.0 ){ temp *= -1.0; }
226  if( m(0,1) < 0.0 ){ temp *= -1.0; }
227 
228  beta = quadSum(1.0, temp);
229 
230  if( temp >= 0.0 ){
231  temp = 1.0/(temp + beta);
232  }else{
233  temp = 1.0/(temp - beta);
234  }
235 
236  // Calculate beta = sqrt(1 + temp**2)
237  beta = quadSum(1.0, temp);
238 
239  c = 1.0/beta;
240  s = temp*c;
241 
242  Matrix<Real,2> p;
243 
244  p(0,0) = c*m(0,0) - s*m(0,1);
245  p(1,0) = - s*m(1,1);
246  p(0,1) = s*m(0,0) + c*m(0,1);
247  p(1,1) = c*m(1,1);
248 
249  assignGivensRotation(c, s, v);
250 
251  // Make the column with the largest norm the first column
252  alpha = quadSum(p(0,0),p(1,0));
253  beta = quadSum(p(0,1),p(1,1));
254 
255  if(alpha < beta){
256  swap(p(0,0),p(0,1));
257  swap(p(1,0),p(1,1));
258 
259  swap(v(0,0),v(0,1));
260  swap(v(1,0),v(1,1));
261  }
262 
263  getGivensRotation(p(0,0), p(1,0), c, s);
264 
265  m(0,0) = p(0,0)*c - s*p(1,0);
266  m(1,1) = p(0,1)*s + c*p(1,1);
267  m(0,1) = 0.0;
268 
269  assignGivensRotation(c,s,u);
270  }
271 
272  return;
273 }
274 
275 // Change this so that the bidiagonal terms are not returned
276 // as a complex matrix
277 template<class Cmplx>
278  DEVICEHOST
280  Matrix<Cmplx,3> & u,
281  Matrix<Cmplx,3> & v)
282 {
283  Matrix<Cmplx,3> p;
284  Array<Cmplx,3> vec;
285  Matrix<Cmplx,3> temp;
286 
287 
288  const Cmplx COMPLEX_UNITY = makeComplex<Cmplx>(1,0);
289  const Cmplx COMPLEX_ZERO = makeComplex<Cmplx>(0,0);
290  // Step 1: build the first left reflector v,
291  // calculate the first left rotation
292  // apply to the original matrix
293  typename RealTypeId<Cmplx>::Type x = cabs(mat(1,0));
294  typename RealTypeId<Cmplx>::Type y = cabs(mat(2,0));
295  typename RealTypeId<Cmplx>::Type norm1 = quadSum(x,y);
296  typename RealTypeId<Cmplx>::Type beta;
297  Cmplx w, tau, z;
298 
299  if(norm1 == 0 && mat(0,0).y == 0){
300  p = mat;
301  }else{
302  Array<Cmplx,3> temp_vec;
303  copyColumn(mat, 0, &temp_vec);
304 
305  beta = getNorm(temp_vec);
306 
307  if(mat(0,0).x > 0.0){ beta = -beta; }
308 
309  w.x = mat(0,0).x - beta; // work around for LLVM
310  w.y = mat(0,0).y;
311  norm1 = cabs(w);
312  w = Conj(w)/norm1;
313 
314  // Assign the vector elements
315  vec[0] = COMPLEX_UNITY;
316  vec[1] = mat(1,0)*w/norm1;
317  vec[2] = mat(2,0)*w/norm1;
318 
319  // Now compute tau
320  tau.x = (beta - mat(0,0).x)/beta;
321  tau.y = mat(0,0).y/beta;
322  // construct the Householder matrix
323  constructHHMat(tau, vec, temp);
324  p = conj(temp)*mat;
325  u = temp;
326  }
327 
328  // Step 2: build the first right reflector
329  typename RealTypeId<Cmplx>::Type norm2 = cabs(p(0,2));
330  if(norm2 != 0.0 || p(0,1).y != 0.0){
331  norm1 = cabs(p(0,1));
332  beta = quadSum(norm1,norm2);
333  vec[0] = COMPLEX_ZERO;
334  vec[1] = COMPLEX_UNITY;
335 
336  if( p(0,1).x > 0.0 ){ beta = -beta; }
337  w.x = p(0,1).x-beta; // work around for LLVM
338  w.y = p(0,1).y;
339  norm1 = cabs(w);
340  w = Conj(w)/norm1;
341  z = Conj(p(0,2))/norm1;
342  vec[2] = z*Conj(w);
343 
344  tau.x = (beta - p(0,1).x)/beta; // work around for LLVM
345  tau.y = (- p(0,1).y)/beta;
346  // construct the Householder matrix
347  constructHHMat(tau, vec, temp);
348  p = p*temp;
349  v = temp;
350  }
351 
352  // Step 3: build the second left reflector
353  norm2 = cabs(p(2,1));
354  if(norm2 != 0.0 || p(1,1).y != 0.0){
355  norm1 = cabs(p(1,1));
356  beta = quadSum(norm1,norm2);
357 
358  // Set the first two elements of the vector
359  vec[0] = COMPLEX_ZERO;
360  vec[1] = COMPLEX_UNITY;
361 
362  if( p(1,1).x > 0 ){ beta = -beta; }
363  w.x = p(1,1).x - beta; // work around for LLVM
364  w.y = p(1,1).y;
365  norm1 = cabs(w);
366  w = Conj(w)/norm1;
367  z = p(2,1)/norm1;
368  vec[2] = z*w;
369 
370  tau.x = (beta - p(1,1).x)/beta;
371  tau.y = p(1,1).y/beta;
372  // I could very easily change the definition of tau
373  // so that we wouldn't need to call conj(temp) below.
374  // I would have to be careful to make sure this change
375  // is consistent with the other parts of the code.
376  constructHHMat(tau, vec, temp);
377  p = conj(temp)*p;
378  u = u*temp;
379  }
380 
381  // Step 4: build the second right reflector
382  setIdentity(&temp);
383  if( p(1,2).y != 0.0 ){
384  beta = cabs(p(1,2));
385  if( p(1,2).x > 0.0 ){ beta = -beta; }
386  temp(2,2) = Conj(p(1,2))/beta;
387  p(2,2) = p(2,2)*temp(2,2); // This is the only element of p needed below
388  v = v*temp;
389  }
390 
391 
392  // Step 5: build the third left reflector
393  if( p(2,2).y != 0.0 ){
394  beta = cabs(p(2,2));
395  if( p(2,2).x > 0.0 ){ beta = -beta; }
396  temp(2,2) = p(2,2)/beta;
397  u = u*temp;
398  }
399  return;
400 }
401 
402 
403 
404 template<class Real>
405  DEVICEHOST
406 void bdSVD(Matrix<Real,3>& u, Matrix<Real,3>& v, Matrix<Real,3>& b, int max_it)
407 {
408 
409  Real c,s;
410 
411  // set u and v matrices equal to the identity
412  setIdentity(&u);
413  setIdentity(&v);
414 
415  Real alpha, beta;
416 
417  int it=0;
418  do{
419 
420  if( fabs(b(0,1)) < SVDPREC*( fabs(b(0,0)) + fabs(b(1,1)) ) ){ b(0,1) = 0.0; }
421  if( fabs(b(1,2)) < SVDPREC*( fabs(b(0,0)) + fabs(b(2,2)) ) ){ b(1,2) = 0.0; }
422 
423  if( b(0,1) != 0.0 && b(1,2) != 0.0 ){
424  if( b(0,0) == 0.0 ){
425 
426  getGivensRotation(-b(0,1), b(1,1), s, c);
427 
428  for(int i=0; i<3; ++i){
429  alpha = u(i,0);
430  beta = u(i,1);
431  u(i,0) = alpha*c - beta*s;
432  u(i,1) = alpha*s + beta*c;
433  }
434 
435  b(1,1) = b(0,1)*s + b(1,1)*c;
436  b(0,1) = 0.0;
437 
438  b(0,2) = -b(1,2)*s;
439  b(1,2) *= c;
440 
441  getGivensRotation(-b(0,2), b(2,2), s, c);
442 
443  for(int i=0; i<3; ++i){
444  alpha = u(i,0);
445  beta = u(i,2);
446  u(i,0) = alpha*c - beta*s;
447  u(i,2) = alpha*s + beta*c;
448  }
449  b(2,2) = b(0,2)*s + b(2,2)*c;
450  b(0,2) = 0.0;
451 
452  }else if( b(1,1) == 0.0 ){
453  // same block
454  getGivensRotation(-b(1,2), b(2,2), s, c);
455  for(int i=0; i<3; ++i){
456  alpha = u(i,1);
457  beta = u(i,2);
458  u(i,1) = alpha*c - beta*s;
459  u(i,2) = alpha*s + beta*c;
460  }
461  b(2,2) = b(1,2)*s + b(2,2)*c;
462  b(1,2) = 0.0;
463  // end block
464  }else if( b(2,2) == 0.0 ){
465 
466  getGivensRotation(b(1,1), -b(1,2), c, s);
467  for(int i=0; i<3; ++i){
468  alpha = v(i,1);
469  beta = v(i,2);
470  v(i,1) = alpha*c + beta*s;
471  v(i,2) = -alpha*s + beta*c;
472  }
473  b(1,1) = b(1,1)*c + b(1,2)*s;
474  b(1,2) = 0.0;
475 
476  b(0,2) = -b(0,1)*s;
477  b(0,1) *= c;
478 
479  // apply second rotation, to remove b02
480  getGivensRotation(b(0,0), -b(0,2), c, s);
481  for(int i=0; i<3; ++i){
482  alpha = v(i,0);
483  beta = v(i,2);
484  v(i,0) = alpha*c + beta*s;
485  v(i,2) = -alpha*s + beta*c;
486  }
487  b(0,0) = b(0,0)*c + b(0,2)*s;
488  b(0,2) = 0.0;
489 
490  }else{ // Else entering normal QR iteration
491 
492  Real lambda_max;
493  getLambdaMax(b, lambda_max); // defined above
494 
495  alpha = b(0,0)*b(0,0) - lambda_max;
496  beta = b(0,0)*b(0,1);
497 
498  // c*beta + s*alpha = 0
499  getGivensRotation(alpha, beta, c, s);
500  // Multiply right v matrix
501  accumGivensRotation(0, c, s, v);
502  // apply to bidiagonal matrix, this generates b(1,0)
503  alpha = b(0,0);
504  beta = b(0,1);
505 
506  b(0,0) = alpha*c - beta*s;
507  b(0,1) = alpha*s + beta*c;
508  b(1,0) = -b(1,1)*s;
509  b(1,1) = b(1,1)*c;
510 
511  // Calculate the second Givens rotation (this time on the left)
512  getGivensRotation(b(0,0), b(1,0), c, s);
513 
514  // Multiply left u matrix
515  accumGivensRotation(0, c, s, u);
516 
517  b(0,0) = b(0,0)*c - b(1,0)*s;
518  alpha = b(0,1);
519  beta = b(1,1);
520  b(0,1) = alpha*c - beta*s;
521  b(1,1) = alpha*s + beta*c;
522  b(0,2) = -b(1,2)*s;
523  b(1,2) = b(1,2)*c;
524  // Following from the definition of the Givens rotation, b(1,0) should be equal to zero.
525  b(1,0) = 0.0;
526 
527  // Calculate the third Givens rotation (on the right)
528  getGivensRotation(b(0,1), b(0,2), c, s);
529 
530  // Multiply the right v matrix
531  accumGivensRotation(1, c, s, v);
532 
533  b(0,1) = b(0,1)*c - b(0,2)*s;
534  alpha = b(1,1);
535  beta = b(1,2);
536 
537  b(1,1) = alpha*c - beta*s;
538  b(1,2) = alpha*s + beta*c;
539  b(2,1) = -b(2,2)*s;
540  b(2,2) *= c;
541  b(0,2) = 0.0;
542 
543 
544  // Calculate the fourth Givens rotation (on the left)
545  getGivensRotation(b(1,1), b(2,1), c, s);
546  // Multiply left u matrix
547  accumGivensRotation(1, c, s, u);
548  // Eliminate b(2,1)
549  b(1,1) = b(1,1)*c - b(2,1)*s;
550  alpha = b(1,2);
551  beta = b(2,2);
552  b(1,2) = alpha*c - beta*s;
553  b(2,2) = alpha*s + beta*c;
554  b(2,1) = 0.0;
555 
556  } // end of normal QR iteration
557 
558 
559  }else if( b(0,1) == 0.0 ){
560  Matrix<Real,2> m_small, u_small, v_small;
561 
562  m_small(0,0) = b(1,1);
563  m_small(0,1) = b(1,2);
564  m_small(1,0) = b(2,1);
565  m_small(1,1) = b(2,2);
566 
567  smallSVD(u_small, v_small, m_small);
568 
569  b(1,1) = m_small(0,0);
570  b(1,2) = m_small(0,1);
571  b(2,1) = m_small(1,0);
572  b(2,2) = m_small(1,1);
573 
574 
575  for(int i=0; i<3; ++i){
576  alpha = u(i,1);
577  beta = u(i,2);
578  u(i,1) = alpha*u_small(0,0) + beta*u_small(1,0);
579  u(i,2) = alpha*u_small(0,1) + beta*u_small(1,1);
580 
581  alpha = v(i,1);
582  beta = v(i,2);
583  v(i,1) = alpha*v_small(0,0) + beta*v_small(1,0);
584  v(i,2) = alpha*v_small(0,1) + beta*v_small(1,1);
585  }
586 
587 
588  }else if( b(1,2) == 0.0 ){
589  Matrix<Real,2> m_small, u_small, v_small;
590 
591  m_small(0,0) = b(0,0);
592  m_small(0,1) = b(0,1);
593  m_small(1,0) = b(1,0);
594  m_small(1,1) = b(1,1);
595 
596  smallSVD(u_small, v_small, m_small);
597 
598  b(0,0) = m_small(0,0);
599  b(0,1) = m_small(0,1);
600  b(1,0) = m_small(1,0);
601  b(1,1) = m_small(1,1);
602 
603  for(int i=0; i<3; ++i){
604  alpha = u(i,0);
605  beta = u(i,1);
606  u(i,0) = alpha*u_small(0,0) + beta*u_small(1,0);
607  u(i,1) = alpha*u_small(0,1) + beta*u_small(1,1);
608 
609  alpha = v(i,0);
610  beta = v(i,1);
611  v(i,0) = alpha*v_small(0,0) + beta*v_small(1,0);
612  v(i,1) = alpha*v_small(0,1) + beta*v_small(1,1);
613  }
614 
615  } // end if b(1,2) == 0
616  it++;
617  } while( (b(0,1) != 0.0 || b(1,2) != 0.0) && it < max_it);
618 
619 
620  for(int i=0; i<3; ++i){
621  if( b(i,i) < 0.0) {
622  b(i,i) *= -1;
623  for(int j=0; j<3; ++j){
624  v(j,i) *= -1;
625  }
626  }
627  }
628  return;
629 }
630 
631 
632 
633 template<class Cmplx>
634  DEVICEHOST
635 void computeSVD(const Matrix<Cmplx,3> & m,
636  Matrix<Cmplx,3>& u,
637  Matrix<Cmplx,3>& v,
638  typename RealTypeId<Cmplx>::Type singular_values[3])
639 {
640 
641  getRealBidiagMatrix<Cmplx>(m, u, v);
642  Matrix<typename RealTypeId<Cmplx>::Type,3> bd, u_real, v_real;
643  // Make real
644  for(int i=0; i<3; ++i){
645  for(int j=0; j<3; ++j){
646  bd(i,j) = (conj(u)*m*(v))(i,j).x;
647  }
648  }
649 
650  bdSVD(u_real, v_real, bd, 500);
651  for(int i=0; i<3; ++i){
652  singular_values[i] = bd(i,i);
653  }
654 
655  u = u*u_real;
656  v = v*v_real;
657 
658  return;
659 }
660 
661 //} // end namespace quda
662 
663 
664 #endif // _SVD_QUDA_H
int y[4]
DEVICEHOST float getNorm(const Array< float2, 3 > &a)
Definition: svd_quda.h:52
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
__host__ __device__ ValueType exp(ValueType x)
Definition: complex_quda.h:85
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
DEVICEHOST void getGivensRotation(const Real &alpha, const Real &beta, Real &c, Real &s)
Definition: svd_quda.h:110
DEVICEHOST void getLambdaMax(const Matrix< Real, 3 > &b, Real &lambda_max)
Definition: svd_quda.h:91
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
__device__ __host__ void outerProd(const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
Definition: quda_matrix.h:720
DEVICEHOST void smallSVD(Matrix< Real, 2 > &u, Matrix< Real, 2 > &v, Matrix< Real, 2 > &m)
Definition: svd_quda.h:162
__device__ __host__ Cmplx Conj(const Cmplx &a)
Definition: quda_matrix.h:267
#define DEVICEHOST
Definition: svd_quda.h:4
__device__ __host__ void copyColumn(const Matrix< T, N > &m, int c, Array< T, N > *a)
Definition: quda_matrix.h:709
double norm2(Float *v, int len)
int x[4]
__device__ __host__ void setIdentity(Matrix< T, N > *m)
Definition: quda_matrix.h:597
DEVICEHOST PromoteTypeId< T, U >::Type quadSum(const T &a, const U &b)
Definition: svd_quda.h:32
#define SVDPREC
Definition: svd_quda.h:5
__host__ __device__ ValueType log(ValueType x)
Definition: complex_quda.h:90
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:153
DEVICEHOST void assignGivensRotation(const Real &c, const Real &s, Matrix< Real, 2 > &m)
Definition: svd_quda.h:143
DEVICEHOST RealTypeId< Cmplx >::Type cabs(const Cmplx &z)
Definition: svd_quda.h:13
DEVICEHOST void constructHHMat(const T &tau, const Array< T, 3 > &v, Matrix< T, 3 > &hh)
Definition: svd_quda.h:75
DEVICEHOST void computeSVD(const Matrix< Cmplx, 3 > &m, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v, typename RealTypeId< Cmplx >::Type singular_values[3])
Definition: svd_quda.h:635
#define LOG2
Definition: svd_quda.h:6
DEVICEHOST void bdSVD(Matrix< Real, 3 > &u, Matrix< Real, 3 > &v, Matrix< Real, 3 > &b, int max_it)
Definition: svd_quda.h:406
VOLATILE spinorFloat * s
DEVICEHOST void getRealBidiagMatrix(const Matrix< Cmplx, 3 > &mat, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v)
Definition: svd_quda.h:279
DEVICEHOST void accumGivensRotation(int index, const Real &c, const Real &s, Matrix< Real, 3 > &m)
Definition: svd_quda.h:129