QUDA v0.4.0
A library for QCD on GPUs
|
00001 #ifndef _SVD_QUDA_H_ 00002 #define _SVD_QUDA_H_ 00003 00004 #include "quda_matrix.h" 00005 00006 #include <iostream> 00007 #include <fstream> 00008 #include <string> 00009 #include <sstream> 00010 00011 #define DEVICEHOST __device__ __host__ 00012 #define SVDPREC 1e-10 00013 #define LOG2 0.69314718055994530942 00014 00015 00016 namespace hisq{ 00017 00018 template<class Cmplx> 00019 inline DEVICEHOST 00020 typename RealTypeId<Cmplx>::Type cabs(const Cmplx & z) 00021 { 00022 typename RealTypeId<Cmplx>::Type max, ratio, square; 00023 if(fabs(z.x) > fabs(z.y)){ max = z.x; ratio = z.y/max; }else{ max=z.y; ratio = z.x/max; } 00024 square = max*max*(1.0 + ratio*ratio); 00025 return sqrt(square); 00026 } 00027 00028 00029 /* 00030 template<class Cmplx> 00031 inline DEVICEHOST 00032 typename RealTypeId<Cmplx>::Type cabs(const Cmplx & z) 00033 { 00034 return sqrt(z.x*z.x + z.y*z.y); 00035 } 00036 */ 00037 00038 template<class T, class U> 00039 inline DEVICEHOST typename PromoteTypeId<T,U>::Type quadSum(const T & a, const U & b){ 00040 typename PromoteTypeId<T,U>::Type ratio, square, max; 00041 if(fabs(a) > fabs(b)){ max = a; ratio = b/a; }else{ max=b; ratio = a/b; } 00042 square = max*max*(1.0 + ratio*ratio); 00043 return sqrt(square); 00044 } 00045 00046 /* 00047 template<class T, class U> 00048 inline DEVICEHOST 00049 T quadSum(const T & a, const U & b) 00050 { 00051 return sqrt(a*a + b*b); 00052 } 00053 */ 00054 00055 00056 00057 // In future iterations of the code, I would like to use templates to compute the norm 00058 DEVICEHOST 00059 float getNorm(const Array<float2,3>& a){ 00060 float temp1, temp2, temp3; 00061 temp1 = cabs(a[0]); 00062 temp2 = cabs(a[1]); 00063 temp3 = quadSum(temp1,temp2); 00064 temp1 = cabs(a[2]); 00065 return quadSum(temp1,temp3); 00066 } 00067 00068 00069 DEVICEHOST 00070 double getNorm(const Array<double2,3>& a){ 00071 double temp1, temp2, temp3; 00072 temp1 = cabs(a[0]); 00073 temp2 = cabs(a[1]); 00074 temp3 = quadSum(temp1,temp2); 00075 temp1 = cabs(a[2]); 00076 return quadSum(temp1,temp3); 00077 } 00078 00079 00080 template<class T> 00081 DEVICEHOST 00082 void constructHHMat(const T & tau, const Array<T,3> & v, Matrix<T,3> & hh) 00083 { 00084 Matrix<T,3> temp1, temp2; 00085 outerProd(v,v,&temp1); 00086 00087 temp2 = conj(tau)*temp1; 00088 00089 setIdentity(&temp1); 00090 00091 hh = temp1 - temp2; 00092 return; 00093 } 00094 00095 00096 template<class Real> 00097 DEVICEHOST 00098 void getLambdaMax(const Matrix<Real,3> & b, Real & lambda_max){ 00099 00100 Real m11 = b(1,1)*b(1,1) + b(0,1)*b(0,1); 00101 Real m22 = b(2,2)*b(2,2) + b(1,2)*b(1,2); 00102 Real m12 = b(1,1)*b(1,2); 00103 Real dm = (m11 - m22)/2.0; 00104 00105 Real norm1 = quadSum(dm, m12); 00106 if( dm >= 0.0 ){ 00107 lambda_max = m22 - (m12*m12)/(norm1 + dm); 00108 }else{ 00109 lambda_max = m22 + (m12*m12)/(norm1 - dm); 00110 } 00111 return; 00112 } 00113 00114 00115 template<class Real> 00116 DEVICEHOST 00117 void getGivensRotation(const Real & alpha, const Real & beta, Real & c, Real & s) 00118 { 00119 Real ratio; 00120 if( beta == 0.0 ){ 00121 c = 1.0; s = 0.0; 00122 }else if( fabs(beta) > fabs(alpha) ){ 00123 ratio = -alpha/beta; 00124 s = rsqrt(1.0 + ratio*ratio); 00125 c = ratio*s; 00126 }else{ 00127 ratio = -beta/alpha; 00128 c = rsqrt(1.0 + ratio*ratio); 00129 s = ratio*c; 00130 } 00131 return; 00132 } 00133 00134 template<class Real> 00135 inline DEVICEHOST 00136 void accumGivensRotation(int index, const Real & c, const Real & s, Matrix<Real,3> & m){ 00137 int index_p1 = index+1; 00138 Real alpha, beta; 00139 for(int i=0; i<3; ++i){ 00140 alpha = m(i,index); 00141 beta = m(i,index_p1); 00142 m(i,index) = alpha*c - beta*s; 00143 m(i,index_p1) = alpha*s + beta*c; 00144 } 00145 return; 00146 } 00147 00148 template<class Real> 00149 inline DEVICEHOST 00150 void assignGivensRotation(const Real & c, const Real & s, Matrix<Real,2> & m){ 00151 m(0,0) = c; 00152 m(0,1) = s; 00153 m(1,0) = -s; 00154 m(1,1) = c; 00155 return; 00156 } 00157 00158 template<class Real> 00159 inline DEVICEHOST 00160 void swap(Real & a, Real & b){ 00161 Real temp = a; 00162 a = b; 00163 b = temp; 00164 return; 00165 } 00166 00167 template<class Real> 00168 inline DEVICEHOST 00169 void smallSVD(Matrix<Real,2> & u, Matrix<Real,2> & v, Matrix<Real,2> & m){ 00170 // set u and v to the 2x2 identity matrix 00171 setIdentity(&u); 00172 setIdentity(&v); 00173 00174 Real c, s; 00175 00176 if( m(0,0) == 0.0 ){ 00177 00178 getGivensRotation(m(0,1), m(1,1), c, s); 00179 00180 m(0,0) = m(0,1)*c - m(1,1)*s; 00181 m(0,1) = 0.0; 00182 m(1,1) = 0.0; 00183 00184 // exchange columns in v 00185 v(0,0) = 0.0; 00186 v(0,1) = 1.0; 00187 v(1,0) = 1.0; 00188 v(1,1) = 0.0; 00189 00190 // u is a Givens rotation 00191 assignGivensRotation(c, s, u); 00192 00193 }else if( m(1,1) == 0.0 ){ 00194 00195 getGivensRotation(m(0,0), m(0,1), c, s); 00196 00197 m(0,0) = m(0,0)*c - m(0,1)*s; 00198 m(0,1) = 0.0; 00199 m(1,1) = 0.0; 00200 00201 assignGivensRotation(c,s,v); 00202 00203 }else if( m(0,1) != 0.0 ){ 00204 00205 // need to calculate (m(1,1)**2 + m(0,1)**2 - m(0,0)**2)/(2*m(0,0)*m(0,1)) 00206 Real abs01 = fabs(m(0,1)); 00207 Real abs11 = fabs(m(1,1)); 00208 Real min, max; 00209 if( abs01 > abs11 ){ min = abs11; max = abs01; } 00210 else { min = abs01; max = abs11; } 00211 00212 00213 Real ratio = min/max; 00214 Real alpha = 2.0*log(max) + log(1.0 + ratio*ratio); 00215 00216 Real abs00 = fabs(m(0,0)); 00217 Real beta = 2.0*log(abs00); 00218 00219 int sign; 00220 Real temp; 00221 00222 if( alpha > beta ){ 00223 sign = 1; 00224 temp = alpha + log(1.0 - exp(beta-alpha)); 00225 }else{ 00226 sign = -1; 00227 temp = beta + log(1.0 - exp(alpha-beta)); 00228 } 00229 temp -= LOG2 + log(abs00) + log(abs01); 00230 temp = sign*exp(temp); 00231 00232 if( m(0,0) < 0.0 ){ temp *= -1.0; } 00233 if( m(0,1) < 0.0 ){ temp *= -1.0; } 00234 00235 beta = quadSum(1.0, temp); 00236 00237 if( temp >= 0.0 ){ 00238 temp = 1.0/(temp + beta); 00239 }else{ 00240 temp = 1.0/(temp - beta); 00241 } 00242 00243 // Calculate beta = sqrt(1 + temp**2) 00244 beta = quadSum(1.0, temp); 00245 00246 c = 1.0/beta; 00247 s = temp*c; 00248 00249 Matrix<Real,2> p; 00250 00251 p(0,0) = c*m(0,0) - s*m(0,1); 00252 p(1,0) = - s*m(1,1); 00253 p(0,1) = s*m(0,0) + c*m(0,1); 00254 p(1,1) = c*m(1,1); 00255 00256 assignGivensRotation(c, s, v); 00257 00258 // Make the column with the largest norm the first column 00259 alpha = quadSum(p(0,0),p(1,0)); 00260 beta = quadSum(p(0,1),p(1,1)); 00261 00262 if(alpha < beta){ 00263 swap(p(0,0),p(0,1)); 00264 swap(p(1,0),p(1,1)); 00265 00266 swap(v(0,0),v(0,1)); 00267 swap(v(1,0),v(1,1)); 00268 } 00269 00270 getGivensRotation(p(0,0), p(1,0), c, s); 00271 00272 m(0,0) = p(0,0)*c - s*p(1,0); 00273 m(1,1) = p(0,1)*s + c*p(1,1); 00274 m(0,1) = 0.0; 00275 00276 assignGivensRotation(c,s,u); 00277 } 00278 00279 return; 00280 } 00281 00282 // Change this so that the bidiagonal terms are not returned 00283 // as a complex matrix 00284 template<class Cmplx> 00285 DEVICEHOST 00286 void getRealBidiagMatrix(const Matrix<Cmplx,3> & mat, 00287 Matrix<Cmplx,3> & u, 00288 Matrix<Cmplx,3> & v) 00289 { 00290 Matrix<Cmplx,3> p; 00291 Array<Cmplx,3> vec; 00292 Matrix<Cmplx,3> temp; 00293 00294 00295 const Cmplx COMPLEX_UNITY = makeComplex<Cmplx>(1,0); 00296 const Cmplx COMPLEX_ZERO = makeComplex<Cmplx>(0,0); 00297 // Step 1: build the first left reflector v, 00298 // calculate the first left rotation 00299 // apply to the original matrix 00300 typename RealTypeId<Cmplx>::Type x = cabs(mat(1,0)); 00301 typename RealTypeId<Cmplx>::Type y = cabs(mat(2,0)); 00302 typename RealTypeId<Cmplx>::Type norm1 = quadSum(x,y); 00303 typename RealTypeId<Cmplx>::Type beta; 00304 Cmplx w, tau, z; 00305 00306 if(norm1 == 0 && mat(0,0).y == 0){ 00307 p = mat; 00308 }else{ 00309 Array<Cmplx,3> temp_vec; 00310 copyColumn(mat, 0, &temp_vec); 00311 00312 beta = getNorm(temp_vec); 00313 00314 if(mat(0,0).x > 0.0){ beta = -beta; } 00315 00316 w = mat(0,0) - beta; 00317 norm1 = cabs(w); 00318 w = conj(w)/norm1; 00319 00320 // Assign the vector elements 00321 vec[0] = COMPLEX_UNITY; 00322 vec[1] = mat(1,0)*w/norm1; 00323 vec[2] = mat(2,0)*w/norm1; 00324 00325 // Now compute tau 00326 tau.x = (beta - mat(0,0).x)/beta; 00327 tau.y = mat(0,0).y/beta; 00328 // construct the Householder matrix 00329 constructHHMat(tau, vec, temp); 00330 p = conj(temp)*mat; 00331 u = temp; 00332 } 00333 00334 // Step 2: build the first right reflector 00335 typename RealTypeId<Cmplx>::Type norm2 = cabs(p(0,2)); 00336 if(norm2 != 0.0 || p(0,1).y != 0.0){ 00337 norm1 = cabs(p(0,1)); 00338 beta = quadSum(norm1,norm2); 00339 vec[0] = COMPLEX_ZERO; 00340 vec[1] = COMPLEX_UNITY; 00341 00342 if( p(0,1).x > 0.0 ){ beta = -beta; } 00343 w = p(0,1)-beta; 00344 norm1 = cabs(w); 00345 w = conj(w)/norm1; 00346 z = conj(p(0,2))/norm1; 00347 vec[2] = z*conj(w); 00348 00349 tau = (beta - p(0,1))/beta; 00350 // construct the Householder matrix 00351 constructHHMat(tau, vec, temp); 00352 p = p*temp; 00353 v = temp; 00354 } 00355 00356 // Step 3: build the second left reflector 00357 norm2 = cabs(p(2,1)); 00358 if(norm2 != 0.0 || p(1,1).y != 0.0){ 00359 norm1 = cabs(p(1,1)); 00360 beta = quadSum(norm1,norm2); 00361 00362 // Set the first two elements of the vector 00363 vec[0] = COMPLEX_ZERO; 00364 vec[1] = COMPLEX_UNITY; 00365 00366 if( p(1,1).x > 0 ){ beta = -beta; } 00367 w = p(1,1) - beta; 00368 norm1 = cabs(w); 00369 w = conj(w)/norm1; 00370 z = p(2,1)/norm1; 00371 vec[2] = z*w; 00372 00373 tau.x = (beta - p(1,1).x)/beta; 00374 tau.y = p(1,1).y/beta; 00375 // I could very easily change the definition of tau 00376 // so that we wouldn't need to call conj(temp) below. 00377 // I would have to be careful to make sure this change 00378 // is consistent with the other parts of the code. 00379 constructHHMat(tau, vec, temp); 00380 p = conj(temp)*p; 00381 u = u*temp; 00382 } 00383 00384 // Step 4: build the second right reflector 00385 setIdentity(&temp); 00386 if( p(1,2).y != 0.0 ){ 00387 beta = cabs(p(1,2)); 00388 if( p(1,2).x > 0.0 ){ beta = -beta; } 00389 temp(2,2) = conj(p(1,2))/beta; 00390 p(2,2) = p(2,2)*temp(2,2); // This is the only element of p needed below 00391 v = v*temp; 00392 } 00393 00394 00395 // Step 5: build the third left reflector 00396 if( p(2,2).y != 0.0 ){ 00397 beta = cabs(p(2,2)); 00398 if( p(2,2).x > 0.0 ){ beta = -beta; } 00399 temp(2,2) = p(2,2)/beta; 00400 u = u*temp; 00401 } 00402 return; 00403 } 00404 00405 00406 00407 template<class Real> 00408 DEVICEHOST 00409 void bdSVD(Matrix<Real,3>& u, Matrix<Real,3>& v, Matrix<Real,3>& b, int max_it) 00410 { 00411 00412 Real c,s; 00413 00414 // set u and v matrices equal to the identity 00415 setIdentity(&u); 00416 setIdentity(&v); 00417 00418 Real alpha, beta; 00419 00420 int it=0; 00421 do{ 00422 00423 if( fabs(b(0,1)) < SVDPREC*( fabs(b(0,0)) + fabs(b(1,1)) ) ){ b(0,1) = 0.0; } 00424 if( fabs(b(1,2)) < SVDPREC*( fabs(b(0,0)) + fabs(b(2,2)) ) ){ b(1,2) = 0.0; } 00425 00426 if( b(0,1) != 0.0 && b(1,2) != 0.0 ){ 00427 if( b(0,0) == 0.0 ){ 00428 00429 getGivensRotation(-b(0,1), b(1,1), s, c); 00430 00431 for(int i=0; i<3; ++i){ 00432 alpha = u(i,0); 00433 beta = u(i,1); 00434 u(i,0) = alpha*c - beta*s; 00435 u(i,1) = alpha*s + beta*c; 00436 } 00437 00438 b(1,1) = b(0,1)*s + b(1,1)*c; 00439 b(0,1) = 0.0; 00440 00441 b(0,2) = -b(1,2)*s; 00442 b(1,2) *= c; 00443 00444 getGivensRotation(-b(0,2), b(2,2), s, c); 00445 00446 for(int i=0; i<3; ++i){ 00447 alpha = u(i,0); 00448 beta = u(i,2); 00449 u(i,0) = alpha*c - beta*s; 00450 u(i,2) = alpha*s + beta*c; 00451 } 00452 b(2,2) = b(0,2)*s + b(2,2)*c; 00453 b(0,2) = 0.0; 00454 00455 }else if( b(1,1) == 0.0 ){ 00456 // same block 00457 getGivensRotation(-b(1,2), b(2,2), s, c); 00458 for(int i=0; i<3; ++i){ 00459 alpha = u(i,1); 00460 beta = u(i,2); 00461 u(i,1) = alpha*c - beta*s; 00462 u(i,2) = alpha*s + beta*c; 00463 } 00464 b(2,2) = b(1,2)*s + b(2,2)*c; 00465 b(1,2) = 0.0; 00466 // end block 00467 }else if( b(2,2) == 0.0 ){ 00468 00469 getGivensRotation(b(1,1), -b(1,2), c, s); 00470 for(int i=0; i<3; ++i){ 00471 alpha = v(i,1); 00472 beta = v(i,2); 00473 v(i,1) = alpha*c + beta*s; 00474 v(i,2) = -alpha*s + beta*c; 00475 } 00476 b(1,1) = b(1,1)*c + b(1,2)*s; 00477 b(1,2) = 0.0; 00478 00479 b(0,2) = -b(0,1)*s; 00480 b(0,1) *= c; 00481 00482 // apply second rotation, to remove b02 00483 getGivensRotation(b(0,0), -b(0,2), c, s); 00484 for(int i=0; i<3; ++i){ 00485 alpha = v(i,0); 00486 beta = v(i,2); 00487 v(i,0) = alpha*c + beta*s; 00488 v(i,2) = -alpha*s + beta*c; 00489 } 00490 b(0,0) = b(0,0)*c + b(0,2)*s; 00491 b(0,2) = 0.0; 00492 00493 }else{ // Else entering normal QR iteration 00494 00495 Real lambda_max; 00496 getLambdaMax(b, lambda_max); // defined above 00497 00498 alpha = b(0,0)*b(0,0) - lambda_max; 00499 beta = b(0,0)*b(0,1); 00500 00501 // c*beta + s*alpha = 0 00502 getGivensRotation(alpha, beta, c, s); 00503 // Multiply right v matrix 00504 accumGivensRotation(0, c, s, v); 00505 // apply to bidiagonal matrix, this generates b(1,0) 00506 alpha = b(0,0); 00507 beta = b(0,1); 00508 00509 b(0,0) = alpha*c - beta*s; 00510 b(0,1) = alpha*s + beta*c; 00511 b(1,0) = -b(1,1)*s; 00512 b(1,1) = b(1,1)*c; 00513 00514 // Calculate the second Givens rotation (this time on the left) 00515 getGivensRotation(b(0,0), b(1,0), c, s); 00516 00517 // Multiply left u matrix 00518 accumGivensRotation(0, c, s, u); 00519 00520 b(0,0) = b(0,0)*c - b(1,0)*s; 00521 alpha = b(0,1); 00522 beta = b(1,1); 00523 b(0,1) = alpha*c - beta*s; 00524 b(1,1) = alpha*s + beta*c; 00525 b(0,2) = -b(1,2)*s; 00526 b(1,2) = b(1,2)*c; 00527 // Following from the definition of the Givens rotation, b(1,0) should be equal to zero. 00528 b(1,0) = 0.0; 00529 00530 // Calculate the third Givens rotation (on the right) 00531 getGivensRotation(b(0,1), b(0,2), c, s); 00532 00533 // Multiply the right v matrix 00534 accumGivensRotation(1, c, s, v); 00535 00536 b(0,1) = b(0,1)*c - b(0,2)*s; 00537 alpha = b(1,1); 00538 beta = b(1,2); 00539 00540 b(1,1) = alpha*c - beta*s; 00541 b(1,2) = alpha*s + beta*c; 00542 b(2,1) = -b(2,2)*s; 00543 b(2,2) *= c; 00544 b(0,2) = 0.0; 00545 00546 00547 // Calculate the fourth Givens rotation (on the left) 00548 getGivensRotation(b(1,1), b(2,1), c, s); 00549 // Multiply left u matrix 00550 accumGivensRotation(1, c, s, u); 00551 // Eliminate b(2,1) 00552 b(1,1) = b(1,1)*c - b(2,1)*s; 00553 alpha = b(1,2); 00554 beta = b(2,2); 00555 b(1,2) = alpha*c - beta*s; 00556 b(2,2) = alpha*s + beta*c; 00557 b(2,1) = 0.0; 00558 00559 } // end of normal QR iteration 00560 00561 00562 }else if( b(0,1) == 0.0 ){ 00563 Matrix<Real,2> m_small, u_small, v_small; 00564 00565 m_small(0,0) = b(1,1); 00566 m_small(0,1) = b(1,2); 00567 m_small(1,0) = b(2,1); 00568 m_small(1,1) = b(2,2); 00569 00570 smallSVD(u_small, v_small, m_small); 00571 00572 b(1,1) = m_small(0,0); 00573 b(1,2) = m_small(0,1); 00574 b(2,1) = m_small(1,0); 00575 b(2,2) = m_small(1,1); 00576 00577 00578 for(int i=0; i<3; ++i){ 00579 alpha = u(i,1); 00580 beta = u(i,2); 00581 u(i,1) = alpha*u_small(0,0) + beta*u_small(1,0); 00582 u(i,2) = alpha*u_small(0,1) + beta*u_small(1,1); 00583 00584 alpha = v(i,1); 00585 beta = v(i,2); 00586 v(i,1) = alpha*v_small(0,0) + beta*v_small(1,0); 00587 v(i,2) = alpha*v_small(0,1) + beta*v_small(1,1); 00588 } 00589 00590 00591 }else if( b(1,2) == 0.0 ){ 00592 Matrix<Real,2> m_small, u_small, v_small; 00593 00594 m_small(0,0) = b(0,0); 00595 m_small(0,1) = b(0,1); 00596 m_small(1,0) = b(1,0); 00597 m_small(1,1) = b(1,1); 00598 00599 smallSVD(u_small, v_small, m_small); 00600 00601 b(0,0) = m_small(0,0); 00602 b(0,1) = m_small(0,1); 00603 b(1,0) = m_small(1,0); 00604 b(1,1) = m_small(1,1); 00605 00606 for(int i=0; i<3; ++i){ 00607 alpha = u(i,0); 00608 beta = u(i,1); 00609 u(i,0) = alpha*u_small(0,0) + beta*u_small(1,0); 00610 u(i,1) = alpha*u_small(0,1) + beta*u_small(1,1); 00611 00612 alpha = v(i,0); 00613 beta = v(i,1); 00614 v(i,0) = alpha*v_small(0,0) + beta*v_small(1,0); 00615 v(i,1) = alpha*v_small(0,1) + beta*v_small(1,1); 00616 } 00617 00618 } // end if b(1,2) == 0 00619 } while( (b(0,1) != 0.0 || b(1,2) != 0.0) && it < max_it); 00620 00621 00622 for(int i=0; i<3; ++i){ 00623 if( b(i,i) < 0.0) { 00624 b(i,i) *= -1; 00625 for(int j=0; j<3; ++j){ 00626 v(j,i) *= -1; 00627 } 00628 } 00629 } 00630 return; 00631 } 00632 00633 00634 00635 template<class Cmplx> 00636 DEVICEHOST 00637 void computeSVD(const Matrix<Cmplx,3> & m, 00638 Matrix<Cmplx,3>& u, 00639 Matrix<Cmplx,3>& v, 00640 typename RealTypeId<Cmplx>::Type singular_values[3]) 00641 { 00642 00643 getRealBidiagMatrix<Cmplx>(m, u, v); 00644 Matrix<typename RealTypeId<Cmplx>::Type,3> bd, u_real, v_real; 00645 // Make real 00646 for(int i=0; i<3; ++i){ 00647 for(int j=0; j<3; ++j){ 00648 bd(i,j) = (conj(u)*m*(v))(i,j).x; 00649 } 00650 } 00651 00652 bdSVD(u_real, v_real, bd, 40); 00653 for(int i=0; i<3; ++i){ 00654 singular_values[i] = bd(i,i); 00655 } 00656 00657 u = u*u_real; 00658 v = v*v_real; 00659 00660 return; 00661 } 00662 00663 } // end namespace hisq 00664 00665 00666 #endif // _SVD_QUDA_H