37 template <
template<
typename,
int>
class Mat,
typename T,
int N,
bool fast=
true>
48 __device__ __host__
inline Cholesky(
const Mat<T,N> &A) {
49 const Mat<T,N> &L =
L_;
52 for (
int i=0; i<N; i++) {
54 for (
int j=0; j<N; j++)
if (j<i+1) {
57 for (
int k=0; k<N; k++) {
59 s.x = L(i,k).real()*L(j,k).real();
60 s.x += L(i,k).imag()*L(j,k).imag();
61 s.y = L(i,k).imag()*L(j,k).real();
62 s.y -= L(i,k).real()*L(j,k).imag();
64 s.x += L(i,k).real()*L(j,k).real();
65 s.x += L(i,k).imag()*L(j,k).imag();
66 s.y += L(i,k).imag()*L(j,k).real();
67 s.y -= L(i,k).real()*L(j,k).imag();
71 L_(i,j) = (i == j) ?
sqrt((A(i,i)-s).real()) : (A(i,j) -
s) / L(j,j).real();
73 L_(i,j) = (i == j) ? rsqrt((A(i,i)-s).real()) : (A(i,j)-
s) * L(j,j).real();
84 __device__ __host__
inline const T
D(
int i)
const {
86 if (!fast)
return L(i,i).real();
87 else return static_cast<T
>(1.0) / L(i,i).real();
96 template <
class Vector>
98 const Mat<T,N> &L =
L_;
101 for (
int i=0; i<N; i++) {
104 for (
int j=0; j<N; j++)
if (j<i) {
105 x(i).x -= L(i,j).real()*x(j).real();
106 x(i).x += L(i,j).imag()*x(j).imag();
107 x(i).y -= L(i,j).real()*x(j).imag();
108 x(i).y -= L(i,j).imag()*x(j).real();
110 if (!fast) x(i) /= L(i,i).real();
111 else x(i) *= L(i,i).real();
122 template <
class Vector>
124 const Mat<T,N> &L =
L_;
127 for (
int i=N-1; i>=0; i--) {
130 for (
int j=0; j<N; j++) if (j>=i+1) {
131 x(i).x -= L(i,j).real()*x(j).real();
132 x(i).x += L(i,j).imag()*x(j).imag();
133 x(i).y -= L(i,j).real()*x(j).imag();
134 x(i).y -= L(i,j).imag()*x(j).real();
136 if (!fast) x(i) /=L(i,i).real();
137 else x(i) *= L(i,i).real();
146 __device__ __host__
inline Mat<T,N>
invert() {
147 const Mat<T,N> &L =
L_;
152 for (
int k=0;k<N;k++) {
155 if (!fast) v(k) = complex<T>(
static_cast<T
>(1.0)/L(k,k).real());
156 else v(k) = L(k,k).real();
159 for (
int i=0; i<N; i++) if (i>k) {
160 v(i) = complex<T>(0.0);
162 for (
int j=0; j<N; j++) if (j>=k && j<i) {
163 v(i).x -= L(i,j).real() * v(j).real();
164 v(i).x += L(i,j).imag() * v(j).imag();
165 v(i).y -= L(i,j).real() * v(j).imag();
166 v(i).y -= L(i,j).imag() * v(j).real();
168 if (!fast) v(i) *=
static_cast<T
>(1.0) / L(i,i);
173 if (!fast) v(N-1) *=
static_cast<T
>(1.0) / L(N-1,N-1);
174 else v(N-1) *= L(N-1,N-1);
177 for (
int i=N-2; i>=0; i--)
if (i>=k) {
179 for (
int j=0; j<N; j++) if (j>i) {
180 v(i).x -= L(i,j).real() * v(j).real();
181 v(i).x += L(i,j).imag() * v(j).imag();
182 v(i).y -= L(i,j).real() * v(j).imag();
183 v(i).y -= L(i,j).imag() * v(j).real();
185 if (!fast) v(i) *=
static_cast<T
>(1.0) / L(i,i);
193 for(
int i=0;i<N;i++) if (i>k) Ainv(i,k) = v(i);
__device__ __host__ const T D(int i) const
Return the diagonal element of the Cholesky decomposition L(i,i)
__device__ __host__ Vector backward(const Vector &b)
Backward substition to solve L^dagger x = b.
__device__ __host__ Cholesky(const Mat< T, N > &A)
Constructor that computes the Cholesky decomposition.
__host__ __device__ ValueType sqrt(ValueType x)
Compute Cholesky decomposition of A. By default, we use a modified Cholesky which avoids the division...
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
__device__ __host__ Vector forward(const Vector &b)
Forward substition to solve Lx = b.
__device__ __host__ Mat< T, N > invert()
Compute the inverse of A (the matrix used to construct the Cholesky decomposition).
Mat< T, N > L_
The Cholesky factorization.