5 #define MAX(a, b) (a > b) ? a : b;
8 #define MAGMA_15 //version of the MAGMA library
13 #if (defined MAGMA_14)
24 #elif (defined MAGMA_15)
27 #define _cU MagmaUpper
29 #define _cR MagmaRight
32 #define _cC MagmaConjTrans
33 #define _cN MagmaNoTrans
42 magma_int_t err = magma_init();
44 if(err != MAGMA_SUCCESS) printf(
"\nError: cannot initialize MAGMA library\n");
46 int major, minor, micro;
48 magma_version( &major, &minor, µ);
49 printf(
"\nMAGMA library version: %d.%d\n\n", major, minor);
51 printf(
"\nError: MAGMA library was not compiled, check your compilation options...\n");
61 magma_int_t err = magma_finalize();
62 if(magma_finalize() != MAGMA_SUCCESS) printf(
"\nError: cannot close MAGMA library\n");
64 printf(
"\nError: MAGMA library was not compiled, check your compilation options...\n");
72 lrwork(0), liwork(0), sideLR(0), htsize(0), dtsize(0), lwork_max(0), W(0), W2(0),
73 hTau(0), dTau(0), lwork(0), rwork(0), iwork(0)
77 magma_int_t dev_info = magma_getdevice_arch();
78 if(dev_info == 0) exit(-1);
80 printf(
"\nMAGMA will use device architecture %d.\n", dev_info);
85 printf(
"\nError: MAGMA library was not compiled, check your compilation options...\n");
94 : m(m), nev(0), prec(prec), ldm(ldm), info(-1), sideLR(0), htsize(0), dtsize(0),
95 W(0), hTau(0), dTau(0)
100 magma_int_t dev_info = magma_getdevice_arch();
102 if(dev_info == 0) exit(-1);
104 printf(
"\nMAGMA will use device architecture %d.\n", dev_info);
106 const int complex_prec = 2*prec;
108 magma_int_t nbtrd = prec == 4 ? magma_get_chetrd_nb(m) : magma_get_zhetrd_nb(m);
110 llwork =
MAX(m + m*nbtrd, 2*m + m*m);
111 lrwork = 1 + 5*m + 2*m*m;
114 magma_malloc_pinned((
void**)&W2, ldm*m*complex_prec);
115 magma_malloc_pinned((
void**)&lwork, llwork*complex_prec);
116 magma_malloc_cpu((
void**)&rwork, lrwork*prec);
117 magma_malloc_cpu((
void**)&iwork, liwork*
sizeof(magma_int_t));
123 printf(
"\nError: MAGMA library was not compiled, check your compilation options...\n");
133 : m(m), nev(nev), prec(prec), ldm(ldm), info(-1)
138 magma_int_t dev_info = magma_getdevice_arch();
140 if(dev_info == 0) exit(-1);
142 printf(
"\nMAGMA will use device architecture %d.\n", dev_info);
144 const int complex_prec = 2*prec;
146 magma_int_t nbtrd = prec == 4 ? magma_get_chetrd_nb(m) : magma_get_zhetrd_nb(m);
147 magma_int_t nbqrf = prec == 4 ? magma_get_cgeqrf_nb(m) : magma_get_zgeqrf_nb(m);
149 llwork =
MAX(m + m*nbtrd, 2*m + m*m);
150 lrwork = 1 + 5*m + 2*m*m;
154 dtsize = ( 2*htsize + ((htsize + 31)/32)*32 )*nbqrf;
156 sideLR = (m - 2*nev + nbqrf)*(m + nbqrf) + m*nbqrf;
158 magma_malloc_pinned((
void**)&W, sideLR*complex_prec);
159 magma_malloc_pinned((
void**)&W2, ldm*m*complex_prec);
160 magma_malloc_pinned((
void**)&hTau, htsize*complex_prec);
161 magma_malloc((
void**)&dTau, dtsize*complex_prec);
163 magma_malloc_pinned((
void**)&lwork, llwork*complex_prec);
164 magma_malloc_cpu((
void**)&rwork, lrwork*prec);
165 magma_malloc_cpu((
void**)&iwork, liwork*
sizeof(magma_int_t));
171 printf(
"\nError: MAGMA library was not compiled, check your compilation options...\n");
184 if(dTau) magma_free(dTau);
185 if(hTau) magma_free_pinned(hTau);
187 if(W) magma_free_pinned(W);
188 magma_free_pinned(W2);
189 magma_free_pinned(lwork);
191 magma_free_cpu(rwork);
192 magma_free_cpu(iwork);
205 if(prob_size > m) printf(
"\nError in MagmaHEEVD (problem size cannot exceed given search space %d), exit ...\n", m), exit(-1);
208 cudaPointerAttributes ptr_attr;
213 cudaPointerGetAttributes(&ptr_attr, dTvecm);
215 if(ptr_attr.memoryType != cudaMemoryTypeDevice || ptr_attr.devicePointer == NULL ) printf(
"Error in MagmaHEEVD, no device pointer found."), exit(-1);
219 magma_cheevd_gpu(_cV, _cU, prob_size, (magmaFloatComplex*)dTvecm, ldm, (
float*)hTvalm, (magmaFloatComplex*)W2, ldm, (magmaFloatComplex*)lwork, llwork, (
float*)rwork, lrwork, iwork, liwork, &info);
220 if(info != 0) printf(
"\nError in MagmaHEEVD (magma_cheevd_gpu), exit ...\n"), exit(-1);
224 magma_zheevd_gpu(_cV, _cU, prob_size, (magmaDoubleComplex*)dTvecm, ldm, (
double*)hTvalm, (magmaDoubleComplex*)W2, ldm, (magmaDoubleComplex*)lwork, llwork, (
double*)rwork, lrwork, iwork, liwork, &info);
225 if(info != 0) printf(
"\nError in MagmaHEEVD (magma_zheevd_gpu), exit ...\n"), exit(-1);
231 cudaPointerGetAttributes(&ptr_attr, dTvecm);
233 if(ptr_attr.memoryType != cudaMemoryTypeHost || ptr_attr.hostPointer == NULL ) printf(
"Error in MagmaHEEVD, no host pointer found."), exit(-1);
237 magma_cheevd(_cV, _cU, prob_size, (magmaFloatComplex*)dTvecm, ldm, (
float*)hTvalm, (magmaFloatComplex*)lwork, llwork, (
float*)rwork, lrwork, iwork, liwork, &info);
238 if(info != 0) printf(
"\nError in MagmaHEEVD (magma_cheevd_gpu), exit ...\n"), exit(-1);
242 magma_zheevd(_cV, _cU, prob_size, (magmaDoubleComplex*)dTvecm, ldm, (
double*)hTvalm, (magmaDoubleComplex*)lwork, llwork, (
double*)rwork, lrwork, iwork, liwork, &info);
243 if(info != 0) printf(
"\nError in MagmaHEEVD (magma_zheevd_gpu), exit ...\n"), exit(-1);
257 magma_int_t nb = magma_get_cgeqrf_nb(m);
259 magma_cgeqrf_gpu(m, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTau, &info);
260 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_cgeqrf_gpu), exit ...\n"), exit(-1);
264 magma_cunmqr_gpu(_cR, _cN, m, m, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTm, ldm, (magmaFloatComplex *)W, sideLR, (magmaFloatComplex *)dTau, nb, &info);
265 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_cunmqr_gpu), exit ...\n"), exit(-1);
268 magma_cunmqr_gpu(_cL, _cC, m, l, l, (magmaFloatComplex *)dTvecm, ldm, (magmaFloatComplex *)hTau, (magmaFloatComplex *)dTm, ldm, (magmaFloatComplex *)W, sideLR, (magmaFloatComplex *)dTau, nb, &info);
269 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_cunmqr_gpu), exit ...\n"), exit(-1);
273 magma_int_t nb = magma_get_zgeqrf_nb(m);
275 magma_zgeqrf_gpu(m, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTau, &info);
276 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_zgeqrf_gpu), exit ...\n"), exit(-1);
280 magma_zunmqr_gpu(_cR, _cN, m, m, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTm, ldm, (magmaDoubleComplex *)W, sideLR, (magmaDoubleComplex *)dTau, nb, &info);
281 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_zunmqr_gpu), exit ...\n"), exit(-1);
284 magma_zunmqr_gpu(_cL, _cC, m, l, l, (magmaDoubleComplex *)dTvecm, ldm, (magmaDoubleComplex *)hTau, (magmaDoubleComplex *)dTm, ldm, (magmaDoubleComplex *)W, sideLR, (magmaDoubleComplex *)dTau, nb, &info);
285 if(info != 0) printf(
"\nError in MagmaORTH_2nev (magma_zunmqr_gpu), exit ...\n"), exit(-1);
296 const int cprec = 2*prec;
304 const int bufferSize = 2*vld+l*l;
306 int bufferBlock = bufferSize / l;
309 magma_malloc(&buffer, bufferSize*cprec);
310 cudaMemset(buffer, 0, bufferSize*cprec);
315 magma_int_t nb = magma_get_cgeqrf_nb(m);
316 magma_cunmqr_gpu(_cL, _cN, m, l, l, (magmaFloatComplex*)dTevecm, ldm, (magmaFloatComplex*)hTau, (magmaFloatComplex*)dTm, ldm, (magmaFloatComplex*)W, sideLR, (magmaFloatComplex*)dTau, nb, &info);
318 if(info != 0) printf(
"\nError in RestartV (magma_cunmqr_gpu), exit ...\n"), exit(-1);
322 magma_int_t nb = magma_get_zgeqrf_nb(m);
323 magma_zunmqr_gpu(_cL, _cN, m, l, l, (magmaDoubleComplex*)dTevecm, ldm, (magmaDoubleComplex*)hTau, (magmaDoubleComplex*)dTm, ldm, (magmaDoubleComplex*)W, sideLR, (magmaDoubleComplex*)dTau, nb, &info);
325 if(info != 0) printf(
"\nError in RestartV (magma_zunmqr_gpu), exit ...\n"), exit(-1);
330 magmaFloatComplex *dtm;
334 magma_malloc((
void**)&dtm, ldm*l*prec);
339 magma_malloc_pinned((
void**)&hbuff1, ldm*l*cprec);
340 magma_malloc_pinned((
void**)&hbuff2, ldm*l*prec);
342 cudaMemcpy(hbuff1, dTm, ldm*l*cprec, cudaMemcpyDefault);
343 for(
int i = 0; i < ldm*l; i++) hbuff2[i] = (
float)hbuff1[i];
345 cudaMemcpy(dtm, hbuff2, ldm*l*prec, cudaMemcpyDefault);
347 magma_free_pinned(hbuff1);
348 magma_free_pinned(hbuff2);
352 dtm = (magmaFloatComplex *)dTm;
357 for (
int blockOffset = 0; blockOffset < vlen; blockOffset += bufferBlock)
359 if (bufferBlock > (vlen-blockOffset)) bufferBlock = (vlen-blockOffset);
361 magmaFloatComplex *ptrV = &(((magmaFloatComplex*)dV)[blockOffset]);
363 magmablas_cgemm(_cN, _cN, bufferBlock, l, m, MAGMA_C_ONE, ptrV, vld, dtm, ldm, MAGMA_C_ZERO, (magmaFloatComplex*)buffer, bufferBlock);
365 cudaMemcpy2D(ptrV, vld*cprec, buffer, bufferBlock*cprec, bufferBlock*cprec, l, cudaMemcpyDefault);
369 if(prec == 8) magma_free(dtm);
376 for (
int blockOffset = 0; blockOffset < vlen; blockOffset += bufferBlock)
378 if (bufferBlock > (vlen-blockOffset)) bufferBlock = (vlen-blockOffset);
380 magmaDoubleComplex *ptrV = &(((magmaDoubleComplex*)dV)[blockOffset]);
382 magmablas_zgemm(_cN, _cN, bufferBlock, l, m, MAGMA_Z_ONE, ptrV, vld, (magmaDoubleComplex*)dTm, ldm, MAGMA_Z_ZERO, (magmaDoubleComplex*)buffer, bufferBlock);
384 cudaMemcpy2D(ptrV, vld*cprec, buffer, bufferBlock*cprec, bufferBlock*cprec, l, cudaMemcpyDefault);
401 const int complex_prec = 2*prec;
406 magma_malloc_pinned((
void**)&tmp, ldH*n*complex_prec);
407 magma_malloc_pinned((
void**)&ipiv, n*
sizeof(magma_int_t));
409 memcpy(tmp, H, ldH*n*complex_prec);
413 err = magma_cgesv(n, 1, (magmaFloatComplex*)tmp, ldH, ipiv, (magmaFloatComplex*)rhs, ldn, &info);
414 if(err != 0) printf(
"\nError in SolveProjMatrix (magma_cgesv), exit ...\n"), exit(-1);
418 err = magma_zgesv(n, 1, (magmaDoubleComplex*)tmp, ldH, ipiv, (magmaDoubleComplex*)rhs, ldn, &info);
419 if(err != 0) printf(
"\nError in SolveProjMatrix (magma_zgesv), exit ...\n"), exit(-1);
422 magma_free_pinned(tmp);
423 magma_free_pinned(ipiv);
431 const int complex_prec = 2*prec;
436 magma_malloc((
void**)&tmp, ldH*n*complex_prec);
437 magma_malloc_pinned((
void**)&ipiv, n*
sizeof(magma_int_t));
439 cudaMemcpy(tmp, H, ldH*n*complex_prec, cudaMemcpyDefault);
443 err = magma_cgesv_gpu(n, 1, (magmaFloatComplex*)tmp, ldH, ipiv, (magmaFloatComplex*)rhs, ldn, &info);
444 if(err != 0) printf(
"\nError in SolveGPUProjMatrix (magma_cgesv), exit ...\n"), exit(-1);
448 err = magma_zgesv_gpu(n, 1, (magmaDoubleComplex*)tmp, ldH, ipiv, (magmaDoubleComplex*)rhs, ldn, &info);
449 if(err != 0) printf(
"\nError in SolveGPUProjMatrix (magma_zgesv), exit ...\n"), exit(-1);
453 magma_free_pinned(ipiv);
459 (
void *
spinorOut,
const void *spinorSetIn,
const int sld,
const int slen,
const void *vec,
const int vlen)
464 magmaFloatComplex *spmat = (magmaFloatComplex*)spinorSetIn;
465 magmaFloatComplex *spout = (magmaFloatComplex*)spinorOut;
467 magmablas_cgemv(_cN, slen, vlen, MAGMA_C_ONE, spmat, sld, (magmaFloatComplex*)vec, 1, MAGMA_C_ZERO, spout, 1);
471 magmaDoubleComplex *spmat = (magmaDoubleComplex*)spinorSetIn;
472 magmaDoubleComplex *spout = (magmaDoubleComplex*)spinorOut;
474 magmablas_zgemv(_cN, slen, vlen, MAGMA_Z_ONE, spmat, sld, (magmaDoubleComplex*)vec, 1, MAGMA_Z_ZERO, spout, 1);
void RestartV(void *dV, const int vld, const int vlen, const int vprec, void *dTevecm, void *dTm)
void SolveProjMatrix(void *rhs, const int ldn, const int n, void *H, const int ldH)
int MagmaORTH_2nev(void *dTvecm, void *dTm)
cudaColorSpinorField * tmp
void SolveGPUProjMatrix(void *rhs, const int ldn, const int n, void *H, const int ldH)
void SpinorMatVec(void *spinorOut, const void *spinorSetIn, const int sld, const int slen, const void *vec, const int vlen)
cpuColorSpinorField * spinorOut
void MagmaHEEVD(void *dTvecm, void *hTvalm, const int problem_size, bool host=false)